diff options
-rwxr-xr-x | scripts/gen-s-parser.py | 2 | ||||
-rw-r--r-- | src/gen-s-parser.inc | 32 | ||||
-rw-r--r-- | src/ir/cost.h | 2 | ||||
-rw-r--r-- | src/literal.h | 2 | ||||
-rw-r--r-- | src/passes/Print.cpp | 6 | ||||
-rw-r--r-- | src/wasm-binary.h | 2 | ||||
-rw-r--r-- | src/wasm-interpreter.h | 4 | ||||
-rw-r--r-- | src/wasm.h | 2 | ||||
-rw-r--r-- | src/wasm/literal.cpp | 27 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 8 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 6 | ||||
-rw-r--r-- | test/lit/basic/f16.wast | 126 | ||||
-rw-r--r-- | test/spec/f16.wast | 29 |
13 files changed, 207 insertions, 41 deletions
diff --git a/scripts/gen-s-parser.py b/scripts/gen-s-parser.py index 0b5703b9e..9c39acb48 100755 --- a/scripts/gen-s-parser.py +++ b/scripts/gen-s-parser.py @@ -547,6 +547,8 @@ instructions = [ ("i32x4.relaxed_trunc_f32x4_u", "makeUnary(UnaryOp::RelaxedTruncUVecF32x4ToVecI32x4)"), ("i32x4.relaxed_trunc_f64x2_s_zero", "makeUnary(UnaryOp::RelaxedTruncZeroSVecF64x2ToVecI32x4)"), ("i32x4.relaxed_trunc_f64x2_u_zero", "makeUnary(UnaryOp::RelaxedTruncZeroUVecF64x2ToVecI32x4)"), + ("f16x8.relaxed_madd", "makeSIMDTernary(SIMDTernaryOp::RelaxedMaddVecF16x8)"), + ("f16x8.relaxed_nmadd", "makeSIMDTernary(SIMDTernaryOp::RelaxedNmaddVecF16x8)"), ("f32x4.relaxed_madd", "makeSIMDTernary(SIMDTernaryOp::RelaxedMaddVecF32x4)"), ("f32x4.relaxed_nmadd", "makeSIMDTernary(SIMDTernaryOp::RelaxedNmaddVecF32x4)"), ("f64x2.relaxed_madd", "makeSIMDTernary(SIMDTernaryOp::RelaxedMaddVecF64x2)"), diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index 56a7c1cce..f7d12a1be 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -458,12 +458,34 @@ switch (buf[0]) { default: goto parse_error; } } - case 'r': - if (op == "f16x8.replace_lane"sv) { - CHECK_ERR(makeSIMDReplace(ctx, pos, annotations, SIMDReplaceOp::ReplaceLaneVecF16x8, 8)); - return Ok{}; + case 'r': { + switch (buf[8]) { + case 'l': { + switch (buf[14]) { + case 'm': + if (op == "f16x8.relaxed_madd"sv) { + CHECK_ERR(makeSIMDTernary(ctx, pos, annotations, SIMDTernaryOp::RelaxedMaddVecF16x8)); + return Ok{}; + } + goto parse_error; + case 'n': + if (op == "f16x8.relaxed_nmadd"sv) { + CHECK_ERR(makeSIMDTernary(ctx, pos, annotations, SIMDTernaryOp::RelaxedNmaddVecF16x8)); + return Ok{}; + } + goto parse_error; + default: goto parse_error; + } + } + case 'p': + if (op == "f16x8.replace_lane"sv) { + CHECK_ERR(makeSIMDReplace(ctx, pos, annotations, SIMDReplaceOp::ReplaceLaneVecF16x8, 8)); + return Ok{}; + } + goto parse_error; + default: goto parse_error; } - goto parse_error; + } case 's': { switch (buf[7]) { case 'p': diff --git a/src/ir/cost.h b/src/ir/cost.h index 99b945815..d11a9bfac 100644 --- a/src/ir/cost.h +++ b/src/ir/cost.h @@ -582,6 +582,8 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { case LaneselectI16x8: case LaneselectI32x4: case LaneselectI64x2: + case RelaxedMaddVecF16x8: + case RelaxedNmaddVecF16x8: case RelaxedMaddVecF32x4: case RelaxedNmaddVecF32x4: case RelaxedMaddVecF64x2: diff --git a/src/literal.h b/src/literal.h index 73289c83b..424121f5a 100644 --- a/src/literal.h +++ b/src/literal.h @@ -694,6 +694,8 @@ public: Literal demoteZeroToF32x4() const; Literal promoteLowToF64x2() const; Literal swizzleI8x16(const Literal& other) const; + Literal relaxedMaddF16x8(const Literal& left, const Literal& right) const; + Literal relaxedNmaddF16x8(const Literal& left, const Literal& right) const; Literal relaxedMaddF32x4(const Literal& left, const Literal& right) const; Literal relaxedNmaddF32x4(const Literal& left, const Literal& right) const; Literal relaxedMaddF64x2(const Literal& left, const Literal& right) const; diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 024b6af5c..46d519e9e 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -770,6 +770,12 @@ struct PrintExpressionContents case LaneselectI64x2: o << "i64x2.laneselect"; break; + case RelaxedMaddVecF16x8: + o << "f16x8.relaxed_madd"; + break; + case RelaxedNmaddVecF16x8: + o << "f16x8.relaxed_nmadd"; + break; case RelaxedMaddVecF32x4: o << "f32x4.relaxed_madd"; break; diff --git a/src/wasm-binary.h b/src/wasm-binary.h index e8ed293e7..d10b6c312 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1037,6 +1037,8 @@ enum ASTNodes { I32x4RelaxedTruncF32x4U = 0x102, I32x4RelaxedTruncF64x2SZero = 0x103, I32x4RelaxedTruncF64x2UZero = 0x104, + F16x8RelaxedMadd = 0x14e, + F16x8RelaxedNmadd = 0x14f, F32x4RelaxedMadd = 0x105, F32x4RelaxedNmadd = 0x106, F64x2RelaxedMadd = 0x107, diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 578b0a569..86ba3b3a2 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1201,6 +1201,10 @@ public: case LaneselectI64x2: return c.bitselectV128(a, b); + case RelaxedMaddVecF16x8: + return a.relaxedMaddF16x8(b, c); + case RelaxedNmaddVecF16x8: + return a.relaxedNmaddF16x8(b, c); case RelaxedMaddVecF32x4: return a.relaxedMaddF32x4(b, c); case RelaxedNmaddVecF32x4: diff --git a/src/wasm.h b/src/wasm.h index 86ee12972..a7ad6ec6c 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -574,6 +574,8 @@ enum SIMDTernaryOp { Bitselect, // Relaxed SIMD + RelaxedMaddVecF16x8, + RelaxedNmaddVecF16x8, RelaxedMaddVecF32x4, RelaxedNmaddVecF32x4, RelaxedMaddVecF64x2, diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index c76856d15..e332db305 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -1674,23 +1674,25 @@ Literal Literal::copysign(const Literal& other) const { Literal Literal::madd(const Literal& left, const Literal& right) const { switch (type.getBasic()) { case Type::f32: - return Literal(::fmaf(left.getf32(), right.getf32(), getf32())); + return Literal(::fmaf(getf32(), left.getf32(), right.getf32())); break; case Type::f64: - return Literal(::fma(left.getf64(), right.getf64(), getf64())); + return Literal(::fma(getf64(), left.getf64(), right.getf64())); break; default: WASM_UNREACHABLE("unexpected type"); } } +// XXX: This is not an actual fused negated multiply implementation, but +// the relaxed spec allows a double rounding implementation like below. Literal Literal::nmadd(const Literal& left, const Literal& right) const { switch (type.getBasic()) { case Type::f32: - return Literal(::fmaf(-left.getf32(), right.getf32(), getf32())); + return Literal(-(getf32() * left.getf32()) + right.getf32()); break; case Type::f64: - return Literal(::fma(-left.getf64(), right.getf64(), getf64())); + return Literal(-(getf64() * left.getf64()) + right.getf64()); break; default: WASM_UNREACHABLE("unexpected type"); @@ -2749,19 +2751,32 @@ Literal Literal::swizzleI8x16(const Literal& other) const { namespace { template<int Lanes, LaneArray<Lanes> (Literal::*IntoLanes)() const, - Literal (Literal::*TernaryOp)(const Literal&, const Literal&) const> + Literal (Literal::*TernaryOp)(const Literal&, const Literal&) const, + Literal (*Convert)(const Literal&) = passThrough> static Literal ternary(const Literal& a, const Literal& b, const Literal& c) { LaneArray<Lanes> x = (a.*IntoLanes)(); LaneArray<Lanes> y = (b.*IntoLanes)(); LaneArray<Lanes> z = (c.*IntoLanes)(); LaneArray<Lanes> r; for (size_t i = 0; i < Lanes; ++i) { - r[i] = (x[i].*TernaryOp)(y[i], z[i]); + r[i] = Convert((x[i].*TernaryOp)(y[i], z[i])); } return Literal(r); } } // namespace +Literal Literal::relaxedMaddF16x8(const Literal& left, + const Literal& right) const { + return ternary<8, &Literal::getLanesF16x8, &Literal::madd, &toFP16>( + *this, left, right); +} + +Literal Literal::relaxedNmaddF16x8(const Literal& left, + const Literal& right) const { + return ternary<8, &Literal::getLanesF16x8, &Literal::nmadd, &toFP16>( + *this, left, right); +} + Literal Literal::relaxedMaddF32x4(const Literal& left, const Literal& right) const { return ternary<4, &Literal::getLanesF32x4, &Literal::madd>( diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 8c684dc2f..542eab4b4 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -6823,6 +6823,14 @@ bool WasmBinaryReader::maybeVisitSIMDTernary(Expression*& out, uint32_t code) { curr = allocator.alloc<SIMDTernary>(); curr->op = LaneselectI64x2; break; + case BinaryConsts::F16x8RelaxedMadd: + curr = allocator.alloc<SIMDTernary>(); + curr->op = RelaxedMaddVecF16x8; + break; + case BinaryConsts::F16x8RelaxedNmadd: + curr = allocator.alloc<SIMDTernary>(); + curr->op = RelaxedNmaddVecF16x8; + break; case BinaryConsts::F32x4RelaxedMadd: curr = allocator.alloc<SIMDTernary>(); curr->op = RelaxedMaddVecF32x4; diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 140205b85..ac79d0135 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -657,6 +657,12 @@ void BinaryInstWriter::visitSIMDTernary(SIMDTernary* curr) { case LaneselectI64x2: o << U32LEB(BinaryConsts::I64x2Laneselect); break; + case RelaxedMaddVecF16x8: + o << U32LEB(BinaryConsts::F16x8RelaxedMadd); + break; + case RelaxedNmaddVecF16x8: + o << U32LEB(BinaryConsts::F16x8RelaxedNmadd); + break; case RelaxedMaddVecF32x4: o << U32LEB(BinaryConsts::F32x4RelaxedMadd); break; diff --git a/test/lit/basic/f16.wast b/test/lit/basic/f16.wast index 2e5ac57dd..ba806bb57 100644 --- a/test/lit/basic/f16.wast +++ b/test/lit/basic/f16.wast @@ -17,19 +17,21 @@ ;; CHECK-TEXT: (type $1 (func (param v128) (result v128))) - ;; CHECK-TEXT: (type $2 (func (param i32) (result f32))) + ;; CHECK-TEXT: (type $2 (func (param v128 v128 v128) (result v128))) - ;; CHECK-TEXT: (type $3 (func (param i32 f32))) + ;; CHECK-TEXT: (type $3 (func (param i32) (result f32))) - ;; CHECK-TEXT: (type $4 (func (param f32) (result v128))) + ;; CHECK-TEXT: (type $4 (func (param i32 f32))) - ;; CHECK-TEXT: (type $5 (func (param v128) (result f32))) + ;; CHECK-TEXT: (type $5 (func (param f32) (result v128))) - ;; CHECK-TEXT: (type $6 (func (param v128 f32) (result v128))) + ;; CHECK-TEXT: (type $6 (func (param v128) (result f32))) + + ;; CHECK-TEXT: (type $7 (func (param v128 f32) (result v128))) ;; CHECK-TEXT: (memory $0 1 1) - ;; CHECK-TEXT: (func $f32.load_f16 (type $2) (param $0 i32) (result f32) + ;; CHECK-TEXT: (func $f32.load_f16 (type $3) (param $0 i32) (result f32) ;; CHECK-TEXT-NEXT: (f32.load_f16 ;; CHECK-TEXT-NEXT: (local.get $0) ;; CHECK-TEXT-NEXT: ) @@ -38,19 +40,21 @@ ;; CHECK-BIN: (type $1 (func (param v128) (result v128))) - ;; CHECK-BIN: (type $2 (func (param i32) (result f32))) + ;; CHECK-BIN: (type $2 (func (param v128 v128 v128) (result v128))) + + ;; CHECK-BIN: (type $3 (func (param i32) (result f32))) - ;; CHECK-BIN: (type $3 (func (param i32 f32))) + ;; CHECK-BIN: (type $4 (func (param i32 f32))) - ;; CHECK-BIN: (type $4 (func (param f32) (result v128))) + ;; CHECK-BIN: (type $5 (func (param f32) (result v128))) - ;; CHECK-BIN: (type $5 (func (param v128) (result f32))) + ;; CHECK-BIN: (type $6 (func (param v128) (result f32))) - ;; CHECK-BIN: (type $6 (func (param v128 f32) (result v128))) + ;; CHECK-BIN: (type $7 (func (param v128 f32) (result v128))) ;; CHECK-BIN: (memory $0 1 1) - ;; CHECK-BIN: (func $f32.load_f16 (type $2) (param $0 i32) (result f32) + ;; CHECK-BIN: (func $f32.load_f16 (type $3) (param $0 i32) (result f32) ;; CHECK-BIN-NEXT: (f32.load_f16 ;; CHECK-BIN-NEXT: (local.get $0) ;; CHECK-BIN-NEXT: ) @@ -60,13 +64,13 @@ (local.get $0) ) ) - ;; CHECK-TEXT: (func $f32.store_f16 (type $3) (param $0 i32) (param $1 f32) + ;; CHECK-TEXT: (func $f32.store_f16 (type $4) (param $0 i32) (param $1 f32) ;; CHECK-TEXT-NEXT: (f32.store_f16 ;; CHECK-TEXT-NEXT: (local.get $0) ;; CHECK-TEXT-NEXT: (local.get $1) ;; CHECK-TEXT-NEXT: ) ;; CHECK-TEXT-NEXT: ) - ;; CHECK-BIN: (func $f32.store_f16 (type $3) (param $0 i32) (param $1 f32) + ;; CHECK-BIN: (func $f32.store_f16 (type $4) (param $0 i32) (param $1 f32) ;; CHECK-BIN-NEXT: (f32.store_f16 ;; CHECK-BIN-NEXT: (local.get $0) ;; CHECK-BIN-NEXT: (local.get $1) @@ -79,12 +83,12 @@ ) ) - ;; CHECK-TEXT: (func $f16x8.splat (type $4) (param $0 f32) (result v128) + ;; CHECK-TEXT: (func $f16x8.splat (type $5) (param $0 f32) (result v128) ;; CHECK-TEXT-NEXT: (f16x8.splat ;; CHECK-TEXT-NEXT: (local.get $0) ;; CHECK-TEXT-NEXT: ) ;; CHECK-TEXT-NEXT: ) - ;; CHECK-BIN: (func $f16x8.splat (type $4) (param $0 f32) (result v128) + ;; CHECK-BIN: (func $f16x8.splat (type $5) (param $0 f32) (result v128) ;; CHECK-BIN-NEXT: (f16x8.splat ;; CHECK-BIN-NEXT: (local.get $0) ;; CHECK-BIN-NEXT: ) @@ -95,12 +99,12 @@ ) ) - ;; CHECK-TEXT: (func $f16x8.extract_lane (type $5) (param $0 v128) (result f32) + ;; CHECK-TEXT: (func $f16x8.extract_lane (type $6) (param $0 v128) (result f32) ;; CHECK-TEXT-NEXT: (f16x8.extract_lane 0 ;; CHECK-TEXT-NEXT: (local.get $0) ;; CHECK-TEXT-NEXT: ) ;; CHECK-TEXT-NEXT: ) - ;; CHECK-BIN: (func $f16x8.extract_lane (type $5) (param $0 v128) (result f32) + ;; CHECK-BIN: (func $f16x8.extract_lane (type $6) (param $0 v128) (result f32) ;; CHECK-BIN-NEXT: (f16x8.extract_lane 0 ;; CHECK-BIN-NEXT: (local.get $0) ;; CHECK-BIN-NEXT: ) @@ -111,13 +115,13 @@ ) ) - ;; CHECK-TEXT: (func $f16x8.replace_lane (type $6) (param $0 v128) (param $1 f32) (result v128) + ;; CHECK-TEXT: (func $f16x8.replace_lane (type $7) (param $0 v128) (param $1 f32) (result v128) ;; CHECK-TEXT-NEXT: (f16x8.replace_lane 0 ;; CHECK-TEXT-NEXT: (local.get $0) ;; CHECK-TEXT-NEXT: (local.get $1) ;; CHECK-TEXT-NEXT: ) ;; CHECK-TEXT-NEXT: ) - ;; CHECK-BIN: (func $f16x8.replace_lane (type $6) (param $0 v128) (param $1 f32) (result v128) + ;; CHECK-BIN: (func $f16x8.replace_lane (type $7) (param $0 v128) (param $1 f32) (result v128) ;; CHECK-BIN-NEXT: (f16x8.replace_lane 0 ;; CHECK-BIN-NEXT: (local.get $0) ;; CHECK-BIN-NEXT: (local.get $1) @@ -486,49 +490,95 @@ (local.get $0) ) ) + ;; CHECK-TEXT: (func $f16x8.relaxed_madd (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f16x8.relaxed_madd + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: (local.get $2) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-BIN: (func $f16x8.relaxed_madd (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-BIN-NEXT: (f16x8.relaxed_madd + ;; CHECK-BIN-NEXT: (local.get $0) + ;; CHECK-BIN-NEXT: (local.get $1) + ;; CHECK-BIN-NEXT: (local.get $2) + ;; CHECK-BIN-NEXT: ) + ;; CHECK-BIN-NEXT: ) + (func $f16x8.relaxed_madd (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (f16x8.relaxed_madd + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + + + ;; CHECK-TEXT: (func $f16x8.relaxed_nmadd (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f16x8.relaxed_nmadd + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: (local.get $2) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-BIN: (func $f16x8.relaxed_nmadd (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-BIN-NEXT: (f16x8.relaxed_nmadd + ;; CHECK-BIN-NEXT: (local.get $0) + ;; CHECK-BIN-NEXT: (local.get $1) + ;; CHECK-BIN-NEXT: (local.get $2) + ;; CHECK-BIN-NEXT: ) + ;; CHECK-BIN-NEXT: ) + (func $f16x8.relaxed_nmadd (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (f16x8.relaxed_nmadd + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) ) ;; CHECK-BIN-NODEBUG: (type $0 (func (param v128 v128) (result v128))) ;; CHECK-BIN-NODEBUG: (type $1 (func (param v128) (result v128))) -;; CHECK-BIN-NODEBUG: (type $2 (func (param i32) (result f32))) +;; CHECK-BIN-NODEBUG: (type $2 (func (param v128 v128 v128) (result v128))) -;; CHECK-BIN-NODEBUG: (type $3 (func (param i32 f32))) +;; CHECK-BIN-NODEBUG: (type $3 (func (param i32) (result f32))) -;; CHECK-BIN-NODEBUG: (type $4 (func (param f32) (result v128))) +;; CHECK-BIN-NODEBUG: (type $4 (func (param i32 f32))) -;; CHECK-BIN-NODEBUG: (type $5 (func (param v128) (result f32))) +;; CHECK-BIN-NODEBUG: (type $5 (func (param f32) (result v128))) -;; CHECK-BIN-NODEBUG: (type $6 (func (param v128 f32) (result v128))) +;; CHECK-BIN-NODEBUG: (type $6 (func (param v128) (result f32))) + +;; CHECK-BIN-NODEBUG: (type $7 (func (param v128 f32) (result v128))) ;; CHECK-BIN-NODEBUG: (memory $0 1 1) -;; CHECK-BIN-NODEBUG: (func $0 (type $2) (param $0 i32) (result f32) +;; CHECK-BIN-NODEBUG: (func $0 (type $3) (param $0 i32) (result f32) ;; CHECK-BIN-NODEBUG-NEXT: (f32.load_f16 ;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) ;; CHECK-BIN-NODEBUG-NEXT: ) ;; CHECK-BIN-NODEBUG-NEXT: ) -;; CHECK-BIN-NODEBUG: (func $1 (type $3) (param $0 i32) (param $1 f32) +;; CHECK-BIN-NODEBUG: (func $1 (type $4) (param $0 i32) (param $1 f32) ;; CHECK-BIN-NODEBUG-NEXT: (f32.store_f16 ;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) ;; CHECK-BIN-NODEBUG-NEXT: (local.get $1) ;; CHECK-BIN-NODEBUG-NEXT: ) ;; CHECK-BIN-NODEBUG-NEXT: ) -;; CHECK-BIN-NODEBUG: (func $2 (type $4) (param $0 f32) (result v128) +;; CHECK-BIN-NODEBUG: (func $2 (type $5) (param $0 f32) (result v128) ;; CHECK-BIN-NODEBUG-NEXT: (f16x8.splat ;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) ;; CHECK-BIN-NODEBUG-NEXT: ) ;; CHECK-BIN-NODEBUG-NEXT: ) -;; CHECK-BIN-NODEBUG: (func $3 (type $5) (param $0 v128) (result f32) +;; CHECK-BIN-NODEBUG: (func $3 (type $6) (param $0 v128) (result f32) ;; CHECK-BIN-NODEBUG-NEXT: (f16x8.extract_lane 0 ;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) ;; CHECK-BIN-NODEBUG-NEXT: ) ;; CHECK-BIN-NODEBUG-NEXT: ) -;; CHECK-BIN-NODEBUG: (func $4 (type $6) (param $0 v128) (param $1 f32) (result v128) +;; CHECK-BIN-NODEBUG: (func $4 (type $7) (param $0 v128) (param $1 f32) (result v128) ;; CHECK-BIN-NODEBUG-NEXT: (f16x8.replace_lane 0 ;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) ;; CHECK-BIN-NODEBUG-NEXT: (local.get $1) @@ -674,3 +724,19 @@ ;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) ;; CHECK-BIN-NODEBUG-NEXT: ) ;; CHECK-BIN-NODEBUG-NEXT: ) + +;; CHECK-BIN-NODEBUG: (func $26 (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128) +;; CHECK-BIN-NODEBUG-NEXT: (f16x8.relaxed_madd +;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) +;; CHECK-BIN-NODEBUG-NEXT: (local.get $1) +;; CHECK-BIN-NODEBUG-NEXT: (local.get $2) +;; CHECK-BIN-NODEBUG-NEXT: ) +;; CHECK-BIN-NODEBUG-NEXT: ) + +;; CHECK-BIN-NODEBUG: (func $27 (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128) +;; CHECK-BIN-NODEBUG-NEXT: (f16x8.relaxed_nmadd +;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) +;; CHECK-BIN-NODEBUG-NEXT: (local.get $1) +;; CHECK-BIN-NODEBUG-NEXT: (local.get $2) +;; CHECK-BIN-NODEBUG-NEXT: ) +;; CHECK-BIN-NODEBUG-NEXT: ) diff --git a/test/spec/f16.wast b/test/spec/f16.wast index 09ee9328b..d5de0c0e8 100644 --- a/test/spec/f16.wast +++ b/test/spec/f16.wast @@ -32,6 +32,8 @@ (func (export "f16x8.floor") (param $0 v128) (result v128) (f16x8.floor (local.get $0))) (func (export "f16x8.trunc") (param $0 v128) (result v128) (f16x8.trunc (local.get $0))) (func (export "f16x8.nearest") (param $0 v128) (result v128) (f16x8.nearest (local.get $0))) + (func (export "f16x8.relaxed_madd") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f16x8.relaxed_madd (local.get $0) (local.get $1) (local.get $2))) + (func (export "f16x8.relaxed_nmadd") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f16x8.relaxed_nmadd (local.get $0) (local.get $1) (local.get $2))) ) (assert_return (invoke "f32.load_f16") (f32.const 42.0)) @@ -187,3 +189,30 @@ (v128.const i16x8 0x7e00 0 0x7c00 0xfc00 0xbc00 0x3c00 0x3e00 0x3ccd)) ;; nan 0 inf -inf -1 1 2 1 (v128.const i16x8 0x7e00 0 0x7c00 0xfc00 0xbc00 0x3c00 0x4000 0x3c00)) +;; ternary operations +(assert_return (invoke "f16x8.relaxed_madd" + ;; Lane 0 illustrates the difference between fused/unfused. e.g. + ;; fused: (positive overflow) + -inf = -inf + ;; unfused: (inf) + -inf = NaN + ;; + ;; 1e4 inf -1 0 1 1.5 -2 1 + (v128.const i16x8 0x70e2 0x7c00 0xbc00 0 0x3c00 0x3e00 0xc000 0x3c00) + ;; 1e4 inf -1 0 1 1.5 4 1 + (v128.const i16x8 0x70e2 0x7c00 0xbc00 0 0x3c00 0x3e00 0x4400 0x3c00) + ;; -inf inf -1 0 1 2 1 -1 + (v128.const i16x8 0xfc00 0x7c00 0xbc00 0 0x3c00 0x4000 0x3c00 0xbc00)) + ;; -inf inf 0 0 2 4.25 -7 0 + (v128.const i16x8 0xfc00 0x7c00 0 0 0x4000 0x4440 0xc700 0)) +(assert_return (invoke "f16x8.relaxed_nmadd" + ;; Lane 0 illustrates the difference between fused/unfused. e.g. + ;; fused: -(positive overflow) + inf = inf + ;; unfused: (-inf) + -inf = NaN + ;; + ;; 1e4 -inf -1 0 1 1.5 -2 1 + (v128.const i16x8 0x70e2 0xfc00 0xbc00 0 0x3c00 0x3e00 0xc000 0x3c00) + ;; 1e4 inf -1 0 1 1.5 4 1 + (v128.const i16x8 0x70e2 0x7c00 0xbc00 0 0x3c00 0x3e00 0x4400 0x3c00) + ;; inf inf -1 0 1 2 1 -1 + (v128.const i16x8 0x7c00 0x7c00 0xbc00 0 0x3c00 0x4000 0x3c00 0xbc00)) + ;; inf inf -2 0 0 -0.25 9 -2 + (v128.const i16x8 0x7c00 0x7c00 0xc000 0 0 0xb400 0x4880 0xc000)) |