diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gen-s-parser.inc | 32 | ||||
-rw-r--r-- | src/ir/cost.h | 2 | ||||
-rw-r--r-- | src/literal.h | 2 | ||||
-rw-r--r-- | src/passes/Print.cpp | 6 | ||||
-rw-r--r-- | src/wasm-binary.h | 2 | ||||
-rw-r--r-- | src/wasm-interpreter.h | 4 | ||||
-rw-r--r-- | src/wasm.h | 2 | ||||
-rw-r--r-- | src/wasm/literal.cpp | 27 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 8 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 6 |
10 files changed, 80 insertions, 11 deletions
diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index 56a7c1cce..f7d12a1be 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -458,12 +458,34 @@ switch (buf[0]) { default: goto parse_error; } } - case 'r': - if (op == "f16x8.replace_lane"sv) { - CHECK_ERR(makeSIMDReplace(ctx, pos, annotations, SIMDReplaceOp::ReplaceLaneVecF16x8, 8)); - return Ok{}; + case 'r': { + switch (buf[8]) { + case 'l': { + switch (buf[14]) { + case 'm': + if (op == "f16x8.relaxed_madd"sv) { + CHECK_ERR(makeSIMDTernary(ctx, pos, annotations, SIMDTernaryOp::RelaxedMaddVecF16x8)); + return Ok{}; + } + goto parse_error; + case 'n': + if (op == "f16x8.relaxed_nmadd"sv) { + CHECK_ERR(makeSIMDTernary(ctx, pos, annotations, SIMDTernaryOp::RelaxedNmaddVecF16x8)); + return Ok{}; + } + goto parse_error; + default: goto parse_error; + } + } + case 'p': + if (op == "f16x8.replace_lane"sv) { + CHECK_ERR(makeSIMDReplace(ctx, pos, annotations, SIMDReplaceOp::ReplaceLaneVecF16x8, 8)); + return Ok{}; + } + goto parse_error; + default: goto parse_error; } - goto parse_error; + } case 's': { switch (buf[7]) { case 'p': diff --git a/src/ir/cost.h b/src/ir/cost.h index 99b945815..d11a9bfac 100644 --- a/src/ir/cost.h +++ b/src/ir/cost.h @@ -582,6 +582,8 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { case LaneselectI16x8: case LaneselectI32x4: case LaneselectI64x2: + case RelaxedMaddVecF16x8: + case RelaxedNmaddVecF16x8: case RelaxedMaddVecF32x4: case RelaxedNmaddVecF32x4: case RelaxedMaddVecF64x2: diff --git a/src/literal.h b/src/literal.h index 73289c83b..424121f5a 100644 --- a/src/literal.h +++ b/src/literal.h @@ -694,6 +694,8 @@ public: Literal demoteZeroToF32x4() const; Literal promoteLowToF64x2() const; Literal swizzleI8x16(const Literal& other) const; + Literal relaxedMaddF16x8(const Literal& left, const Literal& right) const; + Literal relaxedNmaddF16x8(const Literal& left, const Literal& right) const; Literal relaxedMaddF32x4(const Literal& left, const Literal& right) const; Literal relaxedNmaddF32x4(const Literal& left, const Literal& right) const; Literal relaxedMaddF64x2(const Literal& left, const Literal& right) const; diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 024b6af5c..46d519e9e 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -770,6 +770,12 @@ struct PrintExpressionContents case LaneselectI64x2: o << "i64x2.laneselect"; break; + case RelaxedMaddVecF16x8: + o << "f16x8.relaxed_madd"; + break; + case RelaxedNmaddVecF16x8: + o << "f16x8.relaxed_nmadd"; + break; case RelaxedMaddVecF32x4: o << "f32x4.relaxed_madd"; break; diff --git a/src/wasm-binary.h b/src/wasm-binary.h index e8ed293e7..d10b6c312 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1037,6 +1037,8 @@ enum ASTNodes { I32x4RelaxedTruncF32x4U = 0x102, I32x4RelaxedTruncF64x2SZero = 0x103, I32x4RelaxedTruncF64x2UZero = 0x104, + F16x8RelaxedMadd = 0x14e, + F16x8RelaxedNmadd = 0x14f, F32x4RelaxedMadd = 0x105, F32x4RelaxedNmadd = 0x106, F64x2RelaxedMadd = 0x107, diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 578b0a569..86ba3b3a2 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1201,6 +1201,10 @@ public: case LaneselectI64x2: return c.bitselectV128(a, b); + case RelaxedMaddVecF16x8: + return a.relaxedMaddF16x8(b, c); + case RelaxedNmaddVecF16x8: + return a.relaxedNmaddF16x8(b, c); case RelaxedMaddVecF32x4: return a.relaxedMaddF32x4(b, c); case RelaxedNmaddVecF32x4: diff --git a/src/wasm.h b/src/wasm.h index 86ee12972..a7ad6ec6c 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -574,6 +574,8 @@ enum SIMDTernaryOp { Bitselect, // Relaxed SIMD + RelaxedMaddVecF16x8, + RelaxedNmaddVecF16x8, RelaxedMaddVecF32x4, RelaxedNmaddVecF32x4, RelaxedMaddVecF64x2, diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index c76856d15..e332db305 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -1674,23 +1674,25 @@ Literal Literal::copysign(const Literal& other) const { Literal Literal::madd(const Literal& left, const Literal& right) const { switch (type.getBasic()) { case Type::f32: - return Literal(::fmaf(left.getf32(), right.getf32(), getf32())); + return Literal(::fmaf(getf32(), left.getf32(), right.getf32())); break; case Type::f64: - return Literal(::fma(left.getf64(), right.getf64(), getf64())); + return Literal(::fma(getf64(), left.getf64(), right.getf64())); break; default: WASM_UNREACHABLE("unexpected type"); } } +// XXX: This is not an actual fused negated multiply implementation, but +// the relaxed spec allows a double rounding implementation like below. Literal Literal::nmadd(const Literal& left, const Literal& right) const { switch (type.getBasic()) { case Type::f32: - return Literal(::fmaf(-left.getf32(), right.getf32(), getf32())); + return Literal(-(getf32() * left.getf32()) + right.getf32()); break; case Type::f64: - return Literal(::fma(-left.getf64(), right.getf64(), getf64())); + return Literal(-(getf64() * left.getf64()) + right.getf64()); break; default: WASM_UNREACHABLE("unexpected type"); @@ -2749,19 +2751,32 @@ Literal Literal::swizzleI8x16(const Literal& other) const { namespace { template<int Lanes, LaneArray<Lanes> (Literal::*IntoLanes)() const, - Literal (Literal::*TernaryOp)(const Literal&, const Literal&) const> + Literal (Literal::*TernaryOp)(const Literal&, const Literal&) const, + Literal (*Convert)(const Literal&) = passThrough> static Literal ternary(const Literal& a, const Literal& b, const Literal& c) { LaneArray<Lanes> x = (a.*IntoLanes)(); LaneArray<Lanes> y = (b.*IntoLanes)(); LaneArray<Lanes> z = (c.*IntoLanes)(); LaneArray<Lanes> r; for (size_t i = 0; i < Lanes; ++i) { - r[i] = (x[i].*TernaryOp)(y[i], z[i]); + r[i] = Convert((x[i].*TernaryOp)(y[i], z[i])); } return Literal(r); } } // namespace +Literal Literal::relaxedMaddF16x8(const Literal& left, + const Literal& right) const { + return ternary<8, &Literal::getLanesF16x8, &Literal::madd, &toFP16>( + *this, left, right); +} + +Literal Literal::relaxedNmaddF16x8(const Literal& left, + const Literal& right) const { + return ternary<8, &Literal::getLanesF16x8, &Literal::nmadd, &toFP16>( + *this, left, right); +} + Literal Literal::relaxedMaddF32x4(const Literal& left, const Literal& right) const { return ternary<4, &Literal::getLanesF32x4, &Literal::madd>( diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 8c684dc2f..542eab4b4 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -6823,6 +6823,14 @@ bool WasmBinaryReader::maybeVisitSIMDTernary(Expression*& out, uint32_t code) { curr = allocator.alloc<SIMDTernary>(); curr->op = LaneselectI64x2; break; + case BinaryConsts::F16x8RelaxedMadd: + curr = allocator.alloc<SIMDTernary>(); + curr->op = RelaxedMaddVecF16x8; + break; + case BinaryConsts::F16x8RelaxedNmadd: + curr = allocator.alloc<SIMDTernary>(); + curr->op = RelaxedNmaddVecF16x8; + break; case BinaryConsts::F32x4RelaxedMadd: curr = allocator.alloc<SIMDTernary>(); curr->op = RelaxedMaddVecF32x4; diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 140205b85..ac79d0135 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -657,6 +657,12 @@ void BinaryInstWriter::visitSIMDTernary(SIMDTernary* curr) { case LaneselectI64x2: o << U32LEB(BinaryConsts::I64x2Laneselect); break; + case RelaxedMaddVecF16x8: + o << U32LEB(BinaryConsts::F16x8RelaxedMadd); + break; + case RelaxedNmaddVecF16x8: + o << U32LEB(BinaryConsts::F16x8RelaxedNmadd); + break; case RelaxedMaddVecF32x4: o << U32LEB(BinaryConsts::F32x4RelaxedMadd); break; |