diff options
author | Ng Zhi An <zhin@chromium.org> | 2021-11-15 13:43:43 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-15 13:43:43 -0800 |
commit | 3549e3040340c706349b1ee9ab3e994279805afc (patch) | |
tree | 4764d49a6c0ec600102dd07ec677c6755b6da338 | |
parent | ed1f0d8427f330a18b2ca98adeadcb1be56d59bc (diff) | |
download | binaryen-3549e3040340c706349b1ee9ab3e994279805afc.tar.gz binaryen-3549e3040340c706349b1ee9ab3e994279805afc.tar.bz2 binaryen-3549e3040340c706349b1ee9ab3e994279805afc.zip |
Add support for relaxed-simd instructions (#4320)
This adds relaxed-simd instructions based on the current status of the
proposal
https://github.com/WebAssembly/relaxed-simd/blob/main/proposals/relaxed-simd/Overview.md.
Binary opcodes are based on what is listed in
https://github.com/WebAssembly/relaxed-simd/blob/main/proposals/relaxed-simd/Overview.md#binary-format.
Text names are not fixed yet, and some sort sort of names that maps to
the non-relaxed versions are chosen for this prototype.
Support for these instructions have been added to LLVM via builtins,
adding support here will allow Emscripten to successfully compile files
that use those builtins.
Interpreter support has also been added, and they delegate to the
non-relaxed versions of the instructions.
Most instructions are implemented in the interpreter the same way as the non-relaxed
simd128 instructions, except for fma/fms, which is always fused.
-rwxr-xr-x | scripts/gen-s-parser.py | 19 | ||||
-rw-r--r-- | src/gen-s-parser.inc | 144 | ||||
-rw-r--r-- | src/ir/cost.h | 17 | ||||
-rw-r--r-- | src/literal.h | 9 | ||||
-rw-r--r-- | src/passes/Print.cpp | 52 | ||||
-rw-r--r-- | src/wasm-binary.h | 53 | ||||
-rw-r--r-- | src/wasm-interpreter.h | 24 | ||||
-rw-r--r-- | src/wasm.h | 23 | ||||
-rw-r--r-- | src/wasm/literal.cpp | 62 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 68 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 61 | ||||
-rw-r--r-- | src/wasm/wasm-validator.cpp | 11 | ||||
-rw-r--r-- | src/wasm/wasm.cpp | 4 | ||||
-rw-r--r-- | test/lit/relaxed-simd.wast | 483 |
14 files changed, 997 insertions, 33 deletions
diff --git a/scripts/gen-s-parser.py b/scripts/gen-s-parser.py index 3736ce918..512d929b7 100755 --- a/scripts/gen-s-parser.py +++ b/scripts/gen-s-parser.py @@ -515,6 +515,25 @@ instructions = [ ("f32x4.demote_f64x2_zero", "makeUnary(s, UnaryOp::DemoteZeroVecF64x2ToVecF32x4)"), ("f64x2.promote_low_f32x4", "makeUnary(s, UnaryOp::PromoteLowVecF32x4ToVecF64x2)"), + # relaxed SIMD ops + ("i8x16.relaxed_swizzle", "makeBinary(s, BinaryOp::RelaxedSwizzleVec8x16)"), + ("i32x4.relaxed_trunc_f32x4_s", "makeUnary(s, UnaryOp::RelaxedTruncSVecF32x4ToVecI32x4)"), + ("i32x4.relaxed_trunc_f32x4_u", "makeUnary(s, UnaryOp::RelaxedTruncUVecF32x4ToVecI32x4)"), + ("i32x4.relaxed_trunc_f64x2_s_zero", "makeUnary(s, UnaryOp::RelaxedTruncZeroSVecF64x2ToVecI32x4)"), + ("i32x4.relaxed_trunc_f64x2_u_zero", "makeUnary(s, UnaryOp::RelaxedTruncZeroUVecF64x2ToVecI32x4)"), + ("f32x4.relaxed_fma", "makeSIMDTernary(s, SIMDTernaryOp::RelaxedFmaVecF32x4)"), + ("f32x4.relaxed_fms", "makeSIMDTernary(s, SIMDTernaryOp::RelaxedFmsVecF32x4)"), + ("f64x2.relaxed_fma", "makeSIMDTernary(s, SIMDTernaryOp::RelaxedFmaVecF64x2)"), + ("f64x2.relaxed_fms", "makeSIMDTernary(s, SIMDTernaryOp::RelaxedFmsVecF64x2)"), + ("i8x16.laneselect", "makeSIMDTernary(s, SIMDTernaryOp::LaneselectI8x16)"), + ("i16x8.laneselect", "makeSIMDTernary(s, SIMDTernaryOp::LaneselectI16x8)"), + ("i32x4.laneselect", "makeSIMDTernary(s, SIMDTernaryOp::LaneselectI32x4)"), + ("i64x2.laneselect", "makeSIMDTernary(s, SIMDTernaryOp::LaneselectI64x2)"), + ("f32x4.relaxed_min", "makeBinary(s, BinaryOp::RelaxedMinVecF32x4)"), + ("f32x4.relaxed_max", "makeBinary(s, BinaryOp::RelaxedMaxVecF32x4)"), + ("f64x2.relaxed_min", "makeBinary(s, BinaryOp::RelaxedMinVecF64x2)"), + ("f64x2.relaxed_max", "makeBinary(s, BinaryOp::RelaxedMaxVecF64x2)"), + # reference types instructions ("ref.null", "makeRefNull(s)"), ("ref.is_null", "makeRefIs(s, RefIsNull)"), diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index 47ba1e1df..d82914678 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -4,8 +4,8 @@ #ifdef INSTRUCTION_PARSER #undef INSTRUCTION_PARSER -char op[30] = {'\0'}; -strncpy(op, s[0]->c_str(), 29); +char op[33] = {'\0'}; +strncpy(op, s[0]->c_str(), 32); switch (op[0]) { case 'a': { switch (op[1]) { @@ -491,9 +491,41 @@ switch (op[0]) { default: goto parse_error; } } - case 'r': - if (strcmp(op, "f32x4.replace_lane") == 0) { return makeSIMDReplace(s, SIMDReplaceOp::ReplaceLaneVecF32x4, 4); } - goto parse_error; + case 'r': { + switch (op[8]) { + case 'l': { + switch (op[14]) { + case 'f': { + switch (op[16]) { + case 'a': + if (strcmp(op, "f32x4.relaxed_fma") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::RelaxedFmaVecF32x4); } + goto parse_error; + case 's': + if (strcmp(op, "f32x4.relaxed_fms") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::RelaxedFmsVecF32x4); } + goto parse_error; + default: goto parse_error; + } + } + case 'm': { + switch (op[15]) { + case 'a': + if (strcmp(op, "f32x4.relaxed_max") == 0) { return makeBinary(s, BinaryOp::RelaxedMaxVecF32x4); } + goto parse_error; + case 'i': + if (strcmp(op, "f32x4.relaxed_min") == 0) { return makeBinary(s, BinaryOp::RelaxedMinVecF32x4); } + goto parse_error; + default: goto parse_error; + } + } + default: goto parse_error; + } + } + case 'p': + if (strcmp(op, "f32x4.replace_lane") == 0) { return makeSIMDReplace(s, SIMDReplaceOp::ReplaceLaneVecF32x4, 4); } + goto parse_error; + default: goto parse_error; + } + } case 's': { switch (op[7]) { case 'p': @@ -789,9 +821,41 @@ switch (op[0]) { default: goto parse_error; } } - case 'r': - if (strcmp(op, "f64x2.replace_lane") == 0) { return makeSIMDReplace(s, SIMDReplaceOp::ReplaceLaneVecF64x2, 2); } - goto parse_error; + case 'r': { + switch (op[8]) { + case 'l': { + switch (op[14]) { + case 'f': { + switch (op[16]) { + case 'a': + if (strcmp(op, "f64x2.relaxed_fma") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::RelaxedFmaVecF64x2); } + goto parse_error; + case 's': + if (strcmp(op, "f64x2.relaxed_fms") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::RelaxedFmsVecF64x2); } + goto parse_error; + default: goto parse_error; + } + } + case 'm': { + switch (op[15]) { + case 'a': + if (strcmp(op, "f64x2.relaxed_max") == 0) { return makeBinary(s, BinaryOp::RelaxedMaxVecF64x2); } + goto parse_error; + case 'i': + if (strcmp(op, "f64x2.relaxed_min") == 0) { return makeBinary(s, BinaryOp::RelaxedMinVecF64x2); } + goto parse_error; + default: goto parse_error; + } + } + default: goto parse_error; + } + } + case 'p': + if (strcmp(op, "f64x2.replace_lane") == 0) { return makeSIMDReplace(s, SIMDReplaceOp::ReplaceLaneVecF64x2, 2); } + goto parse_error; + default: goto parse_error; + } + } case 's': { switch (op[7]) { case 'p': @@ -987,6 +1051,9 @@ switch (op[0]) { } case 'l': { switch (op[7]) { + case 'a': + if (strcmp(op, "i16x8.laneselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::LaneselectI16x8); } + goto parse_error; case 'e': { switch (op[9]) { case 's': @@ -1730,6 +1797,9 @@ switch (op[0]) { } case 'l': { switch (op[7]) { + case 'a': + if (strcmp(op, "i32x4.laneselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::LaneselectI32x4); } + goto parse_error; case 'e': { switch (op[9]) { case 's': @@ -1796,9 +1866,41 @@ switch (op[0]) { default: goto parse_error; } } - case 'r': - if (strcmp(op, "i32x4.replace_lane") == 0) { return makeSIMDReplace(s, SIMDReplaceOp::ReplaceLaneVecI32x4, 4); } - goto parse_error; + case 'r': { + switch (op[8]) { + case 'l': { + switch (op[21]) { + case '3': { + switch (op[26]) { + case 's': + if (strcmp(op, "i32x4.relaxed_trunc_f32x4_s") == 0) { return makeUnary(s, UnaryOp::RelaxedTruncSVecF32x4ToVecI32x4); } + goto parse_error; + case 'u': + if (strcmp(op, "i32x4.relaxed_trunc_f32x4_u") == 0) { return makeUnary(s, UnaryOp::RelaxedTruncUVecF32x4ToVecI32x4); } + goto parse_error; + default: goto parse_error; + } + } + case '6': { + switch (op[26]) { + case 's': + if (strcmp(op, "i32x4.relaxed_trunc_f64x2_s_zero") == 0) { return makeUnary(s, UnaryOp::RelaxedTruncZeroSVecF64x2ToVecI32x4); } + goto parse_error; + case 'u': + if (strcmp(op, "i32x4.relaxed_trunc_f64x2_u_zero") == 0) { return makeUnary(s, UnaryOp::RelaxedTruncZeroUVecF64x2ToVecI32x4); } + goto parse_error; + default: goto parse_error; + } + } + default: goto parse_error; + } + } + case 'p': + if (strcmp(op, "i32x4.replace_lane") == 0) { return makeSIMDReplace(s, SIMDReplaceOp::ReplaceLaneVecI32x4, 4); } + goto parse_error; + default: goto parse_error; + } + } case 's': { switch (op[7]) { case 'h': { @@ -2487,6 +2589,9 @@ switch (op[0]) { } case 'l': { switch (op[7]) { + case 'a': + if (strcmp(op, "i64x2.laneselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::LaneselectI64x2); } + goto parse_error; case 'e': if (strcmp(op, "i64x2.le_s") == 0) { return makeBinary(s, BinaryOp::LeSVecI64x2); } goto parse_error; @@ -2635,6 +2740,9 @@ switch (op[0]) { } case 'l': { switch (op[7]) { + case 'a': + if (strcmp(op, "i8x16.laneselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::LaneselectI8x16); } + goto parse_error; case 'e': { switch (op[9]) { case 's': @@ -2717,9 +2825,17 @@ switch (op[0]) { case 'p': if (strcmp(op, "i8x16.popcnt") == 0) { return makeUnary(s, UnaryOp::PopcntVecI8x16); } goto parse_error; - case 'r': - if (strcmp(op, "i8x16.replace_lane") == 0) { return makeSIMDReplace(s, SIMDReplaceOp::ReplaceLaneVecI8x16, 16); } - goto parse_error; + case 'r': { + switch (op[8]) { + case 'l': + if (strcmp(op, "i8x16.relaxed_swizzle") == 0) { return makeBinary(s, BinaryOp::RelaxedSwizzleVec8x16); } + goto parse_error; + case 'p': + if (strcmp(op, "i8x16.replace_lane") == 0) { return makeSIMDReplace(s, SIMDReplaceOp::ReplaceLaneVecI8x16, 16); } + goto parse_error; + default: goto parse_error; + } + } case 's': { switch (op[7]) { case 'h': { diff --git a/src/ir/cost.h b/src/ir/cost.h index bf9e48308..93e243a80 100644 --- a/src/ir/cost.h +++ b/src/ir/cost.h @@ -232,6 +232,10 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { case TruncSatZeroUVecF64x2ToVecI32x4: case DemoteZeroVecF64x2ToVecF32x4: case PromoteLowVecF32x4ToVecF64x2: + case RelaxedTruncSVecF32x4ToVecI32x4: + case RelaxedTruncUVecF32x4ToVecI32x4: + case RelaxedTruncZeroSVecF64x2ToVecI32x4: + case RelaxedTruncZeroUVecF64x2ToVecI32x4: ret = 1; break; case InvalidUnary: @@ -465,6 +469,8 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { case MaxVecF32x4: case PMinVecF32x4: case PMaxVecF32x4: + case RelaxedMinVecF32x4: + case RelaxedMaxVecF32x4: case AddVecF64x2: case SubVecF64x2: ret = 1; @@ -479,11 +485,14 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { case MaxVecF64x2: case PMinVecF64x2: case PMaxVecF64x2: + case RelaxedMinVecF64x2: + case RelaxedMaxVecF64x2: case NarrowSVecI16x8ToVecI8x16: case NarrowUVecI16x8ToVecI8x16: case NarrowSVecI32x4ToVecI16x8: case NarrowUVecI32x4ToVecI16x8: case SwizzleVec8x16: + case RelaxedSwizzleVec8x16: ret = 1; break; case InvalidBinary: @@ -523,6 +532,14 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { CostType ret = 0; switch (curr->op) { case Bitselect: + case LaneselectI8x16: + case LaneselectI16x8: + case LaneselectI32x4: + case LaneselectI64x2: + case RelaxedFmaVecF32x4: + case RelaxedFmsVecF32x4: + case RelaxedFmaVecF64x2: + case RelaxedFmsVecF64x2: ret = 1; break; } diff --git a/src/literal.h b/src/literal.h index ae0d4253f..727865c94 100644 --- a/src/literal.h +++ b/src/literal.h @@ -425,6 +425,11 @@ public: Literal pmax(const Literal& other) const; Literal copysign(const Literal& other) const; + // Fused multiply add and subtract. + // Computes this + (left * right) to infinite precision then round once. + Literal fma(const Literal& left, const Literal& right) const; + Literal fms(const Literal& left, const Literal& right) const; + std::array<Literal, 16> getLanesSI8x16() const; std::array<Literal, 16> getLanesUI8x16() const; std::array<Literal, 8> getLanesSI16x8() const; @@ -647,6 +652,10 @@ public: Literal demoteZeroToF32x4() const; Literal promoteLowToF64x2() const; Literal swizzleI8x16(const Literal& other) const; + Literal relaxedFmaF32x4(const Literal& left, const Literal& right) const; + Literal relaxedFmsF32x4(const Literal& left, const Literal& right) const; + Literal relaxedFmaF64x2(const Literal& left, const Literal& right) const; + Literal relaxedFmsF64x2(const Literal& left, const Literal& right) const; // Checks if an RTT value is a sub-rtt of another, that is, whether GC data // with this object's RTT can be successfuly cast using the other RTT diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index ce4238f34..3476199d6 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -659,6 +659,30 @@ struct PrintExpressionContents case Bitselect: o << "v128.bitselect"; break; + case LaneselectI8x16: + o << "i8x16.laneselect"; + break; + case LaneselectI16x8: + o << "i16x8.laneselect"; + break; + case LaneselectI32x4: + o << "i32x4.laneselect"; + break; + case LaneselectI64x2: + o << "i64x2.laneselect"; + break; + case RelaxedFmaVecF32x4: + o << "f32x4.relaxed_fma"; + break; + case RelaxedFmsVecF32x4: + o << "f32x4.relaxed_fms"; + break; + case RelaxedFmaVecF64x2: + o << "f64x2.relaxed_fma"; + break; + case RelaxedFmsVecF64x2: + o << "f64x2.relaxed_fms"; + break; } restoreNormalColor(o); } @@ -1192,6 +1216,18 @@ struct PrintExpressionContents case PromoteLowVecF32x4ToVecF64x2: o << "f64x2.promote_low_f32x4"; break; + case RelaxedTruncSVecF32x4ToVecI32x4: + o << "i32x4.relaxed_trunc_f32x4_s"; + break; + case RelaxedTruncUVecF32x4ToVecI32x4: + o << "i32x4.relaxed_trunc_f32x4_u"; + break; + case RelaxedTruncZeroSVecF64x2ToVecI32x4: + o << "i32x4.relaxed_trunc_f64x2_s_zero"; + break; + case RelaxedTruncZeroUVecF64x2ToVecI32x4: + o << "i32x4.relaxed_trunc_f64x2_u_zero"; + break; case InvalidUnary: WASM_UNREACHABLE("unvalid unary operator"); } @@ -1800,6 +1836,22 @@ struct PrintExpressionContents o << "i8x16.swizzle"; break; + case RelaxedMinVecF32x4: + o << "f32x4.relaxed_min"; + break; + case RelaxedMaxVecF32x4: + o << "f32x4.relaxed_max"; + break; + case RelaxedMinVecF64x2: + o << "f64x2.relaxed_min"; + break; + case RelaxedMaxVecF64x2: + o << "f64x2.relaxed_max"; + break; + case RelaxedSwizzleVec8x16: + o << "i8x16.relaxed_swizzle"; + break; + case InvalidBinary: WASM_UNREACHABLE("unvalid binary operator"); } diff --git a/src/wasm-binary.h b/src/wasm-binary.h index e523d0994..01f37d9aa 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -917,11 +917,11 @@ enum ASTNodes { I32x4Abs = 0xa0, I32x4Neg = 0xa1, - // 0xa2 unused + // 0xa2 for relaxed SIMD I32x4AllTrue = 0xa3, I32x4Bitmask = 0xa4, - // 0xa5 unused - // 0xa6 unused + // 0xa5 for relaxed SIMD + // 0xa6 for relaxed SIMD I32x4ExtendLowI16x8S = 0xa7, I32x4ExtendHighI16x8S = 0xa8, I32x4ExtendLowI16x8U = 0xa9, @@ -930,12 +930,12 @@ enum ASTNodes { I32x4ShrS = 0xac, I32x4ShrU = 0xad, I32x4Add = 0xae, - // 0xaf unused - // 0xb0 unused + // 0xaf for relaxed SIMD + // 0xb0 for relaxed SIMD I32x4Sub = 0xb1, - // 0xb2 unused - // 0xb3 unused - // 0xb4 unused + // 0xb2 for relaxed SIMD + // 0xb3 for relaxed SIMD + // 0xb4 for relaxed SIMD I32x4Mul = 0xb5, I32x4MinS = 0xb6, I32x4MinU = 0xb7, @@ -953,8 +953,8 @@ enum ASTNodes { // 0xc2 unused I64x2AllTrue = 0xc3, I64x2Bitmask = 0xc4, - // 0xc5 unused - // 0xc6 unused + // 0xc5 for relaxed SIMD + // 0xc6 for relaxed SIMD I64x2ExtendLowI32x4S = 0xc7, I64x2ExtendHighI32x4S = 0xc8, I64x2ExtendLowI32x4U = 0xc9, @@ -963,12 +963,12 @@ enum ASTNodes { I64x2ShrS = 0xcc, I64x2ShrU = 0xcd, I64x2Add = 0xce, - // 0xcf unused - // 0xd0 unused + // 0xcf for relaxed SIMD + // 0xd0 for relaxed SIMD I64x2Sub = 0xd1, - // 0xd2 unused - // 0xd3 unused - // 0xd4 unused + // 0xd2 for relaxed SIMD + // 0xd3 for relaxed SIMD + // 0xd4 for relaxed SIMD I64x2Mul = 0xd5, I64x2Eq = 0xd6, I64x2Ne = 0xd7, @@ -983,7 +983,7 @@ enum ASTNodes { F32x4Abs = 0xe0, F32x4Neg = 0xe1, - // 0xe2 unused + // 0xe2 for relaxed SIMD F32x4Sqrt = 0xe3, F32x4Add = 0xe4, F32x4Sub = 0xe5, @@ -996,7 +996,7 @@ enum ASTNodes { F64x2Abs = 0xec, F64x2Neg = 0xed, - // 0xee unused + // 0xee for relaxed SIMD F64x2Sqrt = 0xef, F64x2Add = 0xf0, F64x2Sub = 0xf1, @@ -1016,6 +1016,25 @@ enum ASTNodes { F64x2ConvertLowI32x4S = 0xfe, F64x2ConvertLowI32x4U = 0xff, + // relaxed SIMD opcodes + I8x16RelaxedSwizzle = 0xa2, + I32x4RelaxedTruncF32x4S = 0xa5, + I32x4RelaxedTruncF32x4U = 0xa6, + I32x4RelaxedTruncF64x2SZero = 0xc5, + I32x4RelaxedTruncF64x2UZero = 0xc6, + F32x4RelaxedFma = 0xaf, + F32x4RelaxedFms = 0xb0, + F64x2RelaxedFma = 0xcf, + F64x2RelaxedFms = 0xd0, + I8x16Laneselect = 0xb2, + I16x8Laneselect = 0xb3, + I32x4Laneselect = 0xd2, + I64x2Laneselect = 0xd3, + F32x4RelaxedMin = 0xb4, + F32x4RelaxedMax = 0xe2, + F64x2RelaxedMin = 0xd4, + F64x2RelaxedMax = 0xee, + // bulk memory opcodes MemoryInit = 0x08, diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 8bb250993..7eb13c002 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -538,8 +538,10 @@ public: case ExtAddPairwiseUVecI16x8ToI32x4: return value.extAddPairwiseToUI32x4(); case TruncSatSVecF32x4ToVecI32x4: + case RelaxedTruncSVecF32x4ToVecI32x4: return value.truncSatToSI32x4(); case TruncSatUVecF32x4ToVecI32x4: + case RelaxedTruncUVecF32x4ToVecI32x4: return value.truncSatToUI32x4(); case ConvertSVecI32x4ToVecF32x4: return value.convertSToF32x4(); @@ -574,8 +576,10 @@ public: case ConvertLowUVecI32x4ToVecF64x2: return value.convertLowUToF64x2(); case TruncSatZeroSVecF64x2ToVecI32x4: + case RelaxedTruncZeroSVecF64x2ToVecI32x4: return value.truncSatZeroSToI32x4(); case TruncSatZeroUVecF64x2ToVecI32x4: + case RelaxedTruncZeroUVecF64x2ToVecI32x4: return value.truncSatZeroUToI32x4(); case DemoteZeroVecF64x2ToVecF32x4: return value.demoteZeroToF32x4(); @@ -976,8 +980,10 @@ public: case DivVecF32x4: return left.divF32x4(right); case MinVecF32x4: + case RelaxedMinVecF32x4: return left.minF32x4(right); case MaxVecF32x4: + case RelaxedMaxVecF32x4: return left.maxF32x4(right); case PMinVecF32x4: return left.pminF32x4(right); @@ -992,8 +998,10 @@ public: case DivVecF64x2: return left.divF64x2(right); case MinVecF64x2: + case RelaxedMinVecF64x2: return left.minF64x2(right); case MaxVecF64x2: + case RelaxedMaxVecF64x2: return left.maxF64x2(right); case PMinVecF64x2: return left.pminF64x2(right); @@ -1010,6 +1018,7 @@ public: return left.narrowUToI16x8(right); case SwizzleVec8x16: + case RelaxedSwizzleVec8x16: return left.swizzleI8x16(right); case InvalidBinary: @@ -1105,9 +1114,22 @@ public: Literal c = flow.getSingleValue(); switch (curr->op) { case Bitselect: + case LaneselectI8x16: + case LaneselectI16x8: + case LaneselectI32x4: + case LaneselectI64x2: return c.bitselectV128(a, b); + + case RelaxedFmaVecF32x4: + return a.relaxedFmaF32x4(b, c); + case RelaxedFmsVecF32x4: + return a.relaxedFmsF32x4(b, c); + case RelaxedFmaVecF64x2: + return a.relaxedFmaF64x2(b, c); + case RelaxedFmsVecF64x2: + return a.relaxedFmsF64x2(b, c); default: - // TODO: implement qfma/qfms and signselect + // TODO: implement signselect WASM_UNREACHABLE("not implemented"); } } diff --git a/src/wasm.h b/src/wasm.h index 72fe28bbd..4e981935e 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -219,6 +219,12 @@ enum UnaryOp { DemoteZeroVecF64x2ToVecF32x4, PromoteLowVecF32x4ToVecF64x2, + // Relaxed SIMD + RelaxedTruncSVecF32x4ToVecI32x4, + RelaxedTruncUVecF32x4ToVecI32x4, + RelaxedTruncZeroSVecF64x2ToVecI32x4, + RelaxedTruncZeroUVecF64x2ToVecI32x4, + InvalidUnary }; @@ -459,6 +465,13 @@ enum BinaryOp { // SIMD Swizzle SwizzleVec8x16, + // Relaxed SIMD + RelaxedSwizzleVec8x16, + RelaxedMinVecF32x4, + RelaxedMaxVecF32x4, + RelaxedMinVecF64x2, + RelaxedMaxVecF64x2, + InvalidBinary }; @@ -527,6 +540,16 @@ enum SIMDLoadStoreLaneOp { enum SIMDTernaryOp { Bitselect, + + // Relaxed SIMD + RelaxedFmaVecF32x4, + RelaxedFmsVecF32x4, + RelaxedFmaVecF64x2, + RelaxedFmsVecF64x2, + LaneselectI8x16, + LaneselectI16x8, + LaneselectI32x4, + LaneselectI64x2, }; enum RefIsOp { diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index 4753e5ca1..e761206bc 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -1610,6 +1610,32 @@ Literal Literal::copysign(const Literal& other) const { } } +Literal Literal::fma(const Literal& left, const Literal& right) const { + switch (type.getBasic()) { + case Type::f32: + return Literal(::fmaf(left.getf32(), right.getf32(), getf32())); + break; + case Type::f64: + return Literal(::fma(left.getf64(), right.getf64(), getf64())); + break; + default: + WASM_UNREACHABLE("unexpected type"); + } +} + +Literal Literal::fms(const Literal& left, const Literal& right) const { + switch (type.getBasic()) { + case Type::f32: + return Literal(::fmaf(-left.getf32(), right.getf32(), getf32())); + break; + case Type::f64: + return Literal(::fma(-left.getf64(), right.getf64(), getf64())); + break; + default: + WASM_UNREACHABLE("unexpected type"); + } +} + template<typename LaneT, int Lanes> static LaneArray<Lanes> getLanes(const Literal& val) { assert(val.type == Type::v128); @@ -2548,6 +2574,42 @@ Literal Literal::swizzleI8x16(const Literal& other) const { return Literal(result); } +namespace { +template<int Lanes, + LaneArray<Lanes> (Literal::*IntoLanes)() const, + Literal (Literal::*TernaryOp)(const Literal&, const Literal&) const> +static Literal ternary(const Literal& a, const Literal& b, const Literal& c) { + LaneArray<Lanes> x = (a.*IntoLanes)(); + LaneArray<Lanes> y = (b.*IntoLanes)(); + LaneArray<Lanes> z = (c.*IntoLanes)(); + LaneArray<Lanes> r; + for (size_t i = 0; i < Lanes; ++i) { + r[i] = (x[i].*TernaryOp)(y[i], z[i]); + } + return Literal(r); +} +} // namespace + +Literal Literal::relaxedFmaF32x4(const Literal& left, + const Literal& right) const { + return ternary<4, &Literal::getLanesF32x4, &Literal::fma>(*this, left, right); +} + +Literal Literal::relaxedFmsF32x4(const Literal& left, + const Literal& right) const { + return ternary<4, &Literal::getLanesF32x4, &Literal::fms>(*this, left, right); +} + +Literal Literal::relaxedFmaF64x2(const Literal& left, + const Literal& right) const { + return ternary<2, &Literal::getLanesF64x2, &Literal::fma>(*this, left, right); +} + +Literal Literal::relaxedFmsF64x2(const Literal& left, + const Literal& right) const { + return ternary<2, &Literal::getLanesF64x2, &Literal::fms>(*this, left, right); +} + bool Literal::isSubRtt(const Literal& other) const { assert(type.isRtt() && other.type.isRtt()); // For this literal to be a sub-rtt of the other rtt, the supers must be a diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 6ce2c7979..0ccb9d6db 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -5524,6 +5524,26 @@ bool WasmBinaryBuilder::maybeVisitSIMDBinary(Expression*& out, uint32_t code) { curr = allocator.alloc<Binary>(); curr->op = SwizzleVec8x16; break; + case BinaryConsts::I8x16RelaxedSwizzle: + curr = allocator.alloc<Binary>(); + curr->op = RelaxedSwizzleVec8x16; + break; + case BinaryConsts::F32x4RelaxedMin: + curr = allocator.alloc<Binary>(); + curr->op = RelaxedMinVecF32x4; + break; + case BinaryConsts::F32x4RelaxedMax: + curr = allocator.alloc<Binary>(); + curr->op = RelaxedMaxVecF32x4; + break; + case BinaryConsts::F64x2RelaxedMin: + curr = allocator.alloc<Binary>(); + curr->op = RelaxedMinVecF64x2; + break; + case BinaryConsts::F64x2RelaxedMax: + curr = allocator.alloc<Binary>(); + curr->op = RelaxedMaxVecF64x2; + break; default: return false; } @@ -5797,6 +5817,22 @@ bool WasmBinaryBuilder::maybeVisitSIMDUnary(Expression*& out, uint32_t code) { curr = allocator.alloc<Unary>(); curr->op = PromoteLowVecF32x4ToVecF64x2; break; + case BinaryConsts::I32x4RelaxedTruncF32x4S: + curr = allocator.alloc<Unary>(); + curr->op = RelaxedTruncSVecF32x4ToVecI32x4; + break; + case BinaryConsts::I32x4RelaxedTruncF32x4U: + curr = allocator.alloc<Unary>(); + curr->op = RelaxedTruncUVecF32x4ToVecI32x4; + break; + case BinaryConsts::I32x4RelaxedTruncF64x2SZero: + curr = allocator.alloc<Unary>(); + curr->op = RelaxedTruncZeroSVecF64x2ToVecI32x4; + break; + case BinaryConsts::I32x4RelaxedTruncF64x2UZero: + curr = allocator.alloc<Unary>(); + curr->op = RelaxedTruncZeroUVecF64x2ToVecI32x4; + break; default: return false; } @@ -5950,6 +5986,38 @@ bool WasmBinaryBuilder::maybeVisitSIMDTernary(Expression*& out, uint32_t code) { curr = allocator.alloc<SIMDTernary>(); curr->op = Bitselect; break; + case BinaryConsts::I8x16Laneselect: + curr = allocator.alloc<SIMDTernary>(); + curr->op = LaneselectI8x16; + break; + case BinaryConsts::I16x8Laneselect: + curr = allocator.alloc<SIMDTernary>(); + curr->op = LaneselectI16x8; + break; + case BinaryConsts::I32x4Laneselect: + curr = allocator.alloc<SIMDTernary>(); + curr->op = LaneselectI32x4; + break; + case BinaryConsts::I64x2Laneselect: + curr = allocator.alloc<SIMDTernary>(); + curr->op = LaneselectI64x2; + break; + case BinaryConsts::F32x4RelaxedFma: + curr = allocator.alloc<SIMDTernary>(); + curr->op = RelaxedFmaVecF32x4; + break; + case BinaryConsts::F32x4RelaxedFms: + curr = allocator.alloc<SIMDTernary>(); + curr->op = RelaxedFmsVecF32x4; + break; + case BinaryConsts::F64x2RelaxedFma: + curr = allocator.alloc<SIMDTernary>(); + curr->op = RelaxedFmaVecF64x2; + break; + case BinaryConsts::F64x2RelaxedFms: + curr = allocator.alloc<SIMDTernary>(); + curr->op = RelaxedFmsVecF64x2; + break; default: return false; } diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index f5be5d2c5..59398839e 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -546,6 +546,30 @@ void BinaryInstWriter::visitSIMDTernary(SIMDTernary* curr) { case Bitselect: o << U32LEB(BinaryConsts::V128Bitselect); break; + case LaneselectI8x16: + o << U32LEB(BinaryConsts::I8x16Laneselect); + break; + case LaneselectI16x8: + o << U32LEB(BinaryConsts::I16x8Laneselect); + break; + case LaneselectI32x4: + o << U32LEB(BinaryConsts::I32x4Laneselect); + break; + case LaneselectI64x2: + o << U32LEB(BinaryConsts::I64x2Laneselect); + break; + case RelaxedFmaVecF32x4: + o << U32LEB(BinaryConsts::F32x4RelaxedFma); + break; + case RelaxedFmsVecF32x4: + o << U32LEB(BinaryConsts::F32x4RelaxedFms); + break; + case RelaxedFmaVecF64x2: + o << U32LEB(BinaryConsts::F64x2RelaxedFma); + break; + case RelaxedFmsVecF64x2: + o << U32LEB(BinaryConsts::F64x2RelaxedFms); + break; } } @@ -1153,6 +1177,22 @@ void BinaryInstWriter::visitUnary(Unary* curr) { o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F64x2PromoteLowF32x4); break; + case RelaxedTruncSVecF32x4ToVecI32x4: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::I32x4RelaxedTruncF32x4S); + break; + case RelaxedTruncUVecF32x4ToVecI32x4: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::I32x4RelaxedTruncF32x4U); + break; + case RelaxedTruncZeroSVecF64x2ToVecI32x4: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::I32x4RelaxedTruncF64x2SZero); + break; + case RelaxedTruncZeroUVecF64x2ToVecI32x4: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::I32x4RelaxedTruncF64x2UZero); + break; case InvalidUnary: WASM_UNREACHABLE("invalid unary op"); } @@ -1783,6 +1823,27 @@ void BinaryInstWriter::visitBinary(Binary* curr) { << U32LEB(BinaryConsts::I8x16Swizzle); break; + case RelaxedSwizzleVec8x16: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::I8x16RelaxedSwizzle); + break; + case RelaxedMinVecF32x4: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::F32x4RelaxedMin); + break; + case RelaxedMaxVecF32x4: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::F32x4RelaxedMax); + break; + case RelaxedMinVecF64x2: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::F64x2RelaxedMin); + break; + case RelaxedMaxVecF64x2: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::F64x2RelaxedMax); + break; + case InvalidBinary: WASM_UNREACHABLE("invalid binary op"); } diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 86b537bf5..696c50d8b 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -1613,6 +1613,8 @@ void FunctionValidator::visitBinary(Binary* curr) { case MaxVecF32x4: case PMinVecF32x4: case PMaxVecF32x4: + case RelaxedMinVecF32x4: + case RelaxedMaxVecF32x4: case AddVecF64x2: case SubVecF64x2: case MulVecF64x2: @@ -1621,11 +1623,14 @@ void FunctionValidator::visitBinary(Binary* curr) { case MaxVecF64x2: case PMinVecF64x2: case PMaxVecF64x2: + case RelaxedMinVecF64x2: + case RelaxedMaxVecF64x2: case NarrowSVecI16x8ToVecI8x16: case NarrowUVecI16x8ToVecI8x16: case NarrowSVecI32x4ToVecI16x8: case NarrowUVecI32x4ToVecI16x8: - case SwizzleVec8x16: { + case SwizzleVec8x16: + case RelaxedSwizzleVec8x16: { shouldBeEqualOrFirstIsUnreachable( curr->left->type, Type(Type::v128), curr, "v128 op"); shouldBeEqualOrFirstIsUnreachable( @@ -1898,6 +1903,10 @@ void FunctionValidator::visitUnary(Unary* curr) { case TruncSatZeroUVecF64x2ToVecI32x4: case DemoteZeroVecF64x2ToVecF32x4: case PromoteLowVecF32x4ToVecF64x2: + case RelaxedTruncSVecF32x4ToVecI32x4: + case RelaxedTruncUVecF32x4ToVecI32x4: + case RelaxedTruncZeroSVecF64x2ToVecI32x4: + case RelaxedTruncZeroUVecF64x2ToVecI32x4: shouldBeEqual(curr->type, Type(Type::v128), curr, "expected v128 type"); shouldBeEqual( curr->value->type, Type(Type::v128), curr, "expected v128 operand"); diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 015df26ac..fd3cec8d8 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -711,6 +711,10 @@ void Unary::finalize() { case TruncSatZeroUVecF64x2ToVecI32x4: case DemoteZeroVecF64x2ToVecF32x4: case PromoteLowVecF32x4ToVecF64x2: + case RelaxedTruncSVecF32x4ToVecI32x4: + case RelaxedTruncUVecF32x4ToVecI32x4: + case RelaxedTruncZeroSVecF64x2ToVecI32x4: + case RelaxedTruncZeroUVecF64x2ToVecI32x4: type = Type::v128; break; case AnyTrueVec128: diff --git a/test/lit/relaxed-simd.wast b/test/lit/relaxed-simd.wast new file mode 100644 index 000000000..a21eb8a48 --- /dev/null +++ b/test/lit/relaxed-simd.wast @@ -0,0 +1,483 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: wasm-as %s -all -g -o %t.wasm +;; RUN: wasm-dis %t.wasm -all -o %t.wast +;; RUN: wasm-as %s -all -o %t.nodebug.wasm +;; RUN: wasm-dis %t.nodebug.wasm -all -o %t.nodebug.wast +;; RUN: wasm-opt %t.wast -all -o %t.text.wast -g -S +;; RUN: cat %t.wast | filecheck %s --check-prefix=CHECK-BINARY +;; RUN: cat %t.nodebug.wast | filecheck %s --check-prefix=CHECK-NODEBUG +;; RUN: cat %t.text.wast | filecheck %s --check-prefix=CHECK-TEXT +(module + (memory 1 1) + + ;; CHECK-BINARY: (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) + + ;; CHECK-BINARY: (type $v128_v128_=>_v128 (func (param v128 v128) (result v128))) + + ;; CHECK-BINARY: (type $v128_=>_v128 (func (param v128) (result v128))) + + ;; CHECK-BINARY: (memory $0 1 1) + + ;; CHECK-BINARY: (func $i8x16.relaxed_swizzle (param $0 v128) (param $1 v128) (result v128) + ;; CHECK-BINARY-NEXT: (i8x16.relaxed_swizzle + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) + + ;; CHECK-TEXT: (type $v128_v128_=>_v128 (func (param v128 v128) (result v128))) + + ;; CHECK-TEXT: (type $v128_=>_v128 (func (param v128) (result v128))) + + ;; CHECK-TEXT: (memory $0 1 1) + + ;; CHECK-TEXT: (func $i8x16.relaxed_swizzle (param $0 v128) (param $1 v128) (result v128) + ;; CHECK-TEXT-NEXT: (i8x16.relaxed_swizzle + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $i8x16.relaxed_swizzle (param $0 v128) (param $1 v128) (result v128) + (i8x16.relaxed_swizzle + (local.get $0) + (local.get $1) + ) + ) + + ;; CHECK-BINARY: (func $i32x4.relaxed_trunc_f32x4_s (param $0 v128) (result v128) + ;; CHECK-BINARY-NEXT: (i32x4.relaxed_trunc_f32x4_s + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $i32x4.relaxed_trunc_f32x4_s (param $0 v128) (result v128) + ;; CHECK-TEXT-NEXT: (i32x4.relaxed_trunc_f32x4_s + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $i32x4.relaxed_trunc_f32x4_s (param $0 v128) (result v128) + (i32x4.relaxed_trunc_f32x4_s + (local.get $0) + ) + ) + ;; CHECK-BINARY: (func $i32x4.relaxed_trunc_f32x4_u (param $0 v128) (result v128) + ;; CHECK-BINARY-NEXT: (i32x4.relaxed_trunc_f32x4_u + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $i32x4.relaxed_trunc_f32x4_u (param $0 v128) (result v128) + ;; CHECK-TEXT-NEXT: (i32x4.relaxed_trunc_f32x4_u + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $i32x4.relaxed_trunc_f32x4_u (param $0 v128) (result v128) + (i32x4.relaxed_trunc_f32x4_u + (local.get $0) + ) + ) + ;; CHECK-BINARY: (func $i32x4.relaxed_trunc_f64x2_s_zero (param $0 v128) (result v128) + ;; CHECK-BINARY-NEXT: (i32x4.relaxed_trunc_f64x2_s_zero + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $i32x4.relaxed_trunc_f64x2_s_zero (param $0 v128) (result v128) + ;; CHECK-TEXT-NEXT: (i32x4.relaxed_trunc_f64x2_s_zero + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $i32x4.relaxed_trunc_f64x2_s_zero (param $0 v128) (result v128) + (i32x4.relaxed_trunc_f64x2_s_zero + (local.get $0) + ) + ) + ;; CHECK-BINARY: (func $i32x4.relaxed_trunc_f64x2_u_zero (param $0 v128) (result v128) + ;; CHECK-BINARY-NEXT: (i32x4.relaxed_trunc_f64x2_u_zero + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $i32x4.relaxed_trunc_f64x2_u_zero (param $0 v128) (result v128) + ;; CHECK-TEXT-NEXT: (i32x4.relaxed_trunc_f64x2_u_zero + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $i32x4.relaxed_trunc_f64x2_u_zero (param $0 v128) (result v128) + (i32x4.relaxed_trunc_f64x2_u_zero + (local.get $0) + ) + ) + + ;; CHECK-BINARY: (func $f32x4.relaxed_fma (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-BINARY-NEXT: (f32x4.relaxed_fma + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: (local.get $2) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $f32x4.relaxed_fma (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f32x4.relaxed_fma + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: (local.get $2) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $f32x4.relaxed_fma (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (f32x4.relaxed_fma + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + ;; CHECK-BINARY: (func $f32x4.relaxed_fms (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-BINARY-NEXT: (f32x4.relaxed_fms + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: (local.get $2) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $f32x4.relaxed_fms (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f32x4.relaxed_fms + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: (local.get $2) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $f32x4.relaxed_fms (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (f32x4.relaxed_fms + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + ;; CHECK-BINARY: (func $f64x2.relaxed_fma (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-BINARY-NEXT: (f64x2.relaxed_fma + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: (local.get $2) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $f64x2.relaxed_fma (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f64x2.relaxed_fma + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: (local.get $2) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $f64x2.relaxed_fma (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (f64x2.relaxed_fma + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + ;; CHECK-BINARY: (func $f64x2.relaxed_fms (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-BINARY-NEXT: (f64x2.relaxed_fms + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: (local.get $2) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $f64x2.relaxed_fms (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f64x2.relaxed_fms + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: (local.get $2) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $f64x2.relaxed_fms (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (f64x2.relaxed_fms + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + + ;; CHECK-BINARY: (func $i8x16.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-BINARY-NEXT: (i8x16.laneselect + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: (local.get $2) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $i8x16.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-TEXT-NEXT: (i8x16.laneselect + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: (local.get $2) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $i8x16.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (i8x16.laneselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + ;; CHECK-BINARY: (func $i16x8.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-BINARY-NEXT: (i16x8.laneselect + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: (local.get $2) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $i16x8.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-TEXT-NEXT: (i16x8.laneselect + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: (local.get $2) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $i16x8.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (i16x8.laneselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + ;; CHECK-BINARY: (func $i32x4.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-BINARY-NEXT: (i32x4.laneselect + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: (local.get $2) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $i32x4.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-TEXT-NEXT: (i32x4.laneselect + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: (local.get $2) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $i32x4.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (i32x4.laneselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + ;; CHECK-BINARY: (func $i64x2.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-BINARY-NEXT: (i64x2.laneselect + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: (local.get $2) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $i64x2.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + ;; CHECK-TEXT-NEXT: (i64x2.laneselect + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: (local.get $2) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $i64x2.laneselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (i64x2.laneselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + + ;; CHECK-BINARY: (func $f32x4.relaxed_min (param $0 v128) (param $1 v128) (result v128) + ;; CHECK-BINARY-NEXT: (f32x4.relaxed_min + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $f32x4.relaxed_min (param $0 v128) (param $1 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f32x4.relaxed_min + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $f32x4.relaxed_min (param $0 v128) (param $1 v128) (result v128) + (f32x4.relaxed_min + (local.get $0) + (local.get $1) + ) + ) + ;; CHECK-BINARY: (func $f32x4.relaxed_max (param $0 v128) (param $1 v128) (result v128) + ;; CHECK-BINARY-NEXT: (f32x4.relaxed_max + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $f32x4.relaxed_max (param $0 v128) (param $1 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f32x4.relaxed_max + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $f32x4.relaxed_max (param $0 v128) (param $1 v128) (result v128) + (f32x4.relaxed_max + (local.get $0) + (local.get $1) + ) + ) + ;; CHECK-BINARY: (func $f64x2.relaxed_min (param $0 v128) (param $1 v128) (result v128) + ;; CHECK-BINARY-NEXT: (f64x2.relaxed_min + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $f64x2.relaxed_min (param $0 v128) (param $1 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f64x2.relaxed_min + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $f64x2.relaxed_min (param $0 v128) (param $1 v128) (result v128) + (f64x2.relaxed_min + (local.get $0) + (local.get $1) + ) + ) + ;; CHECK-BINARY: (func $f64x2.relaxed_max (param $0 v128) (param $1 v128) (result v128) + ;; CHECK-BINARY-NEXT: (f64x2.relaxed_max + ;; CHECK-BINARY-NEXT: (local.get $0) + ;; CHECK-BINARY-NEXT: (local.get $1) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-BINARY-NEXT: ) + ;; CHECK-TEXT: (func $f64x2.relaxed_max (param $0 v128) (param $1 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f64x2.relaxed_max + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + (func $f64x2.relaxed_max (param $0 v128) (param $1 v128) (result v128) + (f64x2.relaxed_max + (local.get $0) + (local.get $1) + ) + ) + +) +;; CHECK-NODEBUG: (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) + +;; CHECK-NODEBUG: (type $v128_v128_=>_v128 (func (param v128 v128) (result v128))) + +;; CHECK-NODEBUG: (type $v128_=>_v128 (func (param v128) (result v128))) + +;; CHECK-NODEBUG: (memory $0 1 1) + +;; CHECK-NODEBUG: (func $0 (param $0 v128) (param $1 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (i8x16.relaxed_swizzle +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $1 (param $0 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (i32x4.relaxed_trunc_f32x4_s +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $2 (param $0 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (i32x4.relaxed_trunc_f32x4_u +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $3 (param $0 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (i32x4.relaxed_trunc_f64x2_s_zero +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $4 (param $0 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (i32x4.relaxed_trunc_f64x2_u_zero +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $5 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (f32x4.relaxed_fma +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: (local.get $2) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $6 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (f32x4.relaxed_fms +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: (local.get $2) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $7 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (f64x2.relaxed_fma +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: (local.get $2) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $8 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (f64x2.relaxed_fms +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: (local.get $2) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $9 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (i8x16.laneselect +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: (local.get $2) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $10 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (i16x8.laneselect +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: (local.get $2) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $11 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (i32x4.laneselect +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: (local.get $2) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $12 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (i64x2.laneselect +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: (local.get $2) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $13 (param $0 v128) (param $1 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (f32x4.relaxed_min +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $14 (param $0 v128) (param $1 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (f32x4.relaxed_max +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $15 (param $0 v128) (param $1 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (f64x2.relaxed_min +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) + +;; CHECK-NODEBUG: (func $16 (param $0 v128) (param $1 v128) (result v128) +;; CHECK-NODEBUG-NEXT: (f64x2.relaxed_max +;; CHECK-NODEBUG-NEXT: (local.get $0) +;; CHECK-NODEBUG-NEXT: (local.get $1) +;; CHECK-NODEBUG-NEXT: ) +;; CHECK-NODEBUG-NEXT: ) |