diff options
author | Thomas Lively <7121787+tlively@users.noreply.github.com> | 2020-12-11 19:06:34 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-11 19:06:34 -0500 |
commit | dd33ae0512aa97f537d40e61631d268c8a8fd17e (patch) | |
tree | e5c219ba349e56940669bb873bb57a3f2626bf77 | |
parent | 290147d8d43a7448d68939ec711b524ba4fb3fbd (diff) | |
download | binaryen-dd33ae0512aa97f537d40e61631d268c8a8fd17e.tar.gz binaryen-dd33ae0512aa97f537d40e61631d268c8a8fd17e.tar.bz2 binaryen-dd33ae0512aa97f537d40e61631d268c8a8fd17e.zip |
Prototype SIMD instructions implemented in LLVM (#3440)
- i64x2.eq (https://github.com/WebAssembly/simd/pull/381)
- i64x2 widens (https://github.com/WebAssembly/simd/pull/290)
- i64x2.bitmask (https://github.com/WebAssembly/simd/pull/368)
- signselect ops (https://github.com/WebAssembly/simd/pull/124)
-rwxr-xr-x | scripts/gen-s-parser.py | 10 | ||||
-rw-r--r-- | src/gen-s-parser.inc | 131 | ||||
-rw-r--r-- | src/ir/cost.h | 10 | ||||
-rw-r--r-- | src/literal.h | 1 | ||||
-rw-r--r-- | src/passes/Print.cpp | 30 | ||||
-rw-r--r-- | src/wasm-binary.h | 11 | ||||
-rw-r--r-- | src/wasm-interpreter.h | 11 | ||||
-rw-r--r-- | src/wasm.h | 18 | ||||
-rw-r--r-- | src/wasm/literal.cpp | 4 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 40 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 35 | ||||
-rw-r--r-- | src/wasm/wasm-validator.cpp | 6 | ||||
-rw-r--r-- | src/wasm/wasm.cpp | 5 | ||||
-rw-r--r-- | test/simd.wast | 59 | ||||
-rw-r--r-- | test/simd.wast.from-wast | 61 | ||||
-rw-r--r-- | test/simd.wast.fromBinary | 61 | ||||
-rw-r--r-- | test/simd.wast.fromBinary.noDebugInfo | 415 | ||||
-rw-r--r-- | test/spec/simd.wast | 24 |
18 files changed, 717 insertions, 215 deletions
diff --git a/scripts/gen-s-parser.py b/scripts/gen-s-parser.py index e94c58086..545d48f9c 100755 --- a/scripts/gen-s-parser.py +++ b/scripts/gen-s-parser.py @@ -332,6 +332,7 @@ instructions = [ ("i32x4.le_u", "makeBinary(s, BinaryOp::LeUVecI32x4)"), ("i32x4.ge_s", "makeBinary(s, BinaryOp::GeSVecI32x4)"), ("i32x4.ge_u", "makeBinary(s, BinaryOp::GeUVecI32x4)"), + ("i64x2.eq", "makeBinary(s, BinaryOp::EqVecI64x2)"), ("f32x4.eq", "makeBinary(s, BinaryOp::EqVecF32x4)"), ("f32x4.ne", "makeBinary(s, BinaryOp::NeVecF32x4)"), ("f32x4.lt", "makeBinary(s, BinaryOp::LtVecF32x4)"), @@ -350,6 +351,10 @@ instructions = [ ("v128.xor", "makeBinary(s, BinaryOp::XorVec128)"), ("v128.andnot", "makeBinary(s, BinaryOp::AndNotVec128)"), ("v128.bitselect", "makeSIMDTernary(s, SIMDTernaryOp::Bitselect)"), + ("v8x16.signselect", "makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec8x16)"), + ("v16x8.signselect", "makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec16x8)"), + ("v32x4.signselect", "makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec32x4)"), + ("v64x2.signselect", "makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec64x2)"), ("v128.load8_lane", "makeSIMDLoadStoreLane(s, LoadLaneVec8x16)"), ("v128.load16_lane", "makeSIMDLoadStoreLane(s, LoadLaneVec16x8)"), ("v128.load32_lane", "makeSIMDLoadStoreLane(s, LoadLaneVec32x4)"), @@ -427,6 +432,7 @@ instructions = [ ("i64x2.neg", "makeUnary(s, UnaryOp::NegVecI64x2)"), ("i64x2.any_true", "makeUnary(s, UnaryOp::AnyTrueVecI64x2)"), ("i64x2.all_true", "makeUnary(s, UnaryOp::AllTrueVecI64x2)"), + ("i64x2.bitmask", "makeUnary(s, UnaryOp::BitmaskVecI64x2)"), ("i64x2.shl", "makeSIMDShift(s, SIMDShiftOp::ShlVecI64x2)"), ("i64x2.shr_s", "makeSIMDShift(s, SIMDShiftOp::ShrSVecI64x2)"), ("i64x2.shr_u", "makeSIMDShift(s, SIMDShiftOp::ShrUVecI64x2)"), @@ -503,6 +509,10 @@ instructions = [ ("i32x4.widen_high_i16x8_s", "makeUnary(s, UnaryOp::WidenHighSVecI16x8ToVecI32x4)"), ("i32x4.widen_low_i16x8_u", "makeUnary(s, UnaryOp::WidenLowUVecI16x8ToVecI32x4)"), ("i32x4.widen_high_i16x8_u", "makeUnary(s, UnaryOp::WidenHighUVecI16x8ToVecI32x4)"), + ("i64x2.widen_low_i32x4_s", "makeUnary(s, UnaryOp::WidenLowSVecI32x4ToVecI64x2)"), + ("i64x2.widen_high_i32x4_s", "makeUnary(s, UnaryOp::WidenHighSVecI32x4ToVecI64x2)"), + ("i64x2.widen_low_i32x4_u", "makeUnary(s, UnaryOp::WidenLowUVecI32x4ToVecI64x2)"), + ("i64x2.widen_high_i32x4_u", "makeUnary(s, UnaryOp::WidenHighUVecI32x4ToVecI64x2)"), ("v8x16.swizzle", "makeBinary(s, BinaryOp::SwizzleVec8x16)"), # reference types instructions # TODO Add table instructions diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index 8afcea917..d4e1afe38 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -2312,38 +2312,49 @@ switch (op[0]) { default: goto parse_error; } } + case 'b': + if (strcmp(op, "i64x2.bitmask") == 0) { return makeUnary(s, UnaryOp::BitmaskVecI64x2); } + goto parse_error; case 'e': { - switch (op[9]) { - case 'm': { - switch (op[13]) { - case 'h': { - switch (op[24]) { - case 's': - if (strcmp(op, "i64x2.extmul_high_i32x4_s") == 0) { return makeBinary(s, BinaryOp::ExtMulHighSVecI64x2); } - goto parse_error; - case 'u': - if (strcmp(op, "i64x2.extmul_high_i32x4_u") == 0) { return makeBinary(s, BinaryOp::ExtMulHighUVecI64x2); } - goto parse_error; - default: goto parse_error; - } - } - case 'l': { - switch (op[23]) { - case 's': - if (strcmp(op, "i64x2.extmul_low_i32x4_s") == 0) { return makeBinary(s, BinaryOp::ExtMulLowSVecI64x2); } - goto parse_error; - case 'u': - if (strcmp(op, "i64x2.extmul_low_i32x4_u") == 0) { return makeBinary(s, BinaryOp::ExtMulLowUVecI64x2); } - goto parse_error; + switch (op[7]) { + case 'q': + if (strcmp(op, "i64x2.eq") == 0) { return makeBinary(s, BinaryOp::EqVecI64x2); } + goto parse_error; + case 'x': { + switch (op[9]) { + case 'm': { + switch (op[13]) { + case 'h': { + switch (op[24]) { + case 's': + if (strcmp(op, "i64x2.extmul_high_i32x4_s") == 0) { return makeBinary(s, BinaryOp::ExtMulHighSVecI64x2); } + goto parse_error; + case 'u': + if (strcmp(op, "i64x2.extmul_high_i32x4_u") == 0) { return makeBinary(s, BinaryOp::ExtMulHighUVecI64x2); } + goto parse_error; + default: goto parse_error; + } + } + case 'l': { + switch (op[23]) { + case 's': + if (strcmp(op, "i64x2.extmul_low_i32x4_s") == 0) { return makeBinary(s, BinaryOp::ExtMulLowSVecI64x2); } + goto parse_error; + case 'u': + if (strcmp(op, "i64x2.extmul_low_i32x4_u") == 0) { return makeBinary(s, BinaryOp::ExtMulLowUVecI64x2); } + goto parse_error; + default: goto parse_error; + } + } default: goto parse_error; } } + case 'r': + if (strcmp(op, "i64x2.extract_lane") == 0) { return makeSIMDExtract(s, SIMDExtractOp::ExtractLaneVecI64x2, 2); } + goto parse_error; default: goto parse_error; } } - case 'r': - if (strcmp(op, "i64x2.extract_lane") == 0) { return makeSIMDExtract(s, SIMDExtractOp::ExtractLaneVecI64x2, 2); } - goto parse_error; default: goto parse_error; } } @@ -2408,6 +2419,33 @@ switch (op[0]) { default: goto parse_error; } } + case 'w': { + switch (op[12]) { + case 'h': { + switch (op[23]) { + case 's': + if (strcmp(op, "i64x2.widen_high_i32x4_s") == 0) { return makeUnary(s, UnaryOp::WidenHighSVecI32x4ToVecI64x2); } + goto parse_error; + case 'u': + if (strcmp(op, "i64x2.widen_high_i32x4_u") == 0) { return makeUnary(s, UnaryOp::WidenHighUVecI32x4ToVecI64x2); } + goto parse_error; + default: goto parse_error; + } + } + case 'l': { + switch (op[22]) { + case 's': + if (strcmp(op, "i64x2.widen_low_i32x4_s") == 0) { return makeUnary(s, UnaryOp::WidenLowSVecI32x4ToVecI64x2); } + goto parse_error; + case 'u': + if (strcmp(op, "i64x2.widen_low_i32x4_u") == 0) { return makeUnary(s, UnaryOp::WidenLowUVecI32x4ToVecI64x2); } + goto parse_error; + default: goto parse_error; + } + } + default: goto parse_error; + } + } default: goto parse_error; } } @@ -2962,18 +3000,42 @@ switch (op[0]) { default: goto parse_error; } } - case '6': - if (strcmp(op, "v16x8.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec16x8); } + case '6': { + switch (op[6]) { + case 'l': + if (strcmp(op, "v16x8.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec16x8); } + goto parse_error; + case 's': + if (strcmp(op, "v16x8.signselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec16x8); } + goto parse_error; + default: goto parse_error; + } + } + default: goto parse_error; + } + } + case '3': { + switch (op[6]) { + case 'l': + if (strcmp(op, "v32x4.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec32x4); } + goto parse_error; + case 's': + if (strcmp(op, "v32x4.signselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec32x4); } + goto parse_error; + default: goto parse_error; + } + } + case '6': { + switch (op[6]) { + case 'l': + if (strcmp(op, "v64x2.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec64x2); } + goto parse_error; + case 's': + if (strcmp(op, "v64x2.signselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec64x2); } goto parse_error; default: goto parse_error; } } - case '3': - if (strcmp(op, "v32x4.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec32x4); } - goto parse_error; - case '6': - if (strcmp(op, "v64x2.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec64x2); } - goto parse_error; case '8': { switch (op[6]) { case 'l': @@ -2984,6 +3046,9 @@ switch (op[0]) { case 'h': if (strcmp(op, "v8x16.shuffle") == 0) { return makeSIMDShuffle(s); } goto parse_error; + case 'i': + if (strcmp(op, "v8x16.signselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec8x16); } + goto parse_error; case 'w': if (strcmp(op, "v8x16.swizzle") == 0) { return makeBinary(s, BinaryOp::SwizzleVec8x16); } goto parse_error; diff --git a/src/ir/cost.h b/src/ir/cost.h index 66a316bf6..c5b77ea01 100644 --- a/src/ir/cost.h +++ b/src/ir/cost.h @@ -191,6 +191,7 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, Index> { case NegVecI64x2: case AnyTrueVecI64x2: case AllTrueVecI64x2: + case BitmaskVecI64x2: case AbsVecF32x4: case NegVecF32x4: case SqrtVecF32x4: @@ -221,6 +222,10 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, Index> { case WidenHighSVecI16x8ToVecI32x4: case WidenLowUVecI16x8ToVecI32x4: case WidenHighUVecI16x8ToVecI32x4: + case WidenLowSVecI32x4ToVecI64x2: + case WidenHighSVecI32x4ToVecI64x2: + case WidenLowUVecI32x4ToVecI64x2: + case WidenHighUVecI32x4ToVecI64x2: ret = 1; break; case InvalidUnary: @@ -363,6 +368,7 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, Index> { case GtUVecI32x4: case GeSVecI32x4: case GeUVecI32x4: + case EqVecI64x2: case EqVecF32x4: case NeVecF32x4: case LtVecF32x4: @@ -508,6 +514,10 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, Index> { Index ret = 0; switch (curr->op) { case Bitselect: + case SignSelectVec8x16: + case SignSelectVec16x8: + case SignSelectVec32x4: + case SignSelectVec64x2: ret = 1; break; case QFMAF32x4: diff --git a/src/literal.h b/src/literal.h index 80a7cea7a..8a1829d10 100644 --- a/src/literal.h +++ b/src/literal.h @@ -480,6 +480,7 @@ public: Literal leUI32x4(const Literal& other) const; Literal geSI32x4(const Literal& other) const; Literal geUI32x4(const Literal& other) const; + Literal eqI64x2(const Literal& other) const; Literal eqF32x4(const Literal& other) const; Literal neF32x4(const Literal& other) const; Literal ltF32x4(const Literal& other) const; diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 4007b6daa..b61a72720 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -533,6 +533,18 @@ struct PrintExpressionContents case QFMSF64x2: o << "f64x2.qfms"; break; + case SignSelectVec8x16: + o << "v8x16.signselect"; + break; + case SignSelectVec16x8: + o << "v16x8.signselect"; + break; + case SignSelectVec32x4: + o << "v32x4.signselect"; + break; + case SignSelectVec64x2: + o << "v64x2.signselect"; + break; } } void visitSIMDShift(SIMDShift* curr) { @@ -941,6 +953,9 @@ struct PrintExpressionContents case AllTrueVecI64x2: o << "i64x2.all_true"; break; + case BitmaskVecI64x2: + o << "i64x2.bitmask"; + break; case AbsVecF32x4: o << "f32x4.abs"; break; @@ -1031,6 +1046,18 @@ struct PrintExpressionContents case WidenHighUVecI16x8ToVecI32x4: o << "i32x4.widen_high_i16x8_u"; break; + case WidenLowSVecI32x4ToVecI64x2: + o << "i64x2.widen_low_i32x4_s"; + break; + case WidenHighSVecI32x4ToVecI64x2: + o << "i64x2.widen_high_i32x4_s"; + break; + case WidenLowUVecI32x4ToVecI64x2: + o << "i64x2.widen_low_i32x4_u"; + break; + case WidenHighUVecI32x4ToVecI64x2: + o << "i64x2.widen_high_i32x4_u"; + break; case InvalidUnary: WASM_UNREACHABLE("unvalid unary operator"); } @@ -1360,6 +1387,9 @@ struct PrintExpressionContents case GeUVecI32x4: o << "i32x4.ge_u"; break; + case EqVecI64x2: + o << "i64x2.eq"; + break; case EqVecF32x4: o << "f32x4.eq"; break; diff --git a/src/wasm-binary.h b/src/wasm-binary.h index dffc1507c..1d250aa2b 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -782,6 +782,7 @@ enum ASTNodes { I32x4LeU = 0x3e, I32x4GeS = 0x3f, I32x4GeU = 0x40, + I64x2Eq = 0xc0, F32x4Eq = 0x41, F32x4Ne = 0x42, F32x4Lt = 0x43, @@ -802,6 +803,11 @@ enum ASTNodes { V128Xor = 0x51, V128Bitselect = 0x52, + V8x16SignSelect = 0x7d, + V16x8SignSelect = 0x7e, + V32x4SignSelect = 0x7f, + V64x2SignSelect = 0x94, + V128Load8Lane = 0x58, V128Load16Lane = 0x59, V128Load32Lane = 0x5a, @@ -885,6 +891,11 @@ enum ASTNodes { I32x4MaxU = 0xb9, I32x4DotSVecI16x8 = 0xba, + I64x2Bitmask = 0xc4, + I64x2WidenLowSI32x4 = 0xc7, + I64x2WidenHighSI32x4 = 0xc8, + I64x2WidenLowUI32x4 = 0xc9, + I64x2WidenHighUI32x4 = 0xca, I64x2Neg = 0xc1, I64x2AnyTrue = 0xc2, I64x2AllTrue = 0xc3, diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index e4c20082e..dbc14de5f 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -491,6 +491,8 @@ public: return value.anyTrueI64x2(); case AllTrueVecI64x2: return value.allTrueI64x2(); + case BitmaskVecI64x2: + WASM_UNREACHABLE("unimp"); case AbsVecF32x4: return value.absF32x4(); case NegVecF32x4: @@ -551,6 +553,11 @@ public: return value.widenLowUToVecI32x4(); case WidenHighUVecI16x8ToVecI32x4: return value.widenHighUToVecI32x4(); + case WidenLowSVecI32x4ToVecI64x2: + case WidenHighSVecI32x4ToVecI64x2: + case WidenLowUVecI32x4ToVecI64x2: + case WidenHighUVecI32x4ToVecI64x2: + WASM_UNREACHABLE("unimp"); case InvalidUnary: WASM_UNREACHABLE("invalid unary op"); } @@ -796,6 +803,8 @@ public: return left.geSI32x4(right); case GeUVecI32x4: return left.geUI32x4(right); + case EqVecI64x2: + return left.eqI64x2(right); case EqVecF32x4: return left.eqF32x4(right); case NeVecF32x4: @@ -1067,7 +1076,7 @@ public: case Bitselect: return c.bitselectV128(a, b); default: - // TODO: implement qfma/qfms + // TODO: implement qfma/qfms and signselect WASM_UNREACHABLE("not implemented"); } } diff --git a/src/wasm.h b/src/wasm.h index 0c6566407..bc39eae7b 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -175,6 +175,7 @@ enum UnaryOp { NegVecI64x2, AnyTrueVecI64x2, AllTrueVecI64x2, + BitmaskVecI64x2, AbsVecF32x4, NegVecF32x4, SqrtVecF32x4, @@ -207,6 +208,10 @@ enum UnaryOp { WidenHighSVecI16x8ToVecI32x4, WidenLowUVecI16x8ToVecI32x4, WidenHighUVecI16x8ToVecI32x4, + WidenLowSVecI32x4ToVecI64x2, + WidenHighSVecI32x4ToVecI64x2, + WidenLowUVecI32x4ToVecI64x2, + WidenHighUVecI32x4ToVecI64x2, InvalidUnary }; @@ -351,6 +356,7 @@ enum BinaryOp { LeUVecI32x4, GeSVecI32x4, GeUVecI32x4, + EqVecI64x2, EqVecF32x4, NeVecF32x4, LtVecF32x4, @@ -509,7 +515,17 @@ enum SIMDLoadStoreLaneOp { StoreLaneVec64x2, }; -enum SIMDTernaryOp { Bitselect, QFMAF32x4, QFMSF32x4, QFMAF64x2, QFMSF64x2 }; +enum SIMDTernaryOp { + Bitselect, + QFMAF32x4, + QFMSF32x4, + QFMAF64x2, + QFMSF64x2, + SignSelectVec8x16, + SignSelectVec16x8, + SignSelectVec32x4, + SignSelectVec64x2 +}; // // Expressions diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index c19efa225..9bdf886b3 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -1986,6 +1986,10 @@ Literal Literal::geSI32x4(const Literal& other) const { Literal Literal::geUI32x4(const Literal& other) const { return compare<4, &Literal::getLanesI32x4, &Literal::geU>(*this, other); } +Literal Literal::eqI64x2(const Literal& other) const { + return compare<2, &Literal::getLanesI64x2, &Literal::eq, int64_t>(*this, + other); +} Literal Literal::eqF32x4(const Literal& other) const { return compare<4, &Literal::getLanesF32x4, &Literal::eq>(*this, other); } diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 1b3d50a41..796ba4b4e 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -4371,6 +4371,10 @@ bool WasmBinaryBuilder::maybeVisitSIMDBinary(Expression*& out, uint32_t code) { curr = allocator.alloc<Binary>(); curr->op = GeUVecI32x4; break; + case BinaryConsts::I64x2Eq: + curr = allocator.alloc<Binary>(); + curr->op = EqVecI64x2; + break; case BinaryConsts::F32x4Eq: curr = allocator.alloc<Binary>(); curr->op = EqVecF32x4; @@ -4828,6 +4832,10 @@ bool WasmBinaryBuilder::maybeVisitSIMDUnary(Expression*& out, uint32_t code) { curr = allocator.alloc<Unary>(); curr->op = AllTrueVecI64x2; break; + case BinaryConsts::I64x2Bitmask: + curr = allocator.alloc<Unary>(); + curr->op = BitmaskVecI64x2; + break; case BinaryConsts::F32x4Abs: curr = allocator.alloc<Unary>(); curr->op = AbsVecF32x4; @@ -4948,6 +4956,22 @@ bool WasmBinaryBuilder::maybeVisitSIMDUnary(Expression*& out, uint32_t code) { curr = allocator.alloc<Unary>(); curr->op = WidenHighUVecI16x8ToVecI32x4; break; + case BinaryConsts::I64x2WidenLowSI32x4: + curr = allocator.alloc<Unary>(); + curr->op = WidenLowSVecI32x4ToVecI64x2; + break; + case BinaryConsts::I64x2WidenHighSI32x4: + curr = allocator.alloc<Unary>(); + curr->op = WidenHighSVecI32x4ToVecI64x2; + break; + case BinaryConsts::I64x2WidenLowUI32x4: + curr = allocator.alloc<Unary>(); + curr->op = WidenLowUVecI32x4ToVecI64x2; + break; + case BinaryConsts::I64x2WidenHighUI32x4: + curr = allocator.alloc<Unary>(); + curr->op = WidenHighUVecI32x4ToVecI64x2; + break; default: return false; } @@ -5101,6 +5125,22 @@ bool WasmBinaryBuilder::maybeVisitSIMDTernary(Expression*& out, uint32_t code) { curr = allocator.alloc<SIMDTernary>(); curr->op = Bitselect; break; + case BinaryConsts::V8x16SignSelect: + curr = allocator.alloc<SIMDTernary>(); + curr->op = SignSelectVec8x16; + break; + case BinaryConsts::V16x8SignSelect: + curr = allocator.alloc<SIMDTernary>(); + curr->op = SignSelectVec16x8; + break; + case BinaryConsts::V32x4SignSelect: + curr = allocator.alloc<SIMDTernary>(); + curr->op = SignSelectVec32x4; + break; + case BinaryConsts::V64x2SignSelect: + curr = allocator.alloc<SIMDTernary>(); + curr->op = SignSelectVec64x2; + break; case BinaryConsts::F32x4QFMA: curr = allocator.alloc<SIMDTernary>(); curr->op = QFMAF32x4; diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 741aef5ce..3a24d0cc0 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -560,6 +560,18 @@ void BinaryInstWriter::visitSIMDTernary(SIMDTernary* curr) { case QFMSF64x2: o << U32LEB(BinaryConsts::F64x2QFMS); break; + case SignSelectVec8x16: + o << U32LEB(BinaryConsts::V8x16SignSelect); + break; + case SignSelectVec16x8: + o << U32LEB(BinaryConsts::V16x8SignSelect); + break; + case SignSelectVec32x4: + o << U32LEB(BinaryConsts::V32x4SignSelect); + break; + case SignSelectVec64x2: + o << U32LEB(BinaryConsts::V64x2SignSelect); + break; } } @@ -1024,6 +1036,10 @@ void BinaryInstWriter::visitUnary(Unary* curr) { o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::I64x2AllTrue); break; + case BitmaskVecI64x2: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::I64x2Bitmask); + break; case AbsVecF32x4: o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F32x4Abs); break; @@ -1132,6 +1148,22 @@ void BinaryInstWriter::visitUnary(Unary* curr) { o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::I32x4WidenHighUI16x8); break; + case WidenLowSVecI32x4ToVecI64x2: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::I64x2WidenLowSI32x4); + break; + case WidenHighSVecI32x4ToVecI64x2: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::I64x2WidenHighSI32x4); + break; + case WidenLowUVecI32x4ToVecI64x2: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::I64x2WidenLowUI32x4); + break; + case WidenHighUVecI32x4ToVecI64x2: + o << int8_t(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::I64x2WidenHighUI32x4); + break; case InvalidUnary: WASM_UNREACHABLE("invalid unary op"); } @@ -1461,6 +1493,9 @@ void BinaryInstWriter::visitBinary(Binary* curr) { case GeUVecI32x4: o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::I32x4GeU); break; + case EqVecI64x2: + o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::I64x2Eq); + break; case EqVecF32x4: o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F32x4Eq); break; diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 56cf9c114..b294c953c 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -1534,6 +1534,7 @@ void FunctionValidator::visitBinary(Binary* curr) { case GtUVecI32x4: case GeSVecI32x4: case GeUVecI32x4: + case EqVecI64x2: case EqVecF32x4: case NeVecF32x4: case LtVecF32x4: @@ -1880,6 +1881,10 @@ void FunctionValidator::visitUnary(Unary* curr) { case WidenHighSVecI16x8ToVecI32x4: case WidenLowUVecI16x8ToVecI32x4: case WidenHighUVecI16x8ToVecI32x4: + case WidenLowSVecI32x4ToVecI64x2: + case WidenHighSVecI32x4ToVecI64x2: + case WidenLowUVecI32x4ToVecI64x2: + case WidenHighUVecI32x4ToVecI64x2: shouldBeEqual(curr->type, Type(Type::v128), curr, "expected v128 type"); shouldBeEqual( curr->value->type, Type(Type::v128), curr, "expected v128 operand"); @@ -1895,6 +1900,7 @@ void FunctionValidator::visitUnary(Unary* curr) { case BitmaskVecI8x16: case BitmaskVecI16x8: case BitmaskVecI32x4: + case BitmaskVecI64x2: shouldBeEqual(curr->type, Type(Type::i32), curr, "expected i32 type"); shouldBeEqual( curr->value->type, Type(Type::v128), curr, "expected v128 operand"); diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 93a14aecc..8e6550c27 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -869,6 +869,10 @@ void Unary::finalize() { case WidenHighSVecI16x8ToVecI32x4: case WidenLowUVecI16x8ToVecI32x4: case WidenHighUVecI16x8ToVecI32x4: + case WidenLowSVecI32x4ToVecI64x2: + case WidenHighSVecI32x4ToVecI64x2: + case WidenLowUVecI32x4ToVecI64x2: + case WidenHighUVecI32x4ToVecI64x2: type = Type::v128; break; case AnyTrueVecI8x16: @@ -882,6 +886,7 @@ void Unary::finalize() { case BitmaskVecI8x16: case BitmaskVecI16x8: case BitmaskVecI32x4: + case BitmaskVecI64x2: type = Type::i32; break; diff --git a/test/simd.wast b/test/simd.wast index 8ff2298a8..7296a075e 100644 --- a/test/simd.wast +++ b/test/simd.wast @@ -316,6 +316,12 @@ (local.get $1) ) ) + (func $i64x2.eq (param $0 v128) (param $1 v128) (result v128) + (i64x2.eq + (local.get $0) + (local.get $1) + ) + ) (func $f32x4.eq (param $0 v128) (param $1 v128) (result v128) (f32x4.eq (local.get $0) @@ -424,6 +430,34 @@ (local.get $2) ) ) + (func $v8x16.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v8x16.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $v16x8.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v16x8.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $v32x4.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v32x4.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $v64x2.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v64x2.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) (func $v128.load8_lane (param $0 i32) (param $1 v128) (result v128) (v128.load8_lane 0 (local.get $0) @@ -903,6 +937,11 @@ (local.get $0) ) ) + (func $i64x2.bitmask (param $0 v128) (result i32) + (i64x2.bitmask + (local.get $0) + ) + ) (func $i64x2.shl (param $0 v128) (param $1 i32) (result v128) (i64x2.shl (local.get $0) @@ -1281,6 +1320,26 @@ (local.get $0) ) ) + (func $i64x2.widen_low_i32x4_s (param $0 v128) (result v128) + (i64x2.widen_low_i32x4_s + (local.get $0) + ) + ) + (func $i64x2.widen_high_i32x4_s (param $0 v128) (result v128) + (i64x2.widen_high_i32x4_s + (local.get $0) + ) + ) + (func $i64x2.widen_low_i32x4_u (param $0 v128) (result v128) + (i64x2.widen_low_i32x4_u + (local.get $0) + ) + ) + (func $i64x2.widen_high_i32x4_u (param $0 v128) (result v128) + (i64x2.widen_high_i32x4_u + (local.get $0) + ) + ) (func $i16x8.load8x8_u (param $0 i32) (result v128) (i16x8.load8x8_u (local.get $0) diff --git a/test/simd.wast.from-wast b/test/simd.wast.from-wast index bf71a0ac3..ab30772d3 100644 --- a/test/simd.wast.from-wast +++ b/test/simd.wast.from-wast @@ -4,10 +4,10 @@ (type $v128_=>_i32 (func (param v128) (result i32))) (type $i32_=>_v128 (func (param i32) (result v128))) (type $v128_i32_=>_v128 (func (param v128 i32) (result v128))) + (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) (type $i32_v128_=>_none (func (param i32 v128))) (type $i32_v128_=>_v128 (func (param i32 v128) (result v128))) (type $none_=>_v128 (func (result v128))) - (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) (type $v128_=>_i64 (func (param v128) (result i64))) (type $v128_=>_f32 (func (param v128) (result f32))) (type $v128_=>_f64 (func (param v128) (result f64))) @@ -333,6 +333,12 @@ (local.get $1) ) ) + (func $i64x2.eq (param $0 v128) (param $1 v128) (result v128) + (i64x2.eq + (local.get $0) + (local.get $1) + ) + ) (func $f32x4.eq (param $0 v128) (param $1 v128) (result v128) (f32x4.eq (local.get $0) @@ -441,6 +447,34 @@ (local.get $2) ) ) + (func $v8x16.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v8x16.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $v16x8.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v16x8.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $v32x4.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v32x4.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $v64x2.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v64x2.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) (func $v128.load8_lane (param $0 i32) (param $1 v128) (result v128) (v128.load8_lane 0 (local.get $0) @@ -920,6 +954,11 @@ (local.get $0) ) ) + (func $i64x2.bitmask (param $0 v128) (result i32) + (i64x2.bitmask + (local.get $0) + ) + ) (func $i64x2.shl (param $0 v128) (param $1 i32) (result v128) (i64x2.shl (local.get $0) @@ -1298,6 +1337,26 @@ (local.get $0) ) ) + (func $i64x2.widen_low_i32x4_s (param $0 v128) (result v128) + (i64x2.widen_low_i32x4_s + (local.get $0) + ) + ) + (func $i64x2.widen_high_i32x4_s (param $0 v128) (result v128) + (i64x2.widen_high_i32x4_s + (local.get $0) + ) + ) + (func $i64x2.widen_low_i32x4_u (param $0 v128) (result v128) + (i64x2.widen_low_i32x4_u + (local.get $0) + ) + ) + (func $i64x2.widen_high_i32x4_u (param $0 v128) (result v128) + (i64x2.widen_high_i32x4_u + (local.get $0) + ) + ) (func $i16x8.load8x8_u (param $0 i32) (result v128) (i16x8.load8x8_u (local.get $0) diff --git a/test/simd.wast.fromBinary b/test/simd.wast.fromBinary index 94435e153..935de0c09 100644 --- a/test/simd.wast.fromBinary +++ b/test/simd.wast.fromBinary @@ -4,10 +4,10 @@ (type $v128_=>_i32 (func (param v128) (result i32))) (type $i32_=>_v128 (func (param i32) (result v128))) (type $v128_i32_=>_v128 (func (param v128 i32) (result v128))) + (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) (type $i32_v128_=>_none (func (param i32 v128))) (type $i32_v128_=>_v128 (func (param i32 v128) (result v128))) (type $none_=>_v128 (func (result v128))) - (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) (type $v128_=>_i64 (func (param v128) (result i64))) (type $v128_=>_f32 (func (param v128) (result f32))) (type $v128_=>_f64 (func (param v128) (result f64))) @@ -333,6 +333,12 @@ (local.get $1) ) ) + (func $i64x2.eq (param $0 v128) (param $1 v128) (result v128) + (i64x2.eq + (local.get $0) + (local.get $1) + ) + ) (func $f32x4.eq (param $0 v128) (param $1 v128) (result v128) (f32x4.eq (local.get $0) @@ -441,6 +447,34 @@ (local.get $2) ) ) + (func $v8x16.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v8x16.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $v16x8.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v16x8.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $v32x4.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v32x4.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $v64x2.signselect (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v64x2.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) (func $v128.load8_lane (param $0 i32) (param $1 v128) (result v128) (v128.load8_lane 0 (local.get $0) @@ -920,6 +954,11 @@ (local.get $0) ) ) + (func $i64x2.bitmask (param $0 v128) (result i32) + (i64x2.bitmask + (local.get $0) + ) + ) (func $i64x2.shl (param $0 v128) (param $1 i32) (result v128) (i64x2.shl (local.get $0) @@ -1298,6 +1337,26 @@ (local.get $0) ) ) + (func $i64x2.widen_low_i32x4_s (param $0 v128) (result v128) + (i64x2.widen_low_i32x4_s + (local.get $0) + ) + ) + (func $i64x2.widen_high_i32x4_s (param $0 v128) (result v128) + (i64x2.widen_high_i32x4_s + (local.get $0) + ) + ) + (func $i64x2.widen_low_i32x4_u (param $0 v128) (result v128) + (i64x2.widen_low_i32x4_u + (local.get $0) + ) + ) + (func $i64x2.widen_high_i32x4_u (param $0 v128) (result v128) + (i64x2.widen_high_i32x4_u + (local.get $0) + ) + ) (func $i16x8.load8x8_u (param $0 i32) (result v128) (i16x8.load8x8_u (local.get $0) diff --git a/test/simd.wast.fromBinary.noDebugInfo b/test/simd.wast.fromBinary.noDebugInfo index 25d3ef4e2..42543f448 100644 --- a/test/simd.wast.fromBinary.noDebugInfo +++ b/test/simd.wast.fromBinary.noDebugInfo @@ -4,10 +4,10 @@ (type $v128_=>_i32 (func (param v128) (result i32))) (type $i32_=>_v128 (func (param i32) (result v128))) (type $v128_i32_=>_v128 (func (param v128 i32) (result v128))) + (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) (type $i32_v128_=>_none (func (param i32 v128))) (type $i32_v128_=>_v128 (func (param i32 v128) (result v128))) (type $none_=>_v128 (func (result v128))) - (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) (type $v128_=>_i64 (func (param v128) (result i64))) (type $v128_=>_f32 (func (param v128) (result f32))) (type $v128_=>_f64 (func (param v128) (result f64))) @@ -334,1011 +334,1070 @@ ) ) (func $58 (param $0 v128) (param $1 v128) (result v128) - (f32x4.eq + (i64x2.eq (local.get $0) (local.get $1) ) ) (func $59 (param $0 v128) (param $1 v128) (result v128) - (f32x4.ne + (f32x4.eq (local.get $0) (local.get $1) ) ) (func $60 (param $0 v128) (param $1 v128) (result v128) - (f32x4.lt + (f32x4.ne (local.get $0) (local.get $1) ) ) (func $61 (param $0 v128) (param $1 v128) (result v128) - (f32x4.gt + (f32x4.lt (local.get $0) (local.get $1) ) ) (func $62 (param $0 v128) (param $1 v128) (result v128) - (f32x4.le + (f32x4.gt (local.get $0) (local.get $1) ) ) (func $63 (param $0 v128) (param $1 v128) (result v128) - (f32x4.ge + (f32x4.le (local.get $0) (local.get $1) ) ) (func $64 (param $0 v128) (param $1 v128) (result v128) - (f64x2.eq + (f32x4.ge (local.get $0) (local.get $1) ) ) (func $65 (param $0 v128) (param $1 v128) (result v128) - (f64x2.ne + (f64x2.eq (local.get $0) (local.get $1) ) ) (func $66 (param $0 v128) (param $1 v128) (result v128) - (f64x2.lt + (f64x2.ne (local.get $0) (local.get $1) ) ) (func $67 (param $0 v128) (param $1 v128) (result v128) - (f64x2.gt + (f64x2.lt (local.get $0) (local.get $1) ) ) (func $68 (param $0 v128) (param $1 v128) (result v128) - (f64x2.le + (f64x2.gt (local.get $0) (local.get $1) ) ) (func $69 (param $0 v128) (param $1 v128) (result v128) + (f64x2.le + (local.get $0) + (local.get $1) + ) + ) + (func $70 (param $0 v128) (param $1 v128) (result v128) (f64x2.ge (local.get $0) (local.get $1) ) ) - (func $70 (param $0 v128) (result v128) + (func $71 (param $0 v128) (result v128) (v128.not (local.get $0) ) ) - (func $71 (param $0 v128) (param $1 v128) (result v128) + (func $72 (param $0 v128) (param $1 v128) (result v128) (v128.and (local.get $0) (local.get $1) ) ) - (func $72 (param $0 v128) (param $1 v128) (result v128) + (func $73 (param $0 v128) (param $1 v128) (result v128) (v128.or (local.get $0) (local.get $1) ) ) - (func $73 (param $0 v128) (param $1 v128) (result v128) + (func $74 (param $0 v128) (param $1 v128) (result v128) (v128.xor (local.get $0) (local.get $1) ) ) - (func $74 (param $0 v128) (param $1 v128) (result v128) + (func $75 (param $0 v128) (param $1 v128) (result v128) (v128.andnot (local.get $0) (local.get $1) ) ) - (func $75 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (func $76 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (v128.bitselect (local.get $0) (local.get $1) (local.get $2) ) ) - (func $76 (param $0 i32) (param $1 v128) (result v128) + (func $77 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v8x16.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $78 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v16x8.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $79 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v32x4.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $80 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v64x2.signselect + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (func $81 (param $0 i32) (param $1 v128) (result v128) (v128.load8_lane 0 (local.get $0) (local.get $1) ) ) - (func $77 (param $0 i32) (param $1 v128) (result v128) + (func $82 (param $0 i32) (param $1 v128) (result v128) (v128.load16_lane 0 (local.get $0) (local.get $1) ) ) - (func $78 (param $0 i32) (param $1 v128) (result v128) + (func $83 (param $0 i32) (param $1 v128) (result v128) (v128.load32_lane 0 (local.get $0) (local.get $1) ) ) - (func $79 (param $0 i32) (param $1 v128) (result v128) + (func $84 (param $0 i32) (param $1 v128) (result v128) (v128.load64_lane 0 (local.get $0) (local.get $1) ) ) - (func $80 (param $0 i32) (param $1 v128) (result v128) + (func $85 (param $0 i32) (param $1 v128) (result v128) (v128.load64_lane align=1 0 (local.get $0) (local.get $1) ) ) - (func $81 (param $0 i32) (param $1 v128) (result v128) + (func $86 (param $0 i32) (param $1 v128) (result v128) (v128.load64_lane offset=32 0 (local.get $0) (local.get $1) ) ) - (func $82 (param $0 i32) (param $1 v128) (result v128) + (func $87 (param $0 i32) (param $1 v128) (result v128) (v128.load64_lane offset=32 align=1 0 (local.get $0) (local.get $1) ) ) - (func $83 (param $0 i32) (param $1 v128) + (func $88 (param $0 i32) (param $1 v128) (v128.store8_lane 0 (local.get $0) (local.get $1) ) ) - (func $84 (param $0 i32) (param $1 v128) + (func $89 (param $0 i32) (param $1 v128) (v128.store16_lane 0 (local.get $0) (local.get $1) ) ) - (func $85 (param $0 i32) (param $1 v128) + (func $90 (param $0 i32) (param $1 v128) (v128.store32_lane 0 (local.get $0) (local.get $1) ) ) - (func $86 (param $0 i32) (param $1 v128) + (func $91 (param $0 i32) (param $1 v128) (v128.store64_lane 0 (local.get $0) (local.get $1) ) ) - (func $87 (param $0 i32) (param $1 v128) + (func $92 (param $0 i32) (param $1 v128) (v128.store64_lane align=1 0 (local.get $0) (local.get $1) ) ) - (func $88 (param $0 i32) (param $1 v128) + (func $93 (param $0 i32) (param $1 v128) (v128.store64_lane offset=32 0 (local.get $0) (local.get $1) ) ) - (func $89 (param $0 i32) (param $1 v128) + (func $94 (param $0 i32) (param $1 v128) (v128.store64_lane offset=32 align=1 0 (local.get $0) (local.get $1) ) ) - (func $90 (param $0 v128) (result v128) + (func $95 (param $0 v128) (result v128) (i8x16.popcnt (local.get $0) ) ) - (func $91 (param $0 v128) (result v128) + (func $96 (param $0 v128) (result v128) (i8x16.abs (local.get $0) ) ) - (func $92 (param $0 v128) (result v128) + (func $97 (param $0 v128) (result v128) (i8x16.neg (local.get $0) ) ) - (func $93 (param $0 v128) (result i32) + (func $98 (param $0 v128) (result i32) (i8x16.any_true (local.get $0) ) ) - (func $94 (param $0 v128) (result i32) + (func $99 (param $0 v128) (result i32) (i8x16.all_true (local.get $0) ) ) - (func $95 (param $0 v128) (result i32) + (func $100 (param $0 v128) (result i32) (i8x16.bitmask (local.get $0) ) ) - (func $96 (param $0 v128) (param $1 i32) (result v128) + (func $101 (param $0 v128) (param $1 i32) (result v128) (i8x16.shl (local.get $0) (local.get $1) ) ) - (func $97 (param $0 v128) (param $1 i32) (result v128) + (func $102 (param $0 v128) (param $1 i32) (result v128) (i8x16.shr_s (local.get $0) (local.get $1) ) ) - (func $98 (param $0 v128) (param $1 i32) (result v128) + (func $103 (param $0 v128) (param $1 i32) (result v128) (i8x16.shr_u (local.get $0) (local.get $1) ) ) - (func $99 (param $0 v128) (param $1 v128) (result v128) + (func $104 (param $0 v128) (param $1 v128) (result v128) (i8x16.add (local.get $0) (local.get $1) ) ) - (func $100 (param $0 v128) (param $1 v128) (result v128) + (func $105 (param $0 v128) (param $1 v128) (result v128) (i8x16.add_saturate_s (local.get $0) (local.get $1) ) ) - (func $101 (param $0 v128) (param $1 v128) (result v128) + (func $106 (param $0 v128) (param $1 v128) (result v128) (i8x16.add_saturate_u (local.get $0) (local.get $1) ) ) - (func $102 (param $0 v128) (param $1 v128) (result v128) + (func $107 (param $0 v128) (param $1 v128) (result v128) (i8x16.sub (local.get $0) (local.get $1) ) ) - (func $103 (param $0 v128) (param $1 v128) (result v128) + (func $108 (param $0 v128) (param $1 v128) (result v128) (i8x16.sub_saturate_s (local.get $0) (local.get $1) ) ) - (func $104 (param $0 v128) (param $1 v128) (result v128) + (func $109 (param $0 v128) (param $1 v128) (result v128) (i8x16.sub_saturate_u (local.get $0) (local.get $1) ) ) - (func $105 (param $0 v128) (param $1 v128) (result v128) + (func $110 (param $0 v128) (param $1 v128) (result v128) (i8x16.mul (local.get $0) (local.get $1) ) ) - (func $106 (param $0 v128) (param $1 v128) (result v128) + (func $111 (param $0 v128) (param $1 v128) (result v128) (i8x16.min_s (local.get $0) (local.get $1) ) ) - (func $107 (param $0 v128) (param $1 v128) (result v128) + (func $112 (param $0 v128) (param $1 v128) (result v128) (i8x16.min_u (local.get $0) (local.get $1) ) ) - (func $108 (param $0 v128) (param $1 v128) (result v128) + (func $113 (param $0 v128) (param $1 v128) (result v128) (i8x16.max_s (local.get $0) (local.get $1) ) ) - (func $109 (param $0 v128) (param $1 v128) (result v128) + (func $114 (param $0 v128) (param $1 v128) (result v128) (i8x16.max_u (local.get $0) (local.get $1) ) ) - (func $110 (param $0 v128) (param $1 v128) (result v128) + (func $115 (param $0 v128) (param $1 v128) (result v128) (i8x16.avgr_u (local.get $0) (local.get $1) ) ) - (func $111 (param $0 v128) (result v128) + (func $116 (param $0 v128) (result v128) (i16x8.abs (local.get $0) ) ) - (func $112 (param $0 v128) (result v128) + (func $117 (param $0 v128) (result v128) (i16x8.neg (local.get $0) ) ) - (func $113 (param $0 v128) (result i32) + (func $118 (param $0 v128) (result i32) (i16x8.any_true (local.get $0) ) ) - (func $114 (param $0 v128) (result i32) + (func $119 (param $0 v128) (result i32) (i16x8.all_true (local.get $0) ) ) - (func $115 (param $0 v128) (result i32) + (func $120 (param $0 v128) (result i32) (i16x8.bitmask (local.get $0) ) ) - (func $116 (param $0 v128) (param $1 i32) (result v128) + (func $121 (param $0 v128) (param $1 i32) (result v128) (i16x8.shl (local.get $0) (local.get $1) ) ) - (func $117 (param $0 v128) (param $1 i32) (result v128) + (func $122 (param $0 v128) (param $1 i32) (result v128) (i16x8.shr_s (local.get $0) (local.get $1) ) ) - (func $118 (param $0 v128) (param $1 i32) (result v128) + (func $123 (param $0 v128) (param $1 i32) (result v128) (i16x8.shr_u (local.get $0) (local.get $1) ) ) - (func $119 (param $0 v128) (param $1 v128) (result v128) + (func $124 (param $0 v128) (param $1 v128) (result v128) (i16x8.add (local.get $0) (local.get $1) ) ) - (func $120 (param $0 v128) (param $1 v128) (result v128) + (func $125 (param $0 v128) (param $1 v128) (result v128) (i16x8.add_saturate_s (local.get $0) (local.get $1) ) ) - (func $121 (param $0 v128) (param $1 v128) (result v128) + (func $126 (param $0 v128) (param $1 v128) (result v128) (i16x8.add_saturate_u (local.get $0) (local.get $1) ) ) - (func $122 (param $0 v128) (param $1 v128) (result v128) + (func $127 (param $0 v128) (param $1 v128) (result v128) (i16x8.sub (local.get $0) (local.get $1) ) ) - (func $123 (param $0 v128) (param $1 v128) (result v128) + (func $128 (param $0 v128) (param $1 v128) (result v128) (i16x8.sub_saturate_s (local.get $0) (local.get $1) ) ) - (func $124 (param $0 v128) (param $1 v128) (result v128) + (func $129 (param $0 v128) (param $1 v128) (result v128) (i16x8.sub_saturate_u (local.get $0) (local.get $1) ) ) - (func $125 (param $0 v128) (param $1 v128) (result v128) + (func $130 (param $0 v128) (param $1 v128) (result v128) (i16x8.mul (local.get $0) (local.get $1) ) ) - (func $126 (param $0 v128) (param $1 v128) (result v128) + (func $131 (param $0 v128) (param $1 v128) (result v128) (i16x8.min_s (local.get $0) (local.get $1) ) ) - (func $127 (param $0 v128) (param $1 v128) (result v128) + (func $132 (param $0 v128) (param $1 v128) (result v128) (i16x8.min_u (local.get $0) (local.get $1) ) ) - (func $128 (param $0 v128) (param $1 v128) (result v128) + (func $133 (param $0 v128) (param $1 v128) (result v128) (i16x8.max_s (local.get $0) (local.get $1) ) ) - (func $129 (param $0 v128) (param $1 v128) (result v128) + (func $134 (param $0 v128) (param $1 v128) (result v128) (i16x8.max_u (local.get $0) (local.get $1) ) ) - (func $130 (param $0 v128) (param $1 v128) (result v128) + (func $135 (param $0 v128) (param $1 v128) (result v128) (i16x8.avgr_u (local.get $0) (local.get $1) ) ) - (func $131 (param $0 v128) (param $1 v128) (result v128) + (func $136 (param $0 v128) (param $1 v128) (result v128) (i16x8.q15mulr_sat_s (local.get $0) (local.get $1) ) ) - (func $132 (param $0 v128) (param $1 v128) (result v128) + (func $137 (param $0 v128) (param $1 v128) (result v128) (i16x8.extmul_low_i8x16_s (local.get $0) (local.get $1) ) ) - (func $133 (param $0 v128) (param $1 v128) (result v128) + (func $138 (param $0 v128) (param $1 v128) (result v128) (i16x8.extmul_high_i8x16_s (local.get $0) (local.get $1) ) ) - (func $134 (param $0 v128) (param $1 v128) (result v128) + (func $139 (param $0 v128) (param $1 v128) (result v128) (i16x8.extmul_low_i8x16_u (local.get $0) (local.get $1) ) ) - (func $135 (param $0 v128) (param $1 v128) (result v128) + (func $140 (param $0 v128) (param $1 v128) (result v128) (i16x8.extmul_high_i8x16_u (local.get $0) (local.get $1) ) ) - (func $136 (param $0 v128) (result v128) + (func $141 (param $0 v128) (result v128) (i32x4.abs (local.get $0) ) ) - (func $137 (param $0 v128) (result v128) + (func $142 (param $0 v128) (result v128) (i32x4.neg (local.get $0) ) ) - (func $138 (param $0 v128) (result i32) + (func $143 (param $0 v128) (result i32) (i32x4.any_true (local.get $0) ) ) - (func $139 (param $0 v128) (result i32) + (func $144 (param $0 v128) (result i32) (i32x4.all_true (local.get $0) ) ) - (func $140 (param $0 v128) (result i32) + (func $145 (param $0 v128) (result i32) (i32x4.bitmask (local.get $0) ) ) - (func $141 (param $0 v128) (param $1 i32) (result v128) + (func $146 (param $0 v128) (param $1 i32) (result v128) (i32x4.shl (local.get $0) (local.get $1) ) ) - (func $142 (param $0 v128) (param $1 i32) (result v128) + (func $147 (param $0 v128) (param $1 i32) (result v128) (i32x4.shr_s (local.get $0) (local.get $1) ) ) - (func $143 (param $0 v128) (param $1 i32) (result v128) + (func $148 (param $0 v128) (param $1 i32) (result v128) (i32x4.shr_u (local.get $0) (local.get $1) ) ) - (func $144 (param $0 v128) (param $1 v128) (result v128) + (func $149 (param $0 v128) (param $1 v128) (result v128) (i32x4.add (local.get $0) (local.get $1) ) ) - (func $145 (param $0 v128) (param $1 v128) (result v128) + (func $150 (param $0 v128) (param $1 v128) (result v128) (i32x4.sub (local.get $0) (local.get $1) ) ) - (func $146 (param $0 v128) (param $1 v128) (result v128) + (func $151 (param $0 v128) (param $1 v128) (result v128) (i32x4.mul (local.get $0) (local.get $1) ) ) - (func $147 (param $0 v128) (param $1 v128) (result v128) + (func $152 (param $0 v128) (param $1 v128) (result v128) (i32x4.min_s (local.get $0) (local.get $1) ) ) - (func $148 (param $0 v128) (param $1 v128) (result v128) + (func $153 (param $0 v128) (param $1 v128) (result v128) (i32x4.min_u (local.get $0) (local.get $1) ) ) - (func $149 (param $0 v128) (param $1 v128) (result v128) + (func $154 (param $0 v128) (param $1 v128) (result v128) (i32x4.max_s (local.get $0) (local.get $1) ) ) - (func $150 (param $0 v128) (param $1 v128) (result v128) + (func $155 (param $0 v128) (param $1 v128) (result v128) (i32x4.max_u (local.get $0) (local.get $1) ) ) - (func $151 (param $0 v128) (param $1 v128) (result v128) + (func $156 (param $0 v128) (param $1 v128) (result v128) (i32x4.dot_i16x8_s (local.get $0) (local.get $1) ) ) - (func $152 (param $0 v128) (param $1 v128) (result v128) + (func $157 (param $0 v128) (param $1 v128) (result v128) (i32x4.extmul_low_i16x8_s (local.get $0) (local.get $1) ) ) - (func $153 (param $0 v128) (param $1 v128) (result v128) + (func $158 (param $0 v128) (param $1 v128) (result v128) (i32x4.extmul_high_i16x8_s (local.get $0) (local.get $1) ) ) - (func $154 (param $0 v128) (param $1 v128) (result v128) + (func $159 (param $0 v128) (param $1 v128) (result v128) (i32x4.extmul_low_i16x8_u (local.get $0) (local.get $1) ) ) - (func $155 (param $0 v128) (param $1 v128) (result v128) + (func $160 (param $0 v128) (param $1 v128) (result v128) (i32x4.extmul_high_i16x8_u (local.get $0) (local.get $1) ) ) - (func $156 (param $0 v128) (result v128) + (func $161 (param $0 v128) (result v128) (i64x2.neg (local.get $0) ) ) - (func $157 (param $0 v128) (result i32) + (func $162 (param $0 v128) (result i32) (i64x2.any_true (local.get $0) ) ) - (func $158 (param $0 v128) (result i32) + (func $163 (param $0 v128) (result i32) (i64x2.all_true (local.get $0) ) ) - (func $159 (param $0 v128) (param $1 i32) (result v128) + (func $164 (param $0 v128) (result i32) + (i64x2.bitmask + (local.get $0) + ) + ) + (func $165 (param $0 v128) (param $1 i32) (result v128) (i64x2.shl (local.get $0) (local.get $1) ) ) - (func $160 (param $0 v128) (param $1 i32) (result v128) + (func $166 (param $0 v128) (param $1 i32) (result v128) (i64x2.shr_s (local.get $0) (local.get $1) ) ) - (func $161 (param $0 v128) (param $1 i32) (result v128) + (func $167 (param $0 v128) (param $1 i32) (result v128) (i64x2.shr_u (local.get $0) (local.get $1) ) ) - (func $162 (param $0 v128) (param $1 v128) (result v128) + (func $168 (param $0 v128) (param $1 v128) (result v128) (i64x2.add (local.get $0) (local.get $1) ) ) - (func $163 (param $0 v128) (param $1 v128) (result v128) + (func $169 (param $0 v128) (param $1 v128) (result v128) (i64x2.sub (local.get $0) (local.get $1) ) ) - (func $164 (param $0 v128) (param $1 v128) (result v128) + (func $170 (param $0 v128) (param $1 v128) (result v128) (i64x2.mul (local.get $0) (local.get $1) ) ) - (func $165 (param $0 v128) (param $1 v128) (result v128) + (func $171 (param $0 v128) (param $1 v128) (result v128) (i64x2.extmul_low_i32x4_s (local.get $0) (local.get $1) ) ) - (func $166 (param $0 v128) (param $1 v128) (result v128) + (func $172 (param $0 v128) (param $1 v128) (result v128) (i64x2.extmul_high_i32x4_s (local.get $0) (local.get $1) ) ) - (func $167 (param $0 v128) (param $1 v128) (result v128) + (func $173 (param $0 v128) (param $1 v128) (result v128) (i64x2.extmul_low_i32x4_u (local.get $0) (local.get $1) ) ) - (func $168 (param $0 v128) (param $1 v128) (result v128) + (func $174 (param $0 v128) (param $1 v128) (result v128) (i64x2.extmul_high_i32x4_u (local.get $0) (local.get $1) ) ) - (func $169 (param $0 v128) (param $1 v128) (result v128) + (func $175 (param $0 v128) (param $1 v128) (result v128) (f32x4.add (local.get $0) (local.get $1) ) ) - (func $170 (param $0 v128) (param $1 v128) (result v128) + (func $176 (param $0 v128) (param $1 v128) (result v128) (f32x4.sub (local.get $0) (local.get $1) ) ) - (func $171 (param $0 v128) (param $1 v128) (result v128) + (func $177 (param $0 v128) (param $1 v128) (result v128) (f32x4.mul (local.get $0) (local.get $1) ) ) - (func $172 (param $0 v128) (param $1 v128) (result v128) + (func $178 (param $0 v128) (param $1 v128) (result v128) (f32x4.div (local.get $0) (local.get $1) ) ) - (func $173 (param $0 v128) (param $1 v128) (result v128) + (func $179 (param $0 v128) (param $1 v128) (result v128) (f32x4.min (local.get $0) (local.get $1) ) ) - (func $174 (param $0 v128) (param $1 v128) (result v128) + (func $180 (param $0 v128) (param $1 v128) (result v128) (f32x4.max (local.get $0) (local.get $1) ) ) - (func $175 (param $0 v128) (param $1 v128) (result v128) + (func $181 (param $0 v128) (param $1 v128) (result v128) (f32x4.pmin (local.get $0) (local.get $1) ) ) - (func $176 (param $0 v128) (param $1 v128) (result v128) + (func $182 (param $0 v128) (param $1 v128) (result v128) (f32x4.pmax (local.get $0) (local.get $1) ) ) - (func $177 (param $0 v128) (result v128) + (func $183 (param $0 v128) (result v128) (f32x4.ceil (local.get $0) ) ) - (func $178 (param $0 v128) (result v128) + (func $184 (param $0 v128) (result v128) (f32x4.floor (local.get $0) ) ) - (func $179 (param $0 v128) (result v128) + (func $185 (param $0 v128) (result v128) (f32x4.trunc (local.get $0) ) ) - (func $180 (param $0 v128) (result v128) + (func $186 (param $0 v128) (result v128) (f32x4.nearest (local.get $0) ) ) - (func $181 (param $0 v128) (result v128) + (func $187 (param $0 v128) (result v128) (f32x4.abs (local.get $0) ) ) - (func $182 (param $0 v128) (result v128) + (func $188 (param $0 v128) (result v128) (f32x4.neg (local.get $0) ) ) - (func $183 (param $0 v128) (result v128) + (func $189 (param $0 v128) (result v128) (f32x4.sqrt (local.get $0) ) ) - (func $184 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (func $190 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f32x4.qfma (local.get $0) (local.get $1) (local.get $2) ) ) - (func $185 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (func $191 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f32x4.qfms (local.get $0) (local.get $1) (local.get $2) ) ) - (func $186 (param $0 v128) (param $1 v128) (result v128) + (func $192 (param $0 v128) (param $1 v128) (result v128) (f64x2.add (local.get $0) (local.get $1) ) ) - (func $187 (param $0 v128) (param $1 v128) (result v128) + (func $193 (param $0 v128) (param $1 v128) (result v128) (f64x2.sub (local.get $0) (local.get $1) ) ) - (func $188 (param $0 v128) (param $1 v128) (result v128) + (func $194 (param $0 v128) (param $1 v128) (result v128) (f64x2.mul (local.get $0) (local.get $1) ) ) - (func $189 (param $0 v128) (param $1 v128) (result v128) + (func $195 (param $0 v128) (param $1 v128) (result v128) (f64x2.div (local.get $0) (local.get $1) ) ) - (func $190 (param $0 v128) (param $1 v128) (result v128) + (func $196 (param $0 v128) (param $1 v128) (result v128) (f64x2.min (local.get $0) (local.get $1) ) ) - (func $191 (param $0 v128) (param $1 v128) (result v128) + (func $197 (param $0 v128) (param $1 v128) (result v128) (f64x2.max (local.get $0) (local.get $1) ) ) - (func $192 (param $0 v128) (param $1 v128) (result v128) + (func $198 (param $0 v128) (param $1 v128) (result v128) (f64x2.pmin (local.get $0) (local.get $1) ) ) - (func $193 (param $0 v128) (param $1 v128) (result v128) + (func $199 (param $0 v128) (param $1 v128) (result v128) (f64x2.pmax (local.get $0) (local.get $1) ) ) - (func $194 (param $0 v128) (result v128) + (func $200 (param $0 v128) (result v128) (f64x2.ceil (local.get $0) ) ) - (func $195 (param $0 v128) (result v128) + (func $201 (param $0 v128) (result v128) (f64x2.floor (local.get $0) ) ) - (func $196 (param $0 v128) (result v128) + (func $202 (param $0 v128) (result v128) (f64x2.trunc (local.get $0) ) ) - (func $197 (param $0 v128) (result v128) + (func $203 (param $0 v128) (result v128) (f64x2.nearest (local.get $0) ) ) - (func $198 (param $0 v128) (result v128) + (func $204 (param $0 v128) (result v128) (f64x2.abs (local.get $0) ) ) - (func $199 (param $0 v128) (result v128) + (func $205 (param $0 v128) (result v128) (f64x2.neg (local.get $0) ) ) - (func $200 (param $0 v128) (result v128) + (func $206 (param $0 v128) (result v128) (f64x2.sqrt (local.get $0) ) ) - (func $201 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (func $207 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f64x2.qfma (local.get $0) (local.get $1) (local.get $2) ) ) - (func $202 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (func $208 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f64x2.qfms (local.get $0) (local.get $1) (local.get $2) ) ) - (func $203 (param $0 v128) (result v128) + (func $209 (param $0 v128) (result v128) (i32x4.trunc_sat_f32x4_s (local.get $0) ) ) - (func $204 (param $0 v128) (result v128) + (func $210 (param $0 v128) (result v128) (i32x4.trunc_sat_f32x4_u (local.get $0) ) ) - (func $205 (param $0 v128) (result v128) + (func $211 (param $0 v128) (result v128) (i64x2.trunc_sat_f64x2_s (local.get $0) ) ) - (func $206 (param $0 v128) (result v128) + (func $212 (param $0 v128) (result v128) (i64x2.trunc_sat_f64x2_u (local.get $0) ) ) - (func $207 (param $0 v128) (result v128) + (func $213 (param $0 v128) (result v128) (f32x4.convert_i32x4_s (local.get $0) ) ) - (func $208 (param $0 v128) (result v128) + (func $214 (param $0 v128) (result v128) (f32x4.convert_i32x4_u (local.get $0) ) ) - (func $209 (param $0 v128) (result v128) + (func $215 (param $0 v128) (result v128) (f64x2.convert_i64x2_s (local.get $0) ) ) - (func $210 (param $0 v128) (result v128) + (func $216 (param $0 v128) (result v128) (f64x2.convert_i64x2_u (local.get $0) ) ) - (func $211 (param $0 i32) (result v128) + (func $217 (param $0 i32) (result v128) (v8x16.load_splat (local.get $0) ) ) - (func $212 (param $0 i32) (result v128) + (func $218 (param $0 i32) (result v128) (v16x8.load_splat (local.get $0) ) ) - (func $213 (param $0 i32) (result v128) + (func $219 (param $0 i32) (result v128) (v32x4.load_splat (local.get $0) ) ) - (func $214 (param $0 i32) (result v128) + (func $220 (param $0 i32) (result v128) (v64x2.load_splat (local.get $0) ) ) - (func $215 (param $0 v128) (param $1 v128) (result v128) + (func $221 (param $0 v128) (param $1 v128) (result v128) (i8x16.narrow_i16x8_s (local.get $0) (local.get $1) ) ) - (func $216 (param $0 v128) (param $1 v128) (result v128) + (func $222 (param $0 v128) (param $1 v128) (result v128) (i8x16.narrow_i16x8_u (local.get $0) (local.get $1) ) ) - (func $217 (param $0 v128) (param $1 v128) (result v128) + (func $223 (param $0 v128) (param $1 v128) (result v128) (i16x8.narrow_i32x4_s (local.get $0) (local.get $1) ) ) - (func $218 (param $0 v128) (param $1 v128) (result v128) + (func $224 (param $0 v128) (param $1 v128) (result v128) (i16x8.narrow_i32x4_u (local.get $0) (local.get $1) ) ) - (func $219 (param $0 v128) (result v128) + (func $225 (param $0 v128) (result v128) (i16x8.widen_low_i8x16_s (local.get $0) ) ) - (func $220 (param $0 v128) (result v128) + (func $226 (param $0 v128) (result v128) (i16x8.widen_high_i8x16_s (local.get $0) ) ) - (func $221 (param $0 v128) (result v128) + (func $227 (param $0 v128) (result v128) (i16x8.widen_low_i8x16_u (local.get $0) ) ) - (func $222 (param $0 v128) (result v128) + (func $228 (param $0 v128) (result v128) (i16x8.widen_high_i8x16_u (local.get $0) ) ) - (func $223 (param $0 v128) (result v128) + (func $229 (param $0 v128) (result v128) (i32x4.widen_low_i16x8_s (local.get $0) ) ) - (func $224 (param $0 v128) (result v128) + (func $230 (param $0 v128) (result v128) (i32x4.widen_high_i16x8_s (local.get $0) ) ) - (func $225 (param $0 v128) (result v128) + (func $231 (param $0 v128) (result v128) (i32x4.widen_low_i16x8_u (local.get $0) ) ) - (func $226 (param $0 v128) (result v128) + (func $232 (param $0 v128) (result v128) (i32x4.widen_high_i16x8_u (local.get $0) ) ) - (func $227 (param $0 i32) (result v128) + (func $233 (param $0 v128) (result v128) + (i64x2.widen_low_i32x4_s + (local.get $0) + ) + ) + (func $234 (param $0 v128) (result v128) + (i64x2.widen_high_i32x4_s + (local.get $0) + ) + ) + (func $235 (param $0 v128) (result v128) + (i64x2.widen_low_i32x4_u + (local.get $0) + ) + ) + (func $236 (param $0 v128) (result v128) + (i64x2.widen_high_i32x4_u + (local.get $0) + ) + ) + (func $237 (param $0 i32) (result v128) (i16x8.load8x8_u (local.get $0) ) ) - (func $228 (param $0 i32) (result v128) + (func $238 (param $0 i32) (result v128) (i16x8.load8x8_s (local.get $0) ) ) - (func $229 (param $0 i32) (result v128) + (func $239 (param $0 i32) (result v128) (i32x4.load16x4_s (local.get $0) ) ) - (func $230 (param $0 i32) (result v128) + (func $240 (param $0 i32) (result v128) (i32x4.load16x4_u (local.get $0) ) ) - (func $231 (param $0 i32) (result v128) + (func $241 (param $0 i32) (result v128) (i64x2.load32x2_s (local.get $0) ) ) - (func $232 (param $0 i32) (result v128) + (func $242 (param $0 i32) (result v128) (i64x2.load32x2_u (local.get $0) ) ) - (func $233 (param $0 i32) (result v128) + (func $243 (param $0 i32) (result v128) (v128.load32_zero (local.get $0) ) ) - (func $234 (param $0 i32) (result v128) + (func $244 (param $0 i32) (result v128) (v128.load64_zero (local.get $0) ) ) - (func $235 (param $0 v128) (param $1 v128) (result v128) + (func $245 (param $0 v128) (param $1 v128) (result v128) (v8x16.swizzle (local.get $0) (local.get $1) diff --git a/test/spec/simd.wast b/test/spec/simd.wast index 332d48d01..51bea4deb 100644 --- a/test/spec/simd.wast +++ b/test/spec/simd.wast @@ -84,6 +84,7 @@ (func (export "i32x4.le_u") (param $0 v128) (param $1 v128) (result v128) (i32x4.le_u (local.get $0) (local.get $1))) (func (export "i32x4.ge_s") (param $0 v128) (param $1 v128) (result v128) (i32x4.ge_s (local.get $0) (local.get $1))) (func (export "i32x4.ge_u") (param $0 v128) (param $1 v128) (result v128) (i32x4.ge_u (local.get $0) (local.get $1))) + (func (export "i64x2.eq") (param $0 v128) (param $1 v128) (result v128) (i64x2.eq (local.get $0) (local.get $1))) (func (export "f32x4.eq") (param $0 v128) (param $1 v128) (result v128) (f32x4.eq (local.get $0) (local.get $1))) (func (export "f32x4.ne") (param $0 v128) (param $1 v128) (result v128) (f32x4.ne (local.get $0) (local.get $1))) (func (export "f32x4.lt") (param $0 v128) (param $1 v128) (result v128) (f32x4.lt (local.get $0) (local.get $1))) @@ -104,6 +105,18 @@ (func (export "v128.bitselect") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (v128.bitselect (local.get $0) (local.get $1) (local.get $2)) ) + (func (export "v8x16.signselect") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v8x16.signselect (local.get $0) (local.get $1) (local.get $2)) + ) + (func (export "v16x8.signselect") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v16x8.signselect (local.get $0) (local.get $1) (local.get $2)) + ) + (func (export "v32x4.signselect") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v32x4.signselect (local.get $0) (local.get $1) (local.get $2)) + ) + (func (export "v64x2.signselect") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) + (v64x2.signselect (local.get $0) (local.get $1) (local.get $2)) + ) (func (export "v128.load8_lane") (param $0 i32) (param $1 v128) (result v128) (v128.load8_lane 0 (local.get $0) (local.get $1))) (func (export "v128.load16_lane") (param $0 i32) (param $1 v128) (result v128) (v128.load16_lane 0 (local.get $0) (local.get $1))) (func (export "v128.load32_lane") (param $0 i32) (param $1 v128) (result v128) (v128.load32_lane 0 (local.get $0) (local.get $1))) @@ -174,6 +187,7 @@ (func (export "i64x2.neg") (param $0 v128) (result v128) (i64x2.neg (local.get $0))) (func (export "i64x2.any_true") (param $0 v128) (result i32) (i64x2.any_true (local.get $0))) (func (export "i64x2.all_true") (param $0 v128) (result i32) (i64x2.all_true (local.get $0))) + (func (export "i64x2.bitmask") (param $0 v128) (result i32) (i64x2.bitmask (local.get $0))) (func (export "i64x2.shl") (param $0 v128) (param $1 i32) (result v128) (i64x2.shl (local.get $0) (local.get $1))) (func (export "i64x2.shr_s") (param $0 v128) (param $1 i32) (result v128) (i64x2.shr_s (local.get $0) (local.get $1))) (func (export "i64x2.shr_u") (param $0 v128) (param $1 i32) (result v128) (i64x2.shr_u (local.get $0) (local.get $1))) @@ -238,6 +252,10 @@ (func (export "i32x4.widen_high_i16x8_s") (param $0 v128) (result v128) (i32x4.widen_high_i16x8_s (local.get $0))) (func (export "i32x4.widen_low_i16x8_u") (param $0 v128) (result v128) (i32x4.widen_low_i16x8_u (local.get $0))) (func (export "i32x4.widen_high_i16x8_u") (param $0 v128) (result v128) (i32x4.widen_high_i16x8_u (local.get $0))) + (func (export "i64x2.widen_low_i32x4_s") (param $0 v128) (result v128) (i64x2.widen_low_i32x4_s (local.get $0))) + (func (export "i64x2.widen_high_i32x4_s") (param $0 v128) (result v128) (i64x2.widen_high_i32x4_s (local.get $0))) + (func (export "i64x2.widen_low_i32x4_u") (param $0 v128) (result v128) (i64x2.widen_low_i32x4_u (local.get $0))) + (func (export "i64x2.widen_high_i32x4_u") (param $0 v128) (result v128) (i64x2.widen_high_i32x4_u (local.get $0))) (func (export "i16x8.load8x8_u") (param $0 i32) (result v128) (i16x8.load8x8_u (local.get $0))) (func (export "i16x8.load8x8_s") (param $0 i32) (result v128) (i16x8.load8x8_s (local.get $0))) (func (export "i32x4.load16x4_u") (param $0 i32) (result v128) (i32x4.load16x4_u (local.get $0))) @@ -474,6 +492,9 @@ (assert_return (invoke "i32x4.ge_s" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 -1 0 -1 0)) (assert_return (invoke "i32x4.ge_u" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 -1 -1 0 0)) +;; i64x2 comparisons +(assert_return (invoke "i64x2.eq" (v128.const i64x2 0 -1) (v128.const i64x2 -1 -1)) (v128.const i64x2 0 -1)) + ;; f32x4 comparisons (assert_return (invoke "f32x4.eq" (v128.const f32x4 0 -1 1 0) (v128.const f32x4 0 0 -1 1)) (v128.const i32x4 -1 0 0 0)) (assert_return (invoke "f32x4.ne" (v128.const f32x4 0 -1 1 0) (v128.const f32x4 0 0 -1 1)) (v128.const i32x4 0 -1 -1 -1)) @@ -521,6 +542,7 @@ ) (v128.const i32x4 0xABABABAB 0xAAAAAAAA 0xBBBBBBBB 0xAABBAABB) ) +;; TODO: signselect tests ;; load/store lane (assert_return (invoke "v128.load8_lane" @@ -851,6 +873,7 @@ (assert_return (invoke "i64x2.all_true" (v128.const i64x2 0 0)) (i32.const 0)) (assert_return (invoke "i64x2.all_true" (v128.const i64x2 1 0)) (i32.const 0)) (assert_return (invoke "i64x2.all_true" (v128.const i64x2 1 1)) (i32.const 1)) +;; TODO: test i64x2.bitmask (assert_return (invoke "i64x2.shl" (v128.const i64x2 1 0x8000000000000000) (i32.const 1)) (v128.const i64x2 2 0)) (assert_return (invoke "i64x2.shl" (v128.const i64x2 1 0x8000000000000000) (i32.const 64)) (v128.const i64x2 1 0x8000000000000000)) (assert_return (invoke "i64x2.shr_s" (v128.const i64x2 1 0x8000000000000000) (i32.const 1)) (v128.const i64x2 0 0xc000000000000000)) @@ -1005,6 +1028,7 @@ (assert_return (invoke "i32x4.widen_high_i16x8_s" (v128.const i16x8 0 1 -1 32768 32767 32769 16384 -16384)) (v128.const i32x4 32767 -32767 16384 -16384)) (assert_return (invoke "i32x4.widen_low_i16x8_u" (v128.const i16x8 0 1 -1 32768 32767 32769 16384 -16384)) (v128.const i32x4 0 1 65535 32768)) (assert_return (invoke "i32x4.widen_high_i16x8_u" (v128.const i16x8 0 1 -1 32768 32767 32769 16384 -16384)) (v128.const i32x4 32767 32769 16384 49152)) +;; TODO: test i64x2 widens (assert_return (invoke "i16x8.load8x8_s" (i32.const 256)) (v128.const i16x8 0xff80 0xff90 0xffa0 0xffb0 0xffc0 0xffd0 0xffe0 0xfff0)) (assert_return (invoke "i16x8.load8x8_u" (i32.const 256)) (v128.const i16x8 0x0080 0x0090 0x00a0 0x00b0 0x00c0 0x00d0 0x00e0 0x00f0)) (assert_return (invoke "i32x4.load16x4_s" (i32.const 256)) (v128.const i32x4 0xffff9080 0xffffb0a0 0xffffd0c0 0xfffff0e0)) |