diff options
author | Soni L <EnderMoneyMod@gmail.com> | 2024-02-07 00:24:24 -0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-06 19:24:24 -0800 |
commit | 786f8ea1e45efdb25694f97afe0f002977cfca83 (patch) | |
tree | fde557a900b2e319f29727fc676853b08b72b286 | |
parent | 0388727827064172ba49ca222fc221a1be4a3694 (diff) | |
download | wabt-786f8ea1e45efdb25694f97afe0f002977cfca83.tar.gz wabt-786f8ea1e45efdb25694f97afe0f002977cfca83.tar.bz2 wabt-786f8ea1e45efdb25694f97afe0f002977cfca83.zip |
wasm2c: Implement big-endian SIMD (#2340)
-rw-r--r-- | src/c-writer.cc | 130 | ||||
-rw-r--r-- | src/prebuilt/wasm2c_simd_source_declarations.cc | 444 | ||||
-rw-r--r-- | src/template/wasm2c_simd.declarations.c | 222 | ||||
-rw-r--r-- | test/harness/wasm2c/simd_formatting.txt | 2 | ||||
-rwxr-xr-x | test/run-spec-wasm2c.py | 2 | ||||
-rw-r--r-- | test/spec-wasm2c-prefix.c | 37 |
6 files changed, 762 insertions, 75 deletions
diff --git a/src/c-writer.cc b/src/c-writer.cc index 24bc0057..d7a61f42 100644 --- a/src/c-writer.cc +++ b/src/c-writer.cc @@ -1297,9 +1297,11 @@ void CWriter::Write(const Const& const_) { break; } case Type::V128: { - Writef("simde_wasm_i32x4_const(0x%08x, 0x%08x, 0x%08x, 0x%08x)", - const_.vec128().u32(0), const_.vec128().u32(1), - const_.vec128().u32(2), const_.vec128().u32(3)); + Writef("v128_const(0x%02x", const_.vec128().u8(0)); + for (int i = 1; i < 16; i++) { + Writef(", 0x%02x", const_.vec128().u8(i)); + } + Write(")"); break; } @@ -4178,11 +4180,11 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I8X16NarrowI16X8S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i8x16_narrow_i16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_i8x16_narrow_i16x8"); break; case Opcode::I8X16NarrowI16X8U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u8x16_narrow_i16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_u8x16_narrow_i16x8"); break; case Opcode::I8X16Shl: @@ -4210,7 +4212,7 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I8X16Swizzle: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i8x16_swizzle"); + WritePrefixBinaryExpr(expr.opcode, "v128_i8x16_swizzle"); break; case Opcode::I16X8Add: @@ -4230,19 +4232,19 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I16X8ExtmulHighI8X16S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i16x8_extmul_high_i8x16"); + WritePrefixBinaryExpr(expr.opcode, "v128_i16x8_extmul_high_i8x16"); break; case Opcode::I16X8ExtmulHighI8X16U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u16x8_extmul_high_u8x16"); + WritePrefixBinaryExpr(expr.opcode, "v128_u16x8_extmul_high_u8x16"); break; case Opcode::I16X8ExtmulLowI8X16S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i16x8_extmul_low_i8x16"); + WritePrefixBinaryExpr(expr.opcode, "v128_i16x8_extmul_low_i8x16"); break; case Opcode::I16X8ExtmulLowI8X16U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u16x8_extmul_low_u8x16"); + WritePrefixBinaryExpr(expr.opcode, "v128_u16x8_extmul_low_u8x16"); break; case Opcode::I16X8MaxS: @@ -4266,11 +4268,11 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I16X8NarrowI32X4S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i16x8_narrow_i32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_i16x8_narrow_i32x4"); break; case Opcode::I16X8NarrowI32X4U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u16x8_narrow_i32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_u16x8_narrow_i32x4"); break; case Opcode::I16X8Q15mulrSatS: @@ -4310,19 +4312,19 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I32X4ExtmulHighI16X8S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i32x4_extmul_high_i16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_i32x4_extmul_high_i16x8"); break; case Opcode::I32X4ExtmulHighI16X8U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u32x4_extmul_high_u16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_u32x4_extmul_high_u16x8"); break; case Opcode::I32X4ExtmulLowI16X8S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i32x4_extmul_low_i16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_i32x4_extmul_low_i16x8"); break; case Opcode::I32X4ExtmulLowI16X8U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u32x4_extmul_low_u16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_u32x4_extmul_low_u16x8"); break; case Opcode::I32X4MaxS: @@ -4366,19 +4368,19 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I64X2ExtmulHighI32X4S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i64x2_extmul_high_i32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_i64x2_extmul_high_i32x4"); break; case Opcode::I64X2ExtmulHighI32X4U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u64x2_extmul_high_u32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_u64x2_extmul_high_u32x4"); break; case Opcode::I64X2ExtmulLowI32X4S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i64x2_extmul_low_i32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_i64x2_extmul_low_i32x4"); break; case Opcode::I64X2ExtmulLowI32X4U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u64x2_extmul_low_u32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_u64x2_extmul_low_u32x4"); break; case Opcode::I64X2Mul: @@ -4898,13 +4900,11 @@ void CWriter::Write(const ConvertExpr& expr) { break; case Opcode::I32X4TruncSatF64X2SZero: - WriteSimpleUnaryExpr(expr.opcode, - "simde_wasm_i32x4_trunc_sat_f64x2_zero"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i32x4_trunc_sat_f64x2_zero"); break; case Opcode::I32X4TruncSatF64X2UZero: - WriteSimpleUnaryExpr(expr.opcode, - "simde_wasm_u32x4_trunc_sat_f64x2_zero"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u32x4_trunc_sat_f64x2_zero"); break; case Opcode::F32X4ConvertI32X4S: @@ -4916,19 +4916,19 @@ void CWriter::Write(const ConvertExpr& expr) { break; case Opcode::F32X4DemoteF64X2Zero: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_f32x4_demote_f64x2_zero"); + WriteSimpleUnaryExpr(expr.opcode, "v128_f32x4_demote_f64x2_zero"); break; case Opcode::F64X2ConvertLowI32X4S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_f64x2_convert_low_i32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_f64x2_convert_low_i32x4"); break; case Opcode::F64X2ConvertLowI32X4U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_f64x2_convert_low_u32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_f64x2_convert_low_u32x4"); break; case Opcode::F64X2PromoteLowF32X4: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_f64x2_promote_low_f32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_f64x2_promote_low_f32x4"); break; default: @@ -5120,7 +5120,7 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I8X16Bitmask: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i8x16_bitmask"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i8x16_bitmask"); break; case Opcode::I8X16Neg: @@ -5144,7 +5144,7 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I16X8Bitmask: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i16x8_bitmask"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i16x8_bitmask"); break; case Opcode::I16X8ExtaddPairwiseI8X16S: @@ -5158,19 +5158,19 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I16X8ExtendHighI8X16S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i16x8_extend_high_i8x16"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i16x8_extend_high_i8x16"); break; case Opcode::I16X8ExtendHighI8X16U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u16x8_extend_high_u8x16"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u16x8_extend_high_u8x16"); break; case Opcode::I16X8ExtendLowI8X16S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i16x8_extend_low_i8x16"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i16x8_extend_low_i8x16"); break; case Opcode::I16X8ExtendLowI8X16U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u16x8_extend_low_u8x16"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u16x8_extend_low_u8x16"); break; case Opcode::I16X8Neg: @@ -5190,7 +5190,7 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I32X4Bitmask: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i32x4_bitmask"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i32x4_bitmask"); break; case Opcode::I32X4ExtaddPairwiseI16X8S: @@ -5204,19 +5204,19 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I32X4ExtendHighI16X8S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i32x4_extend_high_i16x8"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i32x4_extend_high_i16x8"); break; case Opcode::I32X4ExtendHighI16X8U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u32x4_extend_high_u16x8"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u32x4_extend_high_u16x8"); break; case Opcode::I32X4ExtendLowI16X8S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i32x4_extend_low_i16x8"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i32x4_extend_low_i16x8"); break; case Opcode::I32X4ExtendLowI16X8U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u32x4_extend_low_u16x8"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u32x4_extend_low_u16x8"); break; case Opcode::I32X4Neg: @@ -5236,23 +5236,23 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I64X2Bitmask: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i64x2_bitmask"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i64x2_bitmask"); break; case Opcode::I64X2ExtendHighI32X4S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i64x2_extend_high_i32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i64x2_extend_high_i32x4"); break; case Opcode::I64X2ExtendHighI32X4U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u64x2_extend_high_u32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u64x2_extend_high_u32x4"); break; case Opcode::I64X2ExtendLowI32X4S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i64x2_extend_low_i32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i64x2_extend_low_i32x4"); break; case Opcode::I64X2ExtendLowI32X4U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u64x2_extend_low_u32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u64x2_extend_low_u32x4"); break; case Opcode::I64X2Neg: @@ -5360,85 +5360,85 @@ void CWriter::Write(const SimdLaneOpExpr& expr) { switch (expr.opcode) { case Opcode::I8X16ExtractLaneS: { - Write(StackVar(0, result_type), " = simde_wasm_i8x16_extract_lane(", + Write(StackVar(0, result_type), " = v128_i8x16_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I8X16ExtractLaneU: { - Write(StackVar(0, result_type), " = simde_wasm_u8x16_extract_lane(", + Write(StackVar(0, result_type), " = v128_u8x16_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I16X8ExtractLaneS: { - Write(StackVar(0, result_type), " = simde_wasm_i16x8_extract_lane(", + Write(StackVar(0, result_type), " = v128_i16x8_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I16X8ExtractLaneU: { - Write(StackVar(0, result_type), " = simde_wasm_u16x8_extract_lane(", + Write(StackVar(0, result_type), " = v128_u16x8_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I32X4ExtractLane: { - Write(StackVar(0, result_type), " = simde_wasm_i32x4_extract_lane(", + Write(StackVar(0, result_type), " = v128_i32x4_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I64X2ExtractLane: { - Write(StackVar(0, result_type), " = simde_wasm_i64x2_extract_lane(", + Write(StackVar(0, result_type), " = v128_i64x2_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::F32X4ExtractLane: { - Write(StackVar(0, result_type), " = simde_wasm_f32x4_extract_lane(", + Write(StackVar(0, result_type), " = v128_f32x4_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::F64X2ExtractLane: { - Write(StackVar(0, result_type), " = simde_wasm_f64x2_extract_lane(", + Write(StackVar(0, result_type), " = v128_f64x2_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I8X16ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_i8x16_replace_lane(", + Write(StackVar(1, result_type), " = v128_i8x16_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; } case Opcode::I16X8ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_i16x8_replace_lane(", + Write(StackVar(1, result_type), " = v128_i16x8_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; } case Opcode::I32X4ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_i32x4_replace_lane(", + Write(StackVar(1, result_type), " = v128_i32x4_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; } case Opcode::I64X2ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_i64x2_replace_lane(", + Write(StackVar(1, result_type), " = v128_i64x2_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; } case Opcode::F32X4ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_f32x4_replace_lane(", + Write(StackVar(1, result_type), " = v128_f32x4_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; } case Opcode::F64X2ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_f64x2_replace_lane(", + Write(StackVar(1, result_type), " = v128_f64x2_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; @@ -5507,14 +5507,12 @@ void CWriter::Write(const SimdShuffleOpExpr& expr) { Type result_type = expr.opcode.GetResultType(); switch (expr.opcode) { case Opcode::I8X16Shuffle: { - Write(StackVar(1, result_type), " = simde_wasm_i8x16_shuffle(", - StackVar(1), ", ", StackVar(0), ", ", expr.val.u8(0), ", ", - expr.val.u8(1), ", ", expr.val.u8(2), ", ", expr.val.u8(3), ", ", - expr.val.u8(4), ", ", expr.val.u8(5), ", ", expr.val.u8(6), ", ", - expr.val.u8(7), ", ", expr.val.u8(8), ", ", expr.val.u8(9), ", ", - expr.val.u8(10), ", ", expr.val.u8(11), ", ", expr.val.u8(12), ", ", - expr.val.u8(13), ", ", expr.val.u8(14), ", ", expr.val.u8(15), ");", - Newline()); + Write(StackVar(1, result_type), " = v128_i8x16_shuffle(", StackVar(1), + ", ", StackVar(0)); + for (int i = 0; i < 16; i++) { + Write(", ", expr.val.u8(i)); + } + Write(");", Newline()); DropTypes(2); break; } diff --git a/src/prebuilt/wasm2c_simd_source_declarations.cc b/src/prebuilt/wasm2c_simd_source_declarations.cc index 07425377..7c0c9fb4 100644 --- a/src/prebuilt/wasm2c_simd_source_declarations.cc +++ b/src/prebuilt/wasm2c_simd_source_declarations.cc @@ -65,7 +65,38 @@ R"w2c_template( } R"w2c_template( // clang-format off )w2c_template" -R"w2c_template(DEFINE_SIMD_LOAD_FUNC(v128_load, simde_wasm_v128_load, v128) +R"w2c_template(#if WABT_BIG_ENDIAN +)w2c_template" +R"w2c_template(static inline v128 v128_impl_load32_zero(const void* a) { +)w2c_template" +R"w2c_template( return simde_wasm_i8x16_swizzle( +)w2c_template" +R"w2c_template( simde_wasm_v128_load32_zero(a), +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3)); +)w2c_template" +R"w2c_template(} +)w2c_template" +R"w2c_template(static inline v128 v128_impl_load64_zero(const void* a) { +)w2c_template" +R"w2c_template( return simde_wasm_i8x16_swizzle( +)w2c_template" +R"w2c_template( simde_wasm_v128_load64_zero(a), +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); +)w2c_template" +R"w2c_template(} +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(#define v128_impl_load32_zero simde_wasm_v128_load32_zero +)w2c_template" +R"w2c_template(#define v128_impl_load64_zero simde_wasm_v128_load64_zero +)w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template( +DEFINE_SIMD_LOAD_FUNC(v128_load, simde_wasm_v128_load, v128) )w2c_template" R"w2c_template( DEFINE_SIMD_LOAD_FUNC(v128_load8_splat, simde_wasm_v128_load8_splat, u8) @@ -90,12 +121,76 @@ R"w2c_template(DEFINE_SIMD_LOAD_FUNC(i64x2_load32x2, simde_wasm_i64x2_load32x2, R"w2c_template(DEFINE_SIMD_LOAD_FUNC(u64x2_load32x2, simde_wasm_u64x2_load32x2, u64) )w2c_template" R"w2c_template( -DEFINE_SIMD_LOAD_FUNC(v128_load32_zero, simde_wasm_v128_load32_zero, u32) +DEFINE_SIMD_LOAD_FUNC(v128_load32_zero, v128_impl_load32_zero, u32) )w2c_template" -R"w2c_template(DEFINE_SIMD_LOAD_FUNC(v128_load64_zero, simde_wasm_v128_load64_zero, u64) +R"w2c_template(DEFINE_SIMD_LOAD_FUNC(v128_load64_zero, v128_impl_load64_zero, u64) )w2c_template" R"w2c_template( -DEFINE_SIMD_LOAD_LANE(v128_load8_lane0, simde_wasm_v128_load8_lane, u8, 0) +#if WABT_BIG_ENDIAN +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane0, simde_wasm_v128_load8_lane, u8, 15) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane1, simde_wasm_v128_load8_lane, u8, 14) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane2, simde_wasm_v128_load8_lane, u8, 13) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane3, simde_wasm_v128_load8_lane, u8, 12) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane4, simde_wasm_v128_load8_lane, u8, 11) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane5, simde_wasm_v128_load8_lane, u8, 10) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane6, simde_wasm_v128_load8_lane, u8, 9) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane7, simde_wasm_v128_load8_lane, u8, 8) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane8, simde_wasm_v128_load8_lane, u8, 7) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane9, simde_wasm_v128_load8_lane, u8, 6) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane10, simde_wasm_v128_load8_lane, u8, 5) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane11, simde_wasm_v128_load8_lane, u8, 4) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane12, simde_wasm_v128_load8_lane, u8, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane13, simde_wasm_v128_load8_lane, u8, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane14, simde_wasm_v128_load8_lane, u8, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane15, simde_wasm_v128_load8_lane, u8, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane0, simde_wasm_v128_load16_lane, u16, 7) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane1, simde_wasm_v128_load16_lane, u16, 6) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane2, simde_wasm_v128_load16_lane, u16, 5) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane3, simde_wasm_v128_load16_lane, u16, 4) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane4, simde_wasm_v128_load16_lane, u16, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane5, simde_wasm_v128_load16_lane, u16, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane6, simde_wasm_v128_load16_lane, u16, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane7, simde_wasm_v128_load16_lane, u16, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load32_lane0, simde_wasm_v128_load32_lane, u32, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load32_lane1, simde_wasm_v128_load32_lane, u32, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load32_lane2, simde_wasm_v128_load32_lane, u32, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load32_lane3, simde_wasm_v128_load32_lane, u32, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load64_lane0, simde_wasm_v128_load64_lane, u64, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load64_lane1, simde_wasm_v128_load64_lane, u64, 0) +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane0, simde_wasm_v128_load8_lane, u8, 0) )w2c_template" R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane1, simde_wasm_v128_load8_lane, u8, 1) )w2c_template" @@ -155,11 +250,77 @@ R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load64_lane0, simde_wasm_v128_load64_l )w2c_template" R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load64_lane1, simde_wasm_v128_load64_lane, u64, 1) )w2c_template" +R"w2c_template(#endif +)w2c_template" R"w2c_template( DEFINE_SIMD_STORE(v128_store, v128) )w2c_template" R"w2c_template( -DEFINE_SIMD_STORE_LANE(v128_store8_lane0, simde_wasm_v128_store8_lane, u8, 0) +#if WABT_BIG_ENDIAN +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane0, simde_wasm_v128_store8_lane, u8, 15) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane1, simde_wasm_v128_store8_lane, u8, 14) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane2, simde_wasm_v128_store8_lane, u8, 13) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane3, simde_wasm_v128_store8_lane, u8, 12) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane4, simde_wasm_v128_store8_lane, u8, 11) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane5, simde_wasm_v128_store8_lane, u8, 10) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane6, simde_wasm_v128_store8_lane, u8, 9) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane7, simde_wasm_v128_store8_lane, u8, 8) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane8, simde_wasm_v128_store8_lane, u8, 7) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane9, simde_wasm_v128_store8_lane, u8, 6) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane10, simde_wasm_v128_store8_lane, u8, 5) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane11, simde_wasm_v128_store8_lane, u8, 4) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane12, simde_wasm_v128_store8_lane, u8, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane13, simde_wasm_v128_store8_lane, u8, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane14, simde_wasm_v128_store8_lane, u8, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane15, simde_wasm_v128_store8_lane, u8, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane0, simde_wasm_v128_store16_lane, u16, 7) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane1, simde_wasm_v128_store16_lane, u16, 6) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane2, simde_wasm_v128_store16_lane, u16, 5) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane3, simde_wasm_v128_store16_lane, u16, 4) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane4, simde_wasm_v128_store16_lane, u16, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane5, simde_wasm_v128_store16_lane, u16, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane6, simde_wasm_v128_store16_lane, u16, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane7, simde_wasm_v128_store16_lane, u16, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store32_lane0, simde_wasm_v128_store32_lane, u32, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store32_lane1, simde_wasm_v128_store32_lane, u32, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store32_lane2, simde_wasm_v128_store32_lane, u32, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store32_lane3, simde_wasm_v128_store32_lane, u32, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store64_lane0, simde_wasm_v128_store64_lane, u64, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store64_lane1, simde_wasm_v128_store64_lane, u64, 0) +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane0, simde_wasm_v128_store8_lane, u8, 0) )w2c_template" R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane1, simde_wasm_v128_store8_lane, u8, 1) )w2c_template" @@ -219,6 +380,279 @@ R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store64_lane0, simde_wasm_v128_store6 )w2c_template" R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store64_lane1, simde_wasm_v128_store64_lane, u64, 1) )w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template( +#if WABT_BIG_ENDIAN +)w2c_template" +R"w2c_template(#define v128_const(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) simde_wasm_i8x16_const(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a) +)w2c_template" +R"w2c_template(#define v128_i8x16_extract_lane(v, l) simde_wasm_i8x16_extract_lane(v, 15-(l)) +)w2c_template" +R"w2c_template(#define v128_u8x16_extract_lane(v, l) simde_wasm_u8x16_extract_lane(v, 15-(l)) +)w2c_template" +R"w2c_template(#define v128_i16x8_extract_lane(v, l) simde_wasm_i16x8_extract_lane(v, 7-(l)) +)w2c_template" +R"w2c_template(#define v128_u16x8_extract_lane(v, l) simde_wasm_u16x8_extract_lane(v, 7-(l)) +)w2c_template" +R"w2c_template(#define v128_i32x4_extract_lane(v, l) simde_wasm_i32x4_extract_lane(v, 3-(l)) +)w2c_template" +R"w2c_template(#define v128_i64x2_extract_lane(v, l) simde_wasm_i64x2_extract_lane(v, 1-(l)) +)w2c_template" +R"w2c_template(#define v128_f32x4_extract_lane(v, l) simde_wasm_f32x4_extract_lane(v, 3-(l)) +)w2c_template" +R"w2c_template(#define v128_f64x2_extract_lane(v, l) simde_wasm_f64x2_extract_lane(v, 1-(l)) +)w2c_template" +R"w2c_template(#define v128_i8x16_replace_lane(v, l, x) simde_wasm_i8x16_replace_lane(v, 15-(l), x) +)w2c_template" +R"w2c_template(#define v128_u8x16_replace_lane(v, l, x) simde_wasm_u8x16_replace_lane(v, 15-(l), x) +)w2c_template" +R"w2c_template(#define v128_i16x8_replace_lane(v, l, x) simde_wasm_i16x8_replace_lane(v, 7-(l), x) +)w2c_template" +R"w2c_template(#define v128_u16x8_replace_lane(v, l, x) simde_wasm_u16x8_replace_lane(v, 7-(l), x) +)w2c_template" +R"w2c_template(#define v128_i32x4_replace_lane(v, l, x) simde_wasm_i32x4_replace_lane(v, 3-(l), x) +)w2c_template" +R"w2c_template(#define v128_i64x2_replace_lane(v, l, x) simde_wasm_i64x2_replace_lane(v, 1-(l), x) +)w2c_template" +R"w2c_template(#define v128_f32x4_replace_lane(v, l, x) simde_wasm_f32x4_replace_lane(v, 3-(l), x) +)w2c_template" +R"w2c_template(#define v128_f64x2_replace_lane(v, l, x) simde_wasm_f64x2_replace_lane(v, 1-(l), x) +)w2c_template" +R"w2c_template(#define v128_i8x16_bitmask(v) simde_wasm_i8x16_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0))) +)w2c_template" +R"w2c_template(#define v128_i16x8_bitmask(v) simde_wasm_i16x8_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1))) +)w2c_template" +R"w2c_template(#define v128_i32x4_bitmask(v) simde_wasm_i32x4_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3))) +)w2c_template" +R"w2c_template(#define v128_i64x2_bitmask(v) simde_wasm_i64x2_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +)w2c_template" +R"w2c_template(#define v128_i8x16_swizzle(v1, v2) simde_wasm_i8x16_swizzle(v1, simde_wasm_v128_xor(v2, simde_wasm_i8x16_splat(15))) +)w2c_template" +R"w2c_template(#define v128_i8x16_shuffle(v1,v2,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) simde_wasm_i8x16_shuffle(v2,v1,31-(p),31-(o),31-(n),31-(m),31-(l),31-(k),31-(j),31-(i),31-(h),31-(g),31-(f),31-(e),31-(d),31-(c),31-(b),31-(a)) +)w2c_template" +R"w2c_template(#define v128_i16x8_extmul_high_i8x16 simde_wasm_i16x8_extmul_low_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extmul_high_u8x16 simde_wasm_u16x8_extmul_low_u8x16 +)w2c_template" +R"w2c_template(#define v128_i16x8_extmul_low_i8x16 simde_wasm_i16x8_extmul_high_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extmul_low_u8x16 simde_wasm_u16x8_extmul_high_u8x16 +)w2c_template" +R"w2c_template(#define v128_i32x4_extmul_high_i16x8 simde_wasm_i32x4_extmul_low_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extmul_high_u16x8 simde_wasm_u32x4_extmul_low_u16x8 +)w2c_template" +R"w2c_template(#define v128_i32x4_extmul_low_i16x8 simde_wasm_i32x4_extmul_high_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extmul_low_u16x8 simde_wasm_u32x4_extmul_high_u16x8 +)w2c_template" +R"w2c_template(#define v128_i64x2_extmul_high_i32x4 simde_wasm_i64x2_extmul_low_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extmul_high_u32x4 simde_wasm_u64x2_extmul_low_u32x4 +)w2c_template" +R"w2c_template(#define v128_i64x2_extmul_low_i32x4 simde_wasm_i64x2_extmul_high_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extmul_low_u32x4 simde_wasm_u64x2_extmul_high_u32x4 +)w2c_template" +R"w2c_template(#define v128_i16x8_extend_high_i8x16 simde_wasm_i16x8_extend_low_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extend_high_u8x16 simde_wasm_u16x8_extend_low_u8x16 +)w2c_template" +R"w2c_template(#define v128_i16x8_extend_low_i8x16 simde_wasm_i16x8_extend_high_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extend_low_u8x16 simde_wasm_u16x8_extend_high_u8x16 +)w2c_template" +R"w2c_template(#define v128_i32x4_extend_high_i16x8 simde_wasm_i32x4_extend_low_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extend_high_u16x8 simde_wasm_u32x4_extend_low_u16x8 +)w2c_template" +R"w2c_template(#define v128_i32x4_extend_low_i16x8 simde_wasm_i32x4_extend_high_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extend_low_u16x8 simde_wasm_u32x4_extend_high_u16x8 +)w2c_template" +R"w2c_template(#define v128_i64x2_extend_high_i32x4 simde_wasm_i64x2_extend_low_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extend_high_u32x4 simde_wasm_u64x2_extend_low_u32x4 +)w2c_template" +R"w2c_template(#define v128_i64x2_extend_low_i32x4 simde_wasm_i64x2_extend_high_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extend_low_u32x4 simde_wasm_u64x2_extend_high_u32x4 +)w2c_template" +R"w2c_template(#define v128_i32x4_trunc_sat_f64x2_zero(a) \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( simde_wasm_i32x4_trunc_sat_f64x2_zero(a), \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +)w2c_template" +R"w2c_template(#define v128_u32x4_trunc_sat_f64x2_zero(a) \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( simde_wasm_u32x4_trunc_sat_f64x2_zero(a), \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +)w2c_template" +R"w2c_template(#define v128_i16x8_narrow_i32x4(a,b) simde_wasm_i16x8_narrow_i32x4(b,a) +)w2c_template" +R"w2c_template(#define v128_u16x8_narrow_i32x4(a,b) simde_wasm_u16x8_narrow_i32x4(b,a) +)w2c_template" +R"w2c_template(#define v128_i8x16_narrow_i16x8(a,b) simde_wasm_i8x16_narrow_i16x8(b,a) +)w2c_template" +R"w2c_template(#define v128_u8x16_narrow_i16x8(a,b) simde_wasm_u8x16_narrow_i16x8(b,a) +)w2c_template" +R"w2c_template(#define v128_f64x2_promote_low_f32x4(a) \ +)w2c_template" +R"w2c_template( simde_wasm_f64x2_promote_low_f32x4(simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( a, \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +)w2c_template" +R"w2c_template(#define v128_f32x4_demote_f64x2_zero(a) \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( simde_wasm_f32x4_demote_f64x2_zero(a), \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +)w2c_template" +R"w2c_template(#define v128_f64x2_convert_low_i32x4(a) \ +)w2c_template" +R"w2c_template( simde_wasm_f64x2_convert_low_i32x4(simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( a, \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +)w2c_template" +R"w2c_template(#define v128_f64x2_convert_low_u32x4(a) \ +)w2c_template" +R"w2c_template( simde_wasm_f64x2_convert_low_u32x4(simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( a, \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(#define v128_const simde_wasm_i8x16_const +)w2c_template" +R"w2c_template(#define v128_i8x16_extract_lane simde_wasm_i8x16_extract_lane +)w2c_template" +R"w2c_template(#define v128_u8x16_extract_lane simde_wasm_u8x16_extract_lane +)w2c_template" +R"w2c_template(#define v128_i16x8_extract_lane simde_wasm_i16x8_extract_lane +)w2c_template" +R"w2c_template(#define v128_u16x8_extract_lane simde_wasm_u16x8_extract_lane +)w2c_template" +R"w2c_template(#define v128_i32x4_extract_lane simde_wasm_i32x4_extract_lane +)w2c_template" +R"w2c_template(#define v128_i64x2_extract_lane simde_wasm_i64x2_extract_lane +)w2c_template" +R"w2c_template(#define v128_f32x4_extract_lane simde_wasm_f32x4_extract_lane +)w2c_template" +R"w2c_template(#define v128_f64x2_extract_lane simde_wasm_f64x2_extract_lane +)w2c_template" +R"w2c_template(#define v128_i8x16_replace_lane simde_wasm_i8x16_replace_lane +)w2c_template" +R"w2c_template(#define v128_u8x16_replace_lane simde_wasm_u8x16_replace_lane +)w2c_template" +R"w2c_template(#define v128_i16x8_replace_lane simde_wasm_i16x8_replace_lane +)w2c_template" +R"w2c_template(#define v128_u16x8_replace_lane simde_wasm_u16x8_replace_lane +)w2c_template" +R"w2c_template(#define v128_i32x4_replace_lane simde_wasm_i32x4_replace_lane +)w2c_template" +R"w2c_template(#define v128_i64x2_replace_lane simde_wasm_i64x2_replace_lane +)w2c_template" +R"w2c_template(#define v128_f32x4_replace_lane simde_wasm_f32x4_replace_lane +)w2c_template" +R"w2c_template(#define v128_f64x2_replace_lane simde_wasm_f64x2_replace_lane +)w2c_template" +R"w2c_template(#define v128_i8x16_bitmask simde_wasm_i8x16_bitmask +)w2c_template" +R"w2c_template(#define v128_i16x8_bitmask simde_wasm_i16x8_bitmask +)w2c_template" +R"w2c_template(#define v128_i32x4_bitmask simde_wasm_i32x4_bitmask +)w2c_template" +R"w2c_template(#define v128_i64x2_bitmask simde_wasm_i64x2_bitmask +)w2c_template" +R"w2c_template(#define v128_i8x16_swizzle simde_wasm_i8x16_swizzle +)w2c_template" +R"w2c_template(#define v128_i8x16_shuffle simde_wasm_i8x16_shuffle +)w2c_template" +R"w2c_template(#define v128_i16x8_extmul_high_i8x16 simde_wasm_i16x8_extmul_high_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extmul_high_u8x16 simde_wasm_u16x8_extmul_high_u8x16 +)w2c_template" +R"w2c_template(#define v128_i16x8_extmul_low_i8x16 simde_wasm_i16x8_extmul_low_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extmul_low_u8x16 simde_wasm_u16x8_extmul_low_u8x16 +)w2c_template" +R"w2c_template(#define v128_i32x4_extmul_high_i16x8 simde_wasm_i32x4_extmul_high_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extmul_high_u16x8 simde_wasm_u32x4_extmul_high_u16x8 +)w2c_template" +R"w2c_template(#define v128_i32x4_extmul_low_i16x8 simde_wasm_i32x4_extmul_low_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extmul_low_u16x8 simde_wasm_u32x4_extmul_low_u16x8 +)w2c_template" +R"w2c_template(#define v128_i64x2_extmul_high_i32x4 simde_wasm_i64x2_extmul_high_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extmul_high_u32x4 simde_wasm_u64x2_extmul_high_u32x4 +)w2c_template" +R"w2c_template(#define v128_i64x2_extmul_low_i32x4 simde_wasm_i64x2_extmul_low_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extmul_low_u32x4 simde_wasm_u64x2_extmul_low_u32x4 +)w2c_template" +R"w2c_template(#define v128_i16x8_extend_high_i8x16 simde_wasm_i16x8_extend_high_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extend_high_u8x16 simde_wasm_u16x8_extend_high_u8x16 +)w2c_template" +R"w2c_template(#define v128_i16x8_extend_low_i8x16 simde_wasm_i16x8_extend_low_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extend_low_u8x16 simde_wasm_u16x8_extend_low_u8x16 +)w2c_template" +R"w2c_template(#define v128_i32x4_extend_high_i16x8 simde_wasm_i32x4_extend_high_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extend_high_u16x8 simde_wasm_u32x4_extend_high_u16x8 +)w2c_template" +R"w2c_template(#define v128_i32x4_extend_low_i16x8 simde_wasm_i32x4_extend_low_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extend_low_u16x8 simde_wasm_u32x4_extend_low_u16x8 +)w2c_template" +R"w2c_template(#define v128_i64x2_extend_high_i32x4 simde_wasm_i64x2_extend_high_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extend_high_u32x4 simde_wasm_u64x2_extend_high_u32x4 +)w2c_template" +R"w2c_template(#define v128_i64x2_extend_low_i32x4 simde_wasm_i64x2_extend_low_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extend_low_u32x4 simde_wasm_u64x2_extend_low_u32x4 +)w2c_template" +R"w2c_template(#define v128_i32x4_trunc_sat_f64x2_zero simde_wasm_i32x4_trunc_sat_f64x2_zero +)w2c_template" +R"w2c_template(#define v128_u32x4_trunc_sat_f64x2_zero simde_wasm_u32x4_trunc_sat_f64x2_zero +)w2c_template" +R"w2c_template(#define v128_i16x8_narrow_i32x4 simde_wasm_i16x8_narrow_i32x4 +)w2c_template" +R"w2c_template(#define v128_u16x8_narrow_i32x4 simde_wasm_u16x8_narrow_i32x4 +)w2c_template" +R"w2c_template(#define v128_i8x16_narrow_i16x8 simde_wasm_i8x16_narrow_i16x8 +)w2c_template" +R"w2c_template(#define v128_u8x16_narrow_i16x8 simde_wasm_u8x16_narrow_i16x8 +)w2c_template" +R"w2c_template(#define v128_f64x2_promote_low_f32x4 simde_wasm_f64x2_promote_low_f32x4 +)w2c_template" +R"w2c_template(#define v128_f32x4_demote_f64x2_zero simde_wasm_f32x4_demote_f64x2_zero +)w2c_template" +R"w2c_template(#define v128_f64x2_convert_low_i32x4 simde_wasm_f64x2_convert_low_i32x4 +)w2c_template" +R"w2c_template(#define v128_f64x2_convert_low_u32x4 simde_wasm_f64x2_convert_low_u32x4 +)w2c_template" +R"w2c_template(#endif +)w2c_template" R"w2c_template(// clang-format on )w2c_template" ; diff --git a/src/template/wasm2c_simd.declarations.c b/src/template/wasm2c_simd.declarations.c index cf397856..d37631ad 100644 --- a/src/template/wasm2c_simd.declarations.c +++ b/src/template/wasm2c_simd.declarations.c @@ -34,6 +34,22 @@ } // clang-format off +#if WABT_BIG_ENDIAN +static inline v128 v128_impl_load32_zero(const void* a) { + return simde_wasm_i8x16_swizzle( + simde_wasm_v128_load32_zero(a), + simde_wasm_i8x16_const(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3)); +} +static inline v128 v128_impl_load64_zero(const void* a) { + return simde_wasm_i8x16_swizzle( + simde_wasm_v128_load64_zero(a), + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); +} +#else +#define v128_impl_load32_zero simde_wasm_v128_load32_zero +#define v128_impl_load64_zero simde_wasm_v128_load64_zero +#endif + DEFINE_SIMD_LOAD_FUNC(v128_load, simde_wasm_v128_load, v128) DEFINE_SIMD_LOAD_FUNC(v128_load8_splat, simde_wasm_v128_load8_splat, u8) @@ -48,9 +64,41 @@ DEFINE_SIMD_LOAD_FUNC(u32x4_load16x4, simde_wasm_u32x4_load16x4, u64) DEFINE_SIMD_LOAD_FUNC(i64x2_load32x2, simde_wasm_i64x2_load32x2, u64) DEFINE_SIMD_LOAD_FUNC(u64x2_load32x2, simde_wasm_u64x2_load32x2, u64) -DEFINE_SIMD_LOAD_FUNC(v128_load32_zero, simde_wasm_v128_load32_zero, u32) -DEFINE_SIMD_LOAD_FUNC(v128_load64_zero, simde_wasm_v128_load64_zero, u64) +DEFINE_SIMD_LOAD_FUNC(v128_load32_zero, v128_impl_load32_zero, u32) +DEFINE_SIMD_LOAD_FUNC(v128_load64_zero, v128_impl_load64_zero, u64) +#if WABT_BIG_ENDIAN +DEFINE_SIMD_LOAD_LANE(v128_load8_lane0, simde_wasm_v128_load8_lane, u8, 15) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane1, simde_wasm_v128_load8_lane, u8, 14) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane2, simde_wasm_v128_load8_lane, u8, 13) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane3, simde_wasm_v128_load8_lane, u8, 12) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane4, simde_wasm_v128_load8_lane, u8, 11) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane5, simde_wasm_v128_load8_lane, u8, 10) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane6, simde_wasm_v128_load8_lane, u8, 9) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane7, simde_wasm_v128_load8_lane, u8, 8) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane8, simde_wasm_v128_load8_lane, u8, 7) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane9, simde_wasm_v128_load8_lane, u8, 6) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane10, simde_wasm_v128_load8_lane, u8, 5) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane11, simde_wasm_v128_load8_lane, u8, 4) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane12, simde_wasm_v128_load8_lane, u8, 3) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane13, simde_wasm_v128_load8_lane, u8, 2) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane14, simde_wasm_v128_load8_lane, u8, 1) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane15, simde_wasm_v128_load8_lane, u8, 0) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane0, simde_wasm_v128_load16_lane, u16, 7) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane1, simde_wasm_v128_load16_lane, u16, 6) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane2, simde_wasm_v128_load16_lane, u16, 5) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane3, simde_wasm_v128_load16_lane, u16, 4) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane4, simde_wasm_v128_load16_lane, u16, 3) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane5, simde_wasm_v128_load16_lane, u16, 2) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane6, simde_wasm_v128_load16_lane, u16, 1) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane7, simde_wasm_v128_load16_lane, u16, 0) +DEFINE_SIMD_LOAD_LANE(v128_load32_lane0, simde_wasm_v128_load32_lane, u32, 3) +DEFINE_SIMD_LOAD_LANE(v128_load32_lane1, simde_wasm_v128_load32_lane, u32, 2) +DEFINE_SIMD_LOAD_LANE(v128_load32_lane2, simde_wasm_v128_load32_lane, u32, 1) +DEFINE_SIMD_LOAD_LANE(v128_load32_lane3, simde_wasm_v128_load32_lane, u32, 0) +DEFINE_SIMD_LOAD_LANE(v128_load64_lane0, simde_wasm_v128_load64_lane, u64, 1) +DEFINE_SIMD_LOAD_LANE(v128_load64_lane1, simde_wasm_v128_load64_lane, u64, 0) +#else DEFINE_SIMD_LOAD_LANE(v128_load8_lane0, simde_wasm_v128_load8_lane, u8, 0) DEFINE_SIMD_LOAD_LANE(v128_load8_lane1, simde_wasm_v128_load8_lane, u8, 1) DEFINE_SIMD_LOAD_LANE(v128_load8_lane2, simde_wasm_v128_load8_lane, u8, 2) @@ -81,9 +129,42 @@ DEFINE_SIMD_LOAD_LANE(v128_load32_lane2, simde_wasm_v128_load32_lane, u32, 2) DEFINE_SIMD_LOAD_LANE(v128_load32_lane3, simde_wasm_v128_load32_lane, u32, 3) DEFINE_SIMD_LOAD_LANE(v128_load64_lane0, simde_wasm_v128_load64_lane, u64, 0) DEFINE_SIMD_LOAD_LANE(v128_load64_lane1, simde_wasm_v128_load64_lane, u64, 1) +#endif DEFINE_SIMD_STORE(v128_store, v128) +#if WABT_BIG_ENDIAN +DEFINE_SIMD_STORE_LANE(v128_store8_lane0, simde_wasm_v128_store8_lane, u8, 15) +DEFINE_SIMD_STORE_LANE(v128_store8_lane1, simde_wasm_v128_store8_lane, u8, 14) +DEFINE_SIMD_STORE_LANE(v128_store8_lane2, simde_wasm_v128_store8_lane, u8, 13) +DEFINE_SIMD_STORE_LANE(v128_store8_lane3, simde_wasm_v128_store8_lane, u8, 12) +DEFINE_SIMD_STORE_LANE(v128_store8_lane4, simde_wasm_v128_store8_lane, u8, 11) +DEFINE_SIMD_STORE_LANE(v128_store8_lane5, simde_wasm_v128_store8_lane, u8, 10) +DEFINE_SIMD_STORE_LANE(v128_store8_lane6, simde_wasm_v128_store8_lane, u8, 9) +DEFINE_SIMD_STORE_LANE(v128_store8_lane7, simde_wasm_v128_store8_lane, u8, 8) +DEFINE_SIMD_STORE_LANE(v128_store8_lane8, simde_wasm_v128_store8_lane, u8, 7) +DEFINE_SIMD_STORE_LANE(v128_store8_lane9, simde_wasm_v128_store8_lane, u8, 6) +DEFINE_SIMD_STORE_LANE(v128_store8_lane10, simde_wasm_v128_store8_lane, u8, 5) +DEFINE_SIMD_STORE_LANE(v128_store8_lane11, simde_wasm_v128_store8_lane, u8, 4) +DEFINE_SIMD_STORE_LANE(v128_store8_lane12, simde_wasm_v128_store8_lane, u8, 3) +DEFINE_SIMD_STORE_LANE(v128_store8_lane13, simde_wasm_v128_store8_lane, u8, 2) +DEFINE_SIMD_STORE_LANE(v128_store8_lane14, simde_wasm_v128_store8_lane, u8, 1) +DEFINE_SIMD_STORE_LANE(v128_store8_lane15, simde_wasm_v128_store8_lane, u8, 0) +DEFINE_SIMD_STORE_LANE(v128_store16_lane0, simde_wasm_v128_store16_lane, u16, 7) +DEFINE_SIMD_STORE_LANE(v128_store16_lane1, simde_wasm_v128_store16_lane, u16, 6) +DEFINE_SIMD_STORE_LANE(v128_store16_lane2, simde_wasm_v128_store16_lane, u16, 5) +DEFINE_SIMD_STORE_LANE(v128_store16_lane3, simde_wasm_v128_store16_lane, u16, 4) +DEFINE_SIMD_STORE_LANE(v128_store16_lane4, simde_wasm_v128_store16_lane, u16, 3) +DEFINE_SIMD_STORE_LANE(v128_store16_lane5, simde_wasm_v128_store16_lane, u16, 2) +DEFINE_SIMD_STORE_LANE(v128_store16_lane6, simde_wasm_v128_store16_lane, u16, 1) +DEFINE_SIMD_STORE_LANE(v128_store16_lane7, simde_wasm_v128_store16_lane, u16, 0) +DEFINE_SIMD_STORE_LANE(v128_store32_lane0, simde_wasm_v128_store32_lane, u32, 3) +DEFINE_SIMD_STORE_LANE(v128_store32_lane1, simde_wasm_v128_store32_lane, u32, 2) +DEFINE_SIMD_STORE_LANE(v128_store32_lane2, simde_wasm_v128_store32_lane, u32, 1) +DEFINE_SIMD_STORE_LANE(v128_store32_lane3, simde_wasm_v128_store32_lane, u32, 0) +DEFINE_SIMD_STORE_LANE(v128_store64_lane0, simde_wasm_v128_store64_lane, u64, 1) +DEFINE_SIMD_STORE_LANE(v128_store64_lane1, simde_wasm_v128_store64_lane, u64, 0) +#else DEFINE_SIMD_STORE_LANE(v128_store8_lane0, simde_wasm_v128_store8_lane, u8, 0) DEFINE_SIMD_STORE_LANE(v128_store8_lane1, simde_wasm_v128_store8_lane, u8, 1) DEFINE_SIMD_STORE_LANE(v128_store8_lane2, simde_wasm_v128_store8_lane, u8, 2) @@ -114,4 +195,141 @@ DEFINE_SIMD_STORE_LANE(v128_store32_lane2, simde_wasm_v128_store32_lane, u32, 2) DEFINE_SIMD_STORE_LANE(v128_store32_lane3, simde_wasm_v128_store32_lane, u32, 3) DEFINE_SIMD_STORE_LANE(v128_store64_lane0, simde_wasm_v128_store64_lane, u64, 0) DEFINE_SIMD_STORE_LANE(v128_store64_lane1, simde_wasm_v128_store64_lane, u64, 1) +#endif + +#if WABT_BIG_ENDIAN +#define v128_const(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) simde_wasm_i8x16_const(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a) +#define v128_i8x16_extract_lane(v, l) simde_wasm_i8x16_extract_lane(v, 15-(l)) +#define v128_u8x16_extract_lane(v, l) simde_wasm_u8x16_extract_lane(v, 15-(l)) +#define v128_i16x8_extract_lane(v, l) simde_wasm_i16x8_extract_lane(v, 7-(l)) +#define v128_u16x8_extract_lane(v, l) simde_wasm_u16x8_extract_lane(v, 7-(l)) +#define v128_i32x4_extract_lane(v, l) simde_wasm_i32x4_extract_lane(v, 3-(l)) +#define v128_i64x2_extract_lane(v, l) simde_wasm_i64x2_extract_lane(v, 1-(l)) +#define v128_f32x4_extract_lane(v, l) simde_wasm_f32x4_extract_lane(v, 3-(l)) +#define v128_f64x2_extract_lane(v, l) simde_wasm_f64x2_extract_lane(v, 1-(l)) +#define v128_i8x16_replace_lane(v, l, x) simde_wasm_i8x16_replace_lane(v, 15-(l), x) +#define v128_u8x16_replace_lane(v, l, x) simde_wasm_u8x16_replace_lane(v, 15-(l), x) +#define v128_i16x8_replace_lane(v, l, x) simde_wasm_i16x8_replace_lane(v, 7-(l), x) +#define v128_u16x8_replace_lane(v, l, x) simde_wasm_u16x8_replace_lane(v, 7-(l), x) +#define v128_i32x4_replace_lane(v, l, x) simde_wasm_i32x4_replace_lane(v, 3-(l), x) +#define v128_i64x2_replace_lane(v, l, x) simde_wasm_i64x2_replace_lane(v, 1-(l), x) +#define v128_f32x4_replace_lane(v, l, x) simde_wasm_f32x4_replace_lane(v, 3-(l), x) +#define v128_f64x2_replace_lane(v, l, x) simde_wasm_f64x2_replace_lane(v, 1-(l), x) +#define v128_i8x16_bitmask(v) simde_wasm_i8x16_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0))) +#define v128_i16x8_bitmask(v) simde_wasm_i16x8_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1))) +#define v128_i32x4_bitmask(v) simde_wasm_i32x4_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3))) +#define v128_i64x2_bitmask(v) simde_wasm_i64x2_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +#define v128_i8x16_swizzle(v1, v2) simde_wasm_i8x16_swizzle(v1, simde_wasm_v128_xor(v2, simde_wasm_i8x16_splat(15))) +#define v128_i8x16_shuffle(v1,v2,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) simde_wasm_i8x16_shuffle(v2,v1,31-(p),31-(o),31-(n),31-(m),31-(l),31-(k),31-(j),31-(i),31-(h),31-(g),31-(f),31-(e),31-(d),31-(c),31-(b),31-(a)) +#define v128_i16x8_extmul_high_i8x16 simde_wasm_i16x8_extmul_low_i8x16 +#define v128_u16x8_extmul_high_u8x16 simde_wasm_u16x8_extmul_low_u8x16 +#define v128_i16x8_extmul_low_i8x16 simde_wasm_i16x8_extmul_high_i8x16 +#define v128_u16x8_extmul_low_u8x16 simde_wasm_u16x8_extmul_high_u8x16 +#define v128_i32x4_extmul_high_i16x8 simde_wasm_i32x4_extmul_low_i16x8 +#define v128_u32x4_extmul_high_u16x8 simde_wasm_u32x4_extmul_low_u16x8 +#define v128_i32x4_extmul_low_i16x8 simde_wasm_i32x4_extmul_high_i16x8 +#define v128_u32x4_extmul_low_u16x8 simde_wasm_u32x4_extmul_high_u16x8 +#define v128_i64x2_extmul_high_i32x4 simde_wasm_i64x2_extmul_low_i32x4 +#define v128_u64x2_extmul_high_u32x4 simde_wasm_u64x2_extmul_low_u32x4 +#define v128_i64x2_extmul_low_i32x4 simde_wasm_i64x2_extmul_high_i32x4 +#define v128_u64x2_extmul_low_u32x4 simde_wasm_u64x2_extmul_high_u32x4 +#define v128_i16x8_extend_high_i8x16 simde_wasm_i16x8_extend_low_i8x16 +#define v128_u16x8_extend_high_u8x16 simde_wasm_u16x8_extend_low_u8x16 +#define v128_i16x8_extend_low_i8x16 simde_wasm_i16x8_extend_high_i8x16 +#define v128_u16x8_extend_low_u8x16 simde_wasm_u16x8_extend_high_u8x16 +#define v128_i32x4_extend_high_i16x8 simde_wasm_i32x4_extend_low_i16x8 +#define v128_u32x4_extend_high_u16x8 simde_wasm_u32x4_extend_low_u16x8 +#define v128_i32x4_extend_low_i16x8 simde_wasm_i32x4_extend_high_i16x8 +#define v128_u32x4_extend_low_u16x8 simde_wasm_u32x4_extend_high_u16x8 +#define v128_i64x2_extend_high_i32x4 simde_wasm_i64x2_extend_low_i32x4 +#define v128_u64x2_extend_high_u32x4 simde_wasm_u64x2_extend_low_u32x4 +#define v128_i64x2_extend_low_i32x4 simde_wasm_i64x2_extend_high_i32x4 +#define v128_u64x2_extend_low_u32x4 simde_wasm_u64x2_extend_high_u32x4 +#define v128_i32x4_trunc_sat_f64x2_zero(a) \ + simde_wasm_i8x16_swizzle( \ + simde_wasm_i32x4_trunc_sat_f64x2_zero(a), \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +#define v128_u32x4_trunc_sat_f64x2_zero(a) \ + simde_wasm_i8x16_swizzle( \ + simde_wasm_u32x4_trunc_sat_f64x2_zero(a), \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +#define v128_i16x8_narrow_i32x4(a,b) simde_wasm_i16x8_narrow_i32x4(b,a) +#define v128_u16x8_narrow_i32x4(a,b) simde_wasm_u16x8_narrow_i32x4(b,a) +#define v128_i8x16_narrow_i16x8(a,b) simde_wasm_i8x16_narrow_i16x8(b,a) +#define v128_u8x16_narrow_i16x8(a,b) simde_wasm_u8x16_narrow_i16x8(b,a) +#define v128_f64x2_promote_low_f32x4(a) \ + simde_wasm_f64x2_promote_low_f32x4(simde_wasm_i8x16_swizzle( \ + a, \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +#define v128_f32x4_demote_f64x2_zero(a) \ + simde_wasm_i8x16_swizzle( \ + simde_wasm_f32x4_demote_f64x2_zero(a), \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +#define v128_f64x2_convert_low_i32x4(a) \ + simde_wasm_f64x2_convert_low_i32x4(simde_wasm_i8x16_swizzle( \ + a, \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +#define v128_f64x2_convert_low_u32x4(a) \ + simde_wasm_f64x2_convert_low_u32x4(simde_wasm_i8x16_swizzle( \ + a, \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +#else +#define v128_const simde_wasm_i8x16_const +#define v128_i8x16_extract_lane simde_wasm_i8x16_extract_lane +#define v128_u8x16_extract_lane simde_wasm_u8x16_extract_lane +#define v128_i16x8_extract_lane simde_wasm_i16x8_extract_lane +#define v128_u16x8_extract_lane simde_wasm_u16x8_extract_lane +#define v128_i32x4_extract_lane simde_wasm_i32x4_extract_lane +#define v128_i64x2_extract_lane simde_wasm_i64x2_extract_lane +#define v128_f32x4_extract_lane simde_wasm_f32x4_extract_lane +#define v128_f64x2_extract_lane simde_wasm_f64x2_extract_lane +#define v128_i8x16_replace_lane simde_wasm_i8x16_replace_lane +#define v128_u8x16_replace_lane simde_wasm_u8x16_replace_lane +#define v128_i16x8_replace_lane simde_wasm_i16x8_replace_lane +#define v128_u16x8_replace_lane simde_wasm_u16x8_replace_lane +#define v128_i32x4_replace_lane simde_wasm_i32x4_replace_lane +#define v128_i64x2_replace_lane simde_wasm_i64x2_replace_lane +#define v128_f32x4_replace_lane simde_wasm_f32x4_replace_lane +#define v128_f64x2_replace_lane simde_wasm_f64x2_replace_lane +#define v128_i8x16_bitmask simde_wasm_i8x16_bitmask +#define v128_i16x8_bitmask simde_wasm_i16x8_bitmask +#define v128_i32x4_bitmask simde_wasm_i32x4_bitmask +#define v128_i64x2_bitmask simde_wasm_i64x2_bitmask +#define v128_i8x16_swizzle simde_wasm_i8x16_swizzle +#define v128_i8x16_shuffle simde_wasm_i8x16_shuffle +#define v128_i16x8_extmul_high_i8x16 simde_wasm_i16x8_extmul_high_i8x16 +#define v128_u16x8_extmul_high_u8x16 simde_wasm_u16x8_extmul_high_u8x16 +#define v128_i16x8_extmul_low_i8x16 simde_wasm_i16x8_extmul_low_i8x16 +#define v128_u16x8_extmul_low_u8x16 simde_wasm_u16x8_extmul_low_u8x16 +#define v128_i32x4_extmul_high_i16x8 simde_wasm_i32x4_extmul_high_i16x8 +#define v128_u32x4_extmul_high_u16x8 simde_wasm_u32x4_extmul_high_u16x8 +#define v128_i32x4_extmul_low_i16x8 simde_wasm_i32x4_extmul_low_i16x8 +#define v128_u32x4_extmul_low_u16x8 simde_wasm_u32x4_extmul_low_u16x8 +#define v128_i64x2_extmul_high_i32x4 simde_wasm_i64x2_extmul_high_i32x4 +#define v128_u64x2_extmul_high_u32x4 simde_wasm_u64x2_extmul_high_u32x4 +#define v128_i64x2_extmul_low_i32x4 simde_wasm_i64x2_extmul_low_i32x4 +#define v128_u64x2_extmul_low_u32x4 simde_wasm_u64x2_extmul_low_u32x4 +#define v128_i16x8_extend_high_i8x16 simde_wasm_i16x8_extend_high_i8x16 +#define v128_u16x8_extend_high_u8x16 simde_wasm_u16x8_extend_high_u8x16 +#define v128_i16x8_extend_low_i8x16 simde_wasm_i16x8_extend_low_i8x16 +#define v128_u16x8_extend_low_u8x16 simde_wasm_u16x8_extend_low_u8x16 +#define v128_i32x4_extend_high_i16x8 simde_wasm_i32x4_extend_high_i16x8 +#define v128_u32x4_extend_high_u16x8 simde_wasm_u32x4_extend_high_u16x8 +#define v128_i32x4_extend_low_i16x8 simde_wasm_i32x4_extend_low_i16x8 +#define v128_u32x4_extend_low_u16x8 simde_wasm_u32x4_extend_low_u16x8 +#define v128_i64x2_extend_high_i32x4 simde_wasm_i64x2_extend_high_i32x4 +#define v128_u64x2_extend_high_u32x4 simde_wasm_u64x2_extend_high_u32x4 +#define v128_i64x2_extend_low_i32x4 simde_wasm_i64x2_extend_low_i32x4 +#define v128_u64x2_extend_low_u32x4 simde_wasm_u64x2_extend_low_u32x4 +#define v128_i32x4_trunc_sat_f64x2_zero simde_wasm_i32x4_trunc_sat_f64x2_zero +#define v128_u32x4_trunc_sat_f64x2_zero simde_wasm_u32x4_trunc_sat_f64x2_zero +#define v128_i16x8_narrow_i32x4 simde_wasm_i16x8_narrow_i32x4 +#define v128_u16x8_narrow_i32x4 simde_wasm_u16x8_narrow_i32x4 +#define v128_i8x16_narrow_i16x8 simde_wasm_i8x16_narrow_i16x8 +#define v128_u8x16_narrow_i16x8 simde_wasm_u8x16_narrow_i16x8 +#define v128_f64x2_promote_low_f32x4 simde_wasm_f64x2_promote_low_f32x4 +#define v128_f32x4_demote_f64x2_zero simde_wasm_f32x4_demote_f64x2_zero +#define v128_f64x2_convert_low_i32x4 simde_wasm_f64x2_convert_low_i32x4 +#define v128_f64x2_convert_low_u32x4 simde_wasm_f64x2_convert_low_u32x4 +#endif // clang-format on diff --git a/test/harness/wasm2c/simd_formatting.txt b/test/harness/wasm2c/simd_formatting.txt index c92ca050..8b17ef1a 100644 --- a/test/harness/wasm2c/simd_formatting.txt +++ b/test/harness/wasm2c/simd_formatting.txt @@ -6,7 +6,7 @@ (assert_return (invoke "x" (v128.const i8x16 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F)) (v128.const i8x16 0xFF 0xFE 0xFD 0xFC 0xFB 0xFA 0xF9 0xF8 0xF7 0xF6 0xF5 0xF4 0xF3 0xF2 0xF1 0xF0)) (;; STDERR ;;; -simd_formatting.txt:6: assertion failed: in w2c_simd__formatting__0__wasm_x(&simd__formatting__0__wasm_instance, simde_wasm_i8x16_make(0u,1u,2u,3u,4u,5u,6u,7u,8u,9u,10u,11u,12u,13u,14u,15u)): expected <255 254 253 252 251 250 249 248 247 246 245 244 243 242 241 240 >, got <0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 >. +simd_formatting.txt:6: assertion failed: in w2c_simd__formatting__0__wasm_x(&simd__formatting__0__wasm_instance, v128_i8x16_make(0u,1u,2u,3u,4u,5u,6u,7u,8u,9u,10u,11u,12u,13u,14u,15u)): expected <255 254 253 252 251 250 249 248 247 246 245 244 243 242 241 240 >, got <0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 >. ;;; STDERR ;;) (;; STDOUT ;;; 0/1 tests passed. diff --git a/test/run-spec-wasm2c.py b/test/run-spec-wasm2c.py index 6a59471d..122174e2 100755 --- a/test/run-spec-wasm2c.py +++ b/test/run-spec-wasm2c.py @@ -374,7 +374,7 @@ class CWriter(object): return '"(f64 %s)"' % value return F64ToC(int(value)) elif type_ == 'v128': - return 'simde_wasm_' + const['lane_type'] + 'x' + str(len(const['value'])) + '_make(' + ','.join([self._Constant({'type': const['lane_type'], 'value': x}) for x in value]) + ')' + return 'v128_' + const['lane_type'] + 'x' + str(len(const['value'])) + '_make(' + ','.join([self._Constant({'type': const['lane_type'], 'value': x}) for x in value]) + ')' elif type_ == 'externref': if value == 'null': return 'wasm_rt_externref_null_value' diff --git a/test/spec-wasm2c-prefix.c b/test/spec-wasm2c-prefix.c index e107325c..529e4620 100644 --- a/test/spec-wasm2c-prefix.c +++ b/test/spec-wasm2c-prefix.c @@ -13,6 +13,42 @@ #include "wasm-rt-impl.h" #include "wasm-rt-exceptions.h" +/* NOTE: function argument evaluation order is implementation-defined in C, + so it SHOULD NOT be relied on by tests. */ +#if WABT_BIG_ENDIAN +#define v128_i8x16_make(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ + simde_wasm_i8x16_make(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a) +#define v128_u8x16_make(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ + simde_wasm_u8x16_make(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a) +#define v128_i16x8_make(a,b,c,d,e,f,g,h) simde_wasm_i16x8_make(h,g,f,e,d,c,b,a) +#define v128_u16x8_make(a,b,c,d,e,f,g,h) simde_wasm_u16x8_make(h,g,f,e,d,c,b,a) +#define v128_i32x4_make(a,b,c,d) simde_wasm_i32x4_make(d,c,b,a) +#define v128_u32x4_make(a,b,c,d) simde_wasm_u32x4_make(d,c,b,a) +#define v128_i64x2_make(a,b) simde_wasm_i64x2_make(b,a) +#define v128_u64x2_make(a,b) simde_wasm_u64x2_make(b,a) +#define v128_f32x4_make(a,b,c,d) simde_wasm_f32x4_make(d,c,b,a) +#define v128_f64x2_make(a,b) simde_wasm_f64x2_make(b,a) +#define v128_i8x16_extract_lane(a,n) simde_wasm_u8x16_extract_lane(a,15-(n)) +#define v128_u8x16_extract_lane(a,n) simde_wasm_u8x16_extract_lane(a,15-(n)) +#define v128_i16x8_extract_lane(a,n) simde_wasm_u16x8_extract_lane(a,7-(n)) +#define v128_u16x8_extract_lane(a,n) simde_wasm_u16x8_extract_lane(a,7-(n)) +#define v128_i32x4_extract_lane(a,n) simde_wasm_u32x4_extract_lane(a,3-(n)) +#define v128_u32x4_extract_lane(a,n) simde_wasm_u32x4_extract_lane(a,3-(n)) +#define v128_i64x2_extract_lane(a,n) simde_wasm_u64x2_extract_lane(a,1-(n)) +#define v128_u64x2_extract_lane(a,n) simde_wasm_u64x2_extract_lane(a,1-(n)) +#define v128_f32x4_extract_lane(a,n) simde_wasm_f32x4_extract_lane(a,3-(n)) +#define v128_f64x2_extract_lane(a,n) simde_wasm_f64x2_extract_lane(a,1-(n)) +#else +#define v128_i8x16_make simde_wasm_i8x16_make +#define v128_u8x16_make simde_wasm_u8x16_make +#define v128_i16x8_make simde_wasm_i16x8_make +#define v128_u16x8_make simde_wasm_u16x8_make +#define v128_i32x4_make simde_wasm_i32x4_make +#define v128_u32x4_make simde_wasm_u32x4_make +#define v128_i64x2_make simde_wasm_i64x2_make +#define v128_u64x2_make simde_wasm_u64x2_make +#define v128_f32x4_make simde_wasm_f32x4_make +#define v128_f64x2_make simde_wasm_f64x2_make // like is_equal_TYPE below, always use unsigned for these #define v128_i8x16_extract_lane simde_wasm_u8x16_extract_lane #define v128_u8x16_extract_lane simde_wasm_u8x16_extract_lane @@ -24,6 +60,7 @@ #define v128_u64x2_extract_lane simde_wasm_u64x2_extract_lane #define v128_f32x4_extract_lane simde_wasm_f32x4_extract_lane #define v128_f64x2_extract_lane simde_wasm_f64x2_extract_lane +#endif static int g_tests_run; static int g_tests_passed; |