summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xscripts/gen-s-parser.py2
-rw-r--r--src/gen-s-parser.inc32
-rw-r--r--src/ir/cost.h2
-rw-r--r--src/literal.h2
-rw-r--r--src/passes/Print.cpp6
-rw-r--r--src/wasm-binary.h2
-rw-r--r--src/wasm-interpreter.h4
-rw-r--r--src/wasm.h2
-rw-r--r--src/wasm/literal.cpp27
-rw-r--r--src/wasm/wasm-binary.cpp8
-rw-r--r--src/wasm/wasm-stack.cpp6
-rw-r--r--test/lit/basic/f16.wast126
-rw-r--r--test/spec/f16.wast29
13 files changed, 207 insertions, 41 deletions
diff --git a/scripts/gen-s-parser.py b/scripts/gen-s-parser.py
index 0b5703b9e..9c39acb48 100755
--- a/scripts/gen-s-parser.py
+++ b/scripts/gen-s-parser.py
@@ -547,6 +547,8 @@ instructions = [
("i32x4.relaxed_trunc_f32x4_u", "makeUnary(UnaryOp::RelaxedTruncUVecF32x4ToVecI32x4)"),
("i32x4.relaxed_trunc_f64x2_s_zero", "makeUnary(UnaryOp::RelaxedTruncZeroSVecF64x2ToVecI32x4)"),
("i32x4.relaxed_trunc_f64x2_u_zero", "makeUnary(UnaryOp::RelaxedTruncZeroUVecF64x2ToVecI32x4)"),
+ ("f16x8.relaxed_madd", "makeSIMDTernary(SIMDTernaryOp::RelaxedMaddVecF16x8)"),
+ ("f16x8.relaxed_nmadd", "makeSIMDTernary(SIMDTernaryOp::RelaxedNmaddVecF16x8)"),
("f32x4.relaxed_madd", "makeSIMDTernary(SIMDTernaryOp::RelaxedMaddVecF32x4)"),
("f32x4.relaxed_nmadd", "makeSIMDTernary(SIMDTernaryOp::RelaxedNmaddVecF32x4)"),
("f64x2.relaxed_madd", "makeSIMDTernary(SIMDTernaryOp::RelaxedMaddVecF64x2)"),
diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc
index 56a7c1cce..f7d12a1be 100644
--- a/src/gen-s-parser.inc
+++ b/src/gen-s-parser.inc
@@ -458,12 +458,34 @@ switch (buf[0]) {
default: goto parse_error;
}
}
- case 'r':
- if (op == "f16x8.replace_lane"sv) {
- CHECK_ERR(makeSIMDReplace(ctx, pos, annotations, SIMDReplaceOp::ReplaceLaneVecF16x8, 8));
- return Ok{};
+ case 'r': {
+ switch (buf[8]) {
+ case 'l': {
+ switch (buf[14]) {
+ case 'm':
+ if (op == "f16x8.relaxed_madd"sv) {
+ CHECK_ERR(makeSIMDTernary(ctx, pos, annotations, SIMDTernaryOp::RelaxedMaddVecF16x8));
+ return Ok{};
+ }
+ goto parse_error;
+ case 'n':
+ if (op == "f16x8.relaxed_nmadd"sv) {
+ CHECK_ERR(makeSIMDTernary(ctx, pos, annotations, SIMDTernaryOp::RelaxedNmaddVecF16x8));
+ return Ok{};
+ }
+ goto parse_error;
+ default: goto parse_error;
+ }
+ }
+ case 'p':
+ if (op == "f16x8.replace_lane"sv) {
+ CHECK_ERR(makeSIMDReplace(ctx, pos, annotations, SIMDReplaceOp::ReplaceLaneVecF16x8, 8));
+ return Ok{};
+ }
+ goto parse_error;
+ default: goto parse_error;
}
- goto parse_error;
+ }
case 's': {
switch (buf[7]) {
case 'p':
diff --git a/src/ir/cost.h b/src/ir/cost.h
index 99b945815..d11a9bfac 100644
--- a/src/ir/cost.h
+++ b/src/ir/cost.h
@@ -582,6 +582,8 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
case LaneselectI16x8:
case LaneselectI32x4:
case LaneselectI64x2:
+ case RelaxedMaddVecF16x8:
+ case RelaxedNmaddVecF16x8:
case RelaxedMaddVecF32x4:
case RelaxedNmaddVecF32x4:
case RelaxedMaddVecF64x2:
diff --git a/src/literal.h b/src/literal.h
index 73289c83b..424121f5a 100644
--- a/src/literal.h
+++ b/src/literal.h
@@ -694,6 +694,8 @@ public:
Literal demoteZeroToF32x4() const;
Literal promoteLowToF64x2() const;
Literal swizzleI8x16(const Literal& other) const;
+ Literal relaxedMaddF16x8(const Literal& left, const Literal& right) const;
+ Literal relaxedNmaddF16x8(const Literal& left, const Literal& right) const;
Literal relaxedMaddF32x4(const Literal& left, const Literal& right) const;
Literal relaxedNmaddF32x4(const Literal& left, const Literal& right) const;
Literal relaxedMaddF64x2(const Literal& left, const Literal& right) const;
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index 024b6af5c..46d519e9e 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -770,6 +770,12 @@ struct PrintExpressionContents
case LaneselectI64x2:
o << "i64x2.laneselect";
break;
+ case RelaxedMaddVecF16x8:
+ o << "f16x8.relaxed_madd";
+ break;
+ case RelaxedNmaddVecF16x8:
+ o << "f16x8.relaxed_nmadd";
+ break;
case RelaxedMaddVecF32x4:
o << "f32x4.relaxed_madd";
break;
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index e8ed293e7..d10b6c312 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -1037,6 +1037,8 @@ enum ASTNodes {
I32x4RelaxedTruncF32x4U = 0x102,
I32x4RelaxedTruncF64x2SZero = 0x103,
I32x4RelaxedTruncF64x2UZero = 0x104,
+ F16x8RelaxedMadd = 0x14e,
+ F16x8RelaxedNmadd = 0x14f,
F32x4RelaxedMadd = 0x105,
F32x4RelaxedNmadd = 0x106,
F64x2RelaxedMadd = 0x107,
diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h
index 578b0a569..86ba3b3a2 100644
--- a/src/wasm-interpreter.h
+++ b/src/wasm-interpreter.h
@@ -1201,6 +1201,10 @@ public:
case LaneselectI64x2:
return c.bitselectV128(a, b);
+ case RelaxedMaddVecF16x8:
+ return a.relaxedMaddF16x8(b, c);
+ case RelaxedNmaddVecF16x8:
+ return a.relaxedNmaddF16x8(b, c);
case RelaxedMaddVecF32x4:
return a.relaxedMaddF32x4(b, c);
case RelaxedNmaddVecF32x4:
diff --git a/src/wasm.h b/src/wasm.h
index 86ee12972..a7ad6ec6c 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -574,6 +574,8 @@ enum SIMDTernaryOp {
Bitselect,
// Relaxed SIMD
+ RelaxedMaddVecF16x8,
+ RelaxedNmaddVecF16x8,
RelaxedMaddVecF32x4,
RelaxedNmaddVecF32x4,
RelaxedMaddVecF64x2,
diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp
index c76856d15..e332db305 100644
--- a/src/wasm/literal.cpp
+++ b/src/wasm/literal.cpp
@@ -1674,23 +1674,25 @@ Literal Literal::copysign(const Literal& other) const {
Literal Literal::madd(const Literal& left, const Literal& right) const {
switch (type.getBasic()) {
case Type::f32:
- return Literal(::fmaf(left.getf32(), right.getf32(), getf32()));
+ return Literal(::fmaf(getf32(), left.getf32(), right.getf32()));
break;
case Type::f64:
- return Literal(::fma(left.getf64(), right.getf64(), getf64()));
+ return Literal(::fma(getf64(), left.getf64(), right.getf64()));
break;
default:
WASM_UNREACHABLE("unexpected type");
}
}
+// XXX: This is not an actual fused negated multiply implementation, but
+// the relaxed spec allows a double rounding implementation like below.
Literal Literal::nmadd(const Literal& left, const Literal& right) const {
switch (type.getBasic()) {
case Type::f32:
- return Literal(::fmaf(-left.getf32(), right.getf32(), getf32()));
+ return Literal(-(getf32() * left.getf32()) + right.getf32());
break;
case Type::f64:
- return Literal(::fma(-left.getf64(), right.getf64(), getf64()));
+ return Literal(-(getf64() * left.getf64()) + right.getf64());
break;
default:
WASM_UNREACHABLE("unexpected type");
@@ -2749,19 +2751,32 @@ Literal Literal::swizzleI8x16(const Literal& other) const {
namespace {
template<int Lanes,
LaneArray<Lanes> (Literal::*IntoLanes)() const,
- Literal (Literal::*TernaryOp)(const Literal&, const Literal&) const>
+ Literal (Literal::*TernaryOp)(const Literal&, const Literal&) const,
+ Literal (*Convert)(const Literal&) = passThrough>
static Literal ternary(const Literal& a, const Literal& b, const Literal& c) {
LaneArray<Lanes> x = (a.*IntoLanes)();
LaneArray<Lanes> y = (b.*IntoLanes)();
LaneArray<Lanes> z = (c.*IntoLanes)();
LaneArray<Lanes> r;
for (size_t i = 0; i < Lanes; ++i) {
- r[i] = (x[i].*TernaryOp)(y[i], z[i]);
+ r[i] = Convert((x[i].*TernaryOp)(y[i], z[i]));
}
return Literal(r);
}
} // namespace
+Literal Literal::relaxedMaddF16x8(const Literal& left,
+ const Literal& right) const {
+ return ternary<8, &Literal::getLanesF16x8, &Literal::madd, &toFP16>(
+ *this, left, right);
+}
+
+Literal Literal::relaxedNmaddF16x8(const Literal& left,
+ const Literal& right) const {
+ return ternary<8, &Literal::getLanesF16x8, &Literal::nmadd, &toFP16>(
+ *this, left, right);
+}
+
Literal Literal::relaxedMaddF32x4(const Literal& left,
const Literal& right) const {
return ternary<4, &Literal::getLanesF32x4, &Literal::madd>(
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 8c684dc2f..542eab4b4 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -6823,6 +6823,14 @@ bool WasmBinaryReader::maybeVisitSIMDTernary(Expression*& out, uint32_t code) {
curr = allocator.alloc<SIMDTernary>();
curr->op = LaneselectI64x2;
break;
+ case BinaryConsts::F16x8RelaxedMadd:
+ curr = allocator.alloc<SIMDTernary>();
+ curr->op = RelaxedMaddVecF16x8;
+ break;
+ case BinaryConsts::F16x8RelaxedNmadd:
+ curr = allocator.alloc<SIMDTernary>();
+ curr->op = RelaxedNmaddVecF16x8;
+ break;
case BinaryConsts::F32x4RelaxedMadd:
curr = allocator.alloc<SIMDTernary>();
curr->op = RelaxedMaddVecF32x4;
diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp
index 140205b85..ac79d0135 100644
--- a/src/wasm/wasm-stack.cpp
+++ b/src/wasm/wasm-stack.cpp
@@ -657,6 +657,12 @@ void BinaryInstWriter::visitSIMDTernary(SIMDTernary* curr) {
case LaneselectI64x2:
o << U32LEB(BinaryConsts::I64x2Laneselect);
break;
+ case RelaxedMaddVecF16x8:
+ o << U32LEB(BinaryConsts::F16x8RelaxedMadd);
+ break;
+ case RelaxedNmaddVecF16x8:
+ o << U32LEB(BinaryConsts::F16x8RelaxedNmadd);
+ break;
case RelaxedMaddVecF32x4:
o << U32LEB(BinaryConsts::F32x4RelaxedMadd);
break;
diff --git a/test/lit/basic/f16.wast b/test/lit/basic/f16.wast
index 2e5ac57dd..ba806bb57 100644
--- a/test/lit/basic/f16.wast
+++ b/test/lit/basic/f16.wast
@@ -17,19 +17,21 @@
;; CHECK-TEXT: (type $1 (func (param v128) (result v128)))
- ;; CHECK-TEXT: (type $2 (func (param i32) (result f32)))
+ ;; CHECK-TEXT: (type $2 (func (param v128 v128 v128) (result v128)))
- ;; CHECK-TEXT: (type $3 (func (param i32 f32)))
+ ;; CHECK-TEXT: (type $3 (func (param i32) (result f32)))
- ;; CHECK-TEXT: (type $4 (func (param f32) (result v128)))
+ ;; CHECK-TEXT: (type $4 (func (param i32 f32)))
- ;; CHECK-TEXT: (type $5 (func (param v128) (result f32)))
+ ;; CHECK-TEXT: (type $5 (func (param f32) (result v128)))
- ;; CHECK-TEXT: (type $6 (func (param v128 f32) (result v128)))
+ ;; CHECK-TEXT: (type $6 (func (param v128) (result f32)))
+
+ ;; CHECK-TEXT: (type $7 (func (param v128 f32) (result v128)))
;; CHECK-TEXT: (memory $0 1 1)
- ;; CHECK-TEXT: (func $f32.load_f16 (type $2) (param $0 i32) (result f32)
+ ;; CHECK-TEXT: (func $f32.load_f16 (type $3) (param $0 i32) (result f32)
;; CHECK-TEXT-NEXT: (f32.load_f16
;; CHECK-TEXT-NEXT: (local.get $0)
;; CHECK-TEXT-NEXT: )
@@ -38,19 +40,21 @@
;; CHECK-BIN: (type $1 (func (param v128) (result v128)))
- ;; CHECK-BIN: (type $2 (func (param i32) (result f32)))
+ ;; CHECK-BIN: (type $2 (func (param v128 v128 v128) (result v128)))
+
+ ;; CHECK-BIN: (type $3 (func (param i32) (result f32)))
- ;; CHECK-BIN: (type $3 (func (param i32 f32)))
+ ;; CHECK-BIN: (type $4 (func (param i32 f32)))
- ;; CHECK-BIN: (type $4 (func (param f32) (result v128)))
+ ;; CHECK-BIN: (type $5 (func (param f32) (result v128)))
- ;; CHECK-BIN: (type $5 (func (param v128) (result f32)))
+ ;; CHECK-BIN: (type $6 (func (param v128) (result f32)))
- ;; CHECK-BIN: (type $6 (func (param v128 f32) (result v128)))
+ ;; CHECK-BIN: (type $7 (func (param v128 f32) (result v128)))
;; CHECK-BIN: (memory $0 1 1)
- ;; CHECK-BIN: (func $f32.load_f16 (type $2) (param $0 i32) (result f32)
+ ;; CHECK-BIN: (func $f32.load_f16 (type $3) (param $0 i32) (result f32)
;; CHECK-BIN-NEXT: (f32.load_f16
;; CHECK-BIN-NEXT: (local.get $0)
;; CHECK-BIN-NEXT: )
@@ -60,13 +64,13 @@
(local.get $0)
)
)
- ;; CHECK-TEXT: (func $f32.store_f16 (type $3) (param $0 i32) (param $1 f32)
+ ;; CHECK-TEXT: (func $f32.store_f16 (type $4) (param $0 i32) (param $1 f32)
;; CHECK-TEXT-NEXT: (f32.store_f16
;; CHECK-TEXT-NEXT: (local.get $0)
;; CHECK-TEXT-NEXT: (local.get $1)
;; CHECK-TEXT-NEXT: )
;; CHECK-TEXT-NEXT: )
- ;; CHECK-BIN: (func $f32.store_f16 (type $3) (param $0 i32) (param $1 f32)
+ ;; CHECK-BIN: (func $f32.store_f16 (type $4) (param $0 i32) (param $1 f32)
;; CHECK-BIN-NEXT: (f32.store_f16
;; CHECK-BIN-NEXT: (local.get $0)
;; CHECK-BIN-NEXT: (local.get $1)
@@ -79,12 +83,12 @@
)
)
- ;; CHECK-TEXT: (func $f16x8.splat (type $4) (param $0 f32) (result v128)
+ ;; CHECK-TEXT: (func $f16x8.splat (type $5) (param $0 f32) (result v128)
;; CHECK-TEXT-NEXT: (f16x8.splat
;; CHECK-TEXT-NEXT: (local.get $0)
;; CHECK-TEXT-NEXT: )
;; CHECK-TEXT-NEXT: )
- ;; CHECK-BIN: (func $f16x8.splat (type $4) (param $0 f32) (result v128)
+ ;; CHECK-BIN: (func $f16x8.splat (type $5) (param $0 f32) (result v128)
;; CHECK-BIN-NEXT: (f16x8.splat
;; CHECK-BIN-NEXT: (local.get $0)
;; CHECK-BIN-NEXT: )
@@ -95,12 +99,12 @@
)
)
- ;; CHECK-TEXT: (func $f16x8.extract_lane (type $5) (param $0 v128) (result f32)
+ ;; CHECK-TEXT: (func $f16x8.extract_lane (type $6) (param $0 v128) (result f32)
;; CHECK-TEXT-NEXT: (f16x8.extract_lane 0
;; CHECK-TEXT-NEXT: (local.get $0)
;; CHECK-TEXT-NEXT: )
;; CHECK-TEXT-NEXT: )
- ;; CHECK-BIN: (func $f16x8.extract_lane (type $5) (param $0 v128) (result f32)
+ ;; CHECK-BIN: (func $f16x8.extract_lane (type $6) (param $0 v128) (result f32)
;; CHECK-BIN-NEXT: (f16x8.extract_lane 0
;; CHECK-BIN-NEXT: (local.get $0)
;; CHECK-BIN-NEXT: )
@@ -111,13 +115,13 @@
)
)
- ;; CHECK-TEXT: (func $f16x8.replace_lane (type $6) (param $0 v128) (param $1 f32) (result v128)
+ ;; CHECK-TEXT: (func $f16x8.replace_lane (type $7) (param $0 v128) (param $1 f32) (result v128)
;; CHECK-TEXT-NEXT: (f16x8.replace_lane 0
;; CHECK-TEXT-NEXT: (local.get $0)
;; CHECK-TEXT-NEXT: (local.get $1)
;; CHECK-TEXT-NEXT: )
;; CHECK-TEXT-NEXT: )
- ;; CHECK-BIN: (func $f16x8.replace_lane (type $6) (param $0 v128) (param $1 f32) (result v128)
+ ;; CHECK-BIN: (func $f16x8.replace_lane (type $7) (param $0 v128) (param $1 f32) (result v128)
;; CHECK-BIN-NEXT: (f16x8.replace_lane 0
;; CHECK-BIN-NEXT: (local.get $0)
;; CHECK-BIN-NEXT: (local.get $1)
@@ -486,49 +490,95 @@
(local.get $0)
)
)
+ ;; CHECK-TEXT: (func $f16x8.relaxed_madd (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
+ ;; CHECK-TEXT-NEXT: (f16x8.relaxed_madd
+ ;; CHECK-TEXT-NEXT: (local.get $0)
+ ;; CHECK-TEXT-NEXT: (local.get $1)
+ ;; CHECK-TEXT-NEXT: (local.get $2)
+ ;; CHECK-TEXT-NEXT: )
+ ;; CHECK-TEXT-NEXT: )
+ ;; CHECK-BIN: (func $f16x8.relaxed_madd (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
+ ;; CHECK-BIN-NEXT: (f16x8.relaxed_madd
+ ;; CHECK-BIN-NEXT: (local.get $0)
+ ;; CHECK-BIN-NEXT: (local.get $1)
+ ;; CHECK-BIN-NEXT: (local.get $2)
+ ;; CHECK-BIN-NEXT: )
+ ;; CHECK-BIN-NEXT: )
+ (func $f16x8.relaxed_madd (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
+ (f16x8.relaxed_madd
+ (local.get $0)
+ (local.get $1)
+ (local.get $2)
+ )
+ )
+
+
+ ;; CHECK-TEXT: (func $f16x8.relaxed_nmadd (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
+ ;; CHECK-TEXT-NEXT: (f16x8.relaxed_nmadd
+ ;; CHECK-TEXT-NEXT: (local.get $0)
+ ;; CHECK-TEXT-NEXT: (local.get $1)
+ ;; CHECK-TEXT-NEXT: (local.get $2)
+ ;; CHECK-TEXT-NEXT: )
+ ;; CHECK-TEXT-NEXT: )
+ ;; CHECK-BIN: (func $f16x8.relaxed_nmadd (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
+ ;; CHECK-BIN-NEXT: (f16x8.relaxed_nmadd
+ ;; CHECK-BIN-NEXT: (local.get $0)
+ ;; CHECK-BIN-NEXT: (local.get $1)
+ ;; CHECK-BIN-NEXT: (local.get $2)
+ ;; CHECK-BIN-NEXT: )
+ ;; CHECK-BIN-NEXT: )
+ (func $f16x8.relaxed_nmadd (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
+ (f16x8.relaxed_nmadd
+ (local.get $0)
+ (local.get $1)
+ (local.get $2)
+ )
+ )
)
;; CHECK-BIN-NODEBUG: (type $0 (func (param v128 v128) (result v128)))
;; CHECK-BIN-NODEBUG: (type $1 (func (param v128) (result v128)))
-;; CHECK-BIN-NODEBUG: (type $2 (func (param i32) (result f32)))
+;; CHECK-BIN-NODEBUG: (type $2 (func (param v128 v128 v128) (result v128)))
-;; CHECK-BIN-NODEBUG: (type $3 (func (param i32 f32)))
+;; CHECK-BIN-NODEBUG: (type $3 (func (param i32) (result f32)))
-;; CHECK-BIN-NODEBUG: (type $4 (func (param f32) (result v128)))
+;; CHECK-BIN-NODEBUG: (type $4 (func (param i32 f32)))
-;; CHECK-BIN-NODEBUG: (type $5 (func (param v128) (result f32)))
+;; CHECK-BIN-NODEBUG: (type $5 (func (param f32) (result v128)))
-;; CHECK-BIN-NODEBUG: (type $6 (func (param v128 f32) (result v128)))
+;; CHECK-BIN-NODEBUG: (type $6 (func (param v128) (result f32)))
+
+;; CHECK-BIN-NODEBUG: (type $7 (func (param v128 f32) (result v128)))
;; CHECK-BIN-NODEBUG: (memory $0 1 1)
-;; CHECK-BIN-NODEBUG: (func $0 (type $2) (param $0 i32) (result f32)
+;; CHECK-BIN-NODEBUG: (func $0 (type $3) (param $0 i32) (result f32)
;; CHECK-BIN-NODEBUG-NEXT: (f32.load_f16
;; CHECK-BIN-NODEBUG-NEXT: (local.get $0)
;; CHECK-BIN-NODEBUG-NEXT: )
;; CHECK-BIN-NODEBUG-NEXT: )
-;; CHECK-BIN-NODEBUG: (func $1 (type $3) (param $0 i32) (param $1 f32)
+;; CHECK-BIN-NODEBUG: (func $1 (type $4) (param $0 i32) (param $1 f32)
;; CHECK-BIN-NODEBUG-NEXT: (f32.store_f16
;; CHECK-BIN-NODEBUG-NEXT: (local.get $0)
;; CHECK-BIN-NODEBUG-NEXT: (local.get $1)
;; CHECK-BIN-NODEBUG-NEXT: )
;; CHECK-BIN-NODEBUG-NEXT: )
-;; CHECK-BIN-NODEBUG: (func $2 (type $4) (param $0 f32) (result v128)
+;; CHECK-BIN-NODEBUG: (func $2 (type $5) (param $0 f32) (result v128)
;; CHECK-BIN-NODEBUG-NEXT: (f16x8.splat
;; CHECK-BIN-NODEBUG-NEXT: (local.get $0)
;; CHECK-BIN-NODEBUG-NEXT: )
;; CHECK-BIN-NODEBUG-NEXT: )
-;; CHECK-BIN-NODEBUG: (func $3 (type $5) (param $0 v128) (result f32)
+;; CHECK-BIN-NODEBUG: (func $3 (type $6) (param $0 v128) (result f32)
;; CHECK-BIN-NODEBUG-NEXT: (f16x8.extract_lane 0
;; CHECK-BIN-NODEBUG-NEXT: (local.get $0)
;; CHECK-BIN-NODEBUG-NEXT: )
;; CHECK-BIN-NODEBUG-NEXT: )
-;; CHECK-BIN-NODEBUG: (func $4 (type $6) (param $0 v128) (param $1 f32) (result v128)
+;; CHECK-BIN-NODEBUG: (func $4 (type $7) (param $0 v128) (param $1 f32) (result v128)
;; CHECK-BIN-NODEBUG-NEXT: (f16x8.replace_lane 0
;; CHECK-BIN-NODEBUG-NEXT: (local.get $0)
;; CHECK-BIN-NODEBUG-NEXT: (local.get $1)
@@ -674,3 +724,19 @@
;; CHECK-BIN-NODEBUG-NEXT: (local.get $0)
;; CHECK-BIN-NODEBUG-NEXT: )
;; CHECK-BIN-NODEBUG-NEXT: )
+
+;; CHECK-BIN-NODEBUG: (func $26 (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
+;; CHECK-BIN-NODEBUG-NEXT: (f16x8.relaxed_madd
+;; CHECK-BIN-NODEBUG-NEXT: (local.get $0)
+;; CHECK-BIN-NODEBUG-NEXT: (local.get $1)
+;; CHECK-BIN-NODEBUG-NEXT: (local.get $2)
+;; CHECK-BIN-NODEBUG-NEXT: )
+;; CHECK-BIN-NODEBUG-NEXT: )
+
+;; CHECK-BIN-NODEBUG: (func $27 (type $2) (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
+;; CHECK-BIN-NODEBUG-NEXT: (f16x8.relaxed_nmadd
+;; CHECK-BIN-NODEBUG-NEXT: (local.get $0)
+;; CHECK-BIN-NODEBUG-NEXT: (local.get $1)
+;; CHECK-BIN-NODEBUG-NEXT: (local.get $2)
+;; CHECK-BIN-NODEBUG-NEXT: )
+;; CHECK-BIN-NODEBUG-NEXT: )
diff --git a/test/spec/f16.wast b/test/spec/f16.wast
index 09ee9328b..d5de0c0e8 100644
--- a/test/spec/f16.wast
+++ b/test/spec/f16.wast
@@ -32,6 +32,8 @@
(func (export "f16x8.floor") (param $0 v128) (result v128) (f16x8.floor (local.get $0)))
(func (export "f16x8.trunc") (param $0 v128) (result v128) (f16x8.trunc (local.get $0)))
(func (export "f16x8.nearest") (param $0 v128) (result v128) (f16x8.nearest (local.get $0)))
+ (func (export "f16x8.relaxed_madd") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f16x8.relaxed_madd (local.get $0) (local.get $1) (local.get $2)))
+ (func (export "f16x8.relaxed_nmadd") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f16x8.relaxed_nmadd (local.get $0) (local.get $1) (local.get $2)))
)
(assert_return (invoke "f32.load_f16") (f32.const 42.0))
@@ -187,3 +189,30 @@
(v128.const i16x8 0x7e00 0 0x7c00 0xfc00 0xbc00 0x3c00 0x3e00 0x3ccd))
;; nan 0 inf -inf -1 1 2 1
(v128.const i16x8 0x7e00 0 0x7c00 0xfc00 0xbc00 0x3c00 0x4000 0x3c00))
+;; ternary operations
+(assert_return (invoke "f16x8.relaxed_madd"
+ ;; Lane 0 illustrates the difference between fused/unfused. e.g.
+ ;; fused: (positive overflow) + -inf = -inf
+ ;; unfused: (inf) + -inf = NaN
+ ;;
+ ;; 1e4 inf -1 0 1 1.5 -2 1
+ (v128.const i16x8 0x70e2 0x7c00 0xbc00 0 0x3c00 0x3e00 0xc000 0x3c00)
+ ;; 1e4 inf -1 0 1 1.5 4 1
+ (v128.const i16x8 0x70e2 0x7c00 0xbc00 0 0x3c00 0x3e00 0x4400 0x3c00)
+ ;; -inf inf -1 0 1 2 1 -1
+ (v128.const i16x8 0xfc00 0x7c00 0xbc00 0 0x3c00 0x4000 0x3c00 0xbc00))
+ ;; -inf inf 0 0 2 4.25 -7 0
+ (v128.const i16x8 0xfc00 0x7c00 0 0 0x4000 0x4440 0xc700 0))
+(assert_return (invoke "f16x8.relaxed_nmadd"
+ ;; Lane 0 illustrates the difference between fused/unfused. e.g.
+ ;; fused: -(positive overflow) + inf = inf
+ ;; unfused: (-inf) + -inf = NaN
+ ;;
+ ;; 1e4 -inf -1 0 1 1.5 -2 1
+ (v128.const i16x8 0x70e2 0xfc00 0xbc00 0 0x3c00 0x3e00 0xc000 0x3c00)
+ ;; 1e4 inf -1 0 1 1.5 4 1
+ (v128.const i16x8 0x70e2 0x7c00 0xbc00 0 0x3c00 0x3e00 0x4400 0x3c00)
+ ;; inf inf -1 0 1 2 1 -1
+ (v128.const i16x8 0x7c00 0x7c00 0xbc00 0 0x3c00 0x4000 0x3c00 0xbc00))
+ ;; inf inf -2 0 0 -0.25 9 -2
+ (v128.const i16x8 0x7c00 0x7c00 0xc000 0 0 0xb400 0x4880 0xc000))