3 files changed, 35 insertions, 6 deletions
diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp
index c76856d15..e332db305 100644
--- a/src/wasm/literal.cpp
+++ b/src/wasm/literal.cpp
@@ -1674,23 +1674,25 @@ Literal Literal::copysign(const Literal& other) const {
 Literal Literal::madd(const Literal& left, const Literal& right) const {
   switch (type.getBasic()) {
     case Type::f32:
-      return Literal(::fmaf(left.getf32(), right.getf32(), getf32()));
+      return Literal(::fmaf(getf32(), left.getf32(), right.getf32()));
       break;
     case Type::f64:
-      return Literal(::fma(left.getf64(), right.getf64(), getf64()));
+      return Literal(::fma(getf64(), left.getf64(), right.getf64()));
       break;
     default:
       WASM_UNREACHABLE("unexpected type");
   }
 }
 
+// XXX: This is not an actual fused negated multiply implementation, but
+// the relaxed spec allows a double rounding implementation like below.
 Literal Literal::nmadd(const Literal& left, const Literal& right) const {
   switch (type.getBasic()) {
     case Type::f32:
-      return Literal(::fmaf(-left.getf32(), right.getf32(), getf32()));
+      return Literal(-(getf32() * left.getf32()) + right.getf32());
       break;
     case Type::f64:
-      return Literal(::fma(-left.getf64(), right.getf64(), getf64()));
+      return Literal(-(getf64() * left.getf64()) + right.getf64());
       break;
     default:
       WASM_UNREACHABLE("unexpected type");
@@ -2749,19 +2751,32 @@ Literal Literal::swizzleI8x16(const Literal& other) const {
 namespace {
 template<int Lanes,
          LaneArray<Lanes> (Literal::*IntoLanes)() const,
-         Literal (Literal::*TernaryOp)(const Literal&, const Literal&) const>
+         Literal (Literal::*TernaryOp)(const Literal&, const Literal&) const,
+         Literal (*Convert)(const Literal&) = passThrough>
 static Literal ternary(const Literal& a, const Literal& b, const Literal& c) {
   LaneArray<Lanes> x = (a.*IntoLanes)();
   LaneArray<Lanes> y = (b.*IntoLanes)();
   LaneArray<Lanes> z = (c.*IntoLanes)();
   LaneArray<Lanes> r;
   for (size_t i = 0; i < Lanes; ++i) {
-    r[i] = (x[i].*TernaryOp)(y[i], z[i]);
+    r[i] = Convert((x[i].*TernaryOp)(y[i], z[i]));
   }
   return Literal(r);
 }
 } // namespace
 
+Literal Literal::relaxedMaddF16x8(const Literal& left,
+                                  const Literal& right) const {
+  return ternary<8, &Literal::getLanesF16x8, &Literal::madd, &toFP16>(
+    *this, left, right);
+}
+
+Literal Literal::relaxedNmaddF16x8(const Literal& left,
+                                   const Literal& right) const {
+  return ternary<8, &Literal::getLanesF16x8, &Literal::nmadd, &toFP16>(
+    *this, left, right);
+}
+
 Literal Literal::relaxedMaddF32x4(const Literal& left,
                                   const Literal& right) const {
   return ternary<4, &Literal::getLanesF32x4, &Literal::madd>(
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 8c684dc2f..542eab4b4 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -6823,6 +6823,14 @@ bool WasmBinaryReader::maybeVisitSIMDTernary(Expression*& out, uint32_t code) {
       curr = allocator.alloc<SIMDTernary>();
       curr->op = LaneselectI64x2;
       break;
+    case BinaryConsts::F16x8RelaxedMadd:
+      curr = allocator.alloc<SIMDTernary>();
+      curr->op = RelaxedMaddVecF16x8;
+      break;
+    case BinaryConsts::F16x8RelaxedNmadd:
+      curr = allocator.alloc<SIMDTernary>();
+      curr->op = RelaxedNmaddVecF16x8;
+      break;
     case BinaryConsts::F32x4RelaxedMadd:
       curr = allocator.alloc<SIMDTernary>();
       curr->op = RelaxedMaddVecF32x4;
diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp
index 140205b85..ac79d0135 100644
--- a/src/wasm/wasm-stack.cpp
+++ b/src/wasm/wasm-stack.cpp
@@ -657,6 +657,12 @@ void BinaryInstWriter::visitSIMDTernary(SIMDTernary* curr) {
     case LaneselectI64x2:
       o << U32LEB(BinaryConsts::I64x2Laneselect);
       break;
+    case RelaxedMaddVecF16x8:
+      o << U32LEB(BinaryConsts::F16x8RelaxedMadd);
+      break;
+    case RelaxedNmaddVecF16x8:
+      o << U32LEB(BinaryConsts::F16x8RelaxedNmadd);
+      break;
     case RelaxedMaddVecF32x4:
       o << U32LEB(BinaryConsts::F32x4RelaxedMadd);
       break;