13 files changed, 275 insertions, 10 deletions
diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc
index 2fa658e65..f4afd1f1b 100644
--- a/src/gen-s-parser.inc
+++ b/src/gen-s-parser.inc
@@ -309,6 +309,18 @@ switch (buf[0]) {
     switch (buf[1]) {
       case '1': {
         switch (buf[6]) {
+          case 'a':
+            if (op == "f16x8.add"sv) {
+              CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::AddVecF16x8));
+              return Ok{};
+            }
+            goto parse_error;
+          case 'd':
+            if (op == "f16x8.div"sv) {
+              CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::DivVecF16x8));
+              return Ok{};
+            }
+            goto parse_error;
           case 'e': {
             switch (buf[7]) {
               case 'q':
@@ -360,24 +372,75 @@ switch (buf[0]) {
               default: goto parse_error;
             }
           }
+          case 'm': {
+            switch (buf[7]) {
+              case 'a':
+                if (op == "f16x8.max"sv) {
+                  CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::MaxVecF16x8));
+                  return Ok{};
+                }
+                goto parse_error;
+              case 'i':
+                if (op == "f16x8.min"sv) {
+                  CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::MinVecF16x8));
+                  return Ok{};
+                }
+                goto parse_error;
+              case 'u':
+                if (op == "f16x8.mul"sv) {
+                  CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::MulVecF16x8));
+                  return Ok{};
+                }
+                goto parse_error;
+              default: goto parse_error;
+            }
+          }
           case 'n':
             if (op == "f16x8.ne"sv) {
               CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::NeVecF16x8));
               return Ok{};
             }
             goto parse_error;
+          case 'p': {
+            switch (buf[8]) {
+              case 'a':
+                if (op == "f16x8.pmax"sv) {
+                  CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::PMaxVecF16x8));
+                  return Ok{};
+                }
+                goto parse_error;
+              case 'i':
+                if (op == "f16x8.pmin"sv) {
+                  CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::PMinVecF16x8));
+                  return Ok{};
+                }
+                goto parse_error;
+              default: goto parse_error;
+            }
+          }
           case 'r':
             if (op == "f16x8.replace_lane"sv) {
               CHECK_ERR(makeSIMDReplace(ctx, pos, annotations, SIMDReplaceOp::ReplaceLaneVecF16x8, 8));
               return Ok{};
             }
             goto parse_error;
-          case 's':
-            if (op == "f16x8.splat"sv) {
-              CHECK_ERR(makeUnary(ctx, pos, annotations, UnaryOp::SplatVecF16x8));
-              return Ok{};
+          case 's': {
+            switch (buf[7]) {
+              case 'p':
+                if (op == "f16x8.splat"sv) {
+                  CHECK_ERR(makeUnary(ctx, pos, annotations, UnaryOp::SplatVecF16x8));
+                  return Ok{};
+                }
+                goto parse_error;
+              case 'u':
+                if (op == "f16x8.sub"sv) {
+                  CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::SubVecF16x8));
+                  return Ok{};
+                }
+                goto parse_error;
+              default: goto parse_error;
             }
-            goto parse_error;
+          }
           default: goto parse_error;
         }
       }
diff --git a/src/ir/child-typer.h b/src/ir/child-typer.h
index 725bc842e..fe65b209b 100644
--- a/src/ir/child-typer.h
+++ b/src/ir/child-typer.h
@@ -627,6 +627,14 @@ template<typename Subtype> struct ChildTyper : OverriddenVisitor<Subtype> {
       case ExtMulHighSVecI64x2:
       case ExtMulLowUVecI64x2:
       case ExtMulHighUVecI64x2:
+      case AddVecF16x8:
+      case SubVecF16x8:
+      case MulVecF16x8:
+      case DivVecF16x8:
+      case MinVecF16x8:
+      case MaxVecF16x8:
+      case PMinVecF16x8:
+      case PMaxVecF16x8:
       case AddVecF32x4:
       case SubVecF32x4:
       case MulVecF32x4:
diff --git a/src/ir/cost.h b/src/ir/cost.h
index 11370acab..0b2b6315b 100644
--- a/src/ir/cost.h
+++ b/src/ir/cost.h
@@ -475,6 +475,22 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
       case ExtMulHighSVecI64x2:
       case ExtMulLowUVecI64x2:
       case ExtMulHighUVecI64x2:
+      case AddVecF16x8:
+      case SubVecF16x8:
+        ret = 1;
+        break;
+      case MulVecF16x8:
+        ret = 2;
+        break;
+      case DivVecF16x8:
+        ret = 3;
+        break;
+      case MinVecF16x8:
+      case MaxVecF16x8:
+      case PMinVecF16x8:
+      case PMaxVecF16x8:
+        ret = 1;
+        break;
       case AddVecF32x4:
       case SubVecF32x4:
         ret = 1;
diff --git a/src/literal.h b/src/literal.h
index dd6247d00..882a99027 100644
--- a/src/literal.h
+++ b/src/literal.h
@@ -382,6 +382,7 @@ public:
   Literal convertUIToF32() const;
   Literal convertSIToF64() const;
   Literal convertUIToF64() const;
+  Literal convertF32ToF16() const;
 
   Literal truncSatToSI32() const;
   Literal truncSatToSI64() const;
@@ -618,6 +619,14 @@ public:
   Literal extMulHighSI64x2(const Literal& other) const;
   Literal extMulLowUI64x2(const Literal& other) const;
   Literal extMulHighUI64x2(const Literal& other) const;
+  Literal addF16x8(const Literal& other) const;
+  Literal subF16x8(const Literal& other) const;
+  Literal mulF16x8(const Literal& other) const;
+  Literal divF16x8(const Literal& other) const;
+  Literal minF16x8(const Literal& other) const;
+  Literal maxF16x8(const Literal& other) const;
+  Literal pminF16x8(const Literal& other) const;
+  Literal pmaxF16x8(const Literal& other) const;
   Literal absF32x4() const;
   Literal negF32x4() const;
   Literal sqrtF32x4() const;
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index 6350347c4..4986982c7 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -1901,6 +1901,31 @@ struct PrintExpressionContents
         o << "i64x2.extmul_high_i32x4_u";
         break;
 
+      case AddVecF16x8:
+        o << "f16x8.add";
+        break;
+      case SubVecF16x8:
+        o << "f16x8.sub";
+        break;
+      case MulVecF16x8:
+        o << "f16x8.mul";
+        break;
+      case DivVecF16x8:
+        o << "f16x8.div";
+        break;
+      case MinVecF16x8:
+        o << "f16x8.min";
+        break;
+      case MaxVecF16x8:
+        o << "f16x8.max";
+        break;
+      case PMinVecF16x8:
+        o << "f16x8.pmin";
+        break;
+      case PMaxVecF16x8:
+        o << "f16x8.pmax";
+        break;
+
       case AddVecF32x4:
         o << "f32x4.add";
         break;
diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp
index bebf4f989..a143d2ff2 100644
--- a/src/tools/fuzzing/fuzzing.cpp
+++ b/src/tools/fuzzing/fuzzing.cpp
@@ -3358,6 +3358,12 @@ Expression* TranslateToFuzzReader::makeBinary(Type type) {
                                DotSVecI16x8ToVecI32x4,
                                AddVecI64x2,
                                SubVecI64x2,
+                               AddVecF16x8,
+                               SubVecF16x8,
+                               MulVecF16x8,
+                               DivVecF16x8,
+                               MinVecF16x8,
+                               MaxVecF16x8,
                                AddVecF32x4,
                                SubVecF32x4,
                                MulVecF32x4,
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index 8c1990224..1fbe7535c 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -1064,6 +1064,14 @@ enum ASTNodes {
   F16x8Gt = 0x13a,
   F16x8Le = 0x13b,
   F16x8Ge = 0x13c,
+  F16x8Add = 0x13d,
+  F16x8Sub = 0x13e,
+  F16x8Mul = 0x13f,
+  F16x8Div = 0x140,
+  F16x8Min = 0x141,
+  F16x8Max = 0xe142,
+  F16x8Pmin = 0x143,
+  F16x8Pmax = 0x144,
 
   // bulk memory opcodes
 
diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h
index cbd2b31d6..81b755f9f 100644
--- a/src/wasm-interpreter.h
+++ b/src/wasm-interpreter.h
@@ -1014,6 +1014,23 @@ public:
       case ExtMulHighUVecI64x2:
         return left.extMulHighUI64x2(right);
 
+      case AddVecF16x8:
+        return left.addF16x8(right);
+      case SubVecF16x8:
+        return left.subF16x8(right);
+      case MulVecF16x8:
+        return left.mulF16x8(right);
+      case DivVecF16x8:
+        return left.divF16x8(right);
+      case MinVecF16x8:
+        return left.minF16x8(right);
+      case MaxVecF16x8:
+        return left.maxF16x8(right);
+      case PMinVecF16x8:
+        return left.pminF16x8(right);
+      case PMaxVecF16x8:
+        return left.pmaxF16x8(right);
+
       case AddVecF32x4:
         return left.addF32x4(right);
       case SubVecF32x4:
diff --git a/src/wasm.h b/src/wasm.h
index 9d43a4f9f..6f82196b7 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -452,6 +452,14 @@ enum BinaryOp {
   ExtMulHighSVecI64x2,
   ExtMulLowUVecI64x2,
   ExtMulHighUVecI64x2,
+  AddVecF16x8,
+  SubVecF16x8,
+  MulVecF16x8,
+  DivVecF16x8,
+  MinVecF16x8,
+  MaxVecF16x8,
+  PMinVecF16x8,
+  PMaxVecF16x8,
   AddVecF32x4,
   SubVecF32x4,
   MulVecF32x4,
diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp
index d60e2f8a9..65c2b4e62 100644
--- a/src/wasm/literal.cpp
+++ b/src/wasm/literal.cpp
@@ -856,6 +856,10 @@ Literal Literal::convertUIToF64() const {
   WASM_UNREACHABLE("invalid type");
 }
 
+Literal Literal::convertF32ToF16() const {
+  return Literal(fp16_ieee_from_fp32_value(getf32()));
+}
+
 template<typename F> struct AsInt { using type = void; };
 template<> struct AsInt<float> { using type = int32_t; };
 template<> struct AsInt<double> { using type = int64_t; };
@@ -1791,8 +1795,7 @@ Literal Literal::splatI16x8() const { return splat<Type::i32, 8>(*this); }
 Literal Literal::splatI32x4() const { return splat<Type::i32, 4>(*this); }
 Literal Literal::splatI64x2() const { return splat<Type::i64, 2>(*this); }
 Literal Literal::splatF16x8() const {
-  uint16_t f16 = fp16_ieee_from_fp32_value(getf32());
-  return splat<Type::i32, 8>(Literal(f16));
+  return splat<Type::i32, 8>(convertF32ToF16());
 }
 Literal Literal::splatF32x4() const { return splat<Type::f32, 4>(*this); }
 Literal Literal::splatF64x2() const { return splat<Type::f64, 2>(*this); }
@@ -1848,7 +1851,7 @@ Literal Literal::replaceLaneI64x2(const Literal& other, uint8_t index) const {
 }
 Literal Literal::replaceLaneF16x8(const Literal& other, uint8_t index) const {
   return replace<8, &Literal::getLanesF16x8>(
-    *this, Literal(fp16_ieee_from_fp32_value(other.getf32())), index);
+    *this, other.convertF32ToF16(), index);
 }
 Literal Literal::replaceLaneF32x4(const Literal& other, uint8_t index) const {
   return replace<4, &Literal::getLanesF32x4>(*this, other, index);
@@ -2286,14 +2289,20 @@ Literal Literal::geF64x2(const Literal& other) const {
                                                                     other);
 }
 
+static Literal passThrough(const Literal& literal) { return literal; }
+static Literal toFP16(const Literal& literal) {
+  return literal.convertF32ToF16();
+}
+
 template<int Lanes,
          LaneArray<Lanes> (Literal::*IntoLanes)() const,
-         Literal (Literal::*BinaryOp)(const Literal&) const>
+         Literal (Literal::*BinaryOp)(const Literal&) const,
+         Literal (*Convert)(const Literal&) = passThrough>
 static Literal binary(const Literal& val, const Literal& other) {
   LaneArray<Lanes> lanes = (val.*IntoLanes)();
   LaneArray<Lanes> other_lanes = (other.*IntoLanes)();
   for (size_t i = 0; i < Lanes; ++i) {
-    lanes[i] = (lanes[i].*BinaryOp)(other_lanes[i]);
+    lanes[i] = Convert((lanes[i].*BinaryOp)(other_lanes[i]));
   }
   return Literal(lanes);
 }
@@ -2418,6 +2427,38 @@ Literal Literal::subI64x2(const Literal& other) const {
 Literal Literal::mulI64x2(const Literal& other) const {
   return binary<2, &Literal::getLanesI64x2, &Literal::mul>(*this, other);
 }
+Literal Literal::addF16x8(const Literal& other) const {
+  return binary<8, &Literal::getLanesF16x8, &Literal::add, &toFP16>(*this,
+                                                                    other);
+}
+Literal Literal::subF16x8(const Literal& other) const {
+  return binary<8, &Literal::getLanesF16x8, &Literal::sub, &toFP16>(*this,
+                                                                    other);
+}
+Literal Literal::mulF16x8(const Literal& other) const {
+  return binary<8, &Literal::getLanesF16x8, &Literal::mul, &toFP16>(*this,
+                                                                    other);
+}
+Literal Literal::divF16x8(const Literal& other) const {
+  return binary<8, &Literal::getLanesF16x8, &Literal::div, &toFP16>(*this,
+                                                                    other);
+}
+Literal Literal::minF16x8(const Literal& other) const {
+  return binary<8, &Literal::getLanesF16x8, &Literal::min, &toFP16>(*this,
+                                                                    other);
+}
+Literal Literal::maxF16x8(const Literal& other) const {
+  return binary<8, &Literal::getLanesF16x8, &Literal::max, &toFP16>(*this,
+                                                                    other);
+}
+Literal Literal::pminF16x8(const Literal& other) const {
+  return binary<8, &Literal::getLanesF16x8, &Literal::pmin, &toFP16>(*this,
+                                                                     other);
+}
+Literal Literal::pmaxF16x8(const Literal& other) const {
+  return binary<8, &Literal::getLanesF16x8, &Literal::pmax, &toFP16>(*this,
+                                                                     other);
+}
 Literal Literal::addF32x4(const Literal& other) const {
   return binary<4, &Literal::getLanesF32x4, &Literal::add>(*this, other);
 }
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 865ca39ca..e84639801 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -6174,6 +6174,38 @@ bool WasmBinaryReader::maybeVisitSIMDBinary(Expression*& out, uint32_t code) {
       curr = allocator.alloc<Binary>();
       curr->op = ExtMulHighUVecI64x2;
       break;
+    case BinaryConsts::F16x8Add:
+      curr = allocator.alloc<Binary>();
+      curr->op = AddVecF16x8;
+      break;
+    case BinaryConsts::F16x8Sub:
+      curr = allocator.alloc<Binary>();
+      curr->op = SubVecF16x8;
+      break;
+    case BinaryConsts::F16x8Mul:
+      curr = allocator.alloc<Binary>();
+      curr->op = MulVecF16x8;
+      break;
+    case BinaryConsts::F16x8Div:
+      curr = allocator.alloc<Binary>();
+      curr->op = DivVecF16x8;
+      break;
+    case BinaryConsts::F16x8Min:
+      curr = allocator.alloc<Binary>();
+      curr->op = MinVecF16x8;
+      break;
+    case BinaryConsts::F16x8Max:
+      curr = allocator.alloc<Binary>();
+      curr->op = MaxVecF16x8;
+      break;
+    case BinaryConsts::F16x8Pmin:
+      curr = allocator.alloc<Binary>();
+      curr->op = PMinVecF16x8;
+      break;
+    case BinaryConsts::F16x8Pmax:
+      curr = allocator.alloc<Binary>();
+      curr->op = PMaxVecF16x8;
+      break;
     case BinaryConsts::F32x4Add:
       curr = allocator.alloc<Binary>();
       curr->op = AddVecF32x4;
diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp
index 1c2c2c42b..b7bfea617 100644
--- a/src/wasm/wasm-stack.cpp
+++ b/src/wasm/wasm-stack.cpp
@@ -1873,6 +1873,30 @@ void BinaryInstWriter::visitBinary(Binary* curr) {
         << U32LEB(BinaryConsts::I64x2ExtmulHighI32x4U);
       break;
 
+    case AddVecF16x8:
+      o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F16x8Add);
+      break;
+    case SubVecF16x8:
+      o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F16x8Sub);
+      break;
+    case MulVecF16x8:
+      o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F16x8Mul);
+      break;
+    case DivVecF16x8:
+      o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F16x8Div);
+      break;
+    case MinVecF16x8:
+      o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F16x8Min);
+      break;
+    case MaxVecF16x8:
+      o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F16x8Max);
+      break;
+    case PMinVecF16x8:
+      o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F16x8Pmin);
+      break;
+    case PMaxVecF16x8:
+      o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F16x8Pmax);
+      break;
     case AddVecF32x4:
       o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F32x4Add);
       break;
diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp
index 0bdc18658..24f5379fb 100644
--- a/src/wasm/wasm-validator.cpp
+++ b/src/wasm/wasm-validator.cpp
@@ -1813,6 +1813,14 @@ void FunctionValidator::visitBinary(Binary* curr) {
     case ExtMulHighSVecI64x2:
     case ExtMulLowUVecI64x2:
     case ExtMulHighUVecI64x2:
+    case AddVecF16x8:
+    case SubVecF16x8:
+    case MulVecF16x8:
+    case DivVecF16x8:
+    case MinVecF16x8:
+    case MaxVecF16x8:
+    case PMinVecF16x8:
+    case PMaxVecF16x8:
     case AddVecF32x4:
     case SubVecF32x4:
     case MulVecF32x4: