2 files changed, 98 insertions, 31 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index af7369994..44078545b 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -2178,37 +2178,49 @@ struct OptimizeInstructions
 
     Builder builder(*getModule());
 
-    // Parallel to the code in visitRefCast
-    switch (GCTypeUtils::evaluateCastCheck(curr->ref->type, curr->castType)) {
-      case GCTypeUtils::Unknown:
-        break;
-      case GCTypeUtils::Success:
-        replaceCurrent(builder.makeBlock(
-          {builder.makeDrop(curr->ref), builder.makeConst(int32_t(1))}));
-        break;
-      case GCTypeUtils::Unreachable:
-        // Make sure to emit a block with the same type as us, to avoid other
-        // code in this pass needing to handle unexpected unreachable code
-        // (which is only properly propagated at the end of this pass when we
-        // refinalize).
-        replaceCurrent(builder.makeBlock(
-          {builder.makeDrop(curr->ref), builder.makeUnreachable()}, Type::i32));
-        break;
-      case GCTypeUtils::Failure:
-        replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
-                                            builder.makeConst(int32_t(0))));
-        break;
-      case GCTypeUtils::SuccessOnlyIfNull:
-        replaceCurrent(builder.makeRefIsNull(curr->ref));
-        break;
-      case GCTypeUtils::SuccessOnlyIfNonNull:
-        // This adds an EqZ, but code size does not regress since ref.test also
-        // encodes a type, and ref.is_null does not. The EqZ may also add some
-        // work, but a cast is likely more expensive than a null check + a fast
-        // int operation.
-        replaceCurrent(
-          builder.makeUnary(EqZInt32, builder.makeRefIsNull(curr->ref)));
-        break;
+    // Parallel to the code in visitRefCast: we look not just at the final type
+    // we are given, but at fallthrough values as well.
+    auto* ref = curr->ref;
+    while (1) {
+      switch (GCTypeUtils::evaluateCastCheck(ref->type, curr->castType)) {
+        case GCTypeUtils::Unknown:
+          break;
+        case GCTypeUtils::Success:
+          replaceCurrent(builder.makeBlock(
+            {builder.makeDrop(curr->ref), builder.makeConst(int32_t(1))}));
+          return;
+        case GCTypeUtils::Unreachable:
+          // Make sure to emit a block with the same type as us, to avoid other
+          // code in this pass needing to handle unexpected unreachable code
+          // (which is only properly propagated at the end of this pass when we
+          // refinalize).
+          replaceCurrent(builder.makeBlock(
+            {builder.makeDrop(curr->ref), builder.makeUnreachable()},
+            Type::i32));
+          return;
+        case GCTypeUtils::Failure:
+          replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
+                                              builder.makeConst(int32_t(0))));
+          return;
+        case GCTypeUtils::SuccessOnlyIfNull:
+          replaceCurrent(builder.makeRefIsNull(curr->ref));
+          return;
+        case GCTypeUtils::SuccessOnlyIfNonNull:
+          // This adds an EqZ, but code size does not regress since ref.test
+          // also encodes a type, and ref.is_null does not. The EqZ may also add
+          // some work, but a cast is likely more expensive than a null check +
+          // a fast int operation.
+          replaceCurrent(
+            builder.makeUnary(EqZInt32, builder.makeRefIsNull(curr->ref)));
+          return;
+      }
+
+      auto* fallthrough = Properties::getImmediateFallthrough(
+        ref, getPassOptions(), *getModule());
+      if (fallthrough == ref) {
+        return;
+      }
+      ref = fallthrough;
     }
   }
 
diff --git a/test/lit/passes/optimize-instructions-gc.wast b/test/lit/passes/optimize-instructions-gc.wast
index c55ca8b05..c581c7e42 100644
--- a/test/lit/passes/optimize-instructions-gc.wast
+++ b/test/lit/passes/optimize-instructions-gc.wast
@@ -2426,6 +2426,61 @@
     )
   )
 
+  ;; CHECK:      (func $ref.test-fallthrough (type $void)
+  ;; CHECK-NEXT:  (local $A (ref $A))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.test $B
+  ;; CHECK-NEXT:    (local.tee $A
+  ;; CHECK-NEXT:     (struct.new $A
+  ;; CHECK-NEXT:      (i32.const 10)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block (result i32)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (local.tee $A
+  ;; CHECK-NEXT:      (struct.new $B
+  ;; CHECK-NEXT:       (i32.const 20)
+  ;; CHECK-NEXT:       (i32.const 30)
+  ;; CHECK-NEXT:       (f32.const 40.5)
+  ;; CHECK-NEXT:      )
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $ref.test-fallthrough
+    (local $A (ref $A))
+    ;; The test will fail, but this pass does not have exact type info, so it
+    ;; thinks it can succeed and nothing happens here (GUFA can optimize this,
+    ;; however).
+    (drop
+      (ref.test $B
+        (local.tee $A
+          (struct.new $A
+            (i32.const 10)
+          )
+        )
+      )
+    )
+    ;; This test will succeed, even though we tee to the parent type in the
+    ;; middle.
+    (drop
+      (ref.test $B
+        (local.tee $A
+          (struct.new $B
+            (i32.const 20)
+            (i32.const 30)
+            (f32.const 40.50)
+          )
+        )
+      )
+    )
+  )
+
   ;; CHECK:      (func $ref.test-then-optimizeAddedConstants (type $none_=>_i32) (result i32)
   ;; CHECK-NEXT:  (i32.add
   ;; CHECK-NEXT:   (block