OptimizeInstructions: Loop on fallthrough values in RefTest (#5797)

This parallels the code in RefCast. Previously we only looked at the type reaching us, but intermediate fallthrough values can let us optimize too. In particular, we were not optimizing (ref.test (local.tee ..)) if the tee was to a less-refined type.
author: Alon Zakai <azakai@google.com> 2023-07-05 13:12:26 -0700
committer: GitHub <noreply@github.com> 2023-07-05 13:12:26 -0700
commit: f963083cfb7ca42d76928f52889faf4aeaec54cf (patch)
tree: 3076145eb9281c320cd0d3fc526482c349195b75
parent: ef7f98e50662374b17d88c149a2ba1c11f918e5c (diff)
download: binaryen-f963083cfb7ca42d76928f52889faf4aeaec54cf.tar.gz
binaryen-f963083cfb7ca42d76928f52889faf4aeaec54cf.tar.bz2
binaryen-f963083cfb7ca42d76928f52889faf4aeaec54cf.zip
2 files changed, 98 insertions, 31 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index af7369994..44078545b 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -2178,37 +2178,49 @@ struct OptimizeInstructions
 
     Builder builder(*getModule());
 
-    // Parallel to the code in visitRefCast
-    switch (GCTypeUtils::evaluateCastCheck(curr->ref->type, curr->castType)) {
-      case GCTypeUtils::Unknown:
-        break;
-      case GCTypeUtils::Success:
-        replaceCurrent(builder.makeBlock(
-          {builder.makeDrop(curr->ref), builder.makeConst(int32_t(1))}));
-        break;
-      case GCTypeUtils::Unreachable:
-        // Make sure to emit a block with the same type as us, to avoid other
-        // code in this pass needing to handle unexpected unreachable code
-        // (which is only properly propagated at the end of this pass when we
-        // refinalize).
-        replaceCurrent(builder.makeBlock(
-          {builder.makeDrop(curr->ref), builder.makeUnreachable()}, Type::i32));
-        break;
-      case GCTypeUtils::Failure:
-        replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
-                                            builder.makeConst(int32_t(0))));
-        break;
-      case GCTypeUtils::SuccessOnlyIfNull:
-        replaceCurrent(builder.makeRefIsNull(curr->ref));
-        break;
-      case GCTypeUtils::SuccessOnlyIfNonNull:
-        // This adds an EqZ, but code size does not regress since ref.test also
-        // encodes a type, and ref.is_null does not. The EqZ may also add some
-        // work, but a cast is likely more expensive than a null check + a fast
-        // int operation.
-        replaceCurrent(
-          builder.makeUnary(EqZInt32, builder.makeRefIsNull(curr->ref)));
-        break;
+    // Parallel to the code in visitRefCast: we look not just at the final type
+    // we are given, but at fallthrough values as well.
+    auto* ref = curr->ref;
+    while (1) {
+      switch (GCTypeUtils::evaluateCastCheck(ref->type, curr->castType)) {
+        case GCTypeUtils::Unknown:
+          break;
+        case GCTypeUtils::Success:
+          replaceCurrent(builder.makeBlock(
+            {builder.makeDrop(curr->ref), builder.makeConst(int32_t(1))}));
+          return;
+        case GCTypeUtils::Unreachable:
+          // Make sure to emit a block with the same type as us, to avoid other
+          // code in this pass needing to handle unexpected unreachable code
+          // (which is only properly propagated at the end of this pass when we
+          // refinalize).
+          replaceCurrent(builder.makeBlock(
+            {builder.makeDrop(curr->ref), builder.makeUnreachable()},
+            Type::i32));
+          return;
+        case GCTypeUtils::Failure:
+          replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
+                                              builder.makeConst(int32_t(0))));
+          return;
+        case GCTypeUtils::SuccessOnlyIfNull:
+          replaceCurrent(builder.makeRefIsNull(curr->ref));
+          return;
+        case GCTypeUtils::SuccessOnlyIfNonNull:
+          // This adds an EqZ, but code size does not regress since ref.test
+          // also encodes a type, and ref.is_null does not. The EqZ may also add
+          // some work, but a cast is likely more expensive than a null check +
+          // a fast int operation.
+          replaceCurrent(
+            builder.makeUnary(EqZInt32, builder.makeRefIsNull(curr->ref)));
+          return;
+      }
+
+      auto* fallthrough = Properties::getImmediateFallthrough(
+        ref, getPassOptions(), *getModule());
+      if (fallthrough == ref) {
+        return;
+      }
+      ref = fallthrough;
     }
   }
 
diff --git a/test/lit/passes/optimize-instructions-gc.wast b/test/lit/passes/optimize-instructions-gc.wast
index c55ca8b05..c581c7e42 100644
--- a/test/lit/passes/optimize-instructions-gc.wast
+++ b/test/lit/passes/optimize-instructions-gc.wast
@@ -2426,6 +2426,61 @@
     )
   )
 
+  ;; CHECK:      (func $ref.test-fallthrough (type $void)
+  ;; CHECK-NEXT:  (local $A (ref $A))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.test $B
+  ;; CHECK-NEXT:    (local.tee $A
+  ;; CHECK-NEXT:     (struct.new $A
+  ;; CHECK-NEXT:      (i32.const 10)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block (result i32)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (local.tee $A
+  ;; CHECK-NEXT:      (struct.new $B
+  ;; CHECK-NEXT:       (i32.const 20)
+  ;; CHECK-NEXT:       (i32.const 30)
+  ;; CHECK-NEXT:       (f32.const 40.5)
+  ;; CHECK-NEXT:      )
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $ref.test-fallthrough
+    (local $A (ref $A))
+    ;; The test will fail, but this pass does not have exact type info, so it
+    ;; thinks it can succeed and nothing happens here (GUFA can optimize this,
+    ;; however).
+    (drop
+      (ref.test $B
+        (local.tee $A
+          (struct.new $A
+            (i32.const 10)
+          )
+        )
+      )
+    )
+    ;; This test will succeed, even though we tee to the parent type in the
+    ;; middle.
+    (drop
+      (ref.test $B
+        (local.tee $A
+          (struct.new $B
+            (i32.const 20)
+            (i32.const 30)
+            (f32.const 40.50)
+          )
+        )
+      )
+    )
+  )
+
   ;; CHECK:      (func $ref.test-then-optimizeAddedConstants (type $none_=>_i32) (result i32)
   ;; CHECK-NEXT:  (i32.add
   ;; CHECK-NEXT:   (block
author	Alon Zakai <azakai@google.com>	2023-07-05 13:12:26 -0700
committer	GitHub <noreply@github.com>	2023-07-05 13:12:26 -0700
commit	f963083cfb7ca42d76928f52889faf4aeaec54cf (patch)
tree	3076145eb9281c320cd0d3fc526482c349195b75
parent	ef7f98e50662374b17d88c149a2ba1c11f918e5c (diff)
download	binaryen-f963083cfb7ca42d76928f52889faf4aeaec54cf.tar.gz binaryen-f963083cfb7ca42d76928f52889faf4aeaec54cf.tar.bz2 binaryen-f963083cfb7ca42d76928f52889faf4aeaec54cf.zip