7 files changed, 239 insertions, 12 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
index f21747370..b468755ff 100755
--- a/scripts/fuzz_opt.py
+++ b/scripts/fuzz_opt.py
@@ -1510,6 +1510,7 @@ opt_choices = [
     ("--gsi",),
     ("--gto",),
     ("--gufa",),
+    ("--gufa-cast-all",),
     ("--gufa-optimizing",),
     ("--local-cse",),
     ("--heap2local",),
diff --git a/src/passes/GUFA.cpp b/src/passes/GUFA.cpp
index 8f58eaf8a..d07a4539b 100644
--- a/src/passes/GUFA.cpp
+++ b/src/passes/GUFA.cpp
@@ -33,6 +33,9 @@
 // such followup opts automatically in functions where we make changes, and so
 // it is useful if GUFA is run near the end of the optimization pipeline.
 //
+// A variation of this pass will add casts anywhere we can infer a more specific
+// type, see |castAll| below.
+//
 // TODO: GUFA + polymorphic devirtualization + traps-never-happen. If we see
 //       that the possible call targets are {A, B, C}, and GUFA info lets us
 //       prove that A, C will trap if called - say, if they cast the first
@@ -58,13 +61,27 @@ struct GUFAOptimizer
   bool isFunctionParallel() override { return true; }
 
   ContentOracle& oracle;
+
+  // Whether to run further optimizations in functions we modify.
   bool optimizing;
 
-  GUFAOptimizer(ContentOracle& oracle, bool optimizing)
-    : oracle(oracle), optimizing(optimizing) {}
+  // Whether to add casts to all things that we have inferred a more refined
+  // type for. This increases code size immediately, but later optimizations
+  // generally benefit enough from these casts that overall code size actually
+  // decreases, even if some of these casts remain. However, aside from code
+  // size there may be an increase in the number of casts performed at runtime,
+  // so benchmark carefully.
+  // TODO: Add a pass to remove casts not needed for validation, which users
+  //       could run at the very end. However, even with such a pass we may end
+  //       up with casts that are needed for validation that were not present
+  //       before.
+  bool castAll;
+
+  GUFAOptimizer(ContentOracle& oracle, bool optimizing, bool castAll)
+    : oracle(oracle), optimizing(optimizing), castAll(castAll) {}
 
   std::unique_ptr<Pass> create() override {
-    return std::make_unique<GUFAOptimizer>(oracle, optimizing);
+    return std::make_unique<GUFAOptimizer>(oracle, optimizing, castAll);
   }
 
   bool optimized = false;
@@ -265,7 +282,7 @@ struct GUFAOptimizer
       //
       // Note that we could in principle apply this in all expressions by adding
       // a cast. However, to be careful with code size, we only refine existing
-      // casts for now.
+      // here. See addNewCasts() for where we add entirely new casts.
       curr->type = inferredType;
     }
 
@@ -284,14 +301,23 @@ struct GUFAOptimizer
   //       information about parents.
 
   void visitFunction(Function* func) {
+    if (optimized) {
+      // Optimization may introduce more unreachables, which we need to
+      // propagate.
+      ReFinalize().walkFunctionInModule(func, getModule());
+    }
+
+    // Potentially add new casts after we do our first pass of optimizations +
+    // refinalize (doing it after refinalizing lets us add as few new casts as
+    // possible).
+    if (castAll && addNewCasts(func)) {
+      optimized = true;
+    }
+
     if (!optimized) {
       return;
     }
 
-    // Optimization may introduce more unreachables, which we need to
-    // propagate.
-    ReFinalize().walkFunctionInModule(func, getModule());
-
     // We may add blocks around pops, which we must fix up.
     EHUtils::handleBlockNestedPops(func, *getModule());
 
@@ -333,22 +359,64 @@ struct GUFAOptimizer
     runner.add("vacuum");
     runner.runOnFunction(func);
   }
+
+  // Add a new cast whenever we know a value contains a more refined type than
+  // in the IR. Returns whether we optimized anything.
+  bool addNewCasts(Function* func) {
+    // Subtyping and casts only make sense if GC is enabled.
+    if (!getModule()->features.hasGC()) {
+      return false;
+    }
+
+    struct Adder : public PostWalker<Adder, UnifiedExpressionVisitor<Adder>> {
+      GUFAOptimizer& parent;
+
+      Adder(GUFAOptimizer& parent) : parent(parent) {}
+
+      bool optimized = false;
+
+      void visitExpression(Expression* curr) {
+        if (!curr->type.isRef()) {
+          // Ignore anything we cannot infer a type for.
+          return;
+        }
+
+        auto oracleType = parent.getContents(curr).getType();
+        if (oracleType.isRef() && oracleType != curr->type &&
+            Type::isSubType(oracleType, curr->type)) {
+          replaceCurrent(Builder(*getModule()).makeRefCast(curr, oracleType));
+          optimized = true;
+        }
+      }
+    };
+
+    Adder adder(*this);
+    adder.walkFunctionInModule(func, getModule());
+    if (adder.optimized) {
+      ReFinalize().walkFunctionInModule(func, getModule());
+      return true;
+    }
+    return false;
+  }
 };
 
 struct GUFAPass : public Pass {
   bool optimizing;
+  bool castAll;
 
-  GUFAPass(bool optimizing) : optimizing(optimizing) {}
+  GUFAPass(bool optimizing, bool castAll)
+    : optimizing(optimizing), castAll(castAll) {}
 
   void run(Module* module) override {
     ContentOracle oracle(*module);
-    GUFAOptimizer(oracle, optimizing).run(getPassRunner(), module);
+    GUFAOptimizer(oracle, optimizing, castAll).run(getPassRunner(), module);
   }
 };
 
 } // anonymous namespace
 
-Pass* createGUFAPass() { return new GUFAPass(false); }
-Pass* createGUFAOptimizingPass() { return new GUFAPass(true); }
+Pass* createGUFAPass() { return new GUFAPass(false, false); }
+Pass* createGUFAOptimizingPass() { return new GUFAPass(true, false); }
+Pass* createGUFACastAllPass() { return new GUFAPass(false, true); }
 
 } // namespace wasm
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 2b89371c4..0d47cd78e 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -187,6 +187,9 @@ void PassRegistry::registerPasses() {
                "information about what content can actually appear in each "
                "location",
                createGUFAPass);
+  registerPass("gufa-cast-all",
+               "GUFA plus add casts for all inferences",
+               createGUFACastAllPass);
   registerPass("gufa-optimizing",
                "GUFA plus local optimizations in functions we modified",
                createGUFAOptimizingPass);
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 8cc092f98..fa6d98111 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -58,6 +58,7 @@ Pass* createGlobalRefiningPass();
 Pass* createGlobalStructInferencePass();
 Pass* createGlobalTypeOptimizationPass();
 Pass* createGUFAPass();
+Pass* createGUFACastAllPass();
 Pass* createGUFAOptimizingPass();
 Pass* createHeap2LocalPass();
 Pass* createI64ToI32LoweringPass();
diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test
index d57e5fd1d..1aa794d3d 100644
--- a/test/lit/help/wasm-opt.test
+++ b/test/lit/help/wasm-opt.test
@@ -187,6 +187,9 @@
 ;; CHECK-NEXT:                                                 content can actually appear in
 ;; CHECK-NEXT:                                                 each location
 ;; CHECK-NEXT:
+;; CHECK-NEXT:   --gufa-cast-all                               GUFA plus add casts for all
+;; CHECK-NEXT:                                                 inferences
+;; CHECK-NEXT:
 ;; CHECK-NEXT:   --gufa-optimizing                             GUFA plus local optimizations in
 ;; CHECK-NEXT:                                                 functions we modified
 ;; CHECK-NEXT:
diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test
index 2994a1f59..32dda7f4c 100644
--- a/test/lit/help/wasm2js.test
+++ b/test/lit/help/wasm2js.test
@@ -146,6 +146,9 @@
 ;; CHECK-NEXT:                                                 content can actually appear in
 ;; CHECK-NEXT:                                                 each location
 ;; CHECK-NEXT:
+;; CHECK-NEXT:   --gufa-cast-all                               GUFA plus add casts for all
+;; CHECK-NEXT:                                                 inferences
+;; CHECK-NEXT:
 ;; CHECK-NEXT:   --gufa-optimizing                             GUFA plus local optimizations in
 ;; CHECK-NEXT:                                                 functions we modified
 ;; CHECK-NEXT:
diff --git a/test/lit/passes/gufa-cast-all.wast b/test/lit/passes/gufa-cast-all.wast
new file mode 100644
index 000000000..b57b996a0
--- /dev/null
+++ b/test/lit/passes/gufa-cast-all.wast
@@ -0,0 +1,148 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+
+;; RUN: foreach %s %t wasm-opt -all --gufa-cast-all -S -o - | filecheck %s
+
+(module
+  ;; CHECK:      (type $none_=>_none (func))
+  (type $none_=>_none (func))
+
+  ;; CHECK:      (type $A (struct ))
+  (type $A (struct))
+
+  ;; CHECK:      (type $B (sub $A (struct )))
+  (type $B (sub $A (struct)))
+
+  ;; CHECK:      (type $none_=>_i32 (func (result i32)))
+
+  ;; CHECK:      (import "a" "b" (func $import (type $none_=>_i32) (result i32)))
+  (import "a" "b" (func $import (result i32)))
+
+  ;; CHECK:      (elem declare func $func $funcs)
+
+  ;; CHECK:      (export "export1" (func $ref))
+
+  ;; CHECK:      (export "export2" (func $int))
+
+  ;; CHECK:      (export "export3" (func $func))
+
+  ;; CHECK:      (export "export4" (func $funcs))
+
+  ;; CHECK:      (export "export5" (func $unreachable))
+
+  ;; CHECK:      (func $ref (type $none_=>_none)
+  ;; CHECK-NEXT:  (local $a (ref $A))
+  ;; CHECK-NEXT:  (local.set $a
+  ;; CHECK-NEXT:   (struct.new_default $B)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast $B
+  ;; CHECK-NEXT:    (local.get $a)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $ref (export "export1")
+    (local $a (ref $A))
+    (local.set $a
+      (struct.new $B)
+    )
+    (drop
+      ;; We can infer that this contains B, and add a cast to that type.
+      (local.get $a)
+    )
+  )
+
+  ;; CHECK:      (func $int (type $none_=>_none)
+  ;; CHECK-NEXT:  (local $a i32)
+  ;; CHECK-NEXT:  (local.set $a
+  ;; CHECK-NEXT:   (i32.const 1)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (i32.const 1)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $int (export "export2")
+    (local $a i32)
+    (local.set $a
+      (i32.const 1)
+    )
+    (drop
+      ;; We can infer that this contains 1, but there is nothing to do regarding
+      ;; the type, which is not a reference.
+      (local.get $a)
+    )
+  )
+
+  ;; CHECK:      (func $func (type $none_=>_none)
+  ;; CHECK-NEXT:  (local $a funcref)
+  ;; CHECK-NEXT:  (local.set $a
+  ;; CHECK-NEXT:   (ref.func $func)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.func $func)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $func (export "export3") (type $none_=>_none)
+    (local $a funcref)
+    (local.set $a
+      (ref.func $func)
+    )
+    (drop
+      ;; We can infer that this contains a ref to $func, which we can apply
+      ;; here. We don't need to add a cast in addition to that, as the ref.func
+      ;; we add has the refined type already.
+      (local.get $a)
+    )
+  )
+
+  ;; CHECK:      (func $funcs (type $none_=>_none)
+  ;; CHECK-NEXT:  (local $a funcref)
+  ;; CHECK-NEXT:  (local.set $a
+  ;; CHECK-NEXT:   (select (result (ref $none_=>_none))
+  ;; CHECK-NEXT:    (ref.func $func)
+  ;; CHECK-NEXT:    (ref.func $funcs)
+  ;; CHECK-NEXT:    (call $import)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast $none_=>_none
+  ;; CHECK-NEXT:    (local.get $a)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $funcs (export "export4") (type $none_=>_none)
+    (local $a funcref)
+    (local.set $a
+      (select
+        (ref.func $func)
+        (ref.func $funcs)
+        (call $import)
+      )
+    )
+    (drop
+      ;; We can infer that this contains a ref to $func or $funcs, so all we
+      ;; can infer is the type, and we add a cast to $none_=>_none.
+      (local.get $a)
+    )
+  )
+
+  ;; CHECK:      (func $unreachable (type $none_=>_none)
+  ;; CHECK-NEXT:  (local $a (ref $A))
+  ;; CHECK-NEXT:  (local.tee $a
+  ;; CHECK-NEXT:   (unreachable)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (unreachable)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $unreachable (export "export5")
+    (local $a (ref $A))
+    (local.set $a
+      (unreachable)
+    )
+    (drop
+      ;; We can infer that the type here is unreachable, and emit that in the
+      ;; IR. This checks we don't error on the inferred type not being a ref.
+      (local.get $a)
+    )
+  )
+)