GUFA: Add a version that casts all of our inferences (#5846)

GUFA refines existing casts, but does not add new casts for fear of increasing code size and adding more cast operations at runtime. This PR adds a version that does add all those casts, and it looks like at least code size improves rather than regresses, at least on J2Wasm and Kotlin. That is, this pass adds a lot more casts, but subsequent optimizations benefit enough to shrink overall code size. However, this may still not be worthwhile, as even if code size decreases we may end up doing more casts at runtime, and those casts might be hard to remove, e.g.: (call $foo (x) ;; inferred to be non-null ) (func $foo (param (ref null $A) => (call $foo (ref.cast $A (x) ;; add a cast here ) (func $foo (param (ref $A) ;; later pass refines here That new cast cannot be removed after we refine the function parameter. If the function never benefits from the fact that the input is non-null, then the cast is wasted work (e.g. if the function only compares the input to another value). To use this new pass, try --gufa-cast-all rather than --gufa. As with normal GUFA, running the full optimizer afterwards is important, and even more important in order to get rid of as many of the new casts as possible.
author: Alon Zakai <azakai@google.com> 2023-07-27 13:30:21 -0700
committer: GitHub <noreply@github.com> 2023-07-27 20:30:21 +0000
commit: 8375f80fa789afaac58b0c86ba5c9396c2003635 (patch)
tree: d735c6473d3e293e5349a1467e61864059160661 /src
parent: 5d787c8d89915e257bd11d294ad5cbf882bac2e1 (diff)
download: binaryen-8375f80fa789afaac58b0c86ba5c9396c2003635.tar.gz
binaryen-8375f80fa789afaac58b0c86ba5c9396c2003635.tar.bz2
binaryen-8375f80fa789afaac58b0c86ba5c9396c2003635.zip
3 files changed, 84 insertions, 12 deletions
diff --git a/src/passes/GUFA.cpp b/src/passes/GUFA.cpp
index 8f58eaf8a..d07a4539b 100644
--- a/src/passes/GUFA.cpp
+++ b/src/passes/GUFA.cpp
@@ -33,6 +33,9 @@
 // such followup opts automatically in functions where we make changes, and so
 // it is useful if GUFA is run near the end of the optimization pipeline.
 //
+// A variation of this pass will add casts anywhere we can infer a more specific
+// type, see |castAll| below.
+//
 // TODO: GUFA + polymorphic devirtualization + traps-never-happen. If we see
 //       that the possible call targets are {A, B, C}, and GUFA info lets us
 //       prove that A, C will trap if called - say, if they cast the first
@@ -58,13 +61,27 @@ struct GUFAOptimizer
   bool isFunctionParallel() override { return true; }
 
   ContentOracle& oracle;
+
+  // Whether to run further optimizations in functions we modify.
   bool optimizing;
 
-  GUFAOptimizer(ContentOracle& oracle, bool optimizing)
-    : oracle(oracle), optimizing(optimizing) {}
+  // Whether to add casts to all things that we have inferred a more refined
+  // type for. This increases code size immediately, but later optimizations
+  // generally benefit enough from these casts that overall code size actually
+  // decreases, even if some of these casts remain. However, aside from code
+  // size there may be an increase in the number of casts performed at runtime,
+  // so benchmark carefully.
+  // TODO: Add a pass to remove casts not needed for validation, which users
+  //       could run at the very end. However, even with such a pass we may end
+  //       up with casts that are needed for validation that were not present
+  //       before.
+  bool castAll;
+
+  GUFAOptimizer(ContentOracle& oracle, bool optimizing, bool castAll)
+    : oracle(oracle), optimizing(optimizing), castAll(castAll) {}
 
   std::unique_ptr<Pass> create() override {
-    return std::make_unique<GUFAOptimizer>(oracle, optimizing);
+    return std::make_unique<GUFAOptimizer>(oracle, optimizing, castAll);
   }
 
   bool optimized = false;
@@ -265,7 +282,7 @@ struct GUFAOptimizer
       //
       // Note that we could in principle apply this in all expressions by adding
       // a cast. However, to be careful with code size, we only refine existing
-      // casts for now.
+      // here. See addNewCasts() for where we add entirely new casts.
       curr->type = inferredType;
     }
 
@@ -284,14 +301,23 @@ struct GUFAOptimizer
   //       information about parents.
 
   void visitFunction(Function* func) {
+    if (optimized) {
+      // Optimization may introduce more unreachables, which we need to
+      // propagate.
+      ReFinalize().walkFunctionInModule(func, getModule());
+    }
+
+    // Potentially add new casts after we do our first pass of optimizations +
+    // refinalize (doing it after refinalizing lets us add as few new casts as
+    // possible).
+    if (castAll && addNewCasts(func)) {
+      optimized = true;
+    }
+
     if (!optimized) {
       return;
     }
 
-    // Optimization may introduce more unreachables, which we need to
-    // propagate.
-    ReFinalize().walkFunctionInModule(func, getModule());
-
     // We may add blocks around pops, which we must fix up.
     EHUtils::handleBlockNestedPops(func, *getModule());
 
@@ -333,22 +359,64 @@ struct GUFAOptimizer
     runner.add("vacuum");
     runner.runOnFunction(func);
   }
+
+  // Add a new cast whenever we know a value contains a more refined type than
+  // in the IR. Returns whether we optimized anything.
+  bool addNewCasts(Function* func) {
+    // Subtyping and casts only make sense if GC is enabled.
+    if (!getModule()->features.hasGC()) {
+      return false;
+    }
+
+    struct Adder : public PostWalker<Adder, UnifiedExpressionVisitor<Adder>> {
+      GUFAOptimizer& parent;
+
+      Adder(GUFAOptimizer& parent) : parent(parent) {}
+
+      bool optimized = false;
+
+      void visitExpression(Expression* curr) {
+        if (!curr->type.isRef()) {
+          // Ignore anything we cannot infer a type for.
+          return;
+        }
+
+        auto oracleType = parent.getContents(curr).getType();
+        if (oracleType.isRef() && oracleType != curr->type &&
+            Type::isSubType(oracleType, curr->type)) {
+          replaceCurrent(Builder(*getModule()).makeRefCast(curr, oracleType));
+          optimized = true;
+        }
+      }
+    };
+
+    Adder adder(*this);
+    adder.walkFunctionInModule(func, getModule());
+    if (adder.optimized) {
+      ReFinalize().walkFunctionInModule(func, getModule());
+      return true;
+    }
+    return false;
+  }
 };
 
 struct GUFAPass : public Pass {
   bool optimizing;
+  bool castAll;
 
-  GUFAPass(bool optimizing) : optimizing(optimizing) {}
+  GUFAPass(bool optimizing, bool castAll)
+    : optimizing(optimizing), castAll(castAll) {}
 
   void run(Module* module) override {
     ContentOracle oracle(*module);
-    GUFAOptimizer(oracle, optimizing).run(getPassRunner(), module);
+    GUFAOptimizer(oracle, optimizing, castAll).run(getPassRunner(), module);
   }
 };
 
 } // anonymous namespace
 
-Pass* createGUFAPass() { return new GUFAPass(false); }
-Pass* createGUFAOptimizingPass() { return new GUFAPass(true); }
+Pass* createGUFAPass() { return new GUFAPass(false, false); }
+Pass* createGUFAOptimizingPass() { return new GUFAPass(true, false); }
+Pass* createGUFACastAllPass() { return new GUFAPass(false, true); }
 
 } // namespace wasm
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 2b89371c4..0d47cd78e 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -187,6 +187,9 @@ void PassRegistry::registerPasses() {
                "information about what content can actually appear in each "
                "location",
                createGUFAPass);
+  registerPass("gufa-cast-all",
+               "GUFA plus add casts for all inferences",
+               createGUFACastAllPass);
   registerPass("gufa-optimizing",
                "GUFA plus local optimizations in functions we modified",
                createGUFAOptimizingPass);
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 8cc092f98..fa6d98111 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -58,6 +58,7 @@ Pass* createGlobalRefiningPass();
 Pass* createGlobalStructInferencePass();
 Pass* createGlobalTypeOptimizationPass();
 Pass* createGUFAPass();
+Pass* createGUFACastAllPass();
 Pass* createGUFAOptimizingPass();
 Pass* createHeap2LocalPass();
 Pass* createI64ToI32LoweringPass();
author	Alon Zakai <azakai@google.com>	2023-07-27 13:30:21 -0700
committer	GitHub <noreply@github.com>	2023-07-27 20:30:21 +0000
commit	8375f80fa789afaac58b0c86ba5c9396c2003635 (patch)
tree	d735c6473d3e293e5349a1467e61864059160661 /src
parent	5d787c8d89915e257bd11d294ad5cbf882bac2e1 (diff)
download	binaryen-8375f80fa789afaac58b0c86ba5c9396c2003635.tar.gz binaryen-8375f80fa789afaac58b0c86ba5c9396c2003635.tar.bz2 binaryen-8375f80fa789afaac58b0c86ba5c9396c2003635.zip