Monomorphize: Use -O3 over -O1 + tweaks (#6732)

Eventually we will need to do some tuning of compile time speed, but for now it is going to be simpler to do all the opts, in particular because it makes writing tests simpler.
author: Alon Zakai <azakai@google.com> 2024-07-11 15:09:08 -0700
committer: GitHub <noreply@github.com> 2024-07-11 15:09:08 -0700
commit: 6d2f101b3bcb8d5a7d1ead40f1245bbbead36580 (patch)
tree: 432a900f484d8fc4a172d39ffaeba1cb7f26b1c4
parent: ae4800bebd0d479813d99e31e098296c9167e34a (diff)
download: binaryen-6d2f101b3bcb8d5a7d1ead40f1245bbbead36580.tar.gz
binaryen-6d2f101b3bcb8d5a7d1ead40f1245bbbead36580.tar.bz2
binaryen-6d2f101b3bcb8d5a7d1ead40f1245bbbead36580.zip
3 files changed, 36 insertions, 43 deletions
diff --git a/src/passes/Monomorphize.cpp b/src/passes/Monomorphize.cpp
index 5237f7864..c27f5d6eb 100644
--- a/src/passes/Monomorphize.cpp
+++ b/src/passes/Monomorphize.cpp
@@ -374,7 +374,7 @@ struct Monomorphize : public Pass {
     // Decide whether it is worth using the monomorphized function.
     auto worthwhile = true;
     if (onlyWhenHelpful) {
-      // Optimize both functions using minimal opts, hopefully enough to see if
+      // Run the optimizer on both functions, hopefully just enough to see if
       // there is a benefit to the context. We optimize both to avoid confusion
       // from the function benefiting from simply running another cycle of
       // optimization.
@@ -400,8 +400,8 @@ struct Monomorphize : public Pass {
       //       of the function, which uses memory, which is avoided if we just
       //       keep optimizing from the current contents as we go. It's not
       //       obvious which approach is best here.
-      doMinimalOpts(func);
-      doMinimalOpts(monoFunc.get());
+      doOpts(func);
+      doOpts(monoFunc.get());
 
       auto costBefore = CostAnalyzer(func->body).cost;
       auto costAfter = CostAnalyzer(monoFunc->body).cost;
@@ -552,26 +552,16 @@ struct Monomorphize : public Pass {
     return newFunc;
   }
 
-  // Run minimal function-level optimizations on a function. This optimizes at
-  // -O1 which is very fast and runs in linear time basically, and so it should
-  // be reasonable to run as part of this pass: -O1 is several times faster than
-  // a full -O2, in particular, and so if we run this as part of -O2 we should
-  // not be making it much slower overall.
-  // TODO: Perhaps we don't need all of -O1, and can focus on specific things we
-  //       expect to help. That would be faster, but we'd always run the risk of
-  //       missing things, especially as new passes are added later and we don't
-  //       think to add them here.
-  //       Alternatively, perhaps we should have a mode that does use -O1 or
-  //       even -O2 or above, as in theory any optimization could end up
-  //       mattering a lot here.
-  void doMinimalOpts(Function* func) {
+  // Run some function-level optimizations on a function. Ideally we would run a
+  // minimal amount of optimizations here, but we do want to give the optimizer
+  // as much of a chance to work as possible, so for now do all of -O3 (in
+  // particular, we really need to run --precompute-propagate so constants are
+  // applied in the optimized function).
+  // TODO: Perhaps run -O2 or even -O1 if the function is large (or has many
+  //       locals, etc.), to ensure linear time, but we could miss out.
+  void doOpts(Function* func) {
     PassRunner runner(getPassRunner());
-    runner.options.optimizeLevel = 1;
-    // Local subtyping is not run in -O1, but we really do want it here since
-    // the entire point is that parameters now have more refined types, which
-    // can lead to locals reading them being refinable as well.
-    runner.add("local-subtyping");
-    // TODO: we need local propagation and escape analysis etc. -O3?
+    runner.options.optimizeLevel = 3;
     runner.addDefaultFunctionOptimizationPasses();
     runner.setIsNested(true);
     runner.runOnFunction(func);
diff --git a/test/lit/passes/monomorphize-consts.wast b/test/lit/passes/monomorphize-consts.wast
index 1dbdf1592..ec59edfea 100644
--- a/test/lit/passes/monomorphize-consts.wast
+++ b/test/lit/passes/monomorphize-consts.wast
@@ -29,6 +29,8 @@
 
   ;; CAREFUL:      (type $3 (func (param i32) (result i32)))
 
+  ;; CAREFUL:      (type $4 (func (result i32)))
+
   ;; CAREFUL:      (import "a" "b" (func $import (type $2) (param i32)))
   (import "a" "b" (func $import (param i32)))
 
@@ -314,16 +316,13 @@
   ;; ALWAYS-NEXT: )
   ;; CAREFUL:      (func $mutual-recursion-b (type $3) (param $0 i32) (result i32)
   ;; CAREFUL-NEXT:  (i32.add
-  ;; CAREFUL-NEXT:   (call $mutual-recursion-a
-  ;; CAREFUL-NEXT:    (i32.const 0)
-  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (call $mutual-recursion-a_10)
   ;; CAREFUL-NEXT:   (i32.const 1337)
   ;; CAREFUL-NEXT:  )
   ;; CAREFUL-NEXT: )
   (func $mutual-recursion-b (param $x i32) (result i32)
     (i32.add
-      ;; This can be optimized (in ALWAYS; to see the benefit in CAREFUL, we
-      ;; need additional cycles, which we do not do yet).
+      ;; This can be optimized (as the constant 0 allows work to happen).
       (call $mutual-recursion-a
         (i32.const 0)
       )
@@ -633,3 +632,7 @@
 ;; CAREFUL-NEXT:   (local.get $0)
 ;; CAREFUL-NEXT:  )
 ;; CAREFUL-NEXT: )
+
+;; CAREFUL:      (func $mutual-recursion-a_10 (type $4) (result i32)
+;; CAREFUL-NEXT:  (i32.const 42)
+;; CAREFUL-NEXT: )
diff --git a/test/lit/passes/monomorphize-types.wast b/test/lit/passes/monomorphize-types.wast
index 3133d88c5..f98fb08be 100644
--- a/test/lit/passes/monomorphize-types.wast
+++ b/test/lit/passes/monomorphize-types.wast
@@ -447,35 +447,35 @@
   ;; ALWAYS-NEXT:  )
   ;; ALWAYS-NEXT: )
   ;; CAREFUL:      (func $refinable (type $4) (param $0 (ref $A))
-  ;; CAREFUL-NEXT:  (local $1 (ref $A))
+  ;; CAREFUL-NEXT:  (local $1 (ref $B))
+  ;; CAREFUL-NEXT:  (local $2 (ref $B))
   ;; CAREFUL-NEXT:  (call $import
-  ;; CAREFUL-NEXT:   (ref.cast (ref $B)
-  ;; CAREFUL-NEXT:    (local.get $0)
+  ;; CAREFUL-NEXT:   (local.tee $1
+  ;; CAREFUL-NEXT:    (ref.cast (ref $B)
+  ;; CAREFUL-NEXT:     (local.get $0)
+  ;; CAREFUL-NEXT:    )
   ;; CAREFUL-NEXT:   )
   ;; CAREFUL-NEXT:  )
   ;; CAREFUL-NEXT:  (call $import
-  ;; CAREFUL-NEXT:   (ref.cast (ref $B)
-  ;; CAREFUL-NEXT:    (local.tee $1
-  ;; CAREFUL-NEXT:     (select (result (ref $A))
-  ;; CAREFUL-NEXT:      (local.get $0)
-  ;; CAREFUL-NEXT:      (struct.new_default $B)
-  ;; CAREFUL-NEXT:      (global.get $global)
-  ;; CAREFUL-NEXT:     )
+  ;; CAREFUL-NEXT:   (local.tee $2
+  ;; CAREFUL-NEXT:    (select (result (ref $B))
+  ;; CAREFUL-NEXT:     (local.get $1)
+  ;; CAREFUL-NEXT:     (struct.new_default $B)
+  ;; CAREFUL-NEXT:     (global.get $global)
   ;; CAREFUL-NEXT:    )
   ;; CAREFUL-NEXT:   )
   ;; CAREFUL-NEXT:  )
   ;; CAREFUL-NEXT:  (call $import
-  ;; CAREFUL-NEXT:   (ref.cast (ref $B)
-  ;; CAREFUL-NEXT:    (local.get $1)
-  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (local.get $2)
   ;; CAREFUL-NEXT:  )
   ;; CAREFUL-NEXT:  (call $import
-  ;; CAREFUL-NEXT:   (ref.cast (ref $B)
-  ;; CAREFUL-NEXT:    (local.get $0)
-  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (local.get $1)
   ;; CAREFUL-NEXT:  )
   ;; CAREFUL-NEXT: )
   (func $refinable (param $ref (ref $A))
+    ;; Note that this large function will end up optimized in CAREFUL mode, as a
+    ;; side effect of our keeping optimizations we run for comparison purposes.
+
     (local $x (ref $A))
     ;; The refined version of this function will not have the cast, since
     ;; optimizations manage to remove it using the more refined type.
author	Alon Zakai <azakai@google.com>	2024-07-11 15:09:08 -0700
committer	GitHub <noreply@github.com>	2024-07-11 15:09:08 -0700
commit	6d2f101b3bcb8d5a7d1ead40f1245bbbead36580 (patch)
tree	432a900f484d8fc4a172d39ffaeba1cb7f26b1c4
parent	ae4800bebd0d479813d99e31e098296c9167e34a (diff)
download	binaryen-6d2f101b3bcb8d5a7d1ead40f1245bbbead36580.tar.gz binaryen-6d2f101b3bcb8d5a7d1ead40f1245bbbead36580.tar.bz2 binaryen-6d2f101b3bcb8d5a7d1ead40f1245bbbead36580.zip