3 files changed, 36 insertions, 43 deletions
diff --git a/src/passes/Monomorphize.cpp b/src/passes/Monomorphize.cpp
index 5237f7864..c27f5d6eb 100644
--- a/src/passes/Monomorphize.cpp
+++ b/src/passes/Monomorphize.cpp
@@ -374,7 +374,7 @@ struct Monomorphize : public Pass {
     // Decide whether it is worth using the monomorphized function.
     auto worthwhile = true;
     if (onlyWhenHelpful) {
-      // Optimize both functions using minimal opts, hopefully enough to see if
+      // Run the optimizer on both functions, hopefully just enough to see if
       // there is a benefit to the context. We optimize both to avoid confusion
       // from the function benefiting from simply running another cycle of
       // optimization.
@@ -400,8 +400,8 @@ struct Monomorphize : public Pass {
       //       of the function, which uses memory, which is avoided if we just
       //       keep optimizing from the current contents as we go. It's not
       //       obvious which approach is best here.
-      doMinimalOpts(func);
-      doMinimalOpts(monoFunc.get());
+      doOpts(func);
+      doOpts(monoFunc.get());
 
       auto costBefore = CostAnalyzer(func->body).cost;
       auto costAfter = CostAnalyzer(monoFunc->body).cost;
@@ -552,26 +552,16 @@ struct Monomorphize : public Pass {
     return newFunc;
   }
 
-  // Run minimal function-level optimizations on a function. This optimizes at
-  // -O1 which is very fast and runs in linear time basically, and so it should
-  // be reasonable to run as part of this pass: -O1 is several times faster than
-  // a full -O2, in particular, and so if we run this as part of -O2 we should
-  // not be making it much slower overall.
-  // TODO: Perhaps we don't need all of -O1, and can focus on specific things we
-  //       expect to help. That would be faster, but we'd always run the risk of
-  //       missing things, especially as new passes are added later and we don't
-  //       think to add them here.
-  //       Alternatively, perhaps we should have a mode that does use -O1 or
-  //       even -O2 or above, as in theory any optimization could end up
-  //       mattering a lot here.
-  void doMinimalOpts(Function* func) {
+  // Run some function-level optimizations on a function. Ideally we would run a
+  // minimal amount of optimizations here, but we do want to give the optimizer
+  // as much of a chance to work as possible, so for now do all of -O3 (in
+  // particular, we really need to run --precompute-propagate so constants are
+  // applied in the optimized function).
+  // TODO: Perhaps run -O2 or even -O1 if the function is large (or has many
+  //       locals, etc.), to ensure linear time, but we could miss out.
+  void doOpts(Function* func) {
     PassRunner runner(getPassRunner());
-    runner.options.optimizeLevel = 1;
-    // Local subtyping is not run in -O1, but we really do want it here since
-    // the entire point is that parameters now have more refined types, which
-    // can lead to locals reading them being refinable as well.
-    runner.add("local-subtyping");
-    // TODO: we need local propagation and escape analysis etc. -O3?
+    runner.options.optimizeLevel = 3;
     runner.addDefaultFunctionOptimizationPasses();
     runner.setIsNested(true);
     runner.runOnFunction(func);
diff --git a/test/lit/passes/monomorphize-consts.wast b/test/lit/passes/monomorphize-consts.wast
index 1dbdf1592..ec59edfea 100644
--- a/test/lit/passes/monomorphize-consts.wast
+++ b/test/lit/passes/monomorphize-consts.wast
@@ -29,6 +29,8 @@
 
   ;; CAREFUL:      (type $3 (func (param i32) (result i32)))
 
+  ;; CAREFUL:      (type $4 (func (result i32)))
+
   ;; CAREFUL:      (import "a" "b" (func $import (type $2) (param i32)))
   (import "a" "b" (func $import (param i32)))
 
@@ -314,16 +316,13 @@
   ;; ALWAYS-NEXT: )
   ;; CAREFUL:      (func $mutual-recursion-b (type $3) (param $0 i32) (result i32)
   ;; CAREFUL-NEXT:  (i32.add
-  ;; CAREFUL-NEXT:   (call $mutual-recursion-a
-  ;; CAREFUL-NEXT:    (i32.const 0)
-  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (call $mutual-recursion-a_10)
   ;; CAREFUL-NEXT:   (i32.const 1337)
   ;; CAREFUL-NEXT:  )
   ;; CAREFUL-NEXT: )
   (func $mutual-recursion-b (param $x i32) (result i32)
     (i32.add
-      ;; This can be optimized (in ALWAYS; to see the benefit in CAREFUL, we
-      ;; need additional cycles, which we do not do yet).
+      ;; This can be optimized (as the constant 0 allows work to happen).
       (call $mutual-recursion-a
         (i32.const 0)
       )
@@ -633,3 +632,7 @@
 ;; CAREFUL-NEXT:   (local.get $0)
 ;; CAREFUL-NEXT:  )
 ;; CAREFUL-NEXT: )
+
+;; CAREFUL:      (func $mutual-recursion-a_10 (type $4) (result i32)
+;; CAREFUL-NEXT:  (i32.const 42)
+;; CAREFUL-NEXT: )
diff --git a/test/lit/passes/monomorphize-types.wast b/test/lit/passes/monomorphize-types.wast
index 3133d88c5..f98fb08be 100644
--- a/test/lit/passes/monomorphize-types.wast
+++ b/test/lit/passes/monomorphize-types.wast
@@ -447,35 +447,35 @@
   ;; ALWAYS-NEXT:  )
   ;; ALWAYS-NEXT: )
   ;; CAREFUL:      (func $refinable (type $4) (param $0 (ref $A))
-  ;; CAREFUL-NEXT:  (local $1 (ref $A))
+  ;; CAREFUL-NEXT:  (local $1 (ref $B))
+  ;; CAREFUL-NEXT:  (local $2 (ref $B))
   ;; CAREFUL-NEXT:  (call $import
-  ;; CAREFUL-NEXT:   (ref.cast (ref $B)
-  ;; CAREFUL-NEXT:    (local.get $0)
+  ;; CAREFUL-NEXT:   (local.tee $1
+  ;; CAREFUL-NEXT:    (ref.cast (ref $B)
+  ;; CAREFUL-NEXT:     (local.get $0)
+  ;; CAREFUL-NEXT:    )
   ;; CAREFUL-NEXT:   )
   ;; CAREFUL-NEXT:  )
   ;; CAREFUL-NEXT:  (call $import
-  ;; CAREFUL-NEXT:   (ref.cast (ref $B)
-  ;; CAREFUL-NEXT:    (local.tee $1
-  ;; CAREFUL-NEXT:     (select (result (ref $A))
-  ;; CAREFUL-NEXT:      (local.get $0)
-  ;; CAREFUL-NEXT:      (struct.new_default $B)
-  ;; CAREFUL-NEXT:      (global.get $global)
-  ;; CAREFUL-NEXT:     )
+  ;; CAREFUL-NEXT:   (local.tee $2
+  ;; CAREFUL-NEXT:    (select (result (ref $B))
+  ;; CAREFUL-NEXT:     (local.get $1)
+  ;; CAREFUL-NEXT:     (struct.new_default $B)
+  ;; CAREFUL-NEXT:     (global.get $global)
   ;; CAREFUL-NEXT:    )
   ;; CAREFUL-NEXT:   )
   ;; CAREFUL-NEXT:  )
   ;; CAREFUL-NEXT:  (call $import
-  ;; CAREFUL-NEXT:   (ref.cast (ref $B)
-  ;; CAREFUL-NEXT:    (local.get $1)
-  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (local.get $2)
   ;; CAREFUL-NEXT:  )
   ;; CAREFUL-NEXT:  (call $import
-  ;; CAREFUL-NEXT:   (ref.cast (ref $B)
-  ;; CAREFUL-NEXT:    (local.get $0)
-  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (local.get $1)
   ;; CAREFUL-NEXT:  )
   ;; CAREFUL-NEXT: )
   (func $refinable (param $ref (ref $A))
+    ;; Note that this large function will end up optimized in CAREFUL mode, as a
+    ;; side effect of our keeping optimizations we run for comparison purposes.
+
     (local $x (ref $A))
     ;; The refined version of this function will not have the cast, since
     ;; optimizations manage to remove it using the more refined type.