6 files changed, 231 insertions, 8 deletions
diff --git a/src/ir/linear-execution.h b/src/ir/linear-execution.h
index c88d0e444..cee039b37 100644
--- a/src/ir/linear-execution.h
+++ b/src/ir/linear-execution.h
@@ -17,9 +17,9 @@
 #ifndef wasm_ir_linear_execution_h
 #define wasm_ir_linear_execution_h
 
-#include <ir/properties.h>
-#include <wasm-traversal.h>
-#include <wasm.h>
+#include "ir/properties.h"
+#include "wasm-traversal.h"
+#include "wasm.h"
 
 namespace wasm {
 
@@ -47,6 +47,17 @@ struct LinearExecutionWalker : public PostWalker<SubType, VisitorType> {
   static void scan(SubType* self, Expression** currp) {
     Expression* curr = *currp;
 
+    auto handleCall = [&](bool isReturn) {
+      // Control is nonlinear if we return, or if EH is enabled or may be.
+      if (isReturn || !self->getModule() ||
+          self->getModule()->features.hasExceptionHandling()) {
+        self->pushTask(SubType::doNoteNonLinear, currp);
+      }
+
+      // Scan the children normally.
+      PostWalker<SubType, VisitorType>::scan(self, currp);
+    };
+
     switch (curr->_id) {
       case Expression::Id::InvalidId:
         WASM_UNREACHABLE("bad id");
@@ -97,6 +108,14 @@ struct LinearExecutionWalker : public PostWalker<SubType, VisitorType> {
         self->maybePushTask(SubType::scan, &curr->cast<Return>()->value);
         break;
       }
+      case Expression::Id::CallId: {
+        handleCall(curr->cast<Call>()->isReturn);
+        return;
+      }
+      case Expression::Id::CallRefId: {
+        handleCall(curr->cast<CallRef>()->isReturn);
+        return;
+      }
       case Expression::Id::TryId: {
         self->pushTask(SubType::doVisitTry, currp);
         self->pushTask(SubType::doNoteNonLinear, currp);
diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp
index a042ba573..ff2141fb4 100644
--- a/src/passes/SimplifyLocals.cpp
+++ b/src/passes/SimplifyLocals.cpp
@@ -997,9 +997,9 @@ struct SimplifyLocals
     // will inhibit us creating an if return value.
     struct EquivalentOptimizer
       : public LinearExecutionWalker<EquivalentOptimizer> {
+
       std::vector<Index>* numLocalGets;
       bool removeEquivalentSets;
-      Module* module;
       PassOptions passOptions;
 
       bool anotherCycle = false;
@@ -1016,6 +1016,8 @@ struct SimplifyLocals
       }
 
       void visitLocalSet(LocalSet* curr) {
+        auto* module = this->getModule();
+
         // Remove trivial copies, even through a tee
         auto* value =
           Properties::getFallthrough(curr->value, passOptions, *module);
@@ -1123,11 +1125,10 @@ struct SimplifyLocals
     };
 
     EquivalentOptimizer eqOpter;
-    eqOpter.module = this->getModule();
     eqOpter.passOptions = this->getPassOptions();
     eqOpter.numLocalGets = &getCounter.num;
     eqOpter.removeEquivalentSets = allowStructure;
-    eqOpter.walkFunction(func);
+    eqOpter.walkFunctionInModule(func, this->getModule());
     if (eqOpter.refinalize) {
       ReFinalize().walkFunctionInModule(func, this->getModule());
     }
diff --git a/test/lit/passes/gufa-tnh.wast b/test/lit/passes/gufa-tnh.wast
index 79d524cf0..e1dee4773 100644
--- a/test/lit/passes/gufa-tnh.wast
+++ b/test/lit/passes/gufa-tnh.wast
@@ -1953,3 +1953,62 @@
     )
   )
 )
+
+;; Control flow around calls.
+(module
+  ;; CHECK:      (type $none_=>_none (func))
+
+  ;; CHECK:      (type $A (struct ))
+  (type $A (struct))
+
+  ;; CHECK:      (type $B (sub $A (struct )))
+  (type $B (sub $A (struct)))
+
+  ;; CHECK:      (type $ref?|$A|_=>_none (func (param (ref null $A))))
+
+  ;; CHECK:      (import "a" "b" (func $import-throw (type $none_=>_none)))
+  (import "a" "b" (func $import-throw))
+
+  ;; CHECK:      (export "a" (func $caller))
+
+  ;; CHECK:      (func $called (type $ref?|$A|_=>_none) (param $0 (ref null $A))
+  ;; CHECK-NEXT:  (call $import-throw)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast $B
+  ;; CHECK-NEXT:    (local.get $0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $called (param $0 (ref null $A))
+    ;; This function calls an import, and later casts. We cannot use those casts
+    ;; to infer anything in the callers, since the import might throw (in which
+    ;; case we'd never reach the cast).
+    (call $import-throw)
+    (drop
+      (ref.cast $B
+        (local.get $0)
+      )
+    )
+  )
+
+  ;; CHECK:      (func $caller (type $none_=>_none)
+  ;; CHECK-NEXT:  (call $called
+  ;; CHECK-NEXT:   (struct.new_default $B)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (call $called
+  ;; CHECK-NEXT:   (struct.new_default $A)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $caller (export "a")
+    ;; This call sends a $B which will be cast to $B (assuming the import does
+    ;; not trap), so nothing should happen here.
+    (call $called
+      (struct.new $B)
+    )
+    ;; This call sends an $A, which would fail the cast if it were reached. But
+    ;; it might not, so we do nothing here.
+    (call $called
+      (struct.new $A)
+    )
+  )
+)
diff --git a/test/lit/passes/optimize-casts-noeh.wast b/test/lit/passes/optimize-casts-noeh.wast
new file mode 100644
index 000000000..46d92953d
--- /dev/null
+++ b/test/lit/passes/optimize-casts-noeh.wast
@@ -0,0 +1,69 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
+;; RUN: wasm-opt %s --optimize-casts --enable-reference-types --enable-gc --enable-tail-call -S -o - | filecheck %s
+
+(module
+  ;; CHECK:      (type $A (struct ))
+  (type $A (struct))
+
+  ;; CHECK:      (func $yes-past-call (type $ref|struct|_=>_none) (param $x (ref struct))
+  ;; CHECK-NEXT:  (local $1 (ref $A))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (local.tee $1
+  ;; CHECK-NEXT:    (ref.cast $A
+  ;; CHECK-NEXT:     (local.get $x)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (call $none)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (local.get $1)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $yes-past-call (param $x (ref struct))
+    (drop
+      (ref.cast $A
+        (local.get $x)
+      )
+    )
+    ;; The call in the middle does not stop us from helping the last get, since
+    ;; EH is not enabled. The last get will flip from $x to a new tee of the
+    ;; cast.
+    (call $none)
+    (drop
+      (local.get $x)
+    )
+  )
+
+  ;; CHECK:      (func $not-past-return_call (type $ref|struct|_=>_none) (param $x (ref struct))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast $A
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (return_call $none)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (local.get $x)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $not-past-return_call (param $x (ref struct))
+    (drop
+      (ref.cast $A
+        (local.get $x)
+      )
+    )
+    ;; The call_return in the middle stops us from helping the last get. We
+    ;; could still optimize in this case, however, with more precision (since
+    ;; after we branch out it doesn't matter what we have below).
+    (return_call $none)
+    (drop
+      (local.get $x)
+    )
+  )
+
+  ;; CHECK:      (func $none (type $none_=>_none)
+  ;; CHECK-NEXT:  (nop)
+  ;; CHECK-NEXT: )
+  (func $none
+    ;; Helper for the above.
+  )
+)
diff --git a/test/lit/passes/optimize-casts.wast b/test/lit/passes/optimize-casts.wast
index 4dd839d97..83cde8a4b 100644
--- a/test/lit/passes/optimize-casts.wast
+++ b/test/lit/passes/optimize-casts.wast
@@ -8,9 +8,13 @@
   ;; CHECK:      (type $B (sub $A (struct )))
   (type $B (struct_subtype $A))
 
+  ;; CHECK:      (type $void (func))
+
   ;; CHECK:      (type $D (array (mut i32)))
   (type $D (array (mut i32)))
 
+  (type $void (func))
+
   ;; CHECK:      (global $a (mut i32) (i32.const 0))
   (global $a (mut i32) (i32.const 0))
 
@@ -173,6 +177,65 @@
     )
   )
 
+  ;; CHECK:      (func $not-past-call (type $ref|struct|_=>_none) (param $x (ref struct))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast $A
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (call $get)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (local.get $x)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $not-past-call (param $x (ref struct))
+    (drop
+      (ref.cast $A
+        (local.get $x)
+      )
+    )
+    ;; The call in the middle stops us from helping the last get, since a call
+    ;; might branch out. TODO we could still optimize in this case, with more
+    ;; precision (since if we branch out it doesn't matter what we have below).
+    (drop
+      (call $get)
+    )
+    (drop
+      (local.get $x)
+    )
+  )
+
+  ;; CHECK:      (func $not-past-call_ref (type $ref|struct|_=>_none) (param $x (ref struct))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast $A
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (call_ref $void
+  ;; CHECK-NEXT:   (ref.func $void)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (local.get $x)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $not-past-call_ref (param $x (ref struct))
+    (drop
+      (ref.cast $A
+        (local.get $x)
+      )
+    )
+    ;; As in the last function, the call in the middle stops us from helping the
+    ;; last get (this time with a call_ref).
+    (call_ref $void
+      (ref.func $void)
+    )
+    (drop
+      (local.get $x)
+    )
+  )
+
   ;; CHECK:      (func $best (type $ref|struct|_=>_none) (param $x (ref struct))
   ;; CHECK-NEXT:  (local $1 (ref $A))
   ;; CHECK-NEXT:  (local $2 (ref $B))
@@ -1229,4 +1292,11 @@
     ;; Helper for the above.
     (unreachable)
   )
+
+  ;; CHECK:      (func $void (type $void)
+  ;; CHECK-NEXT:  (nop)
+  ;; CHECK-NEXT: )
+  (func $void
+    ;; Helper for the above.
+  )
 )
diff --git a/test/lit/passes/simplify-locals-gc.wast b/test/lit/passes/simplify-locals-gc.wast
index 6b2b10462..500d12b5a 100644
--- a/test/lit/passes/simplify-locals-gc.wast
+++ b/test/lit/passes/simplify-locals-gc.wast
@@ -366,7 +366,9 @@
   ;; CHECK-NEXT:   (local.get $nn-any)
   ;; CHECK-NEXT:  )
   ;; CHECK-NEXT:  (nop)
-  ;; CHECK-NEXT:  (local.get $nn-any)
+  ;; CHECK-NEXT:  (local.tee $any
+  ;; CHECK-NEXT:   (local.get $nn-any)
+  ;; CHECK-NEXT:  )
   ;; CHECK-NEXT: )
   (func $pick-casted (param $any anyref) (result anyref)
     (local $nn-any (ref any))
@@ -382,7 +384,10 @@
     (call $use-nn-any
       (local.get $nn-any)
     )
-    ;; This copy is not needed, as they hold the same value.
+    ;; This copy is not needed, as they hold the same value. However, the call
+    ;; before us inhibits us from optimizing here. TODO: with a more precise
+    ;; analysis we could optimize this, as if the call branches out it doesn't
+    ;; matter what happens after it.
     (local.set $any
       (local.get $nn-any)
     )