12 files changed, 670 insertions, 0 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
index 66f1c75c0..72bb5e9c3 100755
--- a/scripts/fuzz_opt.py
+++ b/scripts/fuzz_opt.py
@@ -1323,6 +1323,7 @@ opt_choices = [
     ["--simplify-locals-notee-nostructure"],
     ["--ssa"],
     ["--type-refining"],
+    ["--type-merging"],
     ["--type-ssa"],
     ["--vacuum"],
 ]
diff --git a/src/ir/type-updating.cpp b/src/ir/type-updating.cpp
index 47931baad..31c110738 100644
--- a/src/ir/type-updating.cpp
+++ b/src/ir/type-updating.cpp
@@ -266,6 +266,10 @@ Type GlobalTypeRewriter::getTempType(Type type) {
   WASM_UNREACHABLE("bad type");
 }
 
+Type GlobalTypeRewriter::getTempTupleType(Tuple tuple) {
+  return typeBuilder.getTempTupleType(tuple);
+}
+
 namespace TypeUpdating {
 
 bool canHandleAsLocal(Type type) {
diff --git a/src/ir/type-updating.h b/src/ir/type-updating.h
index 8da84ceb6..12e0b8b57 100644
--- a/src/ir/type-updating.h
+++ b/src/ir/type-updating.h
@@ -360,6 +360,7 @@ public:
   // so that they can use a proper temp type of the TypeBuilder while modifying
   // things.
   Type getTempType(Type type);
+  Type getTempTupleType(Tuple tuple);
 
   using SignatureUpdates = std::unordered_map<HeapType, Signature>;
 
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index ea06bebb1..e97f5e388 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -100,6 +100,7 @@ set(passes_SOURCES
   ReReloop.cpp
   TrapMode.cpp
   TypeRefining.cpp
+  TypeMerging.cpp
   TypeSSA.cpp
   SafeHeap.cpp
   SimplifyGlobals.cpp
diff --git a/src/passes/TypeMerging.cpp b/src/passes/TypeMerging.cpp
new file mode 100644
index 000000000..94a198adf
--- /dev/null
+++ b/src/passes/TypeMerging.cpp
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2022 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Merge unneeded types: types that are not needed for validation, and have no
+// detectable runtime effect. Completely unused types are removed anyhow during
+// binary writing, so this handles the case of used types that can be merged
+// into others. Specifically we merge a type into its super, which is possible
+// when it has no extra fields, no refined fields, and no casts.
+//
+// Note that such "redundant" types may help the optimizer, so merging them can
+// have a negative effect later. For that reason this may be best run near the
+// very end of the optimization pipeline, when nothing else is expected to do
+// type-based optimizations later. However, you also do not want to merge at the
+// very end, as e.g. type merging may open up function merging opportunities.
+// One possible sequence:
+//
+//   --type-ssa -Os --type-merging -Os
+//
+// That is, running TypeSSA early makes sense, as it provides more type info.
+// Then we hope the optimizer benefits from that, and after that we merge types
+// and then optimize a final time. You can experiment with more optimization
+// passes in between.
+//
+
+#include "ir/module-utils.h"
+#include "ir/type-updating.h"
+#include "pass.h"
+#include "support/small_set.h"
+#include "wasm-builder.h"
+#include "wasm.h"
+
+namespace wasm {
+
+namespace {
+
+// We need to find all the types that have references to them, such as casts,
+// as such types must be preserved - even if they are identical to other types,
+// they are nominally distinguishable.
+
+// Most functions do no casts, or perhaps cast |this| and perhaps a few others.
+using ReferredTypes = SmallUnorderedSet<HeapType, 5>;
+
+struct CastFinder
+  : public PostWalker<CastFinder, UnifiedExpressionVisitor<CastFinder>> {
+  ReferredTypes referredTypes;
+
+  void visitExpression(Expression* curr) {
+    // Find all references to a heap type.
+
+#define DELEGATE_ID curr->_id
+
+#define DELEGATE_START(id) [[maybe_unused]] auto* cast = curr->cast<id>();
+
+#define DELEGATE_FIELD_HEAPTYPE(id, field) referredTypes.insert(cast->field);
+
+#define DELEGATE_FIELD_CHILD(id, field)
+#define DELEGATE_FIELD_OPTIONAL_CHILD(id, field)
+#define DELEGATE_FIELD_INT(id, field)
+#define DELEGATE_FIELD_INT_ARRAY(id, field)
+#define DELEGATE_FIELD_LITERAL(id, field)
+#define DELEGATE_FIELD_NAME(id, field)
+#define DELEGATE_FIELD_NAME_VECTOR(id, field)
+#define DELEGATE_FIELD_SCOPE_NAME_DEF(id, field)
+#define DELEGATE_FIELD_SCOPE_NAME_USE(id, field)
+#define DELEGATE_FIELD_SCOPE_NAME_USE_VECTOR(id, field)
+#define DELEGATE_FIELD_TYPE(id, field)
+#define DELEGATE_FIELD_ADDRESS(id, field)
+#define DELEGATE_FIELD_CHILD_VECTOR(id, field)
+
+#include "wasm-delegations-fields.def"
+  }
+};
+
+struct TypeMerging : public Pass {
+  // Only modifies types.
+  bool requiresNonNullableLocalFixups() override { return false; }
+
+  Module* module;
+
+  // The types we can merge. We map each such type to merge with the type we
+  // want to merge it with.
+  using TypeUpdates = std::unordered_map<HeapType, HeapType>;
+  TypeUpdates merges;
+
+  void run(Module* module_) override {
+    module = module_;
+
+    if (!module->features.hasGC()) {
+      return;
+    }
+
+    // First, find all the cast types.
+
+    ModuleUtils::ParallelFunctionAnalysis<ReferredTypes> analysis(
+      *module, [&](Function* func, ReferredTypes& referredTypes) {
+        if (func->imported()) {
+          return;
+        }
+
+        CastFinder finder;
+        finder.walk(func->body);
+        referredTypes = std::move(finder.referredTypes);
+      });
+
+    // Also find cast types in the module scope (not possible in the current
+    // spec, but do it to be future-proof).
+    CastFinder moduleFinder;
+    moduleFinder.walkModuleCode(module);
+
+    // Accumulate all the referredTypes.
+    auto& allReferredTypes = moduleFinder.referredTypes;
+    for (auto& [k, referredTypes] : analysis.map) {
+      for (auto type : referredTypes) {
+        allReferredTypes.insert(type);
+      }
+    }
+
+    // Find all the heap types.
+    std::vector<HeapType> types = ModuleUtils::collectHeapTypes(*module);
+
+    // TODO: There may be more opportunities after this loop. Imagine that we
+    //       decide to merge A and B into C, and there are types X and Y that
+    //       contain a nested reference to A and B respectively, then after A
+    //       and B become identical so do X and Y. The recursive case is not
+    //       trivial, however, and needs more thought.
+    for (auto type : types) {
+      if (allReferredTypes.count(type)) {
+        // This has a cast, so it is distinguishable nominally.
+        continue;
+      }
+
+      auto super = type.getSuperType();
+      if (!super) {
+        // This has no supertype, so there is nothing to merge it into.
+        continue;
+      }
+
+      // TODO: arrays
+      if (!type.isStruct()) {
+        continue;
+      }
+
+      auto& fields = type.getStruct().fields;
+      auto& superFields = super->getStruct().fields;
+      if (fields != superFields) {
+        // This adds a field, or refines one, so it differs from the super, and
+        // we cannot merge it with the super.
+        continue;
+      }
+
+      // We can merge! This is identical structurally to the super, and also not
+      // distinguishable nominally.
+      merges[type] = *super;
+    }
+
+    if (merges.empty()) {
+      return;
+    }
+
+    // We found things to optimize! Rewrite types in the module to apply those
+    // changes.
+
+    // First, close over the map, so if X can be merged into Y and Y into Z then
+    // we map X into Z.
+    for (auto type : types) {
+      if (!merges.count(type)) {
+        continue;
+      }
+
+      auto newType = merges[type];
+      while (merges.count(newType)) {
+        newType = merges[newType];
+      }
+      // Apply the findings to all intermediate types as well, to avoid
+      // duplicate work in later iterations. That is, all the types we saw in
+      // the above loop will all get merged into newType.
+      auto curr = type;
+      while (1) {
+        auto iter = merges.find(curr);
+        if (iter == merges.end()) {
+          break;
+        }
+        auto& currMerge = iter->second;
+        curr = currMerge;
+        currMerge = newType;
+      }
+    }
+
+    // Apply the merges.
+
+    class TypeInternalsUpdater : public GlobalTypeRewriter {
+      const TypeUpdates& merges;
+
+      std::unordered_map<HeapType, Signature> newSignatures;
+
+    public:
+      TypeInternalsUpdater(Module& wasm, const TypeUpdates& merges)
+        : GlobalTypeRewriter(wasm), merges(merges) {
+
+        // Map the types of expressions (curr->type, etc.) to their merged
+        // types.
+        mapTypes(merges);
+
+        // Update the internals of types (struct fields, signatures, etc.) to
+        // refer to the merged types.
+        update();
+      }
+
+      Type getNewType(Type type) {
+        if (!type.isRef()) {
+          return type;
+        }
+        auto heapType = type.getHeapType();
+        auto iter = merges.find(heapType);
+        if (iter != merges.end()) {
+          return getTempType(Type(iter->second, type.getNullability()));
+        }
+        return getTempType(type);
+      }
+
+      void modifyStruct(HeapType oldType, Struct& struct_) override {
+        auto& oldFields = oldType.getStruct().fields;
+        for (Index i = 0; i < oldFields.size(); i++) {
+          auto& oldField = oldFields[i];
+          auto& newField = struct_.fields[i];
+          newField.type = getNewType(oldField.type);
+        }
+      }
+      void modifyArray(HeapType oldType, Array& array) override {
+        array.element.type = getNewType(oldType.getArray().element.type);
+      }
+      void modifySignature(HeapType oldSignatureType, Signature& sig) override {
+        auto getUpdatedTypeList = [&](Type type) {
+          std::vector<Type> vec;
+          for (auto t : type) {
+            vec.push_back(getNewType(t));
+          }
+          return getTempTupleType(vec);
+        };
+
+        auto oldSig = oldSignatureType.getSignature();
+        sig.params = getUpdatedTypeList(oldSig.params);
+        sig.results = getUpdatedTypeList(oldSig.results);
+      }
+    } rewriter(*module, merges);
+  }
+};
+
+} // anonymous namespace
+
+Pass* createTypeMergingPass() { return new TypeMerging(); }
+
+} // namespace wasm
diff --git a/src/passes/TypeSSA.cpp b/src/passes/TypeSSA.cpp
index e25795cdc..17f166a9e 100644
--- a/src/passes/TypeSSA.cpp
+++ b/src/passes/TypeSSA.cpp
@@ -44,6 +44,8 @@
 // then we do nothing atm. We could create a phi there, but in general that
 // would require multiple inheritance. TODO think more on that
 //
+// This pass works well with TypeMerging. See notes there for more.
+//
 
 #include "ir/find_all.h"
 #include "ir/module-utils.h"
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index b97d182c6..d7f2a310f 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -461,6 +461,9 @@ void PassRegistry::registerPasses() {
   registerPass("trap-mode-js",
                "replace trapping operations with js semantics",
                createTrapModeJS);
+  registerPass("type-merging",
+               "merge types to their supertypes where possible",
+               createTypeMergingPass);
   registerPass("type-ssa",
                "create new nominal types to help other optimizations",
                createTypeSSAPass);
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 126dc5f12..1543ae794 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -148,6 +148,7 @@ Pass* createSSAifyNoMergePass();
 Pass* createTrapModeClamp();
 Pass* createTrapModeJS();
 Pass* createTypeRefiningPass();
+Pass* createTypeMergingPass();
 Pass* createTypeSSAPass();
 Pass* createUnteePass();
 Pass* createVacuumPass();
diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test
index 0d7c053d1..a5c21af8b 100644
--- a/test/lit/help/wasm-opt.test
+++ b/test/lit/help/wasm-opt.test
@@ -457,6 +457,9 @@
 ;; CHECK-NEXT:   --trap-mode-js                                replace trapping operations with
 ;; CHECK-NEXT:                                                 js semantics
 ;; CHECK-NEXT:
+;; CHECK-NEXT:   --type-merging                                merge types to their supertypes
+;; CHECK-NEXT:                                                 where possible
+;; CHECK-NEXT:
 ;; CHECK-NEXT:   --type-refining                               apply more specific subtypes to
 ;; CHECK-NEXT:                                                 type fields where possible
 ;; CHECK-NEXT:
diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test
index 124cd0bbe..3073bf2f9 100644
--- a/test/lit/help/wasm2js.test
+++ b/test/lit/help/wasm2js.test
@@ -416,6 +416,9 @@
 ;; CHECK-NEXT:   --trap-mode-js                                replace trapping operations with
 ;; CHECK-NEXT:                                                 js semantics
 ;; CHECK-NEXT:
+;; CHECK-NEXT:   --type-merging                                merge types to their supertypes
+;; CHECK-NEXT:                                                 where possible
+;; CHECK-NEXT:
 ;; CHECK-NEXT:   --type-refining                               apply more specific subtypes to
 ;; CHECK-NEXT:                                                 type fields where possible
 ;; CHECK-NEXT:
diff --git a/test/lit/passes/type-merging.wast b/test/lit/passes/type-merging.wast
new file mode 100644
index 000000000..4df8d7e88
--- /dev/null
+++ b/test/lit/passes/type-merging.wast
@@ -0,0 +1,237 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+;; RUN: foreach %s %t wasm-opt --nominal --type-merging -all -S -o - | filecheck %s
+
+(module
+  ;; CHECK:      (type $A (struct (field i32)))
+  (type $A (struct_subtype (field i32) data))
+  (type $B (struct_subtype (field i32) $A))
+  ;; CHECK:      (type $D (struct_subtype (field i32) $A))
+
+  ;; CHECK:      (type $none_=>_none (func))
+
+  ;; CHECK:      (type $C (struct_subtype (field i32) (field f64) $A))
+  (type $C (struct_subtype (field i32) (field f64) $A))
+  (type $D (struct_subtype (field i32) $A))
+
+  ;; CHECK:      (func $foo (type $none_=>_none)
+  ;; CHECK-NEXT:  (local $a (ref null $A))
+  ;; CHECK-NEXT:  (local $b (ref null $A))
+  ;; CHECK-NEXT:  (local $c (ref null $C))
+  ;; CHECK-NEXT:  (local $d (ref null $D))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast_static $A
+  ;; CHECK-NEXT:    (local.get $a)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast_static $D
+  ;; CHECK-NEXT:    (local.get $a)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $foo
+    ;; $A will remain the same.
+    (local $a (ref null $A))
+    ;; $B can be merged into $A.
+    (local $b (ref null $B))
+    ;; $C cannot because it adds a field.
+    (local $c (ref null $C))
+    ;; $D cannot because it has a cast.
+    (local $d (ref null $D))
+
+    ;; A cast of $A has no effect.
+    (drop
+      (ref.cast_static $A
+        (local.get $a)
+      )
+    )
+    ;; A cast of $D prevents it from being merged.
+    (drop
+      (ref.cast_static $D
+        (local.get $a)
+      )
+    )
+  )
+)
+
+;; Multiple levels of merging.
+(module
+  ;; CHECK:      (type $A (struct (field i32)))
+  (type $A (struct_subtype (field i32) data))
+  (type $B (struct_subtype (field i32) $A))
+  (type $C (struct_subtype (field i32) $B))
+  ;; CHECK:      (type $D (struct_subtype (field i32) (field f64) $A))
+  (type $D (struct_subtype (field i32) (field f64) $A))
+  (type $E (struct_subtype (field i32) (field f64) $D))
+  (type $F (struct_subtype (field i32) (field f64) $E))
+  (type $G (struct_subtype (field i32) (field f64) $F))
+
+  ;; CHECK:      (type $none_=>_none (func))
+
+  ;; CHECK:      (func $foo (type $none_=>_none)
+  ;; CHECK-NEXT:  (local $a (ref null $A))
+  ;; CHECK-NEXT:  (local $b (ref null $A))
+  ;; CHECK-NEXT:  (local $c (ref null $A))
+  ;; CHECK-NEXT:  (local $d (ref null $D))
+  ;; CHECK-NEXT:  (local $e (ref null $D))
+  ;; CHECK-NEXT:  (local $f (ref null $D))
+  ;; CHECK-NEXT:  (local $g (ref null $D))
+  ;; CHECK-NEXT:  (nop)
+  ;; CHECK-NEXT: )
+  (func $foo
+    (local $a (ref null $A))
+    ;; $B can be merged into $A.
+    (local $b (ref null $B))
+    ;; $C can be merged into $B, so it will merge into $A.
+    (local $c (ref null $C))
+    ;; $D cannot be merged into $A as it adds a field.
+    (local $d (ref null $D))
+    ;; $E can be merged into $D.
+    (local $e (ref null $E))
+    ;; $F can be merged into $E, so it will merge into $D.
+    (local $f (ref null $F))
+    ;; $G can be merged into $F, so it will merge into $D.
+    (local $g (ref null $G))
+  )
+)
+
+;; As above but now $D is a subtype of $C, so there is a single subtype chain
+;; in which we have two "merge points" that things get merged into. The results
+;; should remain the same as before, everything merged into either $A or $D.
+(module
+  ;; CHECK:      (type $A (struct (field i32)))
+  (type $A (struct_subtype (field i32) data))
+  ;; CHECK:      (type $B (struct_subtype (field i32) $A))
+  (type $B (struct_subtype (field i32) $A))
+  ;; CHECK:      (type $C (struct_subtype (field i32) $B))
+  (type $C (struct_subtype (field i32) $B))
+  ;; CHECK:      (type $D (struct_subtype (field i32) (field f64) $C))
+  (type $D (struct_subtype (field i32) (field f64) $C)) ;; this line changed
+  (type $E (struct_subtype (field i32) (field f64) $D))
+  (type $F (struct_subtype (field i32) (field f64) $E))
+  (type $G (struct_subtype (field i32) (field f64) $F))
+
+  ;; CHECK:      (type $none_=>_none (func))
+
+  ;; CHECK:      (func $foo (type $none_=>_none)
+  ;; CHECK-NEXT:  (local $a (ref null $A))
+  ;; CHECK-NEXT:  (local $b (ref null $A))
+  ;; CHECK-NEXT:  (local $c (ref null $A))
+  ;; CHECK-NEXT:  (local $d (ref null $D))
+  ;; CHECK-NEXT:  (local $e (ref null $D))
+  ;; CHECK-NEXT:  (local $f (ref null $D))
+  ;; CHECK-NEXT:  (local $g (ref null $D))
+  ;; CHECK-NEXT:  (nop)
+  ;; CHECK-NEXT: )
+  (func $foo
+    (local $a (ref null $A))
+    (local $b (ref null $B))
+    (local $c (ref null $C))
+    (local $d (ref null $D))
+    (local $e (ref null $E))
+    (local $f (ref null $F))
+    (local $g (ref null $G))
+  )
+)
+
+(module
+  ;; CHECK:      (type $A (struct (field (ref null $A))))
+  (type $A (struct_subtype (field (ref null $A)) data))
+  (type $B (struct_subtype (field (ref null $A)) $A))
+  ;; CHECK:      (type $none_=>_none (func))
+
+  ;; CHECK:      (type $C (struct_subtype (field (ref null $A)) $A))
+  (type $C (struct_subtype (field (ref null $B)) $A))
+
+  ;; CHECK:      (func $foo (type $none_=>_none)
+  ;; CHECK-NEXT:  (local $a (ref null $A))
+  ;; CHECK-NEXT:  (local $b (ref null $A))
+  ;; CHECK-NEXT:  (local $c (ref null $C))
+  ;; CHECK-NEXT:  (nop)
+  ;; CHECK-NEXT: )
+  (func $foo
+    ;; $A will remain the same.
+    (local $a (ref null $A))
+    ;; $B can be merged into $A.
+    (local $b (ref null $B))
+    ;; $C refines the field, so it cannot be merged. However, separately, in
+    ;; the type definition of $C, its field of type $B should become $A. That
+    ;; is, $B should no longer be used anywhere.
+    (local $c (ref null $C))
+  )
+)
+
+;; Check that we refinalize properly.
+(module
+  ;; CHECK:      (type $A (struct ))
+  (type $A (struct))
+  (type $B (struct_subtype $A))
+
+  ;; CHECK:      (type $none_=>_ref?|$A| (func (result (ref null $A))))
+
+  ;; CHECK:      (func $returner (type $none_=>_ref?|$A|) (result (ref null $A))
+  ;; CHECK-NEXT:  (local $local (ref null $A))
+  ;; CHECK-NEXT:  (local.get $local)
+  ;; CHECK-NEXT: )
+  (func $returner (result (ref null $B))
+    (local $local (ref null $B))
+
+    ;; After we change the local to use type $A, we need to update the local.get's
+    ;; type as well, or we will error.
+    (local.get $local)
+  )
+)
+
+;; Test some real-world patterns, including fields to ignore, links between
+;; merged types, etc.
+;;
+;; The result here is that we will merge $A$to-merge into $A, and $D$to-merge
+;; into $D. While doing so we must update the fields and the expressions that
+;; they appear in, and not error.
+(module
+  ;; CHECK:      (type $C (struct (field (mut i32))))
+
+  ;; CHECK:      (type $D (struct_subtype (field (mut i32)) (field (mut i32)) $C))
+
+  ;; CHECK:      (type $I (array (mut (ref null $C))))
+  (type $I (array (mut (ref null $C))))
+  (type $C (struct (field (mut i32))))
+  (type $D (struct_subtype (field (mut i32)) (field (mut i32)) $C))
+  (type $E (struct_subtype (field (mut i32)) (field (mut i32)) $D))
+  (type $F (struct_subtype (field (mut i32)) (field (mut i32)) $E))
+  (type $D$to-merge (struct_subtype (field (mut i32)) (field (mut i32)) $F))
+  ;; CHECK:      (type $G (func (param (ref $C)) (result (ref $D))))
+  (type $G (func (param (ref $C)) (result (ref $D))))
+  ;; CHECK:      (type $H (struct_subtype (field (mut i32)) (field (mut i32)) (field (mut (ref null $D))) $D))
+  (type $H (struct_subtype (field (mut i32)) (field (mut i32)) (field (mut (ref null $E))) $D))
+  ;; CHECK:      (type $A (struct_subtype (field (mut i32)) (field (mut i32)) (field (mut (ref null $D))) (field (mut i64)) (field (mut (ref null $I))) $H))
+  (type $A (struct_subtype (field (mut i32)) (field (mut i32)) (field (mut (ref null $E))) (field (mut i64)) (field (mut (ref null $I))) $H))
+  (type $A$to-merge (struct_subtype (field (mut i32)) (field (mut i32)) (field (mut (ref null $E))) (field (mut i64)) (field (mut (ref null $I))) $A))
+
+  ;; CHECK:      (global $global$0 (ref $D) (struct.new $D
+  ;; CHECK-NEXT:  (i32.const 1705)
+  ;; CHECK-NEXT:  (i32.const 0)
+  ;; CHECK-NEXT: ))
+  (global $global$0 (ref $F) (struct.new $D$to-merge
+    (i32.const 1705)
+    (i32.const 0)
+  ))
+  ;; CHECK:      (func $0 (type $G) (param $0 (ref $C)) (result (ref $D))
+  ;; CHECK-NEXT:  (struct.new $A
+  ;; CHECK-NEXT:   (i32.const 1685)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (global.get $global$0)
+  ;; CHECK-NEXT:   (i64.const 0)
+  ;; CHECK-NEXT:   (array.init_static $I)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $0 (type $G) (param $0 (ref $C)) (result (ref $D))
+    (struct.new $A$to-merge
+      (i32.const 1685)
+      (i32.const 0)
+      (global.get $global$0)
+      (i64.const 0)
+      (array.init_static $I)
+    )
+  )
+)
diff --git a/test/lit/passes/type-ssa_and_merging.wast b/test/lit/passes/type-ssa_and_merging.wast
new file mode 100644
index 000000000..40322bfcb
--- /dev/null
+++ b/test/lit/passes/type-ssa_and_merging.wast
@@ -0,0 +1,147 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+;; RUN: foreach %s %t wasm-opt --nominal            --gufa -Os                -all -S -o - | filecheck %s --check-prefix NOP
+;; RUN: foreach %s %t wasm-opt --nominal --type-ssa --gufa -Os --type-merging -all -S -o - | filecheck %s --check-prefix YES
+
+;; Show that the combination of type-ssa and type-merging can find things that
+;; otherwise cannot be optimized. NOP will fail to optimize something that YES
+;; can.
+
+(module
+  ;; NOP:      (type $A (struct (field (mut i32))))
+  ;; YES:      (type $none_=>_i32 (func (result i32)))
+
+  ;; YES:      (type $A (struct (field (mut i32))))
+  (type $A (struct_subtype (field (mut i32)) data))
+
+  ;; NOP:      (type $none_=>_i32 (func (result i32)))
+
+  ;; NOP:      (type $ref|$A|_=>_i32 (func (param (ref $A)) (result i32)))
+
+  ;; NOP:      (import "a" "b" (func $import (result i32)))
+  ;; YES:      (type $ref|$A|_=>_none (func (param (ref $A))))
+
+  ;; YES:      (import "a" "b" (func $import (result i32)))
+  (import "a" "b" (func $import (result i32)))
+
+  ;; NOP:      (export "main1" (func $main1))
+
+  ;; NOP:      (export "main2" (func $main2))
+
+  ;; NOP:      (func $main1 (type $none_=>_i32) (; has Stack IR ;) (result i32)
+  ;; NOP-NEXT:  (call $get-a-1
+  ;; NOP-NEXT:   (struct.new $A
+  ;; NOP-NEXT:    (i32.const 42)
+  ;; NOP-NEXT:   )
+  ;; NOP-NEXT:  )
+  ;; NOP-NEXT: )
+  ;; YES:      (export "main1" (func $main1))
+
+  ;; YES:      (export "main2" (func $main2))
+
+  ;; YES:      (func $main1 (type $none_=>_i32) (result i32)
+  ;; YES-NEXT:  (call $get-a-1
+  ;; YES-NEXT:   (struct.new $A
+  ;; YES-NEXT:    (i32.const 42)
+  ;; YES-NEXT:   )
+  ;; YES-NEXT:  )
+  ;; YES-NEXT:  (i32.const 42)
+  ;; YES-NEXT: )
+  (func $main1 (export "main1") (result i32)
+    ;; YES can infer a result here, 42.
+    (call $get-a-1
+      (struct.new $A (i32.const 42))
+    )
+  )
+
+  ;; NOP:      (func $main2 (type $none_=>_i32) (; has Stack IR ;) (result i32)
+  ;; NOP-NEXT:  (call $get-a-2
+  ;; NOP-NEXT:   (struct.new $A
+  ;; NOP-NEXT:    (i32.const 1337)
+  ;; NOP-NEXT:   )
+  ;; NOP-NEXT:  )
+  ;; NOP-NEXT: )
+  ;; YES:      (func $main2 (type $none_=>_i32) (result i32)
+  ;; YES-NEXT:  (call $get-a-2
+  ;; YES-NEXT:   (struct.new $A
+  ;; YES-NEXT:    (i32.const 1337)
+  ;; YES-NEXT:   )
+  ;; YES-NEXT:  )
+  ;; YES-NEXT:  (i32.const 1337)
+  ;; YES-NEXT: )
+  (func $main2 (export "main2") (result i32)
+    ;; YES can infer a result here, 1337.
+    (call $get-a-2
+      (struct.new $A (i32.const 1337))
+    )
+  )
+
+  ;; NOP:      (func $get-a-1 (type $ref|$A|_=>_i32) (; has Stack IR ;) (param $0 (ref $A)) (result i32)
+  ;; NOP-NEXT:  (if
+  ;; NOP-NEXT:   (call $import)
+  ;; NOP-NEXT:   (return
+  ;; NOP-NEXT:    (call $get-a-1
+  ;; NOP-NEXT:     (local.get $0)
+  ;; NOP-NEXT:    )
+  ;; NOP-NEXT:   )
+  ;; NOP-NEXT:  )
+  ;; NOP-NEXT:  (struct.get $A 0
+  ;; NOP-NEXT:   (local.get $0)
+  ;; NOP-NEXT:  )
+  ;; NOP-NEXT: )
+  ;; YES:      (func $get-a-1 (type $ref|$A|_=>_none) (param $0 (ref $A))
+  ;; YES-NEXT:  (if
+  ;; YES-NEXT:   (call $import)
+  ;; YES-NEXT:   (call $get-a-1
+  ;; YES-NEXT:    (local.get $0)
+  ;; YES-NEXT:   )
+  ;; YES-NEXT:  )
+  ;; YES-NEXT: )
+  (func $get-a-1 (param $ref (ref $A)) (result i32)
+    ;; YES infers the result and applies it in the caller, so nothing is
+    ;; returned any more (but we do keep the possibly infinite recursion, which
+    ;; is necessary to avoid inlining making this testcase trivial even in NOP).
+    (if
+      (call $import)
+      (return
+        (call $get-a-1
+          (local.get $ref)
+        )
+      )
+    )
+    (struct.get $A 0 (local.get 0))
+  )
+
+  ;; NOP:      (func $get-a-2 (type $ref|$A|_=>_i32) (; has Stack IR ;) (param $0 (ref $A)) (result i32)
+  ;; NOP-NEXT:  (if
+  ;; NOP-NEXT:   (call $import)
+  ;; NOP-NEXT:   (return
+  ;; NOP-NEXT:    (call $get-a-2
+  ;; NOP-NEXT:     (local.get $0)
+  ;; NOP-NEXT:    )
+  ;; NOP-NEXT:   )
+  ;; NOP-NEXT:  )
+  ;; NOP-NEXT:  (struct.get $A 0
+  ;; NOP-NEXT:   (local.get $0)
+  ;; NOP-NEXT:  )
+  ;; NOP-NEXT: )
+  ;; YES:      (func $get-a-2 (type $ref|$A|_=>_none) (param $0 (ref $A))
+  ;; YES-NEXT:  (if
+  ;; YES-NEXT:   (call $import)
+  ;; YES-NEXT:   (call $get-a-2
+  ;; YES-NEXT:    (local.get $0)
+  ;; YES-NEXT:   )
+  ;; YES-NEXT:  )
+  ;; YES-NEXT: )
+  (func $get-a-2 (param $ref (ref $A)) (result i32)
+    ;; Parallel to the above.
+    (if
+      (call $import)
+      (return
+        (call $get-a-2
+          (local.get $ref)
+        )
+      )
+    )
+    (struct.get $A 0 (local.get 0))
+  )
+)