[Wasm GC] Use DFA minimization to merge types (#5432)

Analyze type merging candidates as states in a DFA, using a partition refining DFA minimization algorithm to more aggressively find sets of mergeable types. The new implementation can merge types that the previous iteration would have taken an arbitrary number of iterations to merge or would not have been able to merge at all. The new implementation is also careful in its treatment of private and public types and should work as-is under an open world assumption or a relaxed closed-world assumption that admits more public types. Specifically, the new code avoids merging public types into their supertypes, but still allows private subtypes to be merged into their public supertypes. The new implementation supports merging function types, which the old implementation did not. Finally, the new implementation is able to merge identical sibling types even when the analysis determines that they cannot be merged into their common supertype. Extending this capability to sibling top-level types without nontrivial supertypes is left to a follow-on PR.
author: Thomas Lively <tlively@google.com> 2023-01-18 13:46:45 -0600
committer: GitHub <noreply@github.com> 2023-01-18 19:46:45 +0000
commit: aebad47fec02af167bd1c328b8614deb42e9ff0e (patch)
tree: 597bdb02eab7ed4aa6b21e66aed49f782354a91a /src
parent: 82bd2c4e5717cbe19a1a6c34cfdd5116b13a68dc (diff)
download: binaryen-aebad47fec02af167bd1c328b8614deb42e9ff0e.tar.gz
binaryen-aebad47fec02af167bd1c328b8614deb42e9ff0e.tar.bz2
binaryen-aebad47fec02af167bd1c328b8614deb42e9ff0e.zip
3 files changed, 329 insertions, 148 deletions
diff --git a/src/ir/type-updating.cpp b/src/ir/type-updating.cpp
index 00f313a2a..67ec4a5c0 100644
--- a/src/ir/type-updating.cpp
+++ b/src/ir/type-updating.cpp
@@ -89,7 +89,7 @@ void GlobalTypeRewriter::update() {
     }
 
     // Apply a super, if there is one
-    if (auto super = type.getSuperType()) {
+    if (auto super = getSuperType(type)) {
       if (auto it = typeIndices.find(*super); it != typeIndices.end()) {
         assert(it->second < i);
         typeBuilder[i].subTypeOf(typeBuilder[it->second]);
diff --git a/src/ir/type-updating.h b/src/ir/type-updating.h
index 5cd8eef47..e1b5e42a9 100644
--- a/src/ir/type-updating.h
+++ b/src/ir/type-updating.h
@@ -357,6 +357,13 @@ public:
   virtual void modifyArray(HeapType oldType, Array& array) {}
   virtual void modifySignature(HeapType oldType, Signature& sig) {}
 
+  // Subclasses can override this method to modify supertypes. The new
+  // supertype, if any, must be a supertype (or the same as) the original
+  // supertype.
+  virtual std::optional<HeapType> getSuperType(HeapType oldType) {
+    return oldType.getSuperType();
+  }
+
   // Map an old type to a temp type. This can be called from the above hooks,
   // so that they can use a proper temp type of the TypeBuilder while modifying
   // things.
diff --git a/src/passes/TypeMerging.cpp b/src/passes/TypeMerging.cpp
index aa9fcca67..365a6c213 100644
--- a/src/passes/TypeMerging.cpp
+++ b/src/passes/TypeMerging.cpp
@@ -39,23 +39,33 @@
 #include "ir/module-utils.h"
 #include "ir/type-updating.h"
 #include "pass.h"
+#include "support/dfa_minimization.h"
 #include "support/small_set.h"
+#include "support/topological_sort.h"
 #include "wasm-builder.h"
+#include "wasm-type-ordering.h"
 #include "wasm.h"
 
+#define TYPE_MERGING_DEBUG 0
+
+#if TYPE_MERGING_DEBUG
+#include "wasm-type-printing.h"
+#endif
+
 namespace wasm {
 
 namespace {
 
-// We need to find all the types that have references to them, such as casts,
-// as such types must be preserved - even if they are identical to other types,
-// they are nominally distinguishable.
+// We need to find all types that are distinguished from their supertypes by
+// some kind of cast instruction. Merging these types with their supertypes
+// would be an observable change. In contrast, types that are never used in
+// casts are never distinguished from their supertypes.
 
 // Most functions do no casts, or perhaps cast |this| and perhaps a few others.
-using ReferredTypes = SmallUnorderedSet<HeapType, 5>;
+using CastTypes = SmallUnorderedSet<HeapType, 5>;
 
 struct CastFinder : public PostWalker<CastFinder> {
-  ReferredTypes referredTypes;
+  CastTypes castTypes;
 
   // If traps never happen, then ref.cast and call_indirect can never
   // differentiate between types since they always succeed. Take advantage of
@@ -68,7 +78,7 @@ struct CastFinder : public PostWalker<CastFinder> {
 
   template<typename T> void visitCast(T* curr) {
     if (auto type = curr->getCastType(); type != Type::unreachable) {
-      referredTypes.insert(type.getHeapType());
+      castTypes.insert(type.getHeapType());
     }
   }
 
@@ -88,188 +98,352 @@ struct CastFinder : public PostWalker<CastFinder> {
 
   void visitCallIndirect(CallIndirect* curr) {
     if (!trapsNeverHappen) {
-      referredTypes.insert(curr->heapType);
+      castTypes.insert(curr->heapType);
     }
   }
 };
 
+// We are going to treat the type graph as a partitioned DFA where each type is
+// a state with transitions to its children. We will partition the DFA states so
+// that types that may be mergeable will be in the same partition and types that
+// we know are not mergeable will be in different partitions. Then we will
+// refine the partitions so that types that turn out to not be mergeable will be
+// split out into separate partitions.
 struct TypeMerging : public Pass {
+  using TypeUpdates = std::unordered_map<HeapType, HeapType>;
+
   // Only modifies types.
   bool requiresNonNullableLocalFixups() override { return false; }
 
   Module* module;
 
-  // The types we can merge. We map each such type to merge with the type we
-  // want to merge it with.
-  using TypeUpdates = std::unordered_map<HeapType, HeapType>;
-  TypeUpdates merges;
+  std::unordered_set<HeapType> privateTypes;
+  CastTypes castTypes;
 
-  void run(Module* module_) override {
-    module = module_;
+  void run(Module* module_) override;
 
-    if (!module->features.hasGC()) {
-      return;
-    }
+  CastTypes findCastTypes();
+  std::vector<HeapType> getPublicChildren(HeapType type);
+  DFA::State<HeapType> makeDFAState(HeapType type);
+  void applyMerges(const TypeUpdates& merges);
 
-    if (!getPassOptions().closedWorld) {
-      Fatal() << "TypeMerging requires --closed-world";
-    }
+  bool mayBeMergeable(HeapType sub, HeapType super);
+  bool mayBeMergeable(const Struct& a, const Struct& b);
+  bool mayBeMergeable(Array a, Array b);
+  bool mayBeMergeable(Signature a, Signature b);
+  bool mayBeMergeable(Field a, Field b);
+  bool mayBeMergeable(Type a, Type b);
+  bool mayBeMergeable(const Tuple& a, const Tuple& b);
+};
 
-    // First, find all the cast types.
+void TypeMerging::run(Module* module_) {
+  module = module_;
 
-    ModuleUtils::ParallelFunctionAnalysis<ReferredTypes> analysis(
-      *module, [&](Function* func, ReferredTypes& referredTypes) {
-        if (func->imported()) {
-          return;
-        }
+  if (!module->features.hasGC()) {
+    return;
+  }
 
-        CastFinder finder(getPassOptions());
-        finder.walk(func->body);
-        referredTypes = std::move(finder.referredTypes);
-      });
-
-    // Also find cast types in the module scope (not possible in the current
-    // spec, but do it to be future-proof).
-    CastFinder moduleFinder(getPassOptions());
-    moduleFinder.walkModuleCode(module);
-
-    // Accumulate all the referredTypes.
-    auto& allReferredTypes = moduleFinder.referredTypes;
-    for (auto& [k, referredTypes] : analysis.map) {
-      for (auto type : referredTypes) {
-        allReferredTypes.insert(type);
+  if (!getPassOptions().closedWorld) {
+    Fatal() << "TypeMerging requires --closed-world";
+  }
+
+  // First, find all the cast types and private types. We will need these to
+  // determine whether types are eligible to be merged.
+  auto privates = ModuleUtils::getPrivateHeapTypes(*module);
+  privateTypes = std::unordered_set<HeapType>(privates.begin(), privates.end());
+  castTypes = findCastTypes();
+
+  // Initial partitions are formed by grouping types with their structurally
+  // similar supertypes. Starting with the topmost types and working down the
+  // subtype trees, add each type to its supertype's partition if they are
+  // structurally compatible.
+
+  // A list of partitions with stable iterators.
+  using Partition = std::vector<DFA::State<HeapType>>;
+  using Partitions = std::list<Partition>;
+  Partitions partitions;
+
+  // Map each type to its partition in the list.
+  std::unordered_map<HeapType, Partitions::iterator> typePartitions;
+
+#if TYPE_MERGING_DEBUG
+  auto dumpPartitions = [&]() {
+    using Fallback = IndexedTypeNameGenerator<DefaultTypeNameGenerator>;
+    Fallback printPrivate(privates, "private.");
+    ModuleTypeNameGenerator<Fallback> print(*module, printPrivate);
+    size_t i = 0;
+    for (auto& partition : partitions) {
+      std::cerr << i++ << ": " << print(partition[0].val) << "\n";
+      for (size_t j = 1; j < partition.size(); ++j) {
+        std::cerr << "   " << print(partition[j].val) << "\n";
       }
+      std::cerr << "\n";
+    }
+  };
+#endif // TYPE_MERGING_DEBUG
+
+  // Ensure the type has a partition and return a reference to it.
+  auto ensurePartition = [&](HeapType type) {
+    auto [it, inserted] = typePartitions.insert({type, partitions.end()});
+    if (inserted) {
+      it->second = partitions.insert(partitions.end(), {makeDFAState(type)});
     }
+    return it->second;
+  };
+
+  // For each type, either create a new partition or add to its supertype's
+  // partition.
+  for (auto type : HeapTypeOrdering::SupertypesFirst(privates)) {
+    // We need partitions for any public children of this type since those
+    // children will participate in the DFA we're creating.
+    for (auto child : getPublicChildren(type)) {
+      ensurePartition(child);
+    }
+    auto super = type.getSuperType();
+    if (!super || !mayBeMergeable(type, *super)) {
+      // Create a new partition containing just this type.
+      ensurePartition(type);
+      continue;
+    }
+    // The current type and its supertype have the same top-level structure, so
+    // merge the current type's partition into its supertype's partition. First,
+    // find the supertype's partition. The supertype's partition may not exist
+    // yet if the supertype is public since we don't visit public types in this
+    // loop. In that case we can create a new partition for the supertype
+    // because merging private types into public supertypes is fine. (In
+    // contrast, merging public types into their supertypes is not fine.)
+    auto it = ensurePartition(*super);
+    it->push_back(makeDFAState(type));
+    typePartitions[type] = it;
+  }
 
-    // Find all the heap types.
-    std::vector<HeapType> types = ModuleUtils::collectHeapTypes(*module);
-
-    // TODO: There may be more opportunities after this loop. Imagine that we
-    //       decide to merge A and B into C, and there are types X and Y that
-    //       contain a nested reference to A and B respectively, then after A
-    //       and B become identical so do X and Y. The recursive case is not
-    //       trivial, however, and needs more thought.
-    for (auto type : types) {
-      if (allReferredTypes.count(type)) {
-        // This has a cast, so it is distinguishable nominally.
-        continue;
-      }
+#if TYPE_MERGING_DEBUG
+  std::cerr << "Initial partitions:\n";
+  dumpPartitions();
+#endif
 
-      auto super = type.getSuperType();
-      if (!super) {
-        // This has no supertype, so there is nothing to merge it into.
-        continue;
-      }
+  // Construct and refine the partitioned DFA.
+  std::vector<Partition> dfa(std::make_move_iterator(partitions.begin()),
+                             std::make_move_iterator(partitions.end()));
+  auto refinedPartitions = DFA::refinePartitions(dfa);
 
-      if (type.isStruct()) {
-        auto& fields = type.getStruct().fields;
-        auto& superFields = super->getStruct().fields;
-        if (fields == superFields) {
-          // We can merge! This is identical structurally to the super, and also
-          // not distinguishable nominally.
-          merges[type] = *super;
-        }
-      } else if (type.isArray()) {
-        auto element = type.getArray().element;
-        auto superElement = super->getArray().element;
-        if (element == superElement) {
-          merges[type] = *super;
-        }
-      }
-    }
+  // The types we can merge mapped to the type we are merging them into.
+  TypeUpdates merges;
 
-    if (merges.empty()) {
-      return;
+  // Merge each refined partition into a single type.
+  for (const auto& partition : refinedPartitions) {
+    for (size_t i = 1; i < partition.size(); ++i) {
+      merges[partition[i]] = partition[0];
     }
+  }
 
-    // We found things to optimize! Rewrite types in the module to apply those
-    // changes.
+  applyMerges(merges);
+}
 
-    // First, close over the map, so if X can be merged into Y and Y into Z then
-    // we map X into Z.
-    for (auto type : types) {
-      if (!merges.count(type)) {
-        continue;
+CastTypes TypeMerging::findCastTypes() {
+  ModuleUtils::ParallelFunctionAnalysis<CastTypes> analysis(
+    *module, [&](Function* func, CastTypes& castTypes) {
+      if (func->imported()) {
+        return;
       }
 
-      auto newType = merges[type];
-      while (merges.count(newType)) {
-        newType = merges[newType];
-      }
-      // Apply the findings to all intermediate types as well, to avoid
-      // duplicate work in later iterations. That is, all the types we saw in
-      // the above loop will all get merged into newType.
-      auto curr = type;
-      while (1) {
-        auto iter = merges.find(curr);
-        if (iter == merges.end()) {
-          break;
-        }
-        auto& currMerge = iter->second;
-        curr = currMerge;
-        currMerge = newType;
-      }
+      CastFinder finder(getPassOptions());
+      finder.walk(func->body);
+      castTypes = std::move(finder.castTypes);
+    });
+
+  // Also find cast types in the module scope (not possible in the current
+  // spec, but do it to be future-proof).
+  CastFinder moduleFinder(getPassOptions());
+  moduleFinder.walkModuleCode(module);
+
+  // Accumulate all the castTypes.
+  auto& allCastTypes = moduleFinder.castTypes;
+  for (auto& [k, castTypes] : analysis.map) {
+    for (auto type : castTypes) {
+      allCastTypes.insert(type);
+    }
+  }
+  return allCastTypes;
+}
+
+std::vector<HeapType> TypeMerging::getPublicChildren(HeapType type) {
+  std::vector<HeapType> publicChildren;
+  for (auto child : type.getHeapTypeChildren()) {
+    if (!child.isBasic() && !privateTypes.count(child)) {
+      publicChildren.push_back(child);
+    }
+  }
+  return publicChildren;
+}
+
+DFA::State<HeapType> TypeMerging::makeDFAState(HeapType type) {
+  std::vector<HeapType> succs;
+  for (auto child : type.getHeapTypeChildren()) {
+    // Both private and public heap type children participate in the DFA and are
+    // eligible to be successors.
+    if (!child.isBasic()) {
+      succs.push_back(child);
     }
+  }
+  return {type, std::move(succs)};
+}
+
+void TypeMerging::applyMerges(const TypeUpdates& merges) {
+  if (merges.empty()) {
+    return;
+  }
 
-    // Apply the merges.
+  // We found things to optimize! Rewrite types in the module to apply those
+  // changes.
 
-    class TypeInternalsUpdater : public GlobalTypeRewriter {
-      const TypeUpdates& merges;
+  class TypeInternalsUpdater : public GlobalTypeRewriter {
+    const TypeUpdates& merges;
 
-      std::unordered_map<HeapType, Signature> newSignatures;
+    std::unordered_map<HeapType, Signature> newSignatures;
 
-    public:
-      TypeInternalsUpdater(Module& wasm, const TypeUpdates& merges)
-        : GlobalTypeRewriter(wasm), merges(merges) {
+  public:
+    TypeInternalsUpdater(Module& wasm, const TypeUpdates& merges)
+      : GlobalTypeRewriter(wasm), merges(merges) {
 
-        // Map the types of expressions (curr->type, etc.) to their merged
-        // types.
-        mapTypes(merges);
+      // Map the types of expressions (curr->type, etc.) to their merged
+      // types.
+      mapTypes(merges);
 
-        // Update the internals of types (struct fields, signatures, etc.) to
-        // refer to the merged types.
-        update();
+      // Update the internals of types (struct fields, signatures, etc.) to
+      // refer to the merged types.
+      update();
+    }
+
+    Type getNewType(Type type) {
+      if (!type.isRef()) {
+        return type;
+      }
+      auto heapType = type.getHeapType();
+      auto iter = merges.find(heapType);
+      if (iter != merges.end()) {
+        return getTempType(Type(iter->second, type.getNullability()));
       }
+      return getTempType(type);
+    }
 
-      Type getNewType(Type type) {
-        if (!type.isRef()) {
-          return type;
-        }
-        auto heapType = type.getHeapType();
-        auto iter = merges.find(heapType);
-        if (iter != merges.end()) {
-          return getTempType(Type(iter->second, type.getNullability()));
-        }
-        return getTempType(type);
+    void modifyStruct(HeapType oldType, Struct& struct_) override {
+      auto& oldFields = oldType.getStruct().fields;
+      for (Index i = 0; i < oldFields.size(); i++) {
+        auto& oldField = oldFields[i];
+        auto& newField = struct_.fields[i];
+        newField.type = getNewType(oldField.type);
       }
+    }
+    void modifyArray(HeapType oldType, Array& array) override {
+      array.element.type = getNewType(oldType.getArray().element.type);
+    }
+    void modifySignature(HeapType oldSignatureType, Signature& sig) override {
+      auto getUpdatedTypeList = [&](Type type) {
+        std::vector<Type> vec;
+        for (auto t : type) {
+          vec.push_back(getNewType(t));
+        }
+        return getTempTupleType(vec);
+      };
 
-      void modifyStruct(HeapType oldType, Struct& struct_) override {
-        auto& oldFields = oldType.getStruct().fields;
-        for (Index i = 0; i < oldFields.size(); i++) {
-          auto& oldField = oldFields[i];
-          auto& newField = struct_.fields[i];
-          newField.type = getNewType(oldField.type);
+      auto oldSig = oldSignatureType.getSignature();
+      sig.params = getUpdatedTypeList(oldSig.params);
+      sig.results = getUpdatedTypeList(oldSig.results);
+    }
+    std::optional<HeapType> getSuperType(HeapType oldType) override {
+      auto super = oldType.getSuperType();
+      if (super) {
+        if (auto it = merges.find(*super); it != merges.end()) {
+          return it->second;
         }
       }
-      void modifyArray(HeapType oldType, Array& array) override {
-        array.element.type = getNewType(oldType.getArray().element.type);
-      }
-      void modifySignature(HeapType oldSignatureType, Signature& sig) override {
-        auto getUpdatedTypeList = [&](Type type) {
-          std::vector<Type> vec;
-          for (auto t : type) {
-            vec.push_back(getNewType(t));
-          }
-          return getTempTupleType(vec);
-        };
-
-        auto oldSig = oldSignatureType.getSignature();
-        sig.params = getUpdatedTypeList(oldSig.params);
-        sig.results = getUpdatedTypeList(oldSig.results);
-      }
-    } rewriter(*module, merges);
+      return super;
+    }
+  } rewriter(*module, merges);
+}
+
+bool TypeMerging::mayBeMergeable(HeapType sub, HeapType super) {
+  // If the type is distinguishable from its supertype or public, we cannot
+  // merge it.
+  if (castTypes.count(sub) || !privateTypes.count(sub)) {
+    return false;
   }
-};
+  // Check whether `sub` and `super` have the same top-level structure,
+  // including the position and identity of any children that are not included
+  // as transitions in the DFA, i.e. any children that are not nontrivial
+  // references.
+  if (sub.isStruct() && super.isStruct()) {
+    return mayBeMergeable(sub.getStruct(), super.getStruct());
+  }
+  if (sub.isArray() && super.isArray()) {
+    return mayBeMergeable(sub.getArray(), super.getArray());
+  }
+  if (sub.isSignature() && super.isSignature()) {
+    return mayBeMergeable(sub.getSignature(), super.getSignature());
+  }
+  return false;
+}
+
+bool TypeMerging::mayBeMergeable(const Struct& a, const Struct& b) {
+  if (a.fields.size() != b.fields.size()) {
+    return false;
+  }
+  for (size_t i = 0; i < a.fields.size(); ++i) {
+    if (!mayBeMergeable(a.fields[i], b.fields[i])) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool TypeMerging::mayBeMergeable(Array a, Array b) {
+  return mayBeMergeable(a.element, b.element);
+}
+
+bool TypeMerging::mayBeMergeable(Signature a, Signature b) {
+  return mayBeMergeable(a.params, b.params) &&
+         mayBeMergeable(a.results, b.results);
+}
+
+bool TypeMerging::mayBeMergeable(Field a, Field b) {
+  return a.packedType == b.packedType && mayBeMergeable(a.type, b.type);
+}
+
+bool TypeMerging::mayBeMergeable(Type a, Type b) {
+  if (a == b) {
+    return true;
+  }
+  if (a.isTuple() && b.isTuple()) {
+    return mayBeMergeable(a.getTuple(), b.getTuple());
+  }
+  // The only thing allowed to differ is the non-basic heap type child, since we
+  // don't know before running the DFA partition refinement whether different
+  // heap type children will end up being merged. Children that won't be merged
+  // will end up being differentiated by the partition refinement.
+  if (!a.isRef() || !b.isRef()) {
+    return false;
+  }
+  if (a.getHeapType().isBasic() || b.getHeapType().isBasic()) {
+    return false;
+  }
+  if (a.getNullability() != b.getNullability()) {
+    return false;
+  }
+  return true;
+}
+
+bool TypeMerging::mayBeMergeable(const Tuple& a, const Tuple& b) {
+  if (a.types.size() != b.types.size()) {
+    return false;
+  }
+  for (size_t i = 0; i < a.types.size(); ++i) {
+    if (!mayBeMergeable(a.types[i], b.types[i])) {
+      return false;
+    }
+  }
+  return true;
+}
 
 } // anonymous namespace
author	Thomas Lively <tlively@google.com>	2023-01-18 13:46:45 -0600
committer	GitHub <noreply@github.com>	2023-01-18 19:46:45 +0000
commit	aebad47fec02af167bd1c328b8614deb42e9ff0e (patch)
tree	597bdb02eab7ed4aa6b21e66aed49f782354a91a /src
parent	82bd2c4e5717cbe19a1a6c34cfdd5116b13a68dc (diff)
download	binaryen-aebad47fec02af167bd1c328b8614deb42e9ff0e.tar.gz binaryen-aebad47fec02af167bd1c328b8614deb42e9ff0e.tar.bz2 binaryen-aebad47fec02af167bd1c328b8614deb42e9ff0e.zip