[Wasm GC] Merge root types with identical shapes (#5438)

The TypeMerging optimization usually tries to merge types into their supertypes, but it can also merge sibling types together without merging them into their common supertype. Implement this kind of sibling merging for root types that have no nontrivial supertypes. Root types were previously never merged. The implementation requires hashing and comparing the top-level structure of the root types so that root types with the same structure can be assigned to the same partition for DFA minimization.
author: Thomas Lively <tlively@google.com> 2023-01-19 11:34:28 -0600
committer: GitHub <noreply@github.com> 2023-01-19 17:34:28 +0000
commit: 0d6d317d44d13240dfc9427f8f32c1299449287b (patch)
tree: 52be1b94b7d58f18433597995468aa8522d0111b /src
parent: aebad47fec02af167bd1c328b8614deb42e9ff0e (diff)
download: binaryen-0d6d317d44d13240dfc9427f8f32c1299449287b.tar.gz
binaryen-0d6d317d44d13240dfc9427f8f32c1299449287b.tar.bz2
binaryen-0d6d317d44d13240dfc9427f8f32c1299449287b.zip
1 files changed, 161 insertions, 50 deletions
diff --git a/src/passes/TypeMerging.cpp b/src/passes/TypeMerging.cpp
index 365a6c213..6f39d586f 100644
--- a/src/passes/TypeMerging.cpp
+++ b/src/passes/TypeMerging.cpp
@@ -126,14 +126,35 @@ struct TypeMerging : public Pass {
   std::vector<HeapType> getPublicChildren(HeapType type);
   DFA::State<HeapType> makeDFAState(HeapType type);
   void applyMerges(const TypeUpdates& merges);
+};
 
-  bool mayBeMergeable(HeapType sub, HeapType super);
-  bool mayBeMergeable(const Struct& a, const Struct& b);
-  bool mayBeMergeable(Array a, Array b);
-  bool mayBeMergeable(Signature a, Signature b);
-  bool mayBeMergeable(Field a, Field b);
-  bool mayBeMergeable(Type a, Type b);
-  bool mayBeMergeable(const Tuple& a, const Tuple& b);
+// Hash and equality-compare HeapTypes based on their top-level structure (i.e.
+// "shape"), ignoring nontrivial heap type children that will not be
+// differentiated between until we run the DFA partition refinement.
+bool shapeEq(HeapType a, HeapType b);
+bool shapeEq(const Struct& a, const Struct& b);
+bool shapeEq(Array a, Array b);
+bool shapeEq(Signature a, Signature b);
+bool shapeEq(Field a, Field b);
+bool shapeEq(Type a, Type b);
+bool shapeEq(const Tuple& a, const Tuple& b);
+
+size_t shapeHash(HeapType a);
+size_t shapeHash(const Struct& a);
+size_t shapeHash(Array a);
+size_t shapeHash(Signature a);
+size_t shapeHash(Field a);
+size_t shapeHash(Type a);
+size_t shapeHash(const Tuple& a);
+
+struct ShapeEq {
+  bool operator()(const HeapType& a, const HeapType& b) const {
+    return shapeEq(a, b);
+  }
+};
+
+struct ShapeHash {
+  size_t operator()(const HeapType& type) const { return shapeHash(type); }
 };
 
 void TypeMerging::run(Module* module_) {
@@ -166,11 +187,15 @@ void TypeMerging::run(Module* module_) {
   // Map each type to its partition in the list.
   std::unordered_map<HeapType, Partitions::iterator> typePartitions;
 
+  // Map the top-level structures of root types to their partitions in the list.
+  std::unordered_map<HeapType, Partitions::iterator, ShapeHash, ShapeEq>
+    shapePartitions;
+
 #if TYPE_MERGING_DEBUG
+  using Fallback = IndexedTypeNameGenerator<DefaultTypeNameGenerator>;
+  Fallback printPrivate(privates, "private.");
+  ModuleTypeNameGenerator<Fallback> print(*module, printPrivate);
   auto dumpPartitions = [&]() {
-    using Fallback = IndexedTypeNameGenerator<DefaultTypeNameGenerator>;
-    Fallback printPrivate(privates, "private.");
-    ModuleTypeNameGenerator<Fallback> print(*module, printPrivate);
     size_t i = 0;
     for (auto& partition : partitions) {
       std::cerr << i++ << ": " << print(partition[0].val) << "\n";
@@ -182,8 +207,11 @@ void TypeMerging::run(Module* module_) {
   };
 #endif // TYPE_MERGING_DEBUG
 
-  // Ensure the type has a partition and return a reference to it.
-  auto ensurePartition = [&](HeapType type) {
+  // Ensure the type has a partition and return a reference to it. Since we
+  // merge up the type tree and visit supertypes first, the partition usually
+  // already exists. The exception is when the supertype is public, in which
+  // case we might not have created a partition for it yet.
+  auto ensurePartition = [&](HeapType type) -> Partitions::iterator {
     auto [it, inserted] = typePartitions.insert({type, partitions.end()});
     if (inserted) {
       it->second = partitions.insert(partitions.end(), {makeDFAState(type)});
@@ -191,6 +219,16 @@ void TypeMerging::run(Module* module_) {
     return it->second;
   };
 
+  // Similar to the above, but look up or create a partition associated with the
+  // type's top-level shape rather than its identity.
+  auto ensureShapePartition = [&](HeapType type) -> Partitions::iterator {
+    auto [it, inserted] = shapePartitions.insert({type, partitions.end()});
+    if (inserted) {
+      it->second = partitions.insert(partitions.end(), Partition{});
+    }
+    return it->second;
+  };
+
   // For each type, either create a new partition or add to its supertype's
   // partition.
   for (auto type : HeapTypeOrdering::SupertypesFirst(privates)) {
@@ -199,19 +237,31 @@ void TypeMerging::run(Module* module_) {
     for (auto child : getPublicChildren(type)) {
       ensurePartition(child);
     }
+    // If the type is distinguished by the module or public, we cannot merge it,
+    // so create a new partition for it.
+    if (castTypes.count(type) || !privateTypes.count(type)) {
+      ensurePartition(type);
+      continue;
+    }
     auto super = type.getSuperType();
-    if (!super || !mayBeMergeable(type, *super)) {
-      // Create a new partition containing just this type.
+    // If there is no supertype to merge with, then we can still merge with
+    // other root types with the same structure. Find and add to the partition
+    // with other such types.
+    if (!super) {
+      auto it = ensureShapePartition(type);
+      it->push_back(makeDFAState(type));
+      typePartitions[type] = it;
+      continue;
+    }
+    // If this type refines its supertype in some way, then we cannot merge it.
+    // Create a new partition for it.
+    if (!shapeEq(type, *super)) {
       ensurePartition(type);
       continue;
     }
-    // The current type and its supertype have the same top-level structure, so
-    // merge the current type's partition into its supertype's partition. First,
-    // find the supertype's partition. The supertype's partition may not exist
-    // yet if the supertype is public since we don't visit public types in this
-    // loop. In that case we can create a new partition for the supertype
-    // because merging private types into public supertypes is fine. (In
-    // contrast, merging public types into their supertypes is not fine.)
+    // The current type and its supertype have the same top-level structure and
+    // are not distinguished, so add the current type to its supertype's
+    // partition.
     auto it = ensurePartition(*super);
     it->push_back(makeDFAState(type));
     typePartitions[type] = it;
@@ -363,59 +413,91 @@ void TypeMerging::applyMerges(const TypeUpdates& merges) {
   } rewriter(*module, merges);
 }
 
-bool TypeMerging::mayBeMergeable(HeapType sub, HeapType super) {
-  // If the type is distinguishable from its supertype or public, we cannot
-  // merge it.
-  if (castTypes.count(sub) || !privateTypes.count(sub)) {
-    return false;
+bool shapeEq(HeapType a, HeapType b) {
+  // Check whether `a` and `b` have the same top-level structure, including the
+  // position and identity of any children that are not included as transitions
+  // in the DFA, i.e. any children that are not nontrivial references.
+  if (a.isStruct() && b.isStruct()) {
+    return shapeEq(a.getStruct(), b.getStruct());
   }
-  // Check whether `sub` and `super` have the same top-level structure,
-  // including the position and identity of any children that are not included
-  // as transitions in the DFA, i.e. any children that are not nontrivial
-  // references.
-  if (sub.isStruct() && super.isStruct()) {
-    return mayBeMergeable(sub.getStruct(), super.getStruct());
+  if (a.isArray() && b.isArray()) {
+    return shapeEq(a.getArray(), b.getArray());
   }
-  if (sub.isArray() && super.isArray()) {
-    return mayBeMergeable(sub.getArray(), super.getArray());
-  }
-  if (sub.isSignature() && super.isSignature()) {
-    return mayBeMergeable(sub.getSignature(), super.getSignature());
+  if (a.isSignature() && b.isSignature()) {
+    return shapeEq(a.getSignature(), b.getSignature());
   }
   return false;
 }
 
-bool TypeMerging::mayBeMergeable(const Struct& a, const Struct& b) {
+size_t shapeHash(HeapType a) {
+  size_t digest;
+  if (a.isStruct()) {
+    digest = hash(0);
+    hash_combine(digest, shapeHash(a.getStruct()));
+  } else if (a.isArray()) {
+    digest = hash(1);
+    hash_combine(digest, shapeHash(a.getArray()));
+  } else if (a.isSignature()) {
+    digest = hash(2);
+    hash_combine(digest, shapeHash(a.getSignature()));
+  } else {
+    WASM_UNREACHABLE("unexpected kind");
+  }
+  return digest;
+}
+
+bool shapeEq(const Struct& a, const Struct& b) {
   if (a.fields.size() != b.fields.size()) {
     return false;
   }
   for (size_t i = 0; i < a.fields.size(); ++i) {
-    if (!mayBeMergeable(a.fields[i], b.fields[i])) {
+    if (!shapeEq(a.fields[i], b.fields[i])) {
       return false;
     }
   }
   return true;
 }
 
-bool TypeMerging::mayBeMergeable(Array a, Array b) {
-  return mayBeMergeable(a.element, b.element);
+size_t shapeHash(const Struct& a) {
+  size_t digest = hash(a.fields.size());
+  for (size_t i = 0; i < a.fields.size(); ++i) {
+    hash_combine(digest, shapeHash(a.fields[i]));
+  }
+  return digest;
+}
+
+bool shapeEq(Array a, Array b) { return shapeEq(a.element, b.element); }
+
+size_t shapeHash(Array a) { return shapeHash(a.element); }
+
+bool shapeEq(Signature a, Signature b) {
+  return shapeEq(a.params, b.params) && shapeEq(a.results, b.results);
+}
+
+size_t shapeHash(Signature a) {
+  auto digest = shapeHash(a.params);
+  hash_combine(digest, shapeHash(a.results));
+  return digest;
 }
 
-bool TypeMerging::mayBeMergeable(Signature a, Signature b) {
-  return mayBeMergeable(a.params, b.params) &&
-         mayBeMergeable(a.results, b.results);
+bool shapeEq(Field a, Field b) {
+  return a.packedType == b.packedType && a.mutable_ == b.mutable_ &&
+         shapeEq(a.type, b.type);
 }
 
-bool TypeMerging::mayBeMergeable(Field a, Field b) {
-  return a.packedType == b.packedType && mayBeMergeable(a.type, b.type);
+size_t shapeHash(Field a) {
+  auto digest = hash((int)a.packedType);
+  rehash(digest, (int)a.mutable_);
+  hash_combine(digest, shapeHash(a.type));
+  return digest;
 }
 
-bool TypeMerging::mayBeMergeable(Type a, Type b) {
+bool shapeEq(Type a, Type b) {
   if (a == b) {
     return true;
   }
   if (a.isTuple() && b.isTuple()) {
-    return mayBeMergeable(a.getTuple(), b.getTuple());
+    return shapeEq(a.getTuple(), b.getTuple());
   }
   // The only thing allowed to differ is the non-basic heap type child, since we
   // don't know before running the DFA partition refinement whether different
@@ -433,18 +515,47 @@ bool TypeMerging::mayBeMergeable(Type a, Type b) {
   return true;
 }
 
-bool TypeMerging::mayBeMergeable(const Tuple& a, const Tuple& b) {
+size_t shapeHash(Type a) {
+  if (a.isTuple()) {
+    auto digest = hash(0);
+    hash_combine(digest, shapeHash(a.getTuple()));
+    return digest;
+  }
+  auto digest = hash(1);
+  if (!a.isRef()) {
+    rehash(digest, 2);
+    return digest;
+  }
+  if (a.getHeapType().isBasic()) {
+    rehash(digest, 3);
+    rehash(digest, a.getHeapType().getID());
+    return digest;
+  }
+  rehash(digest, 4);
+  rehash(digest, (int)a.getNullability());
+  return digest;
+}
+
+bool shapeEq(const Tuple& a, const Tuple& b) {
   if (a.types.size() != b.types.size()) {
     return false;
   }
   for (size_t i = 0; i < a.types.size(); ++i) {
-    if (!mayBeMergeable(a.types[i], b.types[i])) {
+    if (!shapeEq(a.types[i], b.types[i])) {
       return false;
     }
   }
   return true;
 }
 
+size_t shapeHash(const Tuple& a) {
+  auto digest = hash(a.types.size());
+  for (auto type : a.types) {
+    hash_combine(digest, shapeHash(type));
+  }
+  return digest;
+}
+
 } // anonymous namespace
 
 Pass* createTypeMergingPass() { return new TypeMerging(); }
author	Thomas Lively <tlively@google.com>	2023-01-19 11:34:28 -0600
committer	GitHub <noreply@github.com>	2023-01-19 17:34:28 +0000
commit	0d6d317d44d13240dfc9427f8f32c1299449287b (patch)
tree	52be1b94b7d58f18433597995468aa8522d0111b /src
parent	aebad47fec02af167bd1c328b8614deb42e9ff0e (diff)
download	binaryen-0d6d317d44d13240dfc9427f8f32c1299449287b.tar.gz binaryen-0d6d317d44d13240dfc9427f8f32c1299449287b.tar.bz2 binaryen-0d6d317d44d13240dfc9427f8f32c1299449287b.zip