Topological sorting of types in isorecursive output (#4492)

Generally we try to order types by decreasing use count so that frequently used types get smaller indices. For the equirecursive and nominal systems, there are no contraints on the ordering of types, so we just have to sort them according to their use counts. For the isorecursive type system, however, there are a number of ordering constraints that have to be met for the type section to be valid. First, types in the same recursion group must be adjacent so they can be grouped together. Second, groups must be ordered topologically so that they only refer to types in themselves or prior groups. Update type ordering to produce a valid isorecursive output by performing a topological sort on the recursion groups. While performing the sort, prefer to visit and finish processing the most used groups first as a heuristic to improve the final ordering. Do not reorder types within groups, since doing so would change type identity and could affect the external interface of the module. Leave that reordering to an optimization pass (not yet implemented) that users can explicitly opt in to.
author: Thomas Lively <7121787+tlively@users.noreply.github.com> 2022-02-02 13:24:02 -0800
committer: GitHub <noreply@github.com> 2022-02-02 13:24:02 -0800
commit: 5387c4ec48ba753e44f3b1d92fd43ac366b10b5c (patch)
tree: 8708ba238bffdc87846bc7690fef6051c3285db4 /src/wasm
parent: 348131b0a383d2946f18923f7acfd963089f4f5d (diff)
download: binaryen-5387c4ec48ba753e44f3b1d92fd43ac366b10b5c.tar.gz
binaryen-5387c4ec48ba753e44f3b1d92fd43ac366b10b5c.tar.bz2
binaryen-5387c4ec48ba753e44f3b1d92fd43ac366b10b5c.zip
1 files changed, 36 insertions, 34 deletions
diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp
index d05af96a7..89592fc37 100644
--- a/src/wasm/wasm-type.cpp
+++ b/src/wasm/wasm-type.cpp
@@ -1403,12 +1403,20 @@ bool HeapType::isSubType(HeapType left, HeapType right) {
   return SubTyper().isSubType(left, right);
 }
 
-std::vector<HeapType> HeapType::getHeapTypeChildren() {
+std::vector<HeapType> HeapType::getHeapTypeChildren() const {
   HeapTypeChildCollector collector;
-  collector.walkRoot(this);
+  collector.walkRoot(const_cast<HeapType*>(this));
   return collector.children;
 }
 
+std::vector<HeapType> HeapType::getReferencedHeapTypes() const {
+  auto types = getHeapTypeChildren();
+  if (auto super = getSuperType()) {
+    types.push_back(*super);
+  }
+  return types;
+}
+
 HeapType HeapType::getLeastUpperBound(HeapType a, HeapType b) {
   return TypeBounder().getLeastUpperBound(a, b);
 }
@@ -1567,7 +1575,8 @@ bool SubTyper::isSubType(HeapType a, HeapType b) {
     // Basic HeapTypes are never subtypes of compound HeapTypes.
     return false;
   }
-  if (typeSystem == TypeSystem::Nominal) {
+  if (typeSystem == TypeSystem::Nominal ||
+      typeSystem == TypeSystem::Isorecursive) {
     // Subtyping must be declared in a nominal system, not derived from
     // structure, so we will not recurse. TODO: optimize this search with some
     // form of caching.
@@ -2402,7 +2411,12 @@ size_t RecGroupHasher::hash(HeapType type) const {
   // an index into a rec group. Only take the rec group identity into account if
   // the child is not a member of the top-level group because in that case the
   // group may not be canonicalized yet.
-  size_t digest = wasm::hash(type.getRecGroupIndex());
+  size_t digest = wasm::hash(type.isBasic());
+  if (type.isBasic()) {
+    wasm::rehash(digest, type.getID());
+    return digest;
+  }
+  wasm::rehash(digest, type.getRecGroupIndex());
   auto currGroup = type.getRecGroup();
   if (currGroup != group) {
     wasm::rehash(digest, currGroup.getID());
@@ -2525,6 +2539,9 @@ bool RecGroupEquator::eq(HeapType a, HeapType b) const {
   // be canonicalized, explicitly check whether `a` and `b` are in the
   // respective recursion groups of the respective top-level groups we are
   // comparing, in which case the structure is still equivalent.
+  if (a.isBasic() || b.isBasic()) {
+    return a == b;
+  }
   if (a.getRecGroupIndex() != b.getRecGroupIndex()) {
     return false;
   }
@@ -3526,22 +3543,9 @@ void canonicalizeEquirecursive(CanonicalizationState& state) {
     info->supertype = nullptr;
   }
 
-#if TIME_CANONICALIZATION
-  auto start = std::chrono::steady_clock::now();
-#endif
-
   // Canonicalize the shape of the type definition graph.
   ShapeCanonicalizer minimized(state.results);
   state.update(minimized.replacements);
-
-#if TIME_CANONICALIZATION
-  auto end = std::chrono::steady_clock::now();
-  std::cerr << "Shape canonicalization: "
-            << std::chrono::duration_cast<std::chrono::milliseconds>(end -
-                                                                     start)
-                 .count()
-            << " ms\n";
-#endif
 }
 
 std::optional<TypeBuilder::Error>
@@ -3597,10 +3601,6 @@ canonicalizeNominal(CanonicalizationState& state) {
   // Nominal types do not require separate canonicalization, so just validate
   // that their subtyping is correct.
 
-#if TIME_CANONICALIZATION
-  auto start = std::chrono::steady_clock::now();
-#endif
-
   // Ensure there are no cycles in the subtype graph. This is the classic DFA
   // algorithm for detecting cycles, but in the form of a simple loop because
   // each node (type) has at most one child (supertype).
@@ -3627,14 +3627,6 @@ canonicalizeNominal(CanonicalizationState& state) {
     return {*error};
   }
 
-#if TIME_CANONICALIZATION
-  auto end = std::chrono::steady_clock::now();
-  std::cerr << "Validating subtyping took "
-            << std::chrono::duration_cast<std::chrono::milliseconds>(end -
-                                                                     start)
-                 .count()
-            << " ms\n";
-#endif
   return {};
 }
 
@@ -3802,6 +3794,15 @@ TypeBuilder::BuildResult TypeBuilder::build() {
   state.dump();
 #endif
 
+#if TIME_CANONICALIZATION
+  using instant_t = std::chrono::time_point<std::chrono::steady_clock>;
+  auto getMillis = [&](instant_t start, instant_t end) {
+    return std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
+      .count();
+  };
+  auto start = std::chrono::steady_clock::now();
+#endif
+
   switch (typeSystem) {
     case TypeSystem::Equirecursive:
       canonicalizeEquirecursive(state);
@@ -3819,18 +3820,19 @@ TypeBuilder::BuildResult TypeBuilder::build() {
   }
 
 #if TIME_CANONICALIZATION
-  auto start = std::chrono::steady_clock::now();
+  auto afterStructureCanonicalization = std::chrono::steady_clock::now();
 #endif
 
   globallyCanonicalize(state);
 
 #if TIME_CANONICALIZATION
   auto end = std::chrono::steady_clock::now();
-  std::cerr << "Global canonicalization took "
-            << std::chrono::duration_cast<std::chrono::milliseconds>(end -
-                                                                     start)
-                 .count()
+  std::cerr << "Total canonicalization time was " << getMillis(start, end)
             << " ms\n";
+  std::cerr << "Structure canonicalization took "
+            << getMillis(start, afterStructureCanonicalization) << " ms\n";
+  std::cerr << "Global canonicalization took "
+            << getMillis(afterStructureCanonicalization, end) << " ms\n";
 #endif
 
   // Note built signature types. See comment in `HeapType::HeapType(Signature)`.
author	Thomas Lively <7121787+tlively@users.noreply.github.com>	2022-02-02 13:24:02 -0800
committer	GitHub <noreply@github.com>	2022-02-02 13:24:02 -0800
commit	5387c4ec48ba753e44f3b1d92fd43ac366b10b5c (patch)
tree	8708ba238bffdc87846bc7690fef6051c3285db4 /src/wasm
parent	348131b0a383d2946f18923f7acfd963089f4f5d (diff)
download	binaryen-5387c4ec48ba753e44f3b1d92fd43ac366b10b5c.tar.gz binaryen-5387c4ec48ba753e44f3b1d92fd43ac366b10b5c.tar.bz2 binaryen-5387c4ec48ba753e44f3b1d92fd43ac366b10b5c.zip