Fix merging of unrelated types in TypeMerging (#5897)

Previously it was possible that the supertype merging phase would merge unrelated types when DFA minimization would split a common supertype out of a partition, leaving unrelated types behind in the same partition. Fix the problem by post-processing the partitions in the supertype merging phase to split any partitions that contain unrelated types. Fixes #5877.
author: Thomas Lively <tlively@google.com> 2023-08-23 23:55:45 -0500
committer: GitHub <noreply@github.com> 2023-08-23 21:55:45 -0700
commit: da8937a71e908fecfc5722594dadd3c7ec6e80be (patch)
tree: 258780a8802a3362bc6d13bb8b7909b9dd948261
parent: d4207b10ca8038fdb9c11d6dfe1be2b7e84a1266 (diff)
download: binaryen-da8937a71e908fecfc5722594dadd3c7ec6e80be.tar.gz
binaryen-da8937a71e908fecfc5722594dadd3c7ec6e80be.tar.bz2
binaryen-da8937a71e908fecfc5722594dadd3c7ec6e80be.zip
2 files changed, 143 insertions, 7 deletions
diff --git a/src/passes/TypeMerging.cpp b/src/passes/TypeMerging.cpp
index 808076f85..ea1fa7916 100644
--- a/src/passes/TypeMerging.cpp
+++ b/src/passes/TypeMerging.cpp
@@ -152,6 +152,11 @@ struct TypeMerging : public Pass {
   enum MergeKind { Supertypes, Siblings };
   bool merge(MergeKind kind);
 
+  // Split a partition into potentially multiple partitions for each
+  // disconnected group of types it contains.
+  std::vector<std::vector<HeapType>>
+  splitSupertypePartition(const std::vector<HeapType>&);
+
   CastTypes findCastTypes();
   std::vector<HeapType> getPublicChildren(HeapType type);
   DFA::State<HeapType> makeDFAState(HeapType type);
@@ -259,6 +264,7 @@ bool TypeMerging::merge(MergeKind kind) {
       std::cerr << "\n";
     }
   };
+
 #endif // TYPE_MERGING_DEBUG
 
   // Map each type to its partition in the list.
@@ -361,16 +367,46 @@ bool TypeMerging::merge(MergeKind kind) {
   auto refinedPartitions = DFA::refinePartitions(dfa);
 
 #if TYPE_MERGING_DEBUG
+  auto dumpRefinedPartitions = [&]() {
+    size_t i = 0;
+    for (auto& partition : refinedPartitions) {
+      std::cerr << i++ << ": " << print(partition[0]) << "\n";
+      for (size_t j = 1; j < partition.size(); ++j) {
+        std::cerr << "   " << print(partition[j]) << "\n";
+      }
+      std::cerr << "\n";
+    }
+  };
   std::cerr << "Refined partitions:\n";
-  size_t i = 0;
-  for (auto& partition : refinedPartitions) {
-    std::cerr << i++ << ": " << print(partition[0]) << "\n";
-    for (size_t j = 1; j < partition.size(); ++j) {
-      std::cerr << "   " << print(partition[j]) << "\n";
+  dumpRefinedPartitions();
+#endif
+
+  if (kind == Supertypes) {
+    // It's possible that a partition has been split such that a common ancestor
+    // ended up in one of the new partitions, leaving unrelated types grouped
+    // together in the other new partition. Since we are only supposed to be
+    // merging types into their supertypes, merging such unrelated types would
+    // be unsafe. Post-process the refined partitions to manually split any
+    // partitions containing unrelated types.
+    //
+    // Normally splitting partitions like this would require re-running DFA
+    // minimization afterward, but in this case it is not possible that the
+    // manual splits cause types in any other partition to become
+    // differentiatable. A type and its subtype cannot differ by referring to
+    // different, unrelated types in the same position because then they would
+    // not be in a valid subtype relationship.
+    std::vector<std::vector<HeapType>> newPartitions;
+    for (const auto& partitionTypes : refinedPartitions) {
+      auto split = splitSupertypePartition(partitionTypes);
+      newPartitions.insert(newPartitions.end(), split.begin(), split.end());
     }
-    std::cerr << "\n";
-  }
+    refinedPartitions = newPartitions;
+
+#if TYPE_MERGING_DEBUG
+    std::cerr << "Manually split partitions:\n";
+    dumpRefinedPartitions();
 #endif
+  }
 
   // Merge each refined partition into a single type. We should only merge into
   // supertypes or siblings because if we try to merge into a subtype then we
@@ -408,6 +444,32 @@ bool TypeMerging::merge(MergeKind kind) {
   return merged;
 }
 
+std::vector<std::vector<HeapType>>
+TypeMerging::splitSupertypePartition(const std::vector<HeapType>& types) {
+  if (types.size() == 1) {
+    // Cannot split a partition containing just one type.
+    return {types};
+  }
+  std::unordered_set<HeapType> includedTypes(types.begin(), types.end());
+  std::vector<std::vector<HeapType>> partitions;
+  std::unordered_map<HeapType, Index> partitionIndices;
+  for (auto type : MergeableSupertypesFirst(*this, types)) {
+    auto super = type.getSuperType();
+    if (super && includedTypes.count(*super)) {
+      // We must already have a partition for the supertype we can add to.
+      auto index = partitionIndices.at(*super);
+      partitions[index].push_back(type);
+      partitionIndices[type] = index;
+    } else {
+      // This is a new root type. Create a new partition.
+      auto index = partitions.size();
+      partitions.push_back({type});
+      partitionIndices[type] = index;
+    }
+  }
+  return partitions;
+}
+
 CastTypes TypeMerging::findCastTypes() {
   ModuleUtils::ParallelFunctionAnalysis<CastTypes> analysis(
     *module, [&](Function* func, CastTypes& castTypes) {
diff --git a/test/lit/passes/type-merging.wast b/test/lit/passes/type-merging.wast
index f7cae2a42..b3cc45bd2 100644
--- a/test/lit/passes/type-merging.wast
+++ b/test/lit/passes/type-merging.wast
@@ -953,6 +953,80 @@
  )
 )
 
+;; Regression test for a bug where we were over-aggressive in merging during the
+;; supertype phase. Types A, B, C, D1, and D2 will all start out in the same
+;; supertype merging partition, but partition refinement will show that A, B,
+;; and C are distinct. We previously continued to merge D1 and D2, but that is
+;; incorrect, as such a merge will either make g1 or g2 invalid below. The fix
+;; was to manually split partitions that end up containing separate type trees.
+(module
+ ;; CHECK:      (rec
+ ;; CHECK-NEXT:  (type $I (struct (field anyref)))
+ (type $I (sub    (struct (field anyref))))
+ ;; CHECK:       (type $J (sub $I (struct (field eqref))))
+ (type $J (sub $I (struct (field eqref))))
+ ;; CHECK:       (type $K (sub $J (struct (field i31ref))))
+ (type $K (sub $J (struct (field i31ref))))
+ (rec
+  ;; CHECK:       (type $A (struct (field (ref null $A)) (field (ref null $I))))
+  (type $A  (sub    (struct (ref null $A) (ref null $I))))
+  ;; CHECK:       (type $C (sub $A (struct (field (ref null $A)) (field (ref null $K)))))
+
+  ;; CHECK:       (type $D2 (sub $C (struct (field (ref null $B)) (field (ref null $K)))))
+
+  ;; CHECK:       (type $B (sub $A (struct (field (ref null $B)) (field (ref null $J)))))
+  (type $B  (sub $A (struct (ref null $B) (ref null $J))))
+  (type $C  (sub $A (struct (ref null $A) (ref null $K))))
+  ;; CHECK:       (type $D1 (sub $B (struct (field (ref null $B)) (field (ref null $K)))))
+  (type $D1 (sub $B (struct (ref null $B) (ref null $K))))
+  (type $D2 (sub $C (struct (ref null $B) (ref null $K))))
+ )
+
+ ;; CHECK:      (global $g1 (ref $B) (struct.new_default $D1))
+ (global $g1 (ref $B) (struct.new_default $D1))
+ ;; CHECK:      (global $g2 (ref $C) (struct.new_default $D2))
+ (global $g2 (ref $C) (struct.new_default $D2))
+)
+
+;; Same as above, but with some additional types that can be merged.
+(module
+ ;; CHECK:      (rec
+ ;; CHECK-NEXT:  (type $I (struct (field anyref)))
+ (type $I (sub    (struct (field anyref))))
+ ;; CHECK:       (type $J (sub $I (struct (field eqref))))
+ (type $J (sub $I (struct (field eqref))))
+ ;; CHECK:       (type $K (sub $J (struct (field i31ref))))
+ (type $K (sub $J (struct (field i31ref))))
+ (rec
+  ;; CHECK:       (type $A (struct (field (ref null $A)) (field (ref null $I))))
+  (type $A  (sub     (struct (ref null $A) (ref null $I))))
+  (type $A' (sub $A  (struct (ref null $A) (ref null $I))))
+  ;; CHECK:       (type $C (sub $A (struct (field (ref null $A)) (field (ref null $K)))))
+
+  ;; CHECK:       (type $D2 (sub $C (struct (field (ref null $B)) (field (ref null $K)))))
+
+  ;; CHECK:       (type $B (sub $A (struct (field (ref null $B)) (field (ref null $J)))))
+  (type $B  (sub $A' (struct (ref null $B) (ref null $J))))
+  (type $B' (sub $B  (struct (ref null $B) (ref null $J))))
+  (type $C  (sub $A' (struct (ref null $A) (ref null $K))))
+  (type $C' (sub $C  (struct (ref null $A) (ref null $K))))
+  ;; CHECK:       (type $D1 (sub $B (struct (field (ref null $B)) (field (ref null $K)))))
+  (type $D1 (sub $B' (struct (ref null $B) (ref null $K))))
+  (type $D1' (sub $D1 (struct (ref null $B) (ref null $K))))
+  (type $D2 (sub $C' (struct (ref null $B) (ref null $K))))
+  (type $D2' (sub $D2 (struct (ref null $B) (ref null $K))))
+ )
+
+ ;; CHECK:      (global $g1 (ref $B) (struct.new_default $D1))
+ (global $g1 (ref $B') (struct.new_default $D1'))
+ ;; CHECK:      (global $g2 (ref $C) (struct.new_default $D2))
+ (global $g2 (ref $C') (struct.new_default $D2'))
+)
+
+ (global $g1 (ref $B) (struct.new_default $D1))
+ (global $g2 (ref $C) (struct.new_default $D2))
+)
+
 ;; Check that a ref.test inhibits merging (ref.cast is already checked above).
 (module
   ;; CHECK:      (rec
author	Thomas Lively <tlively@google.com>	2023-08-23 23:55:45 -0500
committer	GitHub <noreply@github.com>	2023-08-23 21:55:45 -0700
commit	da8937a71e908fecfc5722594dadd3c7ec6e80be (patch)
tree	258780a8802a3362bc6d13bb8b7909b9dd948261
parent	d4207b10ca8038fdb9c11d6dfe1be2b7e84a1266 (diff)
download	binaryen-da8937a71e908fecfc5722594dadd3c7ec6e80be.tar.gz binaryen-da8937a71e908fecfc5722594dadd3c7ec6e80be.tar.bz2 binaryen-da8937a71e908fecfc5722594dadd3c7ec6e80be.zip