diff options
-rwxr-xr-x | scripts/fuzz_opt.py | 4 | ||||
-rw-r--r-- | src/passes/TypeMerging.cpp | 199 | ||||
-rw-r--r-- | test/lit/passes/type-merging.wast | 195 |
3 files changed, 309 insertions, 89 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 3608e339e..653ea30b6 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1422,9 +1422,7 @@ opt_choices = [ ("--simplify-locals-notee-nostructure",), ("--ssa",), ("--type-refining",), - # TypeMerging is blocked by - # https://github.com/WebAssembly/binaryen/issues/5556 - # ("--type-merging",), + ("--type-merging",), ("--type-ssa",), ("--vacuum",), ] diff --git a/src/passes/TypeMerging.cpp b/src/passes/TypeMerging.cpp index d3652d010..04bc4b850 100644 --- a/src/passes/TypeMerging.cpp +++ b/src/passes/TypeMerging.cpp @@ -56,6 +56,10 @@ namespace wasm { namespace { +// Stop merging after a while to avoid spending too long on pathological +// modules. +constexpr int MAX_ITERATIONS = 20; + // We need to find all types that are distinguished from their supertypes by // some kind of cast instruction. Merging these types with their supertypes // would be an observable change. In contrast, types that are never used in @@ -110,20 +114,65 @@ struct CastFinder : public PostWalker<CastFinder> { // refine the partitions so that types that turn out to not be mergeable will be // split out into separate partitions. struct TypeMerging : public Pass { + // A list of partitions with stable iterators. + using Partition = std::vector<DFA::State<HeapType>>; + using Partitions = std::list<Partition>; + // Only modifies types. bool requiresNonNullableLocalFixups() override { return false; } Module* module; + // All private original types. std::unordered_set<HeapType> privateTypes; + + // Types that are distinguished by cast instructions. CastTypes castTypes; + // The list of remaining types that have not been merged into other types. + // Candidates for further merging. + std::vector<HeapType> mergeable; + + // Map the original types to the types they will be merged into, if any. + TypeMapper::TypeUpdates merges; + HeapType getMerged(HeapType type) { + for (auto it = merges.find(type); it != merges.end(); + it = merges.find(type)) { + type = it->second; + } + return type; + } + void run(Module* module_) override; + // We will do two different kinds of merging: First, we will merge types into + // their identical supertypes, and after that we will will merge types into + // their identical siblings in the type hierarchy. Doing both kinds of merges + // in a single step would be unsound because a type might be merged into its + // parent's sibling without being merged with its parent. + enum MergeKind { Supertypes, Siblings }; + bool merge(MergeKind kind); + CastTypes findCastTypes(); std::vector<HeapType> getPublicChildren(HeapType type); DFA::State<HeapType> makeDFAState(HeapType type); - void applyMerges(const TypeMapper::TypeUpdates& merges); + void applyMerges(); +}; + +struct MergeableSupertypesFirst + : HeapTypeOrdering::SupertypesFirstBase<MergeableSupertypesFirst> { + TypeMerging& merging; + + template<typename T> + MergeableSupertypesFirst(TypeMerging& merging, const T& types) + : SupertypesFirstBase(types), merging(merging) {} + + std::optional<HeapType> getSuperType(HeapType type) { + if (auto super = type.getSuperType()) { + return merging.getMerged(*super); + } + return std::nullopt; + } }; // Hash and equality-compare HeapTypes based on their top-level structure (i.e. @@ -168,42 +217,37 @@ void TypeMerging::run(Module* module_) { // First, find all the cast types and private types. We will need these to // determine whether types are eligible to be merged. - auto privates = ModuleUtils::getPrivateHeapTypes(*module); - privateTypes = std::unordered_set<HeapType>(privates.begin(), privates.end()); + mergeable = ModuleUtils::getPrivateHeapTypes(*module); + privateTypes = + std::unordered_set<HeapType>(mergeable.begin(), mergeable.end()); castTypes = findCastTypes(); - // Initial partitions are formed by grouping types with their structurally - // similar supertypes. Starting with the topmost types and working down the - // subtype trees, add each type to its supertype's partition if they are - // structurally compatible. - - // A list of partitions with stable iterators. - using Partition = std::vector<DFA::State<HeapType>>; - using Partitions = std::list<Partition>; - Partitions partitions; + // Merging supertypes or siblings can unlock more sibling merging + // opportunities, but merging siblings can never unlock more supertype merging + // opportunities, so it suffices to merge supertypes once followed by repeated + // merging of siblings. + // + // Merging can unlock more sibling merging opportunities because two identical + // types cannot be merged until their respective identical parents have been + // merged in a previous step, making them siblings. + merge(Supertypes); + for (int i = 0; i < MAX_ITERATIONS; ++i) { + if (!merge(Siblings)) { + break; + } + } - // Map each type to its partition in the list. - std::unordered_map<HeapType, Partitions::iterator> typePartitions; + applyMerges(); +} - // Map optional supertypes and the top-level structures of their refined - // children to partitions so that different children that refine the supertype - // in the same way can be assigned to the same partition and potentially - // merged. - // TODO: This is not fully general because it still prevents types from being - // merged if they are identical subtypes of two other types that end up being - // merged. Fixing this would require 1) merging such input partitions and - // re-running DFA minimization until convergence or 2) treating supertypes as - // special transitions in the DFA and augmenting Valmari-Lehtinen DFA - // minimization so that these special transitions cannot be used to split a - // partition if they are self-transitions. - std::unordered_map< - std::optional<HeapType>, - std::unordered_map<HeapType, Partitions::iterator, ShapeHash, ShapeEq>> - shapePartitions; +bool TypeMerging::merge(MergeKind kind) { + // Initial partitions are formed by grouping types with their structurally + // similar supertypes or siblings, according to the `kind`. + Partitions partitions; #if TYPE_MERGING_DEBUG using Fallback = IndexedTypeNameGenerator<DefaultTypeNameGenerator>; - Fallback printPrivate(privates, "private."); + Fallback printPrivate(ModuleUtils::getPrivateHeapTypes(*module), "private."); ModuleTypeNameGenerator<Fallback> print(*module, printPrivate); auto dumpPartitions = [&]() { size_t i = 0; @@ -217,6 +261,17 @@ void TypeMerging::run(Module* module_) { }; #endif // TYPE_MERGING_DEBUG + // Map each type to its partition in the list. + std::unordered_map<HeapType, Partitions::iterator> typePartitions; + + // Map the supertypes and top-level structures of each type to partitions so + // that siblings that refine the supertype in the same way can be assigned to + // the same partition and potentially merged. + std::unordered_map< + std::optional<HeapType>, + std::unordered_map<HeapType, Partitions::iterator, ShapeHash, ShapeEq>> + shapePartitions; + // Ensure the type has a partition and return a reference to it. Since we // merge up the type tree and visit supertypes first, the partition usually // already exists. The exception is when the supertype is public, in which @@ -232,8 +287,12 @@ void TypeMerging::run(Module* module_) { // Similar to the above, but look up or create a partition associated with the // type's supertype and top-level shape rather than its identity. auto ensureShapePartition = [&](HeapType type) -> Partitions::iterator { + auto super = type.getSuperType(); + if (super) { + super = getMerged(*super); + } auto [it, inserted] = - shapePartitions[type.getSuperType()].insert({type, partitions.end()}); + shapePartitions[super].insert({type, partitions.end()}); if (inserted) { it->second = partitions.insert(partitions.end(), Partition{}); } @@ -242,7 +301,7 @@ void TypeMerging::run(Module* module_) { // For each type, either create a new partition or add to its supertype's // partition. - for (auto type : HeapTypeOrdering::SupertypesFirst(privates)) { + for (auto type : MergeableSupertypesFirst(*this, mergeable)) { // We need partitions for any public children of this type since those // children will participate in the DFA we're creating. for (auto child : getPublicChildren(type)) { @@ -254,22 +313,32 @@ void TypeMerging::run(Module* module_) { ensurePartition(type); continue; } - // If there is no supertype to merge with or if this type refines its - // supertype, then we can still potentially merge it with sibling types with - // the same structure. Find and add to the partition with other such types. - auto super = type.getSuperType(); - if (!super || !shapeEq(type, *super)) { - auto it = ensureShapePartition(type); - it->push_back(makeDFAState(type)); - typePartitions[type] = it; - continue; + + switch (kind) { + case Supertypes: { + auto super = type.getSuperType(); + if (super && shapeEq(type, *super)) { + // The current type and its supertype have the same top-level + // structure and are not distinguished, so add the current type to its + // supertype's partition. + auto it = ensurePartition(*super); + it->push_back(makeDFAState(type)); + typePartitions[type] = it; + } else { + // Otherwise, create a new partition for this type. + ensurePartition(type); + } + break; + } + case Siblings: { + // Find or create a partition for this type's siblings of the same + // shape. + auto it = ensureShapePartition(type); + it->push_back(makeDFAState(type)); + typePartitions[type] = it; + break; + } } - // The current type and its supertype have the same top-level structure and - // are not distinguished, so add the current type to its supertype's - // partition. - auto it = ensurePartition(*super); - it->push_back(makeDFAState(type)); - typePartitions[type] = it; } #if TYPE_MERGING_DEBUG @@ -282,23 +351,38 @@ void TypeMerging::run(Module* module_) { std::make_move_iterator(partitions.end())); auto refinedPartitions = DFA::refinePartitions(dfa); - // The types we can merge mapped to the type we are merging them into. - TypeMapper::TypeUpdates merges; +#if TYPE_MERGING_DEBUG + std::cerr << "Refined partitions:\n"; + size_t i = 0; + for (auto& partition : refinedPartitions) { + std::cerr << i++ << ": " << print(partition[0]) << "\n"; + for (size_t j = 1; j < partition.size(); ++j) { + std::cerr << " " << print(partition[j]) << "\n"; + } + std::cerr << "\n"; + } +#endif // Merge each refined partition into a single type. We should only merge into // supertypes or siblings because if we try to merge into a subtype then we - // will accidentally set that subtype to be its own supertype. + // will accidentally set that subtype to be its own supertype. Also keep track + // of the remaining types. + std::vector<HeapType> newMergeable; + bool merged = false; for (const auto& partition : refinedPartitions) { - auto target = *HeapTypeOrdering::SupertypesFirst(partition).begin(); + auto target = *MergeableSupertypesFirst(*this, partition).begin(); + newMergeable.push_back(target); for (auto type : partition) { if (type != target) { merges[type] = target; + merged = true; } } } + mergeable = std::move(newMergeable); #if TYPE_MERGING_DEBUG - std::cerr << "Merges):\n"; + std::cerr << "Merges:\n"; std::unordered_map<HeapType, std::vector<HeapType>> mergees; for (auto& [mergee, target] : merges) { mergees[target].push_back(mergee); @@ -312,7 +396,7 @@ void TypeMerging::run(Module* module_) { } #endif // TYPE_MERGING_DEBUG - applyMerges(merges); + return merged; } CastTypes TypeMerging::findCastTypes() { @@ -358,17 +442,22 @@ DFA::State<HeapType> TypeMerging::makeDFAState(HeapType type) { // Both private and public heap type children participate in the DFA and are // eligible to be successors. if (!child.isBasic()) { - succs.push_back(child); + succs.push_back(getMerged(child)); } } return {type, std::move(succs)}; } -void TypeMerging::applyMerges(const TypeMapper::TypeUpdates& merges) { +void TypeMerging::applyMerges() { if (merges.empty()) { return; } + // Flatten merges, which might be an arbitrary tree at this point. + for (auto [type, _] : merges) { + merges[type] = getMerged(type); + } + // We found things to optimize! Rewrite types in the module to apply those // changes. TypeMapper(*module, merges).map(); diff --git a/test/lit/passes/type-merging.wast b/test/lit/passes/type-merging.wast index ef62c669c..4fd8ff75c 100644 --- a/test/lit/passes/type-merging.wast +++ b/test/lit/passes/type-merging.wast @@ -232,10 +232,10 @@ (module (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (struct (field (ref null $A)))) (type $A (struct (ref null $X))) (type $B (struct_subtype (ref null $Y) $A)) - ;; CHECK: (rec - ;; CHECK-NEXT: (type $X (struct (field (ref null $X)))) (type $X (struct (ref null $A))) (type $Y (struct_subtype (ref null $B) $X)) ) @@ -243,10 +243,10 @@ ;; CHECK: (type $none_=>_none (func)) ;; CHECK: (func $foo (type $none_=>_none) - ;; CHECK-NEXT: (local $a (ref null $X)) - ;; CHECK-NEXT: (local $b (ref null $X)) - ;; CHECK-NEXT: (local $x (ref null $X)) - ;; CHECK-NEXT: (local $y (ref null $X)) + ;; CHECK-NEXT: (local $a (ref null $A)) + ;; CHECK-NEXT: (local $b (ref null $A)) + ;; CHECK-NEXT: (local $x (ref null $A)) + ;; CHECK-NEXT: (local $y (ref null $A)) ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) (func $foo @@ -261,26 +261,54 @@ (module (rec - (type $A (struct (ref null $X))) - (type $B (struct_subtype (ref null $Y) $A)) + (type $A (struct (ref null $X) i32)) ;; CHECK: (rec - ;; CHECK-NEXT: (type $X (struct (field (ref null $X)))) - (type $X (struct (ref null $A))) - (type $Y (struct_subtype (ref null $B) $X)) + ;; CHECK-NEXT: (type $Y (struct (field (ref null $B)) (field f32))) + + ;; CHECK: (type $B (struct (field (ref null $Y)) (field i32))) + (type $B (struct (ref null $Y) i32)) + (type $X (struct (ref null $A) f32)) + (type $Y (struct (ref null $B) f32)) + ) + ;; CHECK: (type $none_=>_none (func)) + + ;; CHECK: (func $foo (type $none_=>_none) + ;; CHECK-NEXT: (local $a (ref null $B)) + ;; CHECK-NEXT: (local $b (ref null $B)) + ;; CHECK-NEXT: (local $x (ref null $Y)) + ;; CHECK-NEXT: (local $y (ref null $Y)) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $foo + ;; As above with the differentiated chains, but now the types are top-level + ;; siblings instead of subtypes + (local $a (ref null $A)) + (local $b (ref null $B)) + (local $x (ref null $X)) + (local $y (ref null $Y)) ) +) +(module + (rec + (type $A (struct (ref null $X))) + ;; CHECK: (rec + ;; CHECK-NEXT: (type $B (struct (field (ref null $B)))) + (type $B (struct (ref null $Y))) + (type $X (struct (ref null $A))) + (type $Y (struct (ref null $B))) + ) ;; CHECK: (type $none_=>_none (func)) ;; CHECK: (func $foo (type $none_=>_none) - ;; CHECK-NEXT: (local $a (ref null $X)) - ;; CHECK-NEXT: (local $b (ref null $X)) - ;; CHECK-NEXT: (local $x (ref null $X)) - ;; CHECK-NEXT: (local $y (ref null $X)) + ;; CHECK-NEXT: (local $a (ref null $B)) + ;; CHECK-NEXT: (local $b (ref null $B)) + ;; CHECK-NEXT: (local $x (ref null $B)) + ;; CHECK-NEXT: (local $y (ref null $B)) ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) (func $foo - ;; As above, but now the A->B and X->Y chains are not differentiated by the - ;; i32 and f32, so all four types can be merged into a single type. + ;; As above, but with all the types merging into a single type. (local $a (ref null $A)) (local $b (ref null $B)) (local $x (ref null $X)) @@ -467,7 +495,7 @@ ;; CHECK-NEXT: ) (func $foo ;; B and C cannot be merged into A because they refine A's field, but B and - ;; C can still be merged with each other even though they are siblings. + ;; C can still be merged with each other. (local $a (ref null $A)) (local $b (ref null $B)) (local $c (ref null $C)) @@ -479,8 +507,8 @@ ;; CHECK: (rec ;; CHECK-NEXT: (type $A (struct (field anyref))) (type $A (struct anyref)) - ;; CHECK: (type $B (struct_subtype (field eqref) $A)) (type $B (struct_subtype eqref $A)) + ;; CHECK: (type $C (struct_subtype (field eqref) $A)) (type $C (struct_subtype eqref $A)) ) @@ -488,8 +516,8 @@ ;; CHECK: (func $foo (type $none_=>_none) ;; CHECK-NEXT: (local $a (ref null $A)) - ;; CHECK-NEXT: (local $b (ref null $B)) - ;; CHECK-NEXT: (local $c (ref null $B)) + ;; CHECK-NEXT: (local $b (ref null $C)) + ;; CHECK-NEXT: (local $c (ref null $C)) ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) (func $foo @@ -508,10 +536,8 @@ (type $A (struct anyref)) (type $B (struct_subtype anyref $A)) (type $C (struct_subtype anyref $A)) - ;; CHECK: (type $E (struct_subtype (field eqref) $A)) - - ;; CHECK: (type $D (struct_subtype (field eqref) $A)) (type $D (struct_subtype eqref $B)) + ;; CHECK: (type $E (struct_subtype (field eqref) $A)) (type $E (struct_subtype eqref $C)) ) @@ -521,14 +547,13 @@ ;; CHECK-NEXT: (local $a (ref null $A)) ;; CHECK-NEXT: (local $b (ref null $A)) ;; CHECK-NEXT: (local $c (ref null $A)) - ;; CHECK-NEXT: (local $d (ref null $D)) + ;; CHECK-NEXT: (local $d (ref null $E)) ;; CHECK-NEXT: (local $e (ref null $E)) ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) (func $foo ;; D and E should be mergeable because they have identical shapes and will - ;; be siblings after B and C get merged, but we don't support this case yet. - ;; TODO: support this. + ;; be siblings after B and C get merged. (local $a (ref null $A)) (local $b (ref null $B)) (local $c (ref null $C)) @@ -537,6 +562,60 @@ ) ) +;; Check that we fully optimize a type graph that requires multiple iterations +;; of supertype and sibling merging. +(module + (rec + ;; These will get merged in the initial supertype merging stage. + ;; CHECK: (rec + ;; CHECK-NEXT: (type $B' (struct (field (ref $A)))) + + ;; CHECK: (type $C (struct_subtype (field (ref $A)) (field i32) $B')) + + ;; CHECK: (type $D' (struct_subtype (field (ref $A)) (field i32) (field i32) $C)) + + ;; CHECK: (type $A (struct )) + (type $A (struct)) + (type $A' (struct_subtype $A)) + + ;; These siblings will be merged only after $a and $a' are merged. + (type $B (struct (ref $A))) + (type $B' (struct (ref $A'))) + + ;; These will get merged only after $b and $b' are merged. + (type $C (struct_subtype (ref $A) i32 $B)) + (type $C' (struct_subtype (ref $A') i32 $B')) + + ;; These will get merged only after $c and $c' are merged. + (type $D (struct_subtype (ref $A) i32 i32 $C)) + (type $D' (struct_subtype (ref $A') i32 i32 $C')) + ) + + ;; CHECK: (type $none_=>_none (func)) + + ;; CHECK: (func $foo (type $none_=>_none) + ;; CHECK-NEXT: (local $a (ref null $A)) + ;; CHECK-NEXT: (local $a' (ref null $A)) + ;; CHECK-NEXT: (local $b (ref null $B')) + ;; CHECK-NEXT: (local $b' (ref null $B')) + ;; CHECK-NEXT: (local $c (ref null $C)) + ;; CHECK-NEXT: (local $c' (ref null $C)) + ;; CHECK-NEXT: (local $d (ref null $D')) + ;; CHECK-NEXT: (local $d' (ref null $D')) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $foo + (local $a (ref null $A)) + (local $a' (ref null $A')) + (local $b (ref null $B)) + (local $b' (ref null $B')) + (local $c (ref null $C)) + (local $c' (ref null $C')) + (local $d (ref null $D)) + (local $d' (ref null $D')) + ) +) + ;; Check that we refinalize properly. (module ;; CHECK: (rec @@ -758,18 +837,19 @@ ;; incorrect. (module (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $X (struct (field (ref $A)))) + + ;; CHECK: (type $A (struct )) (type $A (struct)) (type $B (struct_subtype $A)) - ;; CHECK: (rec - ;; CHECK-NEXT: (type $X (struct (field (ref $A')))) (type $X (struct (ref $B))) - ;; CHECK: (type $A' (struct )) (type $A' (struct)) ) ;; CHECK: (type $none_=>_none (func)) ;; CHECK: (func $foo (type $none_=>_none) - ;; CHECK-NEXT: (local $b (ref null $A')) + ;; CHECK-NEXT: (local $b (ref null $A)) ;; CHECK-NEXT: (local $x (ref null $X)) ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) @@ -779,6 +859,59 @@ ) ) +;; Regression test for bug where we unsoundly merged supertypes and siblings in +;; a single step. +(module + (rec + ;; $x and $y are structurally identical, but won't be merged because there is + ;; a cast to $y. + ;; CHECK: (rec + ;; CHECK-NEXT: (type $b (struct (field (ref null $x)))) + + ;; CHECK: (type $b1 (struct_subtype (field (ref null $y)) $b)) + + ;; CHECK: (type $x (struct (field anyref))) + (type $x (struct anyref)) + ;; CHECK: (type $y (struct_subtype (field anyref) $x)) + (type $y (struct_subtype anyref $x)) + + ;; If we did vertical and horizontal merges at the same time, these three + ;; types would be put into the same initial partition and $b1 would be merged + ;; into $a. This would be incorrect because then $b1 would no longer be a + ;; subtype of $b. + ;; CHECK: (type $a (struct (field (ref null $y)))) + (type $a (struct (ref null $y))) + (type $b (struct (ref null $x))) + (type $b1 (struct_subtype (ref null $y) $b)) + ) + + ;; CHECK: (type $none_=>_ref|$b| (func (result (ref $b)))) + + ;; CHECK: (func $test (type $none_=>_ref|$b|) (result (ref $b)) + ;; CHECK-NEXT: (local $0 (ref null $a)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.test $y + ;; CHECK-NEXT: (struct.new_default $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.new_default $b1) + ;; CHECK-NEXT: ) + (func $test (result (ref $b)) + ;; Use $a to prevent it from being dropped completely. + (local (ref null $a)) + + ;; Cast to prevent $x and $y from being merged. + (drop + (ref.test $y + (struct.new_default $x) + ) + ) + + ;; If $b1 were merged with $a, this would cause a validation failure. + (struct.new_default $b1) + ) +) + ;; Check that a ref.test inhibits merging (ref.cast is already checked above). (module ;; CHECK: (rec |