diff options
author | Thomas Lively <7121787+tlively@users.noreply.github.com> | 2021-02-24 18:57:30 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-24 18:57:30 -0800 |
commit | d1d631d5f354ef3470c15003118d1c750b557fe2 (patch) | |
tree | c7bff357069abbebd516425ba347c968469dac9e /src | |
parent | 95ccf32f3e74bf52d9db161cffc27637f3957820 (diff) | |
download | binaryen-d1d631d5f354ef3470c15003118d1c750b557fe2.tar.gz binaryen-d1d631d5f354ef3470c15003118d1c750b557fe2.tar.bz2 binaryen-d1d631d5f354ef3470c15003118d1c750b557fe2.zip |
Support building recursive types (#3602)
Updates TypeBuilder to support recursive types. Recursive types are particularly
problematic because under the current scheme it is necessary to canonicalize the
uses of a type's immediate children before canonicalizing the type itself to
avoid leaking non-canonical, temporary types out of the TypeBuilder and into the
global type stores. In the case of recursive types, it is not possible to do
this because of their cyclic nature. In principle this could be overcome by
hashing recursive types based on their structure rather than their contents, but
that would be complicated. Instead, this PR takes the shortcut of not
canonicalizing self-referential HeapTypes at all, but rather moving them out of the
TypeBuilder and into the global type store without changing their addresses or
needing to update any of their use sites. This breaks all cycles and makes it
possible to canonicalize the other types correctly.
Note that this PR only adds support for building recursive types. Doing almost
anything with the types, such as printing, comparing, or emitting them will
certainly lead to infinite recursions. A follow up PR will update all these
operations to work correctly with recursive types.
Diffstat (limited to 'src')
-rw-r--r-- | src/wasm/wasm-type.cpp | 183 |
1 files changed, 112 insertions, 71 deletions
diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp index de98ffcdb..b920dd587 100644 --- a/src/wasm/wasm-type.cpp +++ b/src/wasm/wasm-type.cpp @@ -1089,25 +1089,30 @@ std::ostream& operator<<(std::ostream& os, HeapTypeInfo info) { struct TypeBuilder::Impl { TypeStore typeStore; - HeapTypeStore heapTypeStore; struct Entry { - // HeapTypeInfo has no default constructor, so pick an arbitrary default. - HeapTypeInfo info = Signature(); + std::unique_ptr<HeapTypeInfo> info; bool initialized = false; + Entry() { + // We need to eagerly allocate the HeapTypeInfo so we have a TypeID to use + // to refer to it before it is initialized. Arbitrarily choose a default + // value. + info = std::make_unique<HeapTypeInfo>(Signature()); + } void set(HeapTypeInfo&& hti) { - info = hti; + *info = hti; initialized = true; } - HeapType get() { return HeapType(TypeID(&info)); } + HeapType get() { return HeapType(TypeID(info.get())); } }; std::vector<Entry> entries; + + Impl(size_t n) : entries(n) {} }; TypeBuilder::TypeBuilder(size_t n) { - impl = std::make_unique<TypeBuilder::Impl>(); - impl->entries.resize(n); + impl = std::make_unique<TypeBuilder::Impl>(n); } TypeBuilder::~TypeBuilder() = default; @@ -1175,15 +1180,18 @@ struct Canonicalizer { // The work list of Types and HeapTypes remaining to be scanned. std::vector<Item> scanList; - // Maps Type and HeapType IDs to the IDs of their ancestor Types and HeapTypes + // Maps Type and HeapType IDs to the IDs of their child Types and HeapTypes // in the type graph. Only considers compound Types and HeapTypes. - std::unordered_map<TypeID, std::unordered_set<TypeID>> ancestors; + std::unordered_map<TypeID, std::unordered_set<TypeID>> children; // Maps each temporary Type and HeapType to the locations where they will have // to be replaced with canonical Types and HeapTypes. std::unordered_map<Type, std::vector<Type*>> typeLocations; std::unordered_map<HeapType, std::vector<HeapType*>> heapTypeLocations; + // These heap types will not participate in canonicalization. + std::unordered_set<TypeID> selfReferentialHeapTypes; + // Maps Types and HeapTypes backed by the TypeBuilder's Stores to globally // canonical Types and HeapTypes. std::unordered_map<Type, Type> canonicalTypes; @@ -1196,7 +1204,7 @@ struct Canonicalizer { template<typename T1, typename T2> void noteChild(T1 parent, T2* child); void scanHeapType(HeapType* ht); void scanType(Type* child); - void makeAncestorsFixedPoint(); + void findSelfReferentialHeapTypes(); std::vector<Item> getOrderedItems(); // Replaces the pointee Type or HeapType of `type` with its globally canonical @@ -1206,6 +1214,10 @@ struct Canonicalizer { void canonicalize(T* type, std::unordered_map<T, T>& canonicals); }; +// Used to extend the lifetime of self-referential HeapTypes so they don't need +// to be canonicalized. +std::vector<std::unique_ptr<HeapTypeInfo>> noncanonicalHeapTypes; + // Traverse the type graph rooted at the initialized HeapTypeInfos in reverse // postorder, replacing in place all Types and HeapTypes backed by the // TypeBuilder's Stores with equivalent globally canonicalized Types and @@ -1224,8 +1236,7 @@ Canonicalizer::Canonicalizer(TypeBuilder& builder) : builder(builder) { // Traverse the type graph reachable from the heap types, calculating // reachability and collecting a list of types and heap types that need to be - // canonicalized. We must scan in depth-first order so that we can do a - // postorder traversal later. + // canonicalized. while (scanList.size() != 0) { auto item = scanList.back(); scanList.pop_back(); @@ -1239,17 +1250,11 @@ Canonicalizer::Canonicalizer(TypeBuilder& builder) : builder(builder) { } } - // Check for recursive types and heap types. TODO: pre-canonicalize these into - // their minimal finite representations. - makeAncestorsFixedPoint(); - for (auto& kv : ancestors) { - if (kv.second.count(kv.first) != 0) { - WASM_UNREACHABLE("TODO: support recursive types"); - } - } + findSelfReferentialHeapTypes(); - // Visit the types and heap types in reverse postorder, replacing them with - // their canonicalized versions. + // Visit the use sites of Types and HeapTypes, replacing them with their + // canonicalized versions in an order that guarantees no temporary types will + // be leaked into the global stores. for (auto it : getOrderedItems()) { switch (it.kind) { case Item::TypeKind: @@ -1260,12 +1265,21 @@ Canonicalizer::Canonicalizer(TypeBuilder& builder) : builder(builder) { break; } } + + // Now that all other Types and HeapTypes have been canonicalized, move + // self-referential HeapTypes to the global store so that they will outlive + // the TypeBuilder without their IDs changing. + for (auto& entry : builder.impl->entries) { + if (selfReferentialHeapTypes.count(entry.get().getID())) { + noncanonicalHeapTypes.emplace_back(std::move(entry.info)); + } + } } template<typename T1, typename T2> void Canonicalizer::noteChild(T1 parent, T2* child) { if (child->isCompound()) { - ancestors[child->getID()].insert(parent.getID()); + children[parent.getID()].insert(child->getID()); scanList.push_back(child); } } @@ -1319,85 +1333,112 @@ void Canonicalizer::scanType(Type* type) { } } -void Canonicalizer::makeAncestorsFixedPoint() { - // Naively calculate the transitive closure of the reachability graph. +void Canonicalizer::findSelfReferentialHeapTypes() { + // Calculate the fixed point of the reachability relation. + auto fixedPoint = children; bool changed; do { changed = false; - for (auto& entry : ancestors) { + for (auto& entry : fixedPoint) { auto& succs = entry.second; - std::unordered_set<TypeID> nextAncestors; + std::unordered_set<TypeID> newSuccs; for (auto& other : succs) { - auto& otherAncestors = ancestors[other]; - nextAncestors.insert(otherAncestors.begin(), otherAncestors.end()); + auto& otherSuccs = fixedPoint[other]; + newSuccs.insert(otherSuccs.begin(), otherSuccs.end()); } size_t oldSize = succs.size(); - succs.insert(nextAncestors.begin(), nextAncestors.end()); + succs.insert(newSuccs.begin(), newSuccs.end()); if (succs.size() != oldSize) { changed = true; } } } while (changed); + + // Find HeapTypes that reach themselves + for (auto& entry : builder.impl->entries) { + TypeID id = entry.get().getID(); + auto it = fixedPoint.find(id); + if (it != fixedPoint.end() && it->second.count(id)) { + selfReferentialHeapTypes.insert(id); + } + } } std::vector<Canonicalizer::Item> Canonicalizer::getOrderedItems() { - // Topologically sort the Types and HeapTypes so that all children are - // canonicalized before their parents. + // In order to canonicalize Types and HeapTypes without leaking any temporary + // types into the global type store, we need to canonicalize children before + // parents. In the case of recursive types, this is not possible due to cycles + // in the parent-child relation. In principle that can be overcome by + // canonicalizing type structures rather than type contents, but for now we + // instead cut corners and break the cycles by skipping canonicalization of + // self-referential HeapTypes. This works because the path from any Type or + // HeapType to itself in the parent-child relation must go through some + // self-referential HeapType; it is not possible to have a cycle composed only + // of Types, for example. In effect this means that we have a nominal type + // system for self-referential HeapTypes and a structural type system for + // everything else. Eventually we will have to implement something more + // consistent, but this is good enough for getting prototype toolchains up and + // running. + + // TODO: None of this is particularly optimized. Benchmark to see if this is a + // significant bottleneck and investigate using better data structures and + // algorithms. + + // Remove self-referential HeapTypes to cut cycles. + auto childrenDAG = children; + for (TypeID id : selfReferentialHeapTypes) { + childrenDAG.erase(id); + for (auto& kv : childrenDAG) { + kv.second.erase(id); + } + } + + // Collect the remaining types that will be sorted. + std::unordered_set<TypeID> toSort; + for (auto& entry : builder.impl->entries) { + TypeID id = entry.get().getID(); + if (!selfReferentialHeapTypes.count(id)) { + toSort.insert(id); + } + } + for (auto& kv : childrenDAG) { + toSort.insert(kv.first); + for (auto& child : kv.second) { + toSort.insert(child); + } + } + + // Topologically sort so that children come before their parents. std::vector<TypeID> sorted; std::unordered_set<TypeID> seen; - - // Topologically sort so that all parents are pushed before their children. - // This sort will be reversed later to have children before their parents. - std::function<void(TypeID)> visit = [&](TypeID i) { - if (seen.count(i)) { + std::function<void(TypeID)> visit = [&](TypeID id) { + if (seen.count(id)) { return; } - // Push ancestors of the current type before pushing the current type. - auto it = ancestors.find(i); - if (it != ancestors.end()) { - for (auto ancestor : it->second) { - visit(ancestor); + // Push children of the current type before pushing the current type. + auto it = childrenDAG.find(id); + if (it != childrenDAG.end()) { + for (auto child : it->second) { + visit(child); } } - seen.insert(i); - sorted.push_back(i); + seen.insert(id); + sorted.push_back(id); }; - - // Collect the items to be sorted, including all the result HeapTypes and any - // type that participates in the ancestor relation. - std::unordered_set<TypeID> allIDs; - for (auto ht : results) { - allIDs.insert(ht.getID()); - } - for (auto& kv : ancestors) { - allIDs.insert(kv.first); - for (auto& id : kv.second) { - allIDs.insert(id); - } - } - - // Perform the sort. - for (TypeID i : allIDs) { + for (TypeID i : toSort) { visit(i); } - // Swap order to have all children before their parents. - std::reverse(sorted.begin(), sorted.end()); - - // Create a list of Items to canonicalize in place according to the - // topological ordering of types and heap types. + // Expand the ordered types into ordered type use sites. std::vector<Item> items; for (TypeID id : sorted) { // IDs may be Types or HeapTypes, so just try both. - auto typeIt = typeLocations.find(Type(id)); - if (typeIt != typeLocations.end()) { - for (Type* loc : typeIt->second) { + if (typeLocations.count(Type(id))) { + for (Type* loc : typeLocations[Type(id)]) { items.emplace_back(loc); } } else { - auto heapTypeIt = heapTypeLocations.find(HeapType(id)); - assert(heapTypeIt != heapTypeLocations.end()); - for (HeapType* loc : heapTypeIt->second) { + for (HeapType* loc : heapTypeLocations[HeapType(id)]) { items.emplace_back(loc); } } |