summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Lively <7121787+tlively@users.noreply.github.com>2021-02-26 11:03:10 -0800
committerGitHub <noreply@github.com>2021-02-26 11:03:10 -0800
commitcafe85d6d8d5a7497ebf40b8474052be0885e58f (patch)
treec38932e394bff7b2ad87660a55e985a76c4ace0b /src
parentb3f67918fe34e7de76d1f5565bd3f07f4f7eb12f (diff)
downloadbinaryen-cafe85d6d8d5a7497ebf40b8474052be0885e58f.tar.gz
binaryen-cafe85d6d8d5a7497ebf40b8474052be0885e58f.tar.bz2
binaryen-cafe85d6d8d5a7497ebf40b8474052be0885e58f.zip
Use Tarjan's SCC alg. to find self-referential types (#3621)
Previously we computed the fixed point of the parent-child relation to identify self-referential HeapTypes in the TypeBuilder canonicalizer. That algorithm was O(|V|^3) in the worst case and took over five seconds to find the self-referential HeapTypes in an example program with just 1134 HeapTypes, probably due to high allocation traffic from the std::unordered_map and std::unordered_sets used to implement the parent-child graph's adjacency list. This PR replaces that algorithm with Tarjan's strongly connected component algorithm, which runs in O(|V|+|E|) and finds the self-referential HeapTypes in the mentioned example program in under 30 ms. All strongly connected components of more than one element in the HeapType parent-child graph correspond to sets of mutually recursive HeapTypes that are therefore self-referential. The only other self-referential HeapTypes are those that are not mutually recursive with any other HeapTypes, but these are trivial to find because they must be their own direct children.
Diffstat (limited to 'src')
-rw-r--r--src/wasm/wasm-type.cpp120
1 files changed, 90 insertions, 30 deletions
diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp
index a2132d09a..1000e8b92 100644
--- a/src/wasm/wasm-type.cpp
+++ b/src/wasm/wasm-type.cpp
@@ -1301,7 +1301,7 @@ struct Canonicalizer {
std::unordered_map<HeapType, std::vector<HeapType*>> heapTypeLocations;
// These heap types will not participate in canonicalization.
- std::unordered_set<TypeID> selfReferentialHeapTypes;
+ std::unordered_set<HeapType> selfReferentialHeapTypes;
// Maps Types and HeapTypes backed by the TypeBuilder's Stores to globally
// canonical Types and HeapTypes.
@@ -1381,7 +1381,7 @@ Canonicalizer::Canonicalizer(TypeBuilder& builder) : builder(builder) {
// self-referential HeapTypes to the global store so that they will outlive
// the TypeBuilder without their IDs changing.
for (auto& entry : builder.impl->entries) {
- if (selfReferentialHeapTypes.count(entry.get().getID())) {
+ if (selfReferentialHeapTypes.count(entry.get())) {
noncanonicalHeapTypes.emplace_back(std::move(entry.info));
}
}
@@ -1445,33 +1445,93 @@ void Canonicalizer::scanType(Type* type) {
}
void Canonicalizer::findSelfReferentialHeapTypes() {
- // Calculate the fixed point of the reachability relation.
- auto fixedPoint = children;
- bool changed;
- do {
- changed = false;
- for (auto& entry : fixedPoint) {
- auto& succs = entry.second;
- std::unordered_set<TypeID> newSuccs;
- for (auto& other : succs) {
- auto& otherSuccs = fixedPoint[other];
- newSuccs.insert(otherSuccs.begin(), otherSuccs.end());
+ // Use Tarjan's strongly connected components algorithm on the parent-child
+ // graph to find self-referential types in O(|V|+|E|) time. Each HeapType in a
+ // strongly connected component with multiple elements must be
+ // self-referential because it is mutually recursive with all other HeapTypes
+ // in that strongly connected component. HeapTypes in strongly connected
+ // components of size one may also be self-referential, but it is trivial to
+ // find these because they must be their own direct children. See
+ // https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm.
+
+ // Get the HeapType children of a HeapType, skipping all intermediate Types.
+ auto getChildren = [&](HeapType type) {
+ std::unordered_set<HeapType> results;
+ std::function<void(TypeID, bool)> visit = [&](TypeID id, bool isRoot) {
+ if (isRoot || typeLocations.count(Type(id))) {
+ auto it = children.find(id);
+ if (it != children.end()) {
+ for (TypeID child : it->second) {
+ visit(child, false);
+ }
+ }
+ } else {
+ results.insert(HeapType(id));
+ }
+ };
+ visit(type.getID(), true);
+ return results;
+ };
+
+ // The Tarjan's algorithm stack. Similar to a DFS stack, but elements are only
+ // popped off once they are committed to a strongly connected component.
+ // HeapTypes stay on the stack after they are visited iff they have a back
+ // edge to a HeapType earlier in the stack.
+ std::vector<HeapType> stack;
+ std::unordered_set<HeapType> stackElems;
+ // Indices assigned to each HeapType in the order they are visited.
+ std::unordered_map<HeapType, size_t> indices;
+ // The smallest index of the HeapTypes reachable from each HeapType through
+ // its subtree.
+ std::unordered_map<HeapType, size_t> minReachable;
+
+ std::function<void(HeapType)> visit = [&](HeapType curr) {
+ size_t index = indices.size();
+ indices[curr] = index;
+ minReachable[curr] = index;
+ stack.push_back(curr);
+ stackElems.insert(curr);
+
+ for (HeapType child : getChildren(curr)) {
+ if (!indices.count(child)) {
+ // Child has not been visited yet; visit it.
+ visit(child);
+ minReachable[curr] = std::min(minReachable[curr], minReachable[child]);
+ } else if (stackElems.count(child)) {
+ // Child was already visited but not committed to a strongly connected
+ // component.
+ minReachable[curr] = std::min(minReachable[curr], indices[child]);
}
- size_t oldSize = succs.size();
- succs.insert(newSuccs.begin(), newSuccs.end());
- if (succs.size() != oldSize) {
- changed = true;
+ // We cannot differentiate self-referential types with strongly connected
+ // component size one from non-self-referential types just by looking at
+ // the strongly connected components. If a type is directly
+ // self-referential, mark it here so we don't miss it later.
+ if (child == curr) {
+ selfReferentialHeapTypes.insert(child);
}
}
- } while (changed);
- // Find HeapTypes that reach themselves
- for (auto& entry : builder.impl->entries) {
- TypeID id = entry.get().getID();
- auto it = fixedPoint.find(id);
- if (it != fixedPoint.end() && it->second.count(id)) {
- selfReferentialHeapTypes.insert(id);
+ if (minReachable[curr] == indices[curr]) {
+ // Curr doesn't have any back edges to an earlier element, so it is the
+ // root of the strongly connected component including itself and anything
+ // after it still on the stack. If this strongly connected component has
+ // more than one element, they are all self referential HeapTypes.
+ // Self-referential types with SCC size one were already accounted for.
+ if (stack.back() != curr) {
+ selfReferentialHeapTypes.insert(curr);
+ while (stack.back() != curr) {
+ selfReferentialHeapTypes.insert(stack.back());
+ stackElems.erase(stack.back());
+ stack.pop_back();
+ }
+ }
+ stack.pop_back();
+ stackElems.erase(curr);
}
+ };
+
+ for (auto& entry : builder.impl->entries) {
+ visit(entry.get());
}
}
@@ -1497,19 +1557,19 @@ std::vector<Canonicalizer::Item> Canonicalizer::getOrderedItems() {
// Remove self-referential HeapTypes to cut cycles.
auto childrenDAG = children;
- for (TypeID id : selfReferentialHeapTypes) {
- childrenDAG.erase(id);
+ for (HeapType type : selfReferentialHeapTypes) {
+ childrenDAG.erase(type.getID());
for (auto& kv : childrenDAG) {
- kv.second.erase(id);
+ kv.second.erase(type.getID());
}
}
// Collect the remaining types that will be sorted.
std::unordered_set<TypeID> toSort;
for (auto& entry : builder.impl->entries) {
- TypeID id = entry.get().getID();
- if (!selfReferentialHeapTypes.count(id)) {
- toSort.insert(id);
+ HeapType curr = entry.get();
+ if (!selfReferentialHeapTypes.count(curr)) {
+ toSort.insert(curr.getID());
}
}
for (auto& kv : childrenDAG) {