diff options
author | Alon Zakai <azakai@google.com> | 2024-09-05 11:00:36 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-05 11:00:36 -0700 |
commit | 7562a6b7da0ba517a46db1f1792677f3ebbf2a27 (patch) | |
tree | 897f11766fb7a4d7a295469e44aab87b0702867c /src/ir | |
parent | ad6a124969a9c6874484f0982c10d6f507388b20 (diff) | |
download | binaryen-7562a6b7da0ba517a46db1f1792677f3ebbf2a27.tar.gz binaryen-7562a6b7da0ba517a46db1f1792677f3ebbf2a27.tar.bz2 binaryen-7562a6b7da0ba517a46db1f1792677f3ebbf2a27.zip |
[NFC] Add a lazy mode to LocalGraph (#6895)
LocalGraph by default will compute all the local.sets that can be read from all
local.gets. However, many passes only query a small amount of those. To
avoid wasted work, add a lazy mode that only computes sets when asked about
a get.
This is then used in a single place, LoopInvariantCodeMotion, which becomes
18% faster.
Diffstat (limited to 'src/ir')
-rw-r--r-- | src/ir/LocalGraph.cpp | 162 | ||||
-rw-r--r-- | src/ir/local-graph.h | 101 |
2 files changed, 207 insertions, 56 deletions
diff --git a/src/ir/LocalGraph.cpp b/src/ir/LocalGraph.cpp index ae0c8db5d..fa7c976a4 100644 --- a/src/ir/LocalGraph.cpp +++ b/src/ir/LocalGraph.cpp @@ -41,10 +41,8 @@ struct Info { // flow helper class. flows the gets to their sets -struct LocalGraph::LocalGraphFlower - : public CFGWalker<LocalGraph::LocalGraphFlower, - Visitor<LocalGraph::LocalGraphFlower>, - Info> { +struct LocalGraphFlower + : public CFGWalker<LocalGraphFlower, Visitor<LocalGraphFlower>, Info> { LocalGraph::GetSetsMap& getSetsMap; LocalGraph::Locations& locations; Function* func; @@ -90,10 +88,6 @@ struct LocalGraph::LocalGraphFlower self->locations[curr] = currp; } - // The below class-level items (currentIteration, FlowBlock, etc.) would more - // properly belong inside flow(), as they are only needed there, but flow() is - // split up into two parts in service of a future user of only part of flow(). - // Each time we flow a get (or set of gets) to find its sets, we mark a // different iteration number. This lets us memoize the current iteration on // blocks as we pass them, allowing us to quickly skip them in that iteration @@ -180,7 +174,7 @@ struct LocalGraph::LocalGraphFlower assert(entryFlowBlock != nullptr); } - // Flow all the data. + // Flow all the data. This is done in eager (i.e., non-lazy) mode. void flow() { prepareFlowBlocks(); @@ -301,22 +295,111 @@ struct LocalGraph::LocalGraphFlower // Bump the current iteration for the next time we are called. currentIteration++; } + + // When the LocalGraph is in lazy mode we do not compute all of getSetsMap + // initially, but instead fill in these data structures that let us do so + // later for individual gets. Specifically we need to find the location of a + // local.get in the CFG. + struct BlockLocation { + // The basic block an item is in. + FlowBlock* block = nullptr; + // The index in that block that the item is at. + Index index; + }; + std::unordered_map<LocalGet*, BlockLocation> getLocations; + + // Set up getLocations using the flow blocks, so that we are ready to handle + // later lazy requests for the sets of particular gets. This is done in lazy + // mode. + void prepareLaziness() { + prepareFlowBlocks(); + + for (auto& block : flowBlocks) { + const auto& actions = block.actions; + for (Index i = 0; i < actions.size(); i++) { + if (auto* get = actions[i]->dynCast<LocalGet>()) { + getLocations[get] = BlockLocation{&block, i}; + } + } + } + } + + // Flow a specific get. This is done in lazy mode. + void flowGet(LocalGet* get) { + auto index = get->index; + + // Regardless of what we do below, ensure an entry for this get, so that we + // know we computed it. + auto& sets = getSetsMap[get]; + + auto [block, blockIndex] = getLocations[get]; + if (!block) { + // We did not find location info for this get, which means it is + // unreachable. + return; + } + + // We must have the get at that location. + assert(blockIndex < block->actions.size()); + assert(block->actions[blockIndex] == get); + + if (!hasSet[index]) { + // As in flow(), when there is no local.set for an index we can just mark + // the default value as the only writer. + sets.insert(nullptr); + return; + } + + // Go backwards in this flow block, from the get. If we see other gets that + // have not been computed then we can accumulate them as well, as the + // results we compute apply to them too. + std::vector<LocalGet*> gets = {get}; + while (blockIndex > 0) { + blockIndex--; + auto* curr = block->actions[blockIndex]; + if (auto* otherGet = curr->dynCast<LocalGet>()) { + if (otherGet->index == index) { + // This is another get of the same index. If we've already computed + // it, then we can just use that, as they must have the same sets. + auto iter = getSetsMap.find(otherGet); + if (iter != getSetsMap.end()) { + auto& otherSets = iter->second; + for (auto* get : gets) { + getSetsMap[get] = otherSets; + } + return; + } + + // This is a get of the same index, but which has not been computed. + // It will have the same sets as us. + gets.push_back(otherGet); + } + } else { + // This is a set. + auto* set = curr->cast<LocalSet>(); + if (set->index == index) { + // This is the only set writing to our gets. + for (auto* get : gets) { + getSetsMap[get].insert(set); + } + return; + } + } + } + + // We must do an inter-block flow. + flowBackFromStartOfBlock(block, index, gets); + } }; // LocalGraph implementation -LocalGraph::LocalGraph(Function* func, Module* module) : func(func) { +LocalGraph::LocalGraph(Function* func, Module* module) + : LocalGraphBase(func, module) { // See comment on the declaration of this field for why we use a raw - // allocation. Note that since we just call flow() and delete it, this is not - // really needed, but it sets the stage for a later PR that will do other work - // here (related to the splitting up of flow() that is mentioned earlier). - flower = - std::make_unique<LocalGraphFlower>(getSetsMap, locations, func, module); - - flower->flow(); - - // We will never use it again. - flower.reset(); + // allocation. + LocalGraphFlower flower(getSetsMap, locations, func, module); + flower.flow(); #ifdef LOCAL_GRAPH_DEBUG std::cout << "LocalGraph::dump\n"; @@ -330,13 +413,6 @@ LocalGraph::LocalGraph(Function* func, Module* module) : func(func) { #endif } -LocalGraph::~LocalGraph() { - // We must declare a destructor here in the cpp file, even though it is empty - // and pointless, due to some C++ issue with our having a unique_ptr to a - // forward-declared class (LocalGraphFlower). - // https://stackoverflow.com/questions/13414652/forward-declaration-with-unique-ptr#comment110005453_13414884 -} - bool LocalGraph::equivalent(LocalGet* a, LocalGet* b) { auto& aSets = getSets(a); auto& bSets = getSets(b); @@ -421,4 +497,36 @@ void LocalGraph::computeSSAIndexes() { bool LocalGraph::isSSA(Index x) { return SSAIndexes.count(x); } +// LazyLocalGraph + +LazyLocalGraph::LazyLocalGraph(Function* func, Module* module) + : LocalGraphBase(func, module) { + flower = + std::make_unique<LocalGraphFlower>(getSetsMap, locations, func, module); + + flower->prepareLaziness(); + +#ifdef LOCAL_GRAPH_DEBUG + std::cout << "LazyLocalGraph::dump\n"; + for (auto& [get, sets] : getSetsMap) { + std::cout << "GET\n" << get << " is influenced by\n"; + for (auto* set : sets) { + std::cout << set << '\n'; + } + } + std::cout << "total locations: " << locations.size() << '\n'; +#endif +} + +LazyLocalGraph::~LazyLocalGraph() { + // We must declare a destructor here in the cpp file, even though it is empty + // and pointless, due to some C++ issue with our having a unique_ptr to a + // forward-declared class (LocalGraphFlower). + // https://stackoverflow.com/questions/13414652/forward-declaration-with-unique-ptr#comment110005453_13414884 +} + +void LazyLocalGraph::computeGetSets(LocalGet* get) const { + flower->flowGet(get); +} + } // namespace wasm diff --git a/src/ir/local-graph.h b/src/ir/local-graph.h index 17cc6ff31..1c36fc1dc 100644 --- a/src/ir/local-graph.h +++ b/src/ir/local-graph.h @@ -38,12 +38,56 @@ namespace wasm { // debugging etc.; and it has no downside for optimization, since unreachable // code will be removed anyhow). // -struct LocalGraph { +// There are two options here, the normal LocalGraph which is eager and computes +// everything up front, which is faster if most things end up needed, and a lazy +// one which computes on demand, which can be much faster if we only need a +// small subset of queries. +// + +// Base class for both LocalGraph and LazyLocalGraph (not meant for direct use). +struct LocalGraphBase { +protected: // If a module is passed in, it is used to find which features are needed in // the computation (for example, if exception handling is disabled, then we // can generate a simpler CFG, as calls cannot throw). + LocalGraphBase(Function* func, Module* module = nullptr) + : func(func), module(module) {} + +public: + // A set of sets, returned from the query about which sets can be read from a + // get. Typically only one or two apply there, so this is a small set. + using Sets = SmallSet<LocalSet*, 2>; + + // Where each get and set is. We compute this while doing the main computation + // and make it accessible for users, for easy replacing of things without + // extra work. + using Locations = std::map<Expression*, Expression**>; + Locations locations; + + // Sets of gets or sets, that are influenced, returned from get*Influences(). + using SetInfluences = std::unordered_set<LocalGet*>; + using GetInfluences = std::unordered_set<LocalSet*>; + + // Defined publicly as other utilities need similar data layouts. + using GetSetsMap = std::unordered_map<LocalGet*, Sets>; + +protected: + Function* func; + Module* module; + + std::set<Index> SSAIndexes; + + // A map of each get to the sets relevant to it. This is mutable so that + // getSets() can be const in LazyLocalGraph (which does memoization, see + // below). + mutable GetSetsMap getSetsMap; + + std::unordered_map<LocalSet*, SetInfluences> setInfluences; + std::unordered_map<LocalGet*, GetInfluences> getInfluences; +}; + +struct LocalGraph : public LocalGraphBase { LocalGraph(Function* func, Module* module = nullptr); - ~LocalGraph(); // Get the sets relevant for a local.get. // @@ -52,10 +96,12 @@ struct LocalGraph { // for a param. // // Often there is a single set, or a phi or two items, so we use a small set. - using Sets = SmallSet<LocalSet*, 2>; const Sets& getSets(LocalGet* get) const { auto iter = getSetsMap.find(get); if (iter == getSetsMap.end()) { + // A missing entry means there is nothing there (and we saved a little + // space by not putting something there). + // // Use a canonical constant empty set to avoid allocation. static const Sets empty; return empty; @@ -63,12 +109,6 @@ struct LocalGraph { return iter->second; } - // Where each get and set is. We compute this while doing the main computation - // and make it accessible for users, for easy replacing of things without - // extra work. - using Locations = std::map<Expression*, Expression**>; - Locations locations; - // Checks if two gets are equivalent, that is, definitely have the same // value. bool equivalent(LocalGet* a, LocalGet* b); @@ -84,9 +124,6 @@ struct LocalGraph { computeGetInfluences(); } - using SetInfluences = std::unordered_set<LocalGet*>; - using GetInfluences = std::unordered_set<LocalSet*>; - const SetInfluences& getSetInfluences(LocalSet* set) const { auto iter = setInfluences.find(set); if (iter == setInfluences.end()) { @@ -130,28 +167,34 @@ struct LocalGraph { void computeSSAIndexes(); bool isSSA(Index x); +}; - // Defined publicly as other utilities need similar data layouts. - using GetSetsMap = std::unordered_map<LocalGet*, Sets>; +// The internal implementation of the flow analysis used to compute things. This +// must be declared in the header so that LazyLocalGraph can declare a unique +// ptr to it, below. +struct LocalGraphFlower; -private: - Function* func; - std::set<Index> SSAIndexes; +struct LazyLocalGraph : public LocalGraphBase { + LazyLocalGraph(Function* func, Module* module = nullptr); + ~LazyLocalGraph(); - // A map of each get to the sets relevant to it. This is mutable so that - // getSets() can be const. - mutable GetSetsMap getSetsMap; + const Sets& getSets(LocalGet* get) const { + auto iter = getSetsMap.find(get); + if (iter == getSetsMap.end()) { + // A missing entry means we did not do the computation yet. Do it now. + computeGetSets(get); + iter = getSetsMap.find(get); + assert(iter != getSetsMap.end()); + } + return iter->second; + } - // The internal implementation of the flow analysis used to compute - // getSetsMap. - struct LocalGraphFlower; - // This could be a unique_ptr, but the forward declaration is not compatible - // with that. It could alternatively be a shared_ptr, but that runs into what - // seems to be a false positive of clang's (but not gcc's) UBSan. - std::unique_ptr<LocalGraphFlower> flower; +private: + // Compute the sets for a get and store them on getSetsMap. + void computeGetSets(LocalGet* get) const; - std::unordered_map<LocalSet*, SetInfluences> setInfluences; - std::unordered_map<LocalGet*, GetInfluences> getInfluences; + // This remains alive as long as we are, so that we can compute things lazily. + std::unique_ptr<LocalGraphFlower> flower; }; } // namespace wasm |