diff options
author | Alon Zakai <azakai@google.com> | 2024-09-09 11:12:46 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-09 11:12:46 -0700 |
commit | 2525389276699787f8a22a71f19275909e2bb40d (patch) | |
tree | dcba2b2b4cfd72ca09d8050ebb4dcf14811a5ed6 /src | |
parent | 7e117284029bfbfc2b638caa53335e1b2c53490e (diff) | |
download | binaryen-2525389276699787f8a22a71f19275909e2bb40d.tar.gz binaryen-2525389276699787f8a22a71f19275909e2bb40d.tar.bz2 binaryen-2525389276699787f8a22a71f19275909e2bb40d.zip |
[NFC] LazyLocalGraph: Add getSetInfluences() (#6909)
This new API on lazy local graphs allows us to use laziness in another place,
StackIR opts. This makes writing the binary (which includes StackIR opts, when
those are enabled), 10% faster.
Diffstat (limited to 'src')
-rw-r--r-- | src/ir/LocalGraph.cpp | 68 | ||||
-rw-r--r-- | src/ir/local-graph.h | 41 | ||||
-rw-r--r-- | src/passes/MergeLocals.cpp | 8 | ||||
-rw-r--r-- | src/wasm/wasm-stack-opts.cpp | 16 |
4 files changed, 107 insertions, 26 deletions
diff --git a/src/ir/LocalGraph.cpp b/src/ir/LocalGraph.cpp index fa7c976a4..11965040c 100644 --- a/src/ir/LocalGraph.cpp +++ b/src/ir/LocalGraph.cpp @@ -308,26 +308,41 @@ struct LocalGraphFlower }; std::unordered_map<LocalGet*, BlockLocation> getLocations; - // Set up getLocations using the flow blocks, so that we are ready to handle - // later lazy requests for the sets of particular gets. This is done in lazy - // mode. + // In lazy mode we also need to categorize gets and sets by their index. + std::vector<std::vector<LocalGet*>> getsByIndex; + std::vector<std::vector<LocalSet*>> setsByIndex; + + // Prepare for all later lazy work. void prepareLaziness() { prepareFlowBlocks(); + // Set up getLocations, getsByIndex, and setsByIndex. + auto numLocals = func->getNumLocals(); + getsByIndex.resize(numLocals); + setsByIndex.resize(numLocals); + for (auto& block : flowBlocks) { const auto& actions = block.actions; for (Index i = 0; i < actions.size(); i++) { if (auto* get = actions[i]->dynCast<LocalGet>()) { getLocations[get] = BlockLocation{&block, i}; + getsByIndex[get->index].push_back(get); + } else if (auto* set = actions[i]->dynCast<LocalSet>()) { + setsByIndex[set->index].push_back(set); + } else { + WASM_UNREACHABLE("bad action"); } } } } - // Flow a specific get. This is done in lazy mode. - void flowGet(LocalGet* get) { + // Flow a specific get to its sets. This is done in lazy mode. + void computeGetSets(LocalGet* get) { auto index = get->index; + // We must never repeat work. + assert(!getSetsMap.count(get)); + // Regardless of what we do below, ensure an entry for this get, so that we // know we computed it. auto& sets = getSetsMap[get]; @@ -390,6 +405,43 @@ struct LocalGraphFlower // We must do an inter-block flow. flowBackFromStartOfBlock(block, index, gets); } + + void computeSetInfluences(LocalSet* set, + LocalGraphBase::SetInfluencesMap& setInfluences) { + auto index = set->index; + + // We must never repeat work. + assert(!setInfluences.count(set)); + + // In theory we could flow the set forward, but to keep things simple we + // reuse the logic for flowing gets backwards: We flow all the gets of the + // set's index, thus fully computing that index and all its sets, including + // this one. This is not 100% lazy, but still avoids extra work by never + // doing work for local indexes we don't care about. + for (auto* get : getsByIndex[index]) { + // Don't repeat work. + if (!getSetsMap.count(get)) { + computeGetSets(get); + } + } + + // Ensure empty entries for each set of this index, to mark them as + // computed. + for (auto* set : setsByIndex[index]) { + setInfluences[set]; + } + + // Also ensure |set| itself, that we were originally asked about. It may be + // in unreachable code, which means it is not listed in setsByIndex. + setInfluences[set]; + + // Apply the info from the gets to the sets. + for (auto* get : getsByIndex[index]) { + for (auto* set : getSetsMap[get]) { + setInfluences[set].insert(get); + } + } + } }; // LocalGraph implementation @@ -526,7 +578,11 @@ LazyLocalGraph::~LazyLocalGraph() { } void LazyLocalGraph::computeGetSets(LocalGet* get) const { - flower->flowGet(get); + flower->computeGetSets(get); +} + +void LazyLocalGraph::computeSetInfluences(LocalSet* set) const { + flower->computeSetInfluences(set, setInfluences); } } // namespace wasm diff --git a/src/ir/local-graph.h b/src/ir/local-graph.h index 1c36fc1dc..7fe9adedc 100644 --- a/src/ir/local-graph.h +++ b/src/ir/local-graph.h @@ -64,26 +64,21 @@ public: using Locations = std::map<Expression*, Expression**>; Locations locations; + // A map of each get to the sets relevant to it (i.e., that it can read from). + using GetSetsMap = std::unordered_map<LocalGet*, Sets>; + // Sets of gets or sets, that are influenced, returned from get*Influences(). using SetInfluences = std::unordered_set<LocalGet*>; using GetInfluences = std::unordered_set<LocalSet*>; - // Defined publicly as other utilities need similar data layouts. - using GetSetsMap = std::unordered_map<LocalGet*, Sets>; + using SetInfluencesMap = std::unordered_map<LocalSet*, SetInfluences>; + using GetInfluencesMap = std::unordered_map<LocalGet*, GetInfluences>; protected: Function* func; Module* module; std::set<Index> SSAIndexes; - - // A map of each get to the sets relevant to it. This is mutable so that - // getSets() can be const in LazyLocalGraph (which does memoization, see - // below). - mutable GetSetsMap getSetsMap; - - std::unordered_map<LocalSet*, SetInfluences> setInfluences; - std::unordered_map<LocalGet*, GetInfluences> getInfluences; }; struct LocalGraph : public LocalGraphBase { @@ -167,6 +162,12 @@ struct LocalGraph : public LocalGraphBase { void computeSSAIndexes(); bool isSSA(Index x); + +private: + GetSetsMap getSetsMap; + + SetInfluencesMap setInfluences; + GetInfluencesMap getInfluences; }; // The internal implementation of the flow analysis used to compute things. This @@ -178,20 +179,38 @@ struct LazyLocalGraph : public LocalGraphBase { LazyLocalGraph(Function* func, Module* module = nullptr); ~LazyLocalGraph(); + // Similar APIs as in LocalGraph, but lazy versions. Each of them does a + // lookup, and if there is a missing entry then we did not do the computation + // yet, and then we do it and memoize it. const Sets& getSets(LocalGet* get) const { auto iter = getSetsMap.find(get); if (iter == getSetsMap.end()) { - // A missing entry means we did not do the computation yet. Do it now. computeGetSets(get); iter = getSetsMap.find(get); assert(iter != getSetsMap.end()); } return iter->second; } + const SetInfluences& getSetInfluences(LocalSet* set) const { + auto iter = setInfluences.find(set); + if (iter == setInfluences.end()) { + computeSetInfluences(set); + iter = setInfluences.find(set); + assert(iter != setInfluences.end()); + } + return iter->second; + } private: + // These data structures are mutable so that we can memoize. + mutable GetSetsMap getSetsMap; + + mutable SetInfluencesMap setInfluences; + // Compute the sets for a get and store them on getSetsMap. void computeGetSets(LocalGet* get) const; + // Compute influences for a set and store them on setInfluences. + void computeSetInfluences(LocalSet* set) const; // This remains alive as long as we are, so that we can compute things lazily. std::unique_ptr<LocalGraphFlower> flower; diff --git a/src/passes/MergeLocals.cpp b/src/passes/MergeLocals.cpp index 9402e0669..b669c20d3 100644 --- a/src/passes/MergeLocals.cpp +++ b/src/passes/MergeLocals.cpp @@ -105,10 +105,16 @@ struct MergeLocals if (copies.empty()) { return; } - // compute all dependencies auto* func = getFunction(); + + // Compute the local graph. Note that we *cannot* do this lazily, as we want + // to read from the original state of the function while we are doing + // changes on it. That is, using an eager graph makes a snapshot of the + // initial state, which is what we want. If we can avoid that, this pass can + // be sped up by around 25%. LocalGraph preGraph(func, getModule()); preGraph.computeSetInfluences(); + // optimize each copy std::unordered_map<LocalSet*, LocalSet*> optimizedToCopy, optimizedToTrivial; diff --git a/src/wasm/wasm-stack-opts.cpp b/src/wasm/wasm-stack-opts.cpp index a39b82b98..4559705bf 100644 --- a/src/wasm/wasm-stack-opts.cpp +++ b/src/wasm/wasm-stack-opts.cpp @@ -131,14 +131,14 @@ void StackIROptimizer::vacuum() { // no control flow branching out, we can remove both the set // and the get. void StackIROptimizer::local2Stack() { - // We use the localGraph to tell us if a get-set pair is indeed - // a set that is read by that get, and only that get. Note that we run - // this on the Binaryen IR, so we are assuming that no previous opt - // has changed the interaction of local operations. - // TODO: we can do this a lot faster, as we just care about linear - // control flow. - LocalGraph localGraph(func); - localGraph.computeSetInfluences(); + // We use the localGraph to tell us if a get-set pair is indeed a set that is + // read by that get, and only that get. Note that we run this on Binaryen IR, + // so we are assuming that no previous opt has changed the interaction of + // local operations. + // + // We use a lazy graph here as we only query in the rare case when we find a + // set/get pair that looks optimizable. + LazyLocalGraph localGraph(func); // The binary writing of StringWTF16Get and StringSliceWTF is optimized to use // fewer scratch locals when their operands are already LocalGets. To avoid // interfering with that optimization, we have to avoid removing such |