diff options
author | Alon Zakai <azakai@google.com> | 2023-10-24 10:37:28 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-24 17:37:28 +0000 |
commit | 92c8a4682367170485295da6744b3a59fbb8d3ac (patch) | |
tree | d8e59f512bf37d49feea2d37f17a0877fc01ce11 /src/ir/LocalGraph.cpp | |
parent | cce277fc649d7406f446f52778286192367a35ad (diff) | |
download | binaryen-92c8a4682367170485295da6744b3a59fbb8d3ac.tar.gz binaryen-92c8a4682367170485295da6744b3a59fbb8d3ac.tar.bz2 binaryen-92c8a4682367170485295da6744b3a59fbb8d3ac.zip |
[NFC] LocalGraph: Optimize params with no sets (#6046)
If a local index has no sets, then all gets of that index read from the entry block
(a param, or a zero for a local). This is actually a common case, where a param
has no other set, and so it is worth optimizing, which this PR does by avoiding
any flowing operation at all for that index: we just skip and write the entry block
as the source of information for such gets.
#6042 on precompute-propagate goes from 3 minutes to 2 seconds with this (!).
But that testcase is rather special in that it is a huge function with many, many
gets in it, so the overhead we remove is very noticeable there.
Diffstat (limited to 'src/ir/LocalGraph.cpp')
-rw-r--r-- | src/ir/LocalGraph.cpp | 35 |
1 files changed, 34 insertions, 1 deletions
diff --git a/src/ir/LocalGraph.cpp b/src/ir/LocalGraph.cpp index 9bdb6765f..7865bc404 100644 --- a/src/ir/LocalGraph.cpp +++ b/src/ir/LocalGraph.cpp @@ -106,6 +106,10 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> { auto numLocals = func->getNumLocals(); std::vector<FlowBlock*> work; + // Track if we have unreachable code anywhere, as if we do that may inhibit + // certain optimizations below. + bool hasUnreachable = false; + // Convert input blocks (basicBlocks) into more efficient flow blocks to // improve memory access. std::vector<FlowBlock> flowBlocks; @@ -114,9 +118,19 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> { // Init mapping between basicblocks and flowBlocks std::unordered_map<BasicBlock*, FlowBlock*> basicToFlowMap; for (Index i = 0; i < basicBlocks.size(); ++i) { - basicToFlowMap[basicBlocks[i].get()] = &flowBlocks[i]; + auto* block = basicBlocks[i].get(); + basicToFlowMap[block] = &flowBlocks[i]; + // Check for unreachable code. Note we ignore the entry block (index 0) as + // that is always reached when we are called. + if (i != 0 && block->in.empty()) { + hasUnreachable = true; + } } + // We note which local indexes have local.sets, as that can help us + // optimize later (if there are none at all). + std::vector<bool> hasSet(numLocals, false); + const size_t NULL_ITERATION = -1; FlowBlock* entryFlowBlock = nullptr; @@ -140,6 +154,7 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> { flowBlock.lastSets.reserve(block->contents.lastSets.size()); for (auto set : block->contents.lastSets) { flowBlock.lastSets.emplace_back(set); + hasSet[set.first] = true; } } assert(entryFlowBlock != nullptr); @@ -185,6 +200,24 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> { if (gets.empty()) { continue; } + if (!hasUnreachable && !hasSet[index]) { + // This local index has no sets, so we know all gets will end up + // reaching the entry block. Do that here as an optimization to avoid + // flowing through the (potentially very many) blocks in the function. + // + // Note that we must check for unreachable code in this function, as + // if there is any then we would not be precise: in that case, the + // gets may either have the entry value, or no value at all. It would + // be safe to mark the entry value in that case anyhow (as it only + // matters in unreachable code), but to keep the IR consistent and to + // avoid confusion when debugging, simply do not optimize if + // there is anything unreachable (which will not happen normally, as + // DCE should run before passes that use this utility). + for (auto* get : gets) { + getSetses[get].insert(nullptr); + } + continue; + } work.push_back(&block); // Note that we may need to revisit the later parts of this initial // block, if we are in a loop, so don't mark it as seen. |