summaryrefslogtreecommitdiff
path: root/src/ir/LocalGraph.cpp
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2023-10-24 10:37:28 -0700
committerGitHub <noreply@github.com>2023-10-24 17:37:28 +0000
commit92c8a4682367170485295da6744b3a59fbb8d3ac (patch)
treed8e59f512bf37d49feea2d37f17a0877fc01ce11 /src/ir/LocalGraph.cpp
parentcce277fc649d7406f446f52778286192367a35ad (diff)
downloadbinaryen-92c8a4682367170485295da6744b3a59fbb8d3ac.tar.gz
binaryen-92c8a4682367170485295da6744b3a59fbb8d3ac.tar.bz2
binaryen-92c8a4682367170485295da6744b3a59fbb8d3ac.zip
[NFC] LocalGraph: Optimize params with no sets (#6046)
If a local index has no sets, then all gets of that index read from the entry block (a param, or a zero for a local). This is actually a common case, where a param has no other set, and so it is worth optimizing, which this PR does by avoiding any flowing operation at all for that index: we just skip and write the entry block as the source of information for such gets. #6042 on precompute-propagate goes from 3 minutes to 2 seconds with this (!). But that testcase is rather special in that it is a huge function with many, many gets in it, so the overhead we remove is very noticeable there.
Diffstat (limited to 'src/ir/LocalGraph.cpp')
-rw-r--r--src/ir/LocalGraph.cpp35
1 files changed, 34 insertions, 1 deletions
diff --git a/src/ir/LocalGraph.cpp b/src/ir/LocalGraph.cpp
index 9bdb6765f..7865bc404 100644
--- a/src/ir/LocalGraph.cpp
+++ b/src/ir/LocalGraph.cpp
@@ -106,6 +106,10 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> {
auto numLocals = func->getNumLocals();
std::vector<FlowBlock*> work;
+ // Track if we have unreachable code anywhere, as if we do that may inhibit
+ // certain optimizations below.
+ bool hasUnreachable = false;
+
// Convert input blocks (basicBlocks) into more efficient flow blocks to
// improve memory access.
std::vector<FlowBlock> flowBlocks;
@@ -114,9 +118,19 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> {
// Init mapping between basicblocks and flowBlocks
std::unordered_map<BasicBlock*, FlowBlock*> basicToFlowMap;
for (Index i = 0; i < basicBlocks.size(); ++i) {
- basicToFlowMap[basicBlocks[i].get()] = &flowBlocks[i];
+ auto* block = basicBlocks[i].get();
+ basicToFlowMap[block] = &flowBlocks[i];
+ // Check for unreachable code. Note we ignore the entry block (index 0) as
+ // that is always reached when we are called.
+ if (i != 0 && block->in.empty()) {
+ hasUnreachable = true;
+ }
}
+ // We note which local indexes have local.sets, as that can help us
+ // optimize later (if there are none at all).
+ std::vector<bool> hasSet(numLocals, false);
+
const size_t NULL_ITERATION = -1;
FlowBlock* entryFlowBlock = nullptr;
@@ -140,6 +154,7 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> {
flowBlock.lastSets.reserve(block->contents.lastSets.size());
for (auto set : block->contents.lastSets) {
flowBlock.lastSets.emplace_back(set);
+ hasSet[set.first] = true;
}
}
assert(entryFlowBlock != nullptr);
@@ -185,6 +200,24 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> {
if (gets.empty()) {
continue;
}
+ if (!hasUnreachable && !hasSet[index]) {
+ // This local index has no sets, so we know all gets will end up
+ // reaching the entry block. Do that here as an optimization to avoid
+ // flowing through the (potentially very many) blocks in the function.
+ //
+ // Note that we must check for unreachable code in this function, as
+ // if there is any then we would not be precise: in that case, the
+ // gets may either have the entry value, or no value at all. It would
+ // be safe to mark the entry value in that case anyhow (as it only
+ // matters in unreachable code), but to keep the IR consistent and to
+ // avoid confusion when debugging, simply do not optimize if
+ // there is anything unreachable (which will not happen normally, as
+ // DCE should run before passes that use this utility).
+ for (auto* get : gets) {
+ getSetses[get].insert(nullptr);
+ }
+ continue;
+ }
work.push_back(&block);
// Note that we may need to revisit the later parts of this initial
// block, if we are in a loop, so don't mark it as seen.