diff options
author | Alon Zakai <azakai@google.com> | 2024-05-15 09:42:28 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-15 09:42:28 -0700 |
commit | 8a5dc1880d962a7c31a7a219720be343a0866e5c (patch) | |
tree | b76322419dac9600599be135a6558d661aafb709 /src | |
parent | 940f4570cb13db7f7b381cbe35ba546708ed7556 (diff) | |
download | binaryen-8a5dc1880d962a7c31a7a219720be343a0866e5c.tar.gz binaryen-8a5dc1880d962a7c31a7a219720be343a0866e5c.tar.bz2 binaryen-8a5dc1880d962a7c31a7a219720be343a0866e5c.zip |
LocalCSE: Fix regression from #6587 by accumulating generativity (#6591)
#6587 was incorrect: It checked generativity early in an incremental manner, but
it did not accumulate that information as we do with hashes. As a result we
could end up optimizing something with a generative child, and sadly we lacked
testing for that case.
This adds incremental generativity computation alongside hashes. It also splits
out this check from isRelevant.
Also add a test for nested effects (as opposed to generativity), but that already
worked before this PR (as we compute effects and invalidation as we go, already).
Diffstat (limited to 'src')
-rw-r--r-- | src/passes/LocalCSE.cpp | 72 |
1 files changed, 44 insertions, 28 deletions
diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp index 63704382e..78f722c7a 100644 --- a/src/passes/LocalCSE.cpp +++ b/src/passes/LocalCSE.cpp @@ -217,16 +217,18 @@ struct Scanner // original expression that we request from. HashedExprs activeExprs; - // Stack of hash values of all active expressions. We store these so that we - // do not end up recomputing hashes of children in an N^2 manner. - SmallVector<size_t, 10> activeHashes; + // Stack of information of all active expressions. We store hash values and + // possibility (as computed by isPossible), which we compute incrementally so + // as to avoid N^2 work (which could happen if we recomputed children). + using HashPossibility = std::pair<size_t, bool>; + SmallVector<HashPossibility, 10> activeIncrementalInfo; static void doNoteNonLinear(Scanner* self, Expression** currp) { // We are starting a new basic block. Forget all the currently-hashed // expressions, as we no longer want to make connections to anything from // another block. self->activeExprs.clear(); - self->activeHashes.clear(); + self->activeIncrementalInfo.clear(); // Note that we do not clear requestInfos - that is information we will use // later in the Applier class. That is, we've cleared all the active // information, leaving the things we need later. @@ -245,19 +247,24 @@ struct Scanner // that are not isRelevant() (if they are the children of a relevant thing). auto numChildren = Properties::getNumChildren(curr); auto hash = ExpressionAnalyzer::shallowHash(curr); + auto possible = isPossible(curr); for (Index i = 0; i < numChildren; i++) { - if (activeHashes.empty()) { + if (activeIncrementalInfo.empty()) { // The child was in another block, so this expression cannot be // optimized. return; } - hash_combine(hash, activeHashes.back()); - activeHashes.pop_back(); + auto [currHash, currPossible] = activeIncrementalInfo.back(); + activeIncrementalInfo.pop_back(); + hash_combine(hash, currHash); + if (!currPossible) { + possible = false; + } } - activeHashes.push_back(hash); + activeIncrementalInfo.emplace_back(hash, possible); - // Check if this is something relevant for optimization. - if (!isRelevant(curr)) { + // Check if this is something possible and also relevant for optimization. + if (!possible || !isRelevant(curr)) { return; } @@ -330,6 +337,32 @@ struct Scanner return false; } + // If the size is at least 3, then if we have two of them we have 6, + // and so adding one set+one get and removing one of the items itself + // is not detrimental, and may be beneficial. + // TODO: investigate size 2 + auto size = Measurer::measure(curr); + if (options.shrinkLevel > 0 && size >= 3) { + return true; + } + + // If we focus on speed, any reduction in cost is beneficial, as the + // cost of a get is essentially free. However, we need to balance that with + // the fact that the VM will also do CSE/GVN itself, so minor improvements + // are not worthwhile, so skip things of size 1 (like a global.get). + if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0 && size >= 2) { + return true; + } + + return false; + } + + // Some things are not possible, and also prevent their parents from being + // possible as well. This is different from isRelevant in that relevance is + // considered for the entire expression, including children - e.g., is the + // total size big enough - while isPossible checks conditions that prevent + // using an expression at all. + bool isPossible(Expression* curr) { // We will fully compute effects later, but consider shallow effects at this // early time to ignore things that cannot be optimized later, because we // use a greedy algorithm. Specifically, imagine we see this: @@ -364,24 +397,7 @@ struct Scanner return false; } - // If the size is at least 3, then if we have two of them we have 6, - // and so adding one set+one get and removing one of the items itself - // is not detrimental, and may be beneficial. - // TODO: investigate size 2 - auto size = Measurer::measure(curr); - if (options.shrinkLevel > 0 && size >= 3) { - return true; - } - - // If we focus on speed, any reduction in cost is beneficial, as the - // cost of a get is essentially free. However, we need to balance that with - // the fact that the VM will also do CSE/GVN itself, so minor improvements - // are not worthwhile, so skip things of size 1 (like a global.get). - if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0 && size >= 2) { - return true; - } - - return false; + return true; } }; |