diff options
author | Alon Zakai <azakai@google.com> | 2024-05-15 09:42:28 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-15 09:42:28 -0700 |
commit | 8a5dc1880d962a7c31a7a219720be343a0866e5c (patch) | |
tree | b76322419dac9600599be135a6558d661aafb709 | |
parent | 940f4570cb13db7f7b381cbe35ba546708ed7556 (diff) | |
download | binaryen-8a5dc1880d962a7c31a7a219720be343a0866e5c.tar.gz binaryen-8a5dc1880d962a7c31a7a219720be343a0866e5c.tar.bz2 binaryen-8a5dc1880d962a7c31a7a219720be343a0866e5c.zip |
LocalCSE: Fix regression from #6587 by accumulating generativity (#6591)
#6587 was incorrect: It checked generativity early in an incremental manner, but
it did not accumulate that information as we do with hashes. As a result we
could end up optimizing something with a generative child, and sadly we lacked
testing for that case.
This adds incremental generativity computation alongside hashes. It also splits
out this check from isRelevant.
Also add a test for nested effects (as opposed to generativity), but that already
worked before this PR (as we compute effects and invalidation as we go, already).
-rw-r--r-- | src/passes/LocalCSE.cpp | 72 | ||||
-rw-r--r-- | test/lit/passes/local-cse.wast | 36 | ||||
-rw-r--r-- | test/lit/passes/local-cse_all-features.wast | 38 |
3 files changed, 113 insertions, 33 deletions
diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp index 63704382e..78f722c7a 100644 --- a/src/passes/LocalCSE.cpp +++ b/src/passes/LocalCSE.cpp @@ -217,16 +217,18 @@ struct Scanner // original expression that we request from. HashedExprs activeExprs; - // Stack of hash values of all active expressions. We store these so that we - // do not end up recomputing hashes of children in an N^2 manner. - SmallVector<size_t, 10> activeHashes; + // Stack of information of all active expressions. We store hash values and + // possibility (as computed by isPossible), which we compute incrementally so + // as to avoid N^2 work (which could happen if we recomputed children). + using HashPossibility = std::pair<size_t, bool>; + SmallVector<HashPossibility, 10> activeIncrementalInfo; static void doNoteNonLinear(Scanner* self, Expression** currp) { // We are starting a new basic block. Forget all the currently-hashed // expressions, as we no longer want to make connections to anything from // another block. self->activeExprs.clear(); - self->activeHashes.clear(); + self->activeIncrementalInfo.clear(); // Note that we do not clear requestInfos - that is information we will use // later in the Applier class. That is, we've cleared all the active // information, leaving the things we need later. @@ -245,19 +247,24 @@ struct Scanner // that are not isRelevant() (if they are the children of a relevant thing). auto numChildren = Properties::getNumChildren(curr); auto hash = ExpressionAnalyzer::shallowHash(curr); + auto possible = isPossible(curr); for (Index i = 0; i < numChildren; i++) { - if (activeHashes.empty()) { + if (activeIncrementalInfo.empty()) { // The child was in another block, so this expression cannot be // optimized. return; } - hash_combine(hash, activeHashes.back()); - activeHashes.pop_back(); + auto [currHash, currPossible] = activeIncrementalInfo.back(); + activeIncrementalInfo.pop_back(); + hash_combine(hash, currHash); + if (!currPossible) { + possible = false; + } } - activeHashes.push_back(hash); + activeIncrementalInfo.emplace_back(hash, possible); - // Check if this is something relevant for optimization. - if (!isRelevant(curr)) { + // Check if this is something possible and also relevant for optimization. + if (!possible || !isRelevant(curr)) { return; } @@ -330,6 +337,32 @@ struct Scanner return false; } + // If the size is at least 3, then if we have two of them we have 6, + // and so adding one set+one get and removing one of the items itself + // is not detrimental, and may be beneficial. + // TODO: investigate size 2 + auto size = Measurer::measure(curr); + if (options.shrinkLevel > 0 && size >= 3) { + return true; + } + + // If we focus on speed, any reduction in cost is beneficial, as the + // cost of a get is essentially free. However, we need to balance that with + // the fact that the VM will also do CSE/GVN itself, so minor improvements + // are not worthwhile, so skip things of size 1 (like a global.get). + if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0 && size >= 2) { + return true; + } + + return false; + } + + // Some things are not possible, and also prevent their parents from being + // possible as well. This is different from isRelevant in that relevance is + // considered for the entire expression, including children - e.g., is the + // total size big enough - while isPossible checks conditions that prevent + // using an expression at all. + bool isPossible(Expression* curr) { // We will fully compute effects later, but consider shallow effects at this // early time to ignore things that cannot be optimized later, because we // use a greedy algorithm. Specifically, imagine we see this: @@ -364,24 +397,7 @@ struct Scanner return false; } - // If the size is at least 3, then if we have two of them we have 6, - // and so adding one set+one get and removing one of the items itself - // is not detrimental, and may be beneficial. - // TODO: investigate size 2 - auto size = Measurer::measure(curr); - if (options.shrinkLevel > 0 && size >= 3) { - return true; - } - - // If we focus on speed, any reduction in cost is beneficial, as the - // cost of a get is essentially free. However, we need to balance that with - // the fact that the VM will also do CSE/GVN itself, so minor improvements - // are not worthwhile, so skip things of size 1 (like a global.get). - if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0 && size >= 2) { - return true; - } - - return false; + return true; } }; diff --git a/test/lit/passes/local-cse.wast b/test/lit/passes/local-cse.wast index 8b0f51a7c..4f6156dba 100644 --- a/test/lit/passes/local-cse.wast +++ b/test/lit/passes/local-cse.wast @@ -11,7 +11,9 @@ ;; CHECK: (type $2 (func (param i32))) - ;; CHECK: (type $3 (func (result i64))) + ;; CHECK: (type $3 (func (result i32))) + + ;; CHECK: (type $4 (func (result i64))) ;; CHECK: (memory $0 100 100) @@ -356,6 +358,38 @@ ) ) + ;; CHECK: (func $nested-calls (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (call $nested-calls) + ;; CHECK-NEXT: (call $nested-calls) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (call $nested-calls) + ;; CHECK-NEXT: (call $nested-calls) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $nested-calls (result i32) + ;; Operations that include nested effects are ignored. + (drop + (i32.add + (call $nested-calls) + (call $nested-calls) + ) + ) + (drop + (i32.add + (call $nested-calls) + (call $nested-calls) + ) + ) + (unreachable) + ) + ;; CHECK: (func $many-sets (result i64) ;; CHECK-NEXT: (local $temp i64) ;; CHECK-NEXT: (local $1 i64) diff --git a/test/lit/passes/local-cse_all-features.wast b/test/lit/passes/local-cse_all-features.wast index d943b940f..368936ae9 100644 --- a/test/lit/passes/local-cse_all-features.wast +++ b/test/lit/passes/local-cse_all-features.wast @@ -67,13 +67,13 @@ ;; CHECK: (type $2 (func (param (ref $A)))) - ;; CHECK: (type $3 (func (param (ref null $A)))) + ;; CHECK: (type $3 (func)) - ;; CHECK: (type $4 (func)) + ;; CHECK: (type $4 (func (param (ref null $A)))) ;; CHECK: (type $5 (func (param (ref null $B) (ref $A)))) - ;; CHECK: (func $struct-gets-nullable (type $3) (param $ref (ref null $A)) + ;; CHECK: (func $struct-gets-nullable (type $4) (param $ref (ref null $A)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (local.tee $1 @@ -182,7 +182,7 @@ ) ) - ;; CHECK: (func $creations (type $4) + ;; CHECK: (func $creations (type $3) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (struct.new $A ;; CHECK-NEXT: (i32.const 1) @@ -233,6 +233,36 @@ ) ) + ;; CHECK: (func $nested-generativity (type $3) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (struct.new_default $A) + ;; CHECK-NEXT: (struct.new_default $A) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (struct.new_default $A) + ;; CHECK-NEXT: (struct.new_default $A) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $nested-generativity + ;; Operations that include nested generativity are ignored. + (drop + (ref.eq + (struct.new_default $A) + (struct.new_default $A) + ) + ) + (drop + (ref.eq + (struct.new_default $A) + (struct.new_default $A) + ) + ) + ) + ;; CHECK: (func $structs-and-arrays-do-not-alias (type $5) (param $array (ref null $B)) (param $struct (ref $A)) ;; CHECK-NEXT: (local $2 i32) ;; CHECK-NEXT: (array.set $B |