diff options
author | Alon Zakai <azakai@google.com> | 2023-08-08 11:10:33 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-08 11:10:33 -0700 |
commit | f1e92f99867b646aebd8a4f9b35c4972301e6469 (patch) | |
tree | aaac08ab6c8b824c3e86086fae92a75ce89360f7 | |
parent | d26d2146c3f58116d4ae7751e3bbfffeb1455412 (diff) | |
download | binaryen-f1e92f99867b646aebd8a4f9b35c4972301e6469.tar.gz binaryen-f1e92f99867b646aebd8a4f9b35c4972301e6469.tar.bz2 binaryen-f1e92f99867b646aebd8a4f9b35c4972301e6469.zip |
LinearExecutionTraversal: Add an option to connect adjacent code, use in SimplifyLocals (#5860)
This addresses most of the minor regression from the correctness fix in #5857.
That PR makes us consider calls as branching, but in some cases it is ok to
ignore that branching (see the comment in the code here), which this PR allows as
an option.
This undoes one test change from that PR, showing it undoes the regression for
SimplifyLocals. More tests are added to cover this specifically as well.
-rw-r--r-- | src/ir/linear-execution.h | 47 | ||||
-rw-r--r-- | src/passes/SimplifyLocals.cpp | 5 | ||||
-rw-r--r-- | test/lit/passes/Oz.wast | 6 | ||||
-rw-r--r-- | test/lit/passes/inlining-optimizing_optimize-level=3.wast | 20 | ||||
-rw-r--r-- | test/lit/passes/simplify-locals-eh.wast | 95 | ||||
-rw-r--r-- | test/lit/passes/simplify-locals-gc.wast | 9 | ||||
-rw-r--r-- | test/lit/passes/type-ssa_and_merging.wast | 4 | ||||
-rw-r--r-- | test/wasm2js/i64-ctz.2asm.js | 1 | ||||
-rw-r--r-- | test/wasm2js/unary-ops.2asm.js | 1 |
9 files changed, 163 insertions, 25 deletions
diff --git a/src/ir/linear-execution.h b/src/ir/linear-execution.h index cee039b37..b7020f7fb 100644 --- a/src/ir/linear-execution.h +++ b/src/ir/linear-execution.h @@ -44,14 +44,49 @@ struct LinearExecutionWalker : public PostWalker<SubType, VisitorType> { self->noteNonLinear(*currp); } + // Optionally, we can connect adjacent basic blocks. "Adjacent" here means + // that the first branches to the second, and that there is no other code in + // between them. As a result, the first dominates the second, but it might not + // reach it. + // + // For example, a call may branch if exceptions are enabled, but if this + // option is flipped on then we will *not* call doNoteNonLinear on the call: + // + // ..A.. + // call(); + // ..B.. + // + // As a result, we'd consider A and B to be together. Another example is an + // if: + // + // ..A.. + // if + // ..B.. + // else + // ..C.. + // end + // + // Here we will connect A and B, but *not* A and C (there is code in between) + // or B and C (they do not branch to each other). + // + // As the if case shows, this can be useful for cases where we want to look at + // dominated blocks with their dominator, but it only handles the trivial + // adjacent cases of such dominance. Passes should generally uses a full CFG + // and dominator tree for this, but this option does help some very common + // cases (calls, if without an else) and it has very low overhead (we still + // only do a simple postorder walk on the IR, no CFG is constructed, etc.). + bool connectAdjacentBlocks = false; + static void scan(SubType* self, Expression** currp) { Expression* curr = *currp; auto handleCall = [&](bool isReturn) { - // Control is nonlinear if we return, or if EH is enabled or may be. - if (isReturn || !self->getModule() || - self->getModule()->features.hasExceptionHandling()) { - self->pushTask(SubType::doNoteNonLinear, currp); + if (!self->connectAdjacentBlocks) { + // Control is nonlinear if we return, or if EH is enabled or may be. + if (isReturn || !self->getModule() || + self->getModule()->features.hasExceptionHandling()) { + self->pushTask(SubType::doNoteNonLinear, currp); + } } // Scan the children normally. @@ -78,7 +113,9 @@ struct LinearExecutionWalker : public PostWalker<SubType, VisitorType> { self->maybePushTask(SubType::scan, &curr->cast<If>()->ifFalse); self->pushTask(SubType::doNoteNonLinear, currp); self->pushTask(SubType::scan, &curr->cast<If>()->ifTrue); - self->pushTask(SubType::doNoteNonLinear, currp); + if (!self->connectAdjacentBlocks) { + self->pushTask(SubType::doNoteNonLinear, currp); + } self->pushTask(SubType::scan, &curr->cast<If>()->condition); break; } diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp index ff2141fb4..fb46c79ec 100644 --- a/src/passes/SimplifyLocals.cpp +++ b/src/passes/SimplifyLocals.cpp @@ -998,6 +998,11 @@ struct SimplifyLocals struct EquivalentOptimizer : public LinearExecutionWalker<EquivalentOptimizer> { + // It is ok to look at adjacent blocks together, as if a later part of a + // block is not reached that is fine - changes we make there would not be + // reached in that case. + bool connectAdjacentBlocks = true; + std::vector<Index>* numLocalGets; bool removeEquivalentSets; PassOptions passOptions; diff --git a/test/lit/passes/Oz.wast b/test/lit/passes/Oz.wast index 56b27c047..3a6ac1f7d 100644 --- a/test/lit/passes/Oz.wast +++ b/test/lit/passes/Oz.wast @@ -195,11 +195,9 @@ ) ;; CHECK: (func $12 (type $i32_i32_=>_i32) (; has Stack IR ;) (param $0 i32) (param $1 i32) (result i32) ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (local.tee $1 - ;; CHECK-NEXT: (local.get $0) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: (return - ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 0) diff --git a/test/lit/passes/inlining-optimizing_optimize-level=3.wast b/test/lit/passes/inlining-optimizing_optimize-level=3.wast index 29ee11b46..c6c4f389c 100644 --- a/test/lit/passes/inlining-optimizing_optimize-level=3.wast +++ b/test/lit/passes/inlining-optimizing_optimize-level=3.wast @@ -6818,7 +6818,7 @@ ;; CHECK-NEXT: (i32.mul ;; CHECK-NEXT: (i32.shr_s ;; CHECK-NEXT: (i32.sub - ;; CHECK-NEXT: (local.get $20) + ;; CHECK-NEXT: (local.get $8) ;; CHECK-NEXT: (local.get $5) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 2) @@ -8423,9 +8423,7 @@ ;; CHECK-NEXT: (i32.and ;; CHECK-NEXT: (local.tee $9 ;; CHECK-NEXT: (i32.ne - ;; CHECK-NEXT: (local.tee $7 - ;; CHECK-NEXT: (local.get $6) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $6) ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -8438,6 +8436,9 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (local.set $7 + ;; CHECK-NEXT: (local.get $6) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.set $8 ;; CHECK-NEXT: (local.get $5) ;; CHECK-NEXT: ) @@ -8478,8 +8479,13 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.set $8 - ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (local.set $7 + ;; CHECK-NEXT: (local.get $6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $8 + ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (br_if $__rjti$20 @@ -8502,7 +8508,7 @@ ;; CHECK-NEXT: (block $__rjti$07 ;; CHECK-NEXT: (br_if $__rjti$07 ;; CHECK-NEXT: (i32.le_u - ;; CHECK-NEXT: (local.get $9) + ;; CHECK-NEXT: (local.get $7) ;; CHECK-NEXT: (i32.const 3) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) diff --git a/test/lit/passes/simplify-locals-eh.wast b/test/lit/passes/simplify-locals-eh.wast index 38b9d1af7..3b00857b6 100644 --- a/test/lit/passes/simplify-locals-eh.wast +++ b/test/lit/passes/simplify-locals-eh.wast @@ -151,4 +151,99 @@ ) ) ) + + ;; CHECK: (func $equivalent-set-removal-call (type $i32_=>_none) (param $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $equivalent-set-removal-call + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $equivalent-set-removal-call (param $0 i32) + (local $1 i32) + (local.set $1 (local.get $0)) + (drop (local.get $0)) + (drop (local.get $1)) + ;; Even with EH enabled we can look past the call and optimize the final 1 + ;; to a 0, since they contain the same (and while the call might branch, + ;; such a branch does not cause a problem here, as if we branch we just + ;; don't reach the change later down). + (call $equivalent-set-removal-call + (i32.const 2) + ) + (drop (local.get $0)) + (drop (local.get $1)) + ) + + ;; CHECK: (func $equivalent-set-removal-if (type $i32_i32_=>_none) (param $p i32) (param $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $p) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $equivalent-set-removal-if (param $p i32) (param $0 i32) + (local $1 i32) + (local.set $1 (local.get $0)) + (drop (local.get $0)) + ;; This local.get of 1 can be of 0. + (drop (local.get $1)) + (if + (local.get $p) + (block + ;; We also optimize in this block, which is adjacent to the code before + ;; us. It is valid to optimize the 1 to a 0 here, as it is dominated by + ;; the code earlier. + (drop (local.get $0)) + (drop (local.get $1)) + ) + (block + ;; We could also optimize here, but atm just look at code adjacent to + ;; its dominator. TODO + (drop (local.get $0)) + (drop (local.get $1)) + ) + ) + ;; As in the else, this could be optimized. TODO + (drop (local.get $0)) + (drop (local.get $1)) + ) ) diff --git a/test/lit/passes/simplify-locals-gc.wast b/test/lit/passes/simplify-locals-gc.wast index 500d12b5a..6b2b10462 100644 --- a/test/lit/passes/simplify-locals-gc.wast +++ b/test/lit/passes/simplify-locals-gc.wast @@ -366,9 +366,7 @@ ;; CHECK-NEXT: (local.get $nn-any) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (nop) - ;; CHECK-NEXT: (local.tee $any - ;; CHECK-NEXT: (local.get $nn-any) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $nn-any) ;; CHECK-NEXT: ) (func $pick-casted (param $any anyref) (result anyref) (local $nn-any (ref any)) @@ -384,10 +382,7 @@ (call $use-nn-any (local.get $nn-any) ) - ;; This copy is not needed, as they hold the same value. However, the call - ;; before us inhibits us from optimizing here. TODO: with a more precise - ;; analysis we could optimize this, as if the call branches out it doesn't - ;; matter what happens after it. + ;; This copy is not needed, as they hold the same value. (local.set $any (local.get $nn-any) ) diff --git a/test/lit/passes/type-ssa_and_merging.wast b/test/lit/passes/type-ssa_and_merging.wast index 19c6bf882..9bab2bc27 100644 --- a/test/lit/passes/type-ssa_and_merging.wast +++ b/test/lit/passes/type-ssa_and_merging.wast @@ -22,6 +22,10 @@ ;; NOP: (type $none_=>_i32 (func (result i32))) ;; NOP: (import "a" "b" (func $import (type $none_=>_i32) (result i32))) + ;; YES: (type $A$2 (sub $A (struct ))) + + ;; YES: (type $A$1 (sub $A (struct ))) + ;; YES: (import "a" "b" (func $import (type $none_=>_i32) (result i32))) (import "a" "b" (func $import (result i32))) diff --git a/test/wasm2js/i64-ctz.2asm.js b/test/wasm2js/i64-ctz.2asm.js index e12cea00b..5bd8cfc9b 100644 --- a/test/wasm2js/i64-ctz.2asm.js +++ b/test/wasm2js/i64-ctz.2asm.js @@ -132,7 +132,6 @@ function asmFunc(imports) { var i64toi32_i32$0 = 0, i64toi32_i32$3 = 0, i64toi32_i32$5 = 0, i64toi32_i32$4 = 0, i64toi32_i32$2 = 0, i64toi32_i32$1 = 0, $10 = 0, $5$hi = 0, $8$hi = 0; i64toi32_i32$0 = var$0$hi; if (!!(var$0 | i64toi32_i32$0 | 0)) { - i64toi32_i32$0 = var$0$hi; i64toi32_i32$2 = var$0; i64toi32_i32$1 = -1; i64toi32_i32$3 = -1; diff --git a/test/wasm2js/unary-ops.2asm.js b/test/wasm2js/unary-ops.2asm.js index 154118048..90b030e45 100644 --- a/test/wasm2js/unary-ops.2asm.js +++ b/test/wasm2js/unary-ops.2asm.js @@ -391,7 +391,6 @@ function asmFunc(imports) { var i64toi32_i32$0 = 0, i64toi32_i32$3 = 0, i64toi32_i32$5 = 0, i64toi32_i32$4 = 0, i64toi32_i32$2 = 0, i64toi32_i32$1 = 0, $10 = 0, $5$hi = 0, $8$hi = 0; i64toi32_i32$0 = var$0$hi; if (!!(var$0 | i64toi32_i32$0 | 0)) { - i64toi32_i32$0 = var$0$hi; i64toi32_i32$2 = var$0; i64toi32_i32$1 = -1; i64toi32_i32$3 = -1; |