summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2023-08-08 11:10:33 -0700
committerGitHub <noreply@github.com>2023-08-08 11:10:33 -0700
commitf1e92f99867b646aebd8a4f9b35c4972301e6469 (patch)
treeaaac08ab6c8b824c3e86086fae92a75ce89360f7
parentd26d2146c3f58116d4ae7751e3bbfffeb1455412 (diff)
downloadbinaryen-f1e92f99867b646aebd8a4f9b35c4972301e6469.tar.gz
binaryen-f1e92f99867b646aebd8a4f9b35c4972301e6469.tar.bz2
binaryen-f1e92f99867b646aebd8a4f9b35c4972301e6469.zip
LinearExecutionTraversal: Add an option to connect adjacent code, use in SimplifyLocals (#5860)
This addresses most of the minor regression from the correctness fix in #5857. That PR makes us consider calls as branching, but in some cases it is ok to ignore that branching (see the comment in the code here), which this PR allows as an option. This undoes one test change from that PR, showing it undoes the regression for SimplifyLocals. More tests are added to cover this specifically as well.
-rw-r--r--src/ir/linear-execution.h47
-rw-r--r--src/passes/SimplifyLocals.cpp5
-rw-r--r--test/lit/passes/Oz.wast6
-rw-r--r--test/lit/passes/inlining-optimizing_optimize-level=3.wast20
-rw-r--r--test/lit/passes/simplify-locals-eh.wast95
-rw-r--r--test/lit/passes/simplify-locals-gc.wast9
-rw-r--r--test/lit/passes/type-ssa_and_merging.wast4
-rw-r--r--test/wasm2js/i64-ctz.2asm.js1
-rw-r--r--test/wasm2js/unary-ops.2asm.js1
9 files changed, 163 insertions, 25 deletions
diff --git a/src/ir/linear-execution.h b/src/ir/linear-execution.h
index cee039b37..b7020f7fb 100644
--- a/src/ir/linear-execution.h
+++ b/src/ir/linear-execution.h
@@ -44,14 +44,49 @@ struct LinearExecutionWalker : public PostWalker<SubType, VisitorType> {
self->noteNonLinear(*currp);
}
+ // Optionally, we can connect adjacent basic blocks. "Adjacent" here means
+ // that the first branches to the second, and that there is no other code in
+ // between them. As a result, the first dominates the second, but it might not
+ // reach it.
+ //
+ // For example, a call may branch if exceptions are enabled, but if this
+ // option is flipped on then we will *not* call doNoteNonLinear on the call:
+ //
+ // ..A..
+ // call();
+ // ..B..
+ //
+ // As a result, we'd consider A and B to be together. Another example is an
+ // if:
+ //
+ // ..A..
+ // if
+ // ..B..
+ // else
+ // ..C..
+ // end
+ //
+ // Here we will connect A and B, but *not* A and C (there is code in between)
+ // or B and C (they do not branch to each other).
+ //
+ // As the if case shows, this can be useful for cases where we want to look at
+ // dominated blocks with their dominator, but it only handles the trivial
+ // adjacent cases of such dominance. Passes should generally uses a full CFG
+ // and dominator tree for this, but this option does help some very common
+ // cases (calls, if without an else) and it has very low overhead (we still
+ // only do a simple postorder walk on the IR, no CFG is constructed, etc.).
+ bool connectAdjacentBlocks = false;
+
static void scan(SubType* self, Expression** currp) {
Expression* curr = *currp;
auto handleCall = [&](bool isReturn) {
- // Control is nonlinear if we return, or if EH is enabled or may be.
- if (isReturn || !self->getModule() ||
- self->getModule()->features.hasExceptionHandling()) {
- self->pushTask(SubType::doNoteNonLinear, currp);
+ if (!self->connectAdjacentBlocks) {
+ // Control is nonlinear if we return, or if EH is enabled or may be.
+ if (isReturn || !self->getModule() ||
+ self->getModule()->features.hasExceptionHandling()) {
+ self->pushTask(SubType::doNoteNonLinear, currp);
+ }
}
// Scan the children normally.
@@ -78,7 +113,9 @@ struct LinearExecutionWalker : public PostWalker<SubType, VisitorType> {
self->maybePushTask(SubType::scan, &curr->cast<If>()->ifFalse);
self->pushTask(SubType::doNoteNonLinear, currp);
self->pushTask(SubType::scan, &curr->cast<If>()->ifTrue);
- self->pushTask(SubType::doNoteNonLinear, currp);
+ if (!self->connectAdjacentBlocks) {
+ self->pushTask(SubType::doNoteNonLinear, currp);
+ }
self->pushTask(SubType::scan, &curr->cast<If>()->condition);
break;
}
diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp
index ff2141fb4..fb46c79ec 100644
--- a/src/passes/SimplifyLocals.cpp
+++ b/src/passes/SimplifyLocals.cpp
@@ -998,6 +998,11 @@ struct SimplifyLocals
struct EquivalentOptimizer
: public LinearExecutionWalker<EquivalentOptimizer> {
+ // It is ok to look at adjacent blocks together, as if a later part of a
+ // block is not reached that is fine - changes we make there would not be
+ // reached in that case.
+ bool connectAdjacentBlocks = true;
+
std::vector<Index>* numLocalGets;
bool removeEquivalentSets;
PassOptions passOptions;
diff --git a/test/lit/passes/Oz.wast b/test/lit/passes/Oz.wast
index 56b27c047..3a6ac1f7d 100644
--- a/test/lit/passes/Oz.wast
+++ b/test/lit/passes/Oz.wast
@@ -195,11 +195,9 @@
)
;; CHECK: (func $12 (type $i32_i32_=>_i32) (; has Stack IR ;) (param $0 i32) (param $1 i32) (result i32)
;; CHECK-NEXT: (if
- ;; CHECK-NEXT: (local.tee $1
- ;; CHECK-NEXT: (local.get $0)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 0)
diff --git a/test/lit/passes/inlining-optimizing_optimize-level=3.wast b/test/lit/passes/inlining-optimizing_optimize-level=3.wast
index 29ee11b46..c6c4f389c 100644
--- a/test/lit/passes/inlining-optimizing_optimize-level=3.wast
+++ b/test/lit/passes/inlining-optimizing_optimize-level=3.wast
@@ -6818,7 +6818,7 @@
;; CHECK-NEXT: (i32.mul
;; CHECK-NEXT: (i32.shr_s
;; CHECK-NEXT: (i32.sub
- ;; CHECK-NEXT: (local.get $20)
+ ;; CHECK-NEXT: (local.get $8)
;; CHECK-NEXT: (local.get $5)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 2)
@@ -8423,9 +8423,7 @@
;; CHECK-NEXT: (i32.and
;; CHECK-NEXT: (local.tee $9
;; CHECK-NEXT: (i32.ne
- ;; CHECK-NEXT: (local.tee $7
- ;; CHECK-NEXT: (local.get $6)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $6)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -8438,6 +8436,9 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (local.set $7
+ ;; CHECK-NEXT: (local.get $6)
+ ;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $8
;; CHECK-NEXT: (local.get $5)
;; CHECK-NEXT: )
@@ -8478,8 +8479,13 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $8
- ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (local.set $7
+ ;; CHECK-NEXT: (local.get $6)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $8
+ ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (br_if $__rjti$20
@@ -8502,7 +8508,7 @@
;; CHECK-NEXT: (block $__rjti$07
;; CHECK-NEXT: (br_if $__rjti$07
;; CHECK-NEXT: (i32.le_u
- ;; CHECK-NEXT: (local.get $9)
+ ;; CHECK-NEXT: (local.get $7)
;; CHECK-NEXT: (i32.const 3)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
diff --git a/test/lit/passes/simplify-locals-eh.wast b/test/lit/passes/simplify-locals-eh.wast
index 38b9d1af7..3b00857b6 100644
--- a/test/lit/passes/simplify-locals-eh.wast
+++ b/test/lit/passes/simplify-locals-eh.wast
@@ -151,4 +151,99 @@
)
)
)
+
+ ;; CHECK: (func $equivalent-set-removal-call (type $i32_=>_none) (param $0 i32)
+ ;; CHECK-NEXT: (local $1 i32)
+ ;; CHECK-NEXT: (nop)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (nop)
+ ;; CHECK-NEXT: (call $equivalent-set-removal-call
+ ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $equivalent-set-removal-call (param $0 i32)
+ (local $1 i32)
+ (local.set $1 (local.get $0))
+ (drop (local.get $0))
+ (drop (local.get $1))
+ ;; Even with EH enabled we can look past the call and optimize the final 1
+ ;; to a 0, since they contain the same (and while the call might branch,
+ ;; such a branch does not cause a problem here, as if we branch we just
+ ;; don't reach the change later down).
+ (call $equivalent-set-removal-call
+ (i32.const 2)
+ )
+ (drop (local.get $0))
+ (drop (local.get $1))
+ )
+
+ ;; CHECK: (func $equivalent-set-removal-if (type $i32_i32_=>_none) (param $p i32) (param $0 i32)
+ ;; CHECK-NEXT: (local $1 i32)
+ ;; CHECK-NEXT: (nop)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $1
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (if
+ ;; CHECK-NEXT: (local.get $p)
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $equivalent-set-removal-if (param $p i32) (param $0 i32)
+ (local $1 i32)
+ (local.set $1 (local.get $0))
+ (drop (local.get $0))
+ ;; This local.get of 1 can be of 0.
+ (drop (local.get $1))
+ (if
+ (local.get $p)
+ (block
+ ;; We also optimize in this block, which is adjacent to the code before
+ ;; us. It is valid to optimize the 1 to a 0 here, as it is dominated by
+ ;; the code earlier.
+ (drop (local.get $0))
+ (drop (local.get $1))
+ )
+ (block
+ ;; We could also optimize here, but atm just look at code adjacent to
+ ;; its dominator. TODO
+ (drop (local.get $0))
+ (drop (local.get $1))
+ )
+ )
+ ;; As in the else, this could be optimized. TODO
+ (drop (local.get $0))
+ (drop (local.get $1))
+ )
)
diff --git a/test/lit/passes/simplify-locals-gc.wast b/test/lit/passes/simplify-locals-gc.wast
index 500d12b5a..6b2b10462 100644
--- a/test/lit/passes/simplify-locals-gc.wast
+++ b/test/lit/passes/simplify-locals-gc.wast
@@ -366,9 +366,7 @@
;; CHECK-NEXT: (local.get $nn-any)
;; CHECK-NEXT: )
;; CHECK-NEXT: (nop)
- ;; CHECK-NEXT: (local.tee $any
- ;; CHECK-NEXT: (local.get $nn-any)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $nn-any)
;; CHECK-NEXT: )
(func $pick-casted (param $any anyref) (result anyref)
(local $nn-any (ref any))
@@ -384,10 +382,7 @@
(call $use-nn-any
(local.get $nn-any)
)
- ;; This copy is not needed, as they hold the same value. However, the call
- ;; before us inhibits us from optimizing here. TODO: with a more precise
- ;; analysis we could optimize this, as if the call branches out it doesn't
- ;; matter what happens after it.
+ ;; This copy is not needed, as they hold the same value.
(local.set $any
(local.get $nn-any)
)
diff --git a/test/lit/passes/type-ssa_and_merging.wast b/test/lit/passes/type-ssa_and_merging.wast
index 19c6bf882..9bab2bc27 100644
--- a/test/lit/passes/type-ssa_and_merging.wast
+++ b/test/lit/passes/type-ssa_and_merging.wast
@@ -22,6 +22,10 @@
;; NOP: (type $none_=>_i32 (func (result i32)))
;; NOP: (import "a" "b" (func $import (type $none_=>_i32) (result i32)))
+ ;; YES: (type $A$2 (sub $A (struct )))
+
+ ;; YES: (type $A$1 (sub $A (struct )))
+
;; YES: (import "a" "b" (func $import (type $none_=>_i32) (result i32)))
(import "a" "b" (func $import (result i32)))
diff --git a/test/wasm2js/i64-ctz.2asm.js b/test/wasm2js/i64-ctz.2asm.js
index e12cea00b..5bd8cfc9b 100644
--- a/test/wasm2js/i64-ctz.2asm.js
+++ b/test/wasm2js/i64-ctz.2asm.js
@@ -132,7 +132,6 @@ function asmFunc(imports) {
var i64toi32_i32$0 = 0, i64toi32_i32$3 = 0, i64toi32_i32$5 = 0, i64toi32_i32$4 = 0, i64toi32_i32$2 = 0, i64toi32_i32$1 = 0, $10 = 0, $5$hi = 0, $8$hi = 0;
i64toi32_i32$0 = var$0$hi;
if (!!(var$0 | i64toi32_i32$0 | 0)) {
- i64toi32_i32$0 = var$0$hi;
i64toi32_i32$2 = var$0;
i64toi32_i32$1 = -1;
i64toi32_i32$3 = -1;
diff --git a/test/wasm2js/unary-ops.2asm.js b/test/wasm2js/unary-ops.2asm.js
index 154118048..90b030e45 100644
--- a/test/wasm2js/unary-ops.2asm.js
+++ b/test/wasm2js/unary-ops.2asm.js
@@ -391,7 +391,6 @@ function asmFunc(imports) {
var i64toi32_i32$0 = 0, i64toi32_i32$3 = 0, i64toi32_i32$5 = 0, i64toi32_i32$4 = 0, i64toi32_i32$2 = 0, i64toi32_i32$1 = 0, $10 = 0, $5$hi = 0, $8$hi = 0;
i64toi32_i32$0 = var$0$hi;
if (!!(var$0 | i64toi32_i32$0 | 0)) {
- i64toi32_i32$0 = var$0$hi;
i64toi32_i32$2 = var$0;
i64toi32_i32$1 = -1;
i64toi32_i32$3 = -1;