diff options
-rw-r--r-- | src/ir/linear-execution.h | 47 | ||||
-rw-r--r-- | src/passes/SimplifyLocals.cpp | 5 | ||||
-rw-r--r-- | test/lit/passes/Oz.wast | 6 | ||||
-rw-r--r-- | test/lit/passes/inlining-optimizing_optimize-level=3.wast | 20 | ||||
-rw-r--r-- | test/lit/passes/simplify-locals-eh.wast | 95 | ||||
-rw-r--r-- | test/lit/passes/simplify-locals-gc.wast | 9 | ||||
-rw-r--r-- | test/lit/passes/type-ssa_and_merging.wast | 4 | ||||
-rw-r--r-- | test/wasm2js/i64-ctz.2asm.js | 1 | ||||
-rw-r--r-- | test/wasm2js/unary-ops.2asm.js | 1 |
9 files changed, 163 insertions, 25 deletions
diff --git a/src/ir/linear-execution.h b/src/ir/linear-execution.h index cee039b37..b7020f7fb 100644 --- a/src/ir/linear-execution.h +++ b/src/ir/linear-execution.h @@ -44,14 +44,49 @@ struct LinearExecutionWalker : public PostWalker<SubType, VisitorType> { self->noteNonLinear(*currp); } + // Optionally, we can connect adjacent basic blocks. "Adjacent" here means + // that the first branches to the second, and that there is no other code in + // between them. As a result, the first dominates the second, but it might not + // reach it. + // + // For example, a call may branch if exceptions are enabled, but if this + // option is flipped on then we will *not* call doNoteNonLinear on the call: + // + // ..A.. + // call(); + // ..B.. + // + // As a result, we'd consider A and B to be together. Another example is an + // if: + // + // ..A.. + // if + // ..B.. + // else + // ..C.. + // end + // + // Here we will connect A and B, but *not* A and C (there is code in between) + // or B and C (they do not branch to each other). + // + // As the if case shows, this can be useful for cases where we want to look at + // dominated blocks with their dominator, but it only handles the trivial + // adjacent cases of such dominance. Passes should generally uses a full CFG + // and dominator tree for this, but this option does help some very common + // cases (calls, if without an else) and it has very low overhead (we still + // only do a simple postorder walk on the IR, no CFG is constructed, etc.). + bool connectAdjacentBlocks = false; + static void scan(SubType* self, Expression** currp) { Expression* curr = *currp; auto handleCall = [&](bool isReturn) { - // Control is nonlinear if we return, or if EH is enabled or may be. - if (isReturn || !self->getModule() || - self->getModule()->features.hasExceptionHandling()) { - self->pushTask(SubType::doNoteNonLinear, currp); + if (!self->connectAdjacentBlocks) { + // Control is nonlinear if we return, or if EH is enabled or may be. + if (isReturn || !self->getModule() || + self->getModule()->features.hasExceptionHandling()) { + self->pushTask(SubType::doNoteNonLinear, currp); + } } // Scan the children normally. @@ -78,7 +113,9 @@ struct LinearExecutionWalker : public PostWalker<SubType, VisitorType> { self->maybePushTask(SubType::scan, &curr->cast<If>()->ifFalse); self->pushTask(SubType::doNoteNonLinear, currp); self->pushTask(SubType::scan, &curr->cast<If>()->ifTrue); - self->pushTask(SubType::doNoteNonLinear, currp); + if (!self->connectAdjacentBlocks) { + self->pushTask(SubType::doNoteNonLinear, currp); + } self->pushTask(SubType::scan, &curr->cast<If>()->condition); break; } diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp index ff2141fb4..fb46c79ec 100644 --- a/src/passes/SimplifyLocals.cpp +++ b/src/passes/SimplifyLocals.cpp @@ -998,6 +998,11 @@ struct SimplifyLocals struct EquivalentOptimizer : public LinearExecutionWalker<EquivalentOptimizer> { + // It is ok to look at adjacent blocks together, as if a later part of a + // block is not reached that is fine - changes we make there would not be + // reached in that case. + bool connectAdjacentBlocks = true; + std::vector<Index>* numLocalGets; bool removeEquivalentSets; PassOptions passOptions; diff --git a/test/lit/passes/Oz.wast b/test/lit/passes/Oz.wast index 56b27c047..3a6ac1f7d 100644 --- a/test/lit/passes/Oz.wast +++ b/test/lit/passes/Oz.wast @@ -195,11 +195,9 @@ ) ;; CHECK: (func $12 (type $i32_i32_=>_i32) (; has Stack IR ;) (param $0 i32) (param $1 i32) (result i32) ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (local.tee $1 - ;; CHECK-NEXT: (local.get $0) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: (return - ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 0) diff --git a/test/lit/passes/inlining-optimizing_optimize-level=3.wast b/test/lit/passes/inlining-optimizing_optimize-level=3.wast index 29ee11b46..c6c4f389c 100644 --- a/test/lit/passes/inlining-optimizing_optimize-level=3.wast +++ b/test/lit/passes/inlining-optimizing_optimize-level=3.wast @@ -6818,7 +6818,7 @@ ;; CHECK-NEXT: (i32.mul ;; CHECK-NEXT: (i32.shr_s ;; CHECK-NEXT: (i32.sub - ;; CHECK-NEXT: (local.get $20) + ;; CHECK-NEXT: (local.get $8) ;; CHECK-NEXT: (local.get $5) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 2) @@ -8423,9 +8423,7 @@ ;; CHECK-NEXT: (i32.and ;; CHECK-NEXT: (local.tee $9 ;; CHECK-NEXT: (i32.ne - ;; CHECK-NEXT: (local.tee $7 - ;; CHECK-NEXT: (local.get $6) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $6) ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -8438,6 +8436,9 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (local.set $7 + ;; CHECK-NEXT: (local.get $6) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.set $8 ;; CHECK-NEXT: (local.get $5) ;; CHECK-NEXT: ) @@ -8478,8 +8479,13 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.set $8 - ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (local.set $7 + ;; CHECK-NEXT: (local.get $6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $8 + ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (br_if $__rjti$20 @@ -8502,7 +8508,7 @@ ;; CHECK-NEXT: (block $__rjti$07 ;; CHECK-NEXT: (br_if $__rjti$07 ;; CHECK-NEXT: (i32.le_u - ;; CHECK-NEXT: (local.get $9) + ;; CHECK-NEXT: (local.get $7) ;; CHECK-NEXT: (i32.const 3) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) diff --git a/test/lit/passes/simplify-locals-eh.wast b/test/lit/passes/simplify-locals-eh.wast index 38b9d1af7..3b00857b6 100644 --- a/test/lit/passes/simplify-locals-eh.wast +++ b/test/lit/passes/simplify-locals-eh.wast @@ -151,4 +151,99 @@ ) ) ) + + ;; CHECK: (func $equivalent-set-removal-call (type $i32_=>_none) (param $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $equivalent-set-removal-call + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $equivalent-set-removal-call (param $0 i32) + (local $1 i32) + (local.set $1 (local.get $0)) + (drop (local.get $0)) + (drop (local.get $1)) + ;; Even with EH enabled we can look past the call and optimize the final 1 + ;; to a 0, since they contain the same (and while the call might branch, + ;; such a branch does not cause a problem here, as if we branch we just + ;; don't reach the change later down). + (call $equivalent-set-removal-call + (i32.const 2) + ) + (drop (local.get $0)) + (drop (local.get $1)) + ) + + ;; CHECK: (func $equivalent-set-removal-if (type $i32_i32_=>_none) (param $p i32) (param $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $p) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $equivalent-set-removal-if (param $p i32) (param $0 i32) + (local $1 i32) + (local.set $1 (local.get $0)) + (drop (local.get $0)) + ;; This local.get of 1 can be of 0. + (drop (local.get $1)) + (if + (local.get $p) + (block + ;; We also optimize in this block, which is adjacent to the code before + ;; us. It is valid to optimize the 1 to a 0 here, as it is dominated by + ;; the code earlier. + (drop (local.get $0)) + (drop (local.get $1)) + ) + (block + ;; We could also optimize here, but atm just look at code adjacent to + ;; its dominator. TODO + (drop (local.get $0)) + (drop (local.get $1)) + ) + ) + ;; As in the else, this could be optimized. TODO + (drop (local.get $0)) + (drop (local.get $1)) + ) ) diff --git a/test/lit/passes/simplify-locals-gc.wast b/test/lit/passes/simplify-locals-gc.wast index 500d12b5a..6b2b10462 100644 --- a/test/lit/passes/simplify-locals-gc.wast +++ b/test/lit/passes/simplify-locals-gc.wast @@ -366,9 +366,7 @@ ;; CHECK-NEXT: (local.get $nn-any) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (nop) - ;; CHECK-NEXT: (local.tee $any - ;; CHECK-NEXT: (local.get $nn-any) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $nn-any) ;; CHECK-NEXT: ) (func $pick-casted (param $any anyref) (result anyref) (local $nn-any (ref any)) @@ -384,10 +382,7 @@ (call $use-nn-any (local.get $nn-any) ) - ;; This copy is not needed, as they hold the same value. However, the call - ;; before us inhibits us from optimizing here. TODO: with a more precise - ;; analysis we could optimize this, as if the call branches out it doesn't - ;; matter what happens after it. + ;; This copy is not needed, as they hold the same value. (local.set $any (local.get $nn-any) ) diff --git a/test/lit/passes/type-ssa_and_merging.wast b/test/lit/passes/type-ssa_and_merging.wast index 19c6bf882..9bab2bc27 100644 --- a/test/lit/passes/type-ssa_and_merging.wast +++ b/test/lit/passes/type-ssa_and_merging.wast @@ -22,6 +22,10 @@ ;; NOP: (type $none_=>_i32 (func (result i32))) ;; NOP: (import "a" "b" (func $import (type $none_=>_i32) (result i32))) + ;; YES: (type $A$2 (sub $A (struct ))) + + ;; YES: (type $A$1 (sub $A (struct ))) + ;; YES: (import "a" "b" (func $import (type $none_=>_i32) (result i32))) (import "a" "b" (func $import (result i32))) diff --git a/test/wasm2js/i64-ctz.2asm.js b/test/wasm2js/i64-ctz.2asm.js index e12cea00b..5bd8cfc9b 100644 --- a/test/wasm2js/i64-ctz.2asm.js +++ b/test/wasm2js/i64-ctz.2asm.js @@ -132,7 +132,6 @@ function asmFunc(imports) { var i64toi32_i32$0 = 0, i64toi32_i32$3 = 0, i64toi32_i32$5 = 0, i64toi32_i32$4 = 0, i64toi32_i32$2 = 0, i64toi32_i32$1 = 0, $10 = 0, $5$hi = 0, $8$hi = 0; i64toi32_i32$0 = var$0$hi; if (!!(var$0 | i64toi32_i32$0 | 0)) { - i64toi32_i32$0 = var$0$hi; i64toi32_i32$2 = var$0; i64toi32_i32$1 = -1; i64toi32_i32$3 = -1; diff --git a/test/wasm2js/unary-ops.2asm.js b/test/wasm2js/unary-ops.2asm.js index 154118048..90b030e45 100644 --- a/test/wasm2js/unary-ops.2asm.js +++ b/test/wasm2js/unary-ops.2asm.js @@ -391,7 +391,6 @@ function asmFunc(imports) { var i64toi32_i32$0 = 0, i64toi32_i32$3 = 0, i64toi32_i32$5 = 0, i64toi32_i32$4 = 0, i64toi32_i32$2 = 0, i64toi32_i32$1 = 0, $10 = 0, $5$hi = 0, $8$hi = 0; i64toi32_i32$0 = var$0$hi; if (!!(var$0 | i64toi32_i32$0 | 0)) { - i64toi32_i32$0 = var$0$hi; i64toi32_i32$2 = var$0; i64toi32_i32$1 = -1; i64toi32_i32$3 = -1; |