summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2019-07-11 08:55:00 -0700
committerGitHub <noreply@github.com>2019-07-11 08:55:00 -0700
commit03f3528f2f216378537988d22c66e4e22d2ddda3 (patch)
treea414c2a8e16213ea5a10dc4b0c5280ffcde5bbd5
parent1a9b0e166a747fefbf0502318e6c2cc27669f3a1 (diff)
downloadbinaryen-03f3528f2f216378537988d22c66e4e22d2ddda3.tar.gz
binaryen-03f3528f2f216378537988d22c66e4e22d2ddda3.tar.bz2
binaryen-03f3528f2f216378537988d22c66e4e22d2ddda3.zip
Optimize if of br_if (#2216)
An if whose body is a br_if can be turned into a br_if of a combined condition (if side effects allow it). The naive size in bytes is identical between the patterns, but the select may avoid a hardware branch, and also the select may be further optimized. On the benchmark suite this helps every single benchmark, but by quite small amounts (e.g. 100 bytes on sqlite, which is 1MB). This was noticed in emscripten-core/emscripten#8941
-rw-r--r--src/asm2wasm.h1
-rw-r--r--src/passes/RemoveUnusedBrs.cpp67
-rw-r--r--test/emcc_hello_world.fromasm17
-rw-r--r--test/emcc_hello_world.fromasm.clamp17
-rw-r--r--test/emcc_hello_world.fromasm.imprecise17
-rw-r--r--test/memorygrowth.fromasm13
-rw-r--r--test/memorygrowth.fromasm.clamp13
-rw-r--r--test/memorygrowth.fromasm.imprecise13
-rw-r--r--test/passes/bysyncify_optimize-level=1.txt11
-rw-r--r--test/passes/remove-unused-brs.txt51
-rw-r--r--test/passes/remove-unused-brs.wast48
-rw-r--r--test/unit.fromasm24
-rw-r--r--test/unit.fromasm.clamp24
-rw-r--r--test/unit.fromasm.imprecise24
14 files changed, 250 insertions, 90 deletions
diff --git a/src/asm2wasm.h b/src/asm2wasm.h
index 015c43053..d4b4f8674 100644
--- a/src/asm2wasm.h
+++ b/src/asm2wasm.h
@@ -1688,6 +1688,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
// autodrop can add some garbage
passRunner.add("vacuum");
passRunner.add("remove-unused-brs");
+ passRunner.add("vacuum");
passRunner.add("remove-unused-names");
passRunner.add("merge-blocks");
passRunner.add("optimize-instructions");
diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp
index 1e67ecec3..7f3a9965d 100644
--- a/src/passes/RemoveUnusedBrs.cpp
+++ b/src/passes/RemoveUnusedBrs.cpp
@@ -21,6 +21,7 @@
#include <ir/branch-utils.h>
#include <ir/cost.h>
#include <ir/effects.h>
+#include <ir/literal-utils.h>
#include <ir/utils.h>
#include <parsing.h>
#include <pass.h>
@@ -49,6 +50,23 @@ static bool canTurnIfIntoBrIf(Expression* ifCondition,
return !EffectAnalyzer(options, ifCondition).invalidates(value);
}
+// Check if it is not worth it to run code unconditionally. This
+// assumes we are trying to run two expressions where previously
+// only one of the two might have executed. We assume here that
+// executing both is good for code size.
+static bool tooCostlyToRunUnconditionally(const PassOptions& passOptions,
+ Expression* one,
+ Expression* two) {
+ // If we care mostly about code size, just do it for that reason.
+ if (passOptions.shrinkLevel) {
+ return false;
+ }
+ // Consider the cost of executing all the code unconditionally.
+ const auto TOO_MUCH = 7;
+ auto total = CostAnalyzer(one).cost + CostAnalyzer(two).cost;
+ return total >= TOO_MUCH;
+}
+
struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> {
bool isFunctionParallel() override { return true; }
@@ -283,12 +301,40 @@ struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> {
void visitIf(If* curr) {
if (!curr->ifFalse) {
- // if without an else. try to reduce if (condition) br => br_if
- // (condition)
- Break* br = curr->ifTrue->dynCast<Break>();
- if (br && !br->condition) { // TODO: if there is a condition, join them
+ // if without an else. try to reduce
+ // if (condition) br => br_if (condition)
+ if (Break* br = curr->ifTrue->dynCast<Break>()) {
if (canTurnIfIntoBrIf(curr->condition, br->value, getPassOptions())) {
- br->condition = curr->condition;
+ if (!br->condition) {
+ br->condition = curr->condition;
+ } else {
+ // In this case we can replace
+ // if (condition1) br_if (condition2)
+ // =>
+ // br_if select (condition1) (condition2) (i32.const 0)
+ // In other words, we replace an if (3 bytes) with a select and a
+ // zero (also 3 bytes). The size is unchanged, but the select may
+ // be further optimizable, and if select does not branch we also
+ // avoid one branch.
+ // If running the br's condition unconditionally is too expensive,
+ // give up.
+ auto* zero = LiteralUtils::makeZero(i32, *getModule());
+ if (tooCostlyToRunUnconditionally(
+ getPassOptions(), br->condition, zero)) {
+ return;
+ }
+ // Of course we can't do this if the br's condition has side
+ // effects, as we would then execute those unconditionally.
+ if (EffectAnalyzer(getPassOptions(), br->condition)
+ .hasSideEffects()) {
+ return;
+ }
+ Builder builder(*getModule());
+ // Note that we use the br's condition as the select condition.
+ // That keeps the order of the two conditions as it was originally.
+ br->condition =
+ builder.makeSelect(br->condition, curr->condition, zero);
+ }
br->finalize();
replaceCurrent(Builder(*getModule()).dropIfConcretelyTyped(br));
anotherCycle = true;
@@ -871,14 +917,9 @@ struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> {
// This is always helpful for code size, but can be a tradeoff with
// performance as we run both code paths. So when shrinking we always
// try to do this, but otherwise must consider more carefully.
- if (!passOptions.shrinkLevel) {
- // Consider the cost of executing all the code unconditionally
- const auto MAX_COST = 7;
- auto total =
- CostAnalyzer(iff->ifTrue).cost + CostAnalyzer(iff->ifFalse).cost;
- if (total >= MAX_COST) {
- return nullptr;
- }
+ if (tooCostlyToRunUnconditionally(
+ passOptions, iff->ifTrue, iff->ifFalse)) {
+ return nullptr;
}
// Check if side effects allow this.
EffectAnalyzer condition(passOptions, iff->condition);
diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm
index a9b17068e..25cbe7176 100644
--- a/test/emcc_hello_world.fromasm
+++ b/test/emcc_hello_world.fromasm
@@ -10327,13 +10327,18 @@
)
)
(block
- (if
- (local.tee $3
- (i32.load
- (i32.const 616)
+ (br_if $label$break$L279
+ (select
+ (i32.eqz
+ (i32.eqz
+ (local.tee $3
+ (i32.load
+ (i32.const 616)
+ )
+ )
+ )
)
- )
- (br_if $label$break$L279
+ (i32.const 0)
(i32.or
(i32.le_u
(local.get $12)
diff --git a/test/emcc_hello_world.fromasm.clamp b/test/emcc_hello_world.fromasm.clamp
index 212e5e8d3..8feba2827 100644
--- a/test/emcc_hello_world.fromasm.clamp
+++ b/test/emcc_hello_world.fromasm.clamp
@@ -10377,13 +10377,18 @@
)
)
(block
- (if
- (local.tee $3
- (i32.load
- (i32.const 616)
+ (br_if $label$break$L279
+ (select
+ (i32.eqz
+ (i32.eqz
+ (local.tee $3
+ (i32.load
+ (i32.const 616)
+ )
+ )
+ )
)
- )
- (br_if $label$break$L279
+ (i32.const 0)
(i32.or
(i32.le_u
(local.get $12)
diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise
index f6fe8aca1..8bb8c257b 100644
--- a/test/emcc_hello_world.fromasm.imprecise
+++ b/test/emcc_hello_world.fromasm.imprecise
@@ -10223,13 +10223,18 @@
)
)
(block
- (if
- (local.tee $3
- (i32.load
- (i32.const 616)
+ (br_if $label$break$L279
+ (select
+ (i32.eqz
+ (i32.eqz
+ (local.tee $3
+ (i32.load
+ (i32.const 616)
+ )
+ )
+ )
)
- )
- (br_if $label$break$L279
+ (i32.const 0)
(i32.or
(i32.le_u
(local.get $12)
diff --git a/test/memorygrowth.fromasm b/test/memorygrowth.fromasm
index 13231d482..f1858e724 100644
--- a/test/memorygrowth.fromasm
+++ b/test/memorygrowth.fromasm
@@ -3152,13 +3152,14 @@
)
)
(block
- (if
- (local.tee $12
- (i32.load
- (i32.const 1648)
+ (br_if $do-once33
+ (select
+ (local.tee $12
+ (i32.load
+ (i32.const 1648)
+ )
)
- )
- (br_if $do-once33
+ (i32.const 0)
(i32.or
(i32.le_u
(local.get $8)
diff --git a/test/memorygrowth.fromasm.clamp b/test/memorygrowth.fromasm.clamp
index 13231d482..f1858e724 100644
--- a/test/memorygrowth.fromasm.clamp
+++ b/test/memorygrowth.fromasm.clamp
@@ -3152,13 +3152,14 @@
)
)
(block
- (if
- (local.tee $12
- (i32.load
- (i32.const 1648)
+ (br_if $do-once33
+ (select
+ (local.tee $12
+ (i32.load
+ (i32.const 1648)
+ )
)
- )
- (br_if $do-once33
+ (i32.const 0)
(i32.or
(i32.le_u
(local.get $8)
diff --git a/test/memorygrowth.fromasm.imprecise b/test/memorygrowth.fromasm.imprecise
index d6011b1c7..61c455725 100644
--- a/test/memorygrowth.fromasm.imprecise
+++ b/test/memorygrowth.fromasm.imprecise
@@ -3150,13 +3150,14 @@
)
)
(block
- (if
- (local.tee $12
- (i32.load
- (i32.const 1648)
+ (br_if $do-once33
+ (select
+ (local.tee $12
+ (i32.load
+ (i32.const 1648)
+ )
)
- )
- (br_if $do-once33
+ (i32.const 0)
(i32.or
(i32.le_u
(local.get $8)
diff --git a/test/passes/bysyncify_optimize-level=1.txt b/test/passes/bysyncify_optimize-level=1.txt
index 7b1093964..b2bc87f85 100644
--- a/test/passes/bysyncify_optimize-level=1.txt
+++ b/test/passes/bysyncify_optimize-level=1.txt
@@ -1244,11 +1244,12 @@
)
)
)
- (if
- (i32.eqz
- (global.get $__bysyncify_state)
- )
- (br_if $l
+ (br_if $l
+ (select
+ (i32.eqz
+ (global.get $__bysyncify_state)
+ )
+ (i32.const 0)
(local.get $0)
)
)
diff --git a/test/passes/remove-unused-brs.txt b/test/passes/remove-unused-brs.txt
index 944828d5c..790d943c3 100644
--- a/test/passes/remove-unused-brs.txt
+++ b/test/passes/remove-unused-brs.txt
@@ -2431,4 +2431,55 @@
)
)
)
+ (func $if_br_if (; 115 ;) (type $1)
+ (local $0 i32)
+ (block $label$1
+ (br_if $label$1
+ (select
+ (local.tee $0
+ (i32.const 1024)
+ )
+ (i32.const 0)
+ (i32.eqz
+ (i32.const -4)
+ )
+ )
+ )
+ (br_if $label$1
+ (select
+ (i32.const 1025)
+ (i32.const 0)
+ (i32.eqz
+ (i32.const -5)
+ )
+ )
+ )
+ (br_if $label$1
+ (select
+ (local.tee $0
+ (i32.const 1025)
+ )
+ (i32.const 0)
+ (i32.eqz
+ (i32.const -6)
+ )
+ )
+ )
+ (if
+ (i32.const 1026)
+ (br_if $label$1
+ (local.tee $0
+ (i32.const -7)
+ )
+ )
+ )
+ (i32.store
+ (i32.const 1024)
+ (i32.add
+ (local.get $0)
+ (i32.const 1)
+ )
+ )
+ )
+ )
)
diff --git a/test/passes/remove-unused-brs.wast b/test/passes/remove-unused-brs.wast
index 15a56b2d9..c20e352f0 100644
--- a/test/passes/remove-unused-brs.wast
+++ b/test/passes/remove-unused-brs.wast
@@ -2039,5 +2039,53 @@
)
)
)
+ (func $if_br_if
+ (local $0 i32)
+ (block $label$1
+ (if
+ (local.tee $0 ;; note side effect; it's ok
+ (i32.const 1024)
+ )
+ (br_if $label$1
+ (i32.eqz
+ (i32.const -4)
+ )
+ )
+ )
+ (if
+ (i32.const 1025)
+ (br_if $label$1
+ (i32.eqz
+ (i32.const -5)
+ )
+ )
+ )
+ (if
+ (local.tee $0 ;; note side effect; it's ok
+ (i32.const 1025)
+ )
+ (br_if $label$1
+ (i32.eqz
+ (i32.const -6)
+ )
+ )
+ )
+ (if
+ (i32.const 1026)
+ (br_if $label$1
+ (local.tee $0 ;; but here it is *not* ok
+ (i32.const -7)
+ )
+ )
+ )
+ (i32.store
+ (i32.const 1024)
+ (i32.add
+ (local.get $0)
+ (i32.const 1)
+ )
+ )
+ )
+ )
)
diff --git a/test/unit.fromasm b/test/unit.fromasm
index b6065c905..748be4b8e 100644
--- a/test/unit.fromasm
+++ b/test/unit.fromasm
@@ -705,19 +705,17 @@
)
)
(func $loophi (; 45 ;) (; has Stack IR ;) (param $0 i32) (param $1 i32)
- (local $2 i32)
(loop $while-in
(block $while-out
(call $loophi
(local.get $0)
(i32.const 0)
)
- (if
- (local.tee $2
+ (br_if $while-out
+ (select
+ (local.get $0)
+ (i32.const 0)
(local.get $0)
- )
- (br_if $while-out
- (local.get $2)
)
)
(br_if $while-in
@@ -747,9 +745,10 @@
(local.set $2
(local.get $0)
)
- (if
- (call $return_int)
- (br_if $label$break$L7
+ (br_if $label$break$L7
+ (select
+ (call $return_int)
+ (i32.const 0)
(local.get $2)
)
)
@@ -779,9 +778,10 @@
(local.set $1
(local.get $0)
)
- (if
- (call $return_int)
- (br_if $label$break$L7
+ (br_if $label$break$L7
+ (select
+ (call $return_int)
+ (i32.const 0)
(local.get $1)
)
)
diff --git a/test/unit.fromasm.clamp b/test/unit.fromasm.clamp
index 1a905fee5..0b57f2077 100644
--- a/test/unit.fromasm.clamp
+++ b/test/unit.fromasm.clamp
@@ -753,19 +753,17 @@
)
)
(func $loophi (; 46 ;) (; has Stack IR ;) (param $0 i32) (param $1 i32)
- (local $2 i32)
(loop $while-in
(block $while-out
(call $loophi
(local.get $0)
(i32.const 0)
)
- (if
- (local.tee $2
+ (br_if $while-out
+ (select
+ (local.get $0)
+ (i32.const 0)
(local.get $0)
- )
- (br_if $while-out
- (local.get $2)
)
)
(br_if $while-in
@@ -795,9 +793,10 @@
(local.set $2
(local.get $0)
)
- (if
- (call $return_int)
- (br_if $label$break$L7
+ (br_if $label$break$L7
+ (select
+ (call $return_int)
+ (i32.const 0)
(local.get $2)
)
)
@@ -827,9 +826,10 @@
(local.set $1
(local.get $0)
)
- (if
- (call $return_int)
- (br_if $label$break$L7
+ (br_if $label$break$L7
+ (select
+ (call $return_int)
+ (i32.const 0)
(local.get $1)
)
)
diff --git a/test/unit.fromasm.imprecise b/test/unit.fromasm.imprecise
index bc2546b6e..f9fc294c5 100644
--- a/test/unit.fromasm.imprecise
+++ b/test/unit.fromasm.imprecise
@@ -693,19 +693,17 @@
)
)
(func $loophi (; 44 ;) (; has Stack IR ;) (param $0 i32) (param $1 i32)
- (local $2 i32)
(loop $while-in
(block $while-out
(call $loophi
(local.get $0)
(i32.const 0)
)
- (if
- (local.tee $2
+ (br_if $while-out
+ (select
+ (local.get $0)
+ (i32.const 0)
(local.get $0)
- )
- (br_if $while-out
- (local.get $2)
)
)
(br_if $while-in
@@ -735,9 +733,10 @@
(local.set $2
(local.get $0)
)
- (if
- (call $return_int)
- (br_if $label$break$L7
+ (br_if $label$break$L7
+ (select
+ (call $return_int)
+ (i32.const 0)
(local.get $2)
)
)
@@ -767,9 +766,10 @@
(local.set $1
(local.get $0)
)
- (if
- (call $return_int)
- (br_if $label$break$L7
+ (br_if $label$break$L7
+ (select
+ (call $return_int)
+ (i32.const 0)
(local.get $1)
)
)