summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2023-10-03 16:39:12 -0700
committerGitHub <noreply@github.com>2023-10-03 16:39:12 -0700
commitb2e096d79c36daa2cbfb7dc3db31af76e9f45cc8 (patch)
tree52f962128c9fe6c1b8cc8b847e5af40f33117c40 /src
parent24779b2a3fe5e5c7cc6b1da3661d346cd9c129ae (diff)
downloadbinaryen-b2e096d79c36daa2cbfb7dc3db31af76e9f45cc8.tar.gz
binaryen-b2e096d79c36daa2cbfb7dc3db31af76e9f45cc8.tar.bz2
binaryen-b2e096d79c36daa2cbfb7dc3db31af76e9f45cc8.zip
RemoveUnusedBrs: Allow less unconditional work and in particular division (#5989)
Fixes #5983: The testcase from there is used here in a new testcase remove-unused-brs_levels in which we check if we are willing to unconditionally do a division operation. Turning an if with an arm that does a division into a select, which always does the division, is almost 5x slower, so we should probably be extremely careful about doing that. I took some measurements and have some suggestions for changes in this PR: * Raise the cost of div/rem to what I measure on my machine, which is 5x slower than an add, or worse. * For some reason we added the if arms rather than take the max of them, so fix that. This does not help the issue, but was confusing. * Adjust TooCostlyToRunUnconditionally in the pass from 9 to 8 (this helps balance the last point). * Use half that value when not optimizing for size. That is, we allow only 4 extra unconditional work normally, and 8 in -Os, and when -Oz then we allow any extra amount. Aside from the new testcases, some existing ones changed. They all appear to change in a reasonable way, to me. We should perhaps go even further than this, and not even run a division unconditionally in -Os, but I wasn't sure it makes sense to go that far as other benchmarks may be affected. For now, this makes the benchmark in #5983 run at full speed in -O3 or -Os, and it remains slow in -Oz. The modified version of the benchmark that only divides in the if (no other operations) is still fast in -O3, but it become slow in -Os as we do turn that if into a select (but again, I didn't want to go that far as to overfit on that one benchmark).
Diffstat (limited to 'src')
-rw-r--r--src/ir/cost.h4
-rw-r--r--src/passes/RemoveUnusedBrs.cpp62
2 files changed, 46 insertions, 20 deletions
diff --git a/src/ir/cost.h b/src/ir/cost.h
index 73d027788..efc50eb1b 100644
--- a/src/ir/cost.h
+++ b/src/ir/cost.h
@@ -263,7 +263,7 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
case DivUInt32:
case RemSInt32:
case RemUInt32:
- ret = curr->right->is<Const>() ? 2 : 3;
+ ret = curr->right->is<Const>() ? 5 : 6;
break;
case AndInt32:
case OrInt32:
@@ -284,7 +284,7 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
case DivUInt64:
case RemSInt64:
case RemUInt64:
- ret = curr->right->is<Const>() ? 3 : 4;
+ ret = curr->right->is<Const>() ? 7 : 8;
break;
case AndInt64:
case OrInt64:
diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp
index fca5b7db4..576f2fe91 100644
--- a/src/passes/RemoveUnusedBrs.cpp
+++ b/src/passes/RemoveUnusedBrs.cpp
@@ -80,38 +80,64 @@ static bool canTurnIfIntoBrIf(Expression* ifCondition,
return !EffectAnalyzer(options, wasm, ifCondition).invalidates(value);
}
-// This leads to similar choices as LLVM does.
-// See https://github.com/WebAssembly/binaryen/pull/4228
-// It can be tuned more later.
-const Index TooCostlyToRunUnconditionally = 9;
+// This leads to similar choices as LLVM does in some cases, by balancing the
+// extra work of code that is run unconditionally with the speedup from not
+// branching to decide whether to run it or not.
+// See:
+// * https://github.com/WebAssembly/binaryen/pull/4228
+// * https://github.com/WebAssembly/binaryen/issues/5983
+const Index TooCostlyToRunUnconditionally = 8;
static_assert(TooCostlyToRunUnconditionally < CostAnalyzer::Unacceptable,
"We never run code unconditionally if it has unacceptable cost");
-// Check if it is not worth it to run code unconditionally. This
-// assumes we are trying to run two expressions where previously
-// only one of the two might have executed. We assume here that
-// executing both is good for code size.
static bool tooCostlyToRunUnconditionally(const PassOptions& passOptions,
- Expression* one,
- Expression* two) {
- // If we care mostly about code size, just do it for that reason.
- if (passOptions.shrinkLevel) {
- return false;
+ Index cost) {
+ if (passOptions.shrinkLevel == 0) {
+ // We are focused on speed. Any extra cost is risky, but allow a small
+ // amount.
+ return cost > TooCostlyToRunUnconditionally / 2;
+ } else if (passOptions.shrinkLevel == 1) {
+ // We are optimizing for size in a balanced manner. Allow some extra
+ // overhead here.
+ return cost >= TooCostlyToRunUnconditionally;
+ } else {
+ // We should have already decided what to do if shrink_level=2 and not
+ // gotten here, and other values are invalid.
+ WASM_UNREACHABLE("bad shrink level");
}
- // Consider the cost of executing all the code unconditionally.
- auto total = CostAnalyzer(one).cost + CostAnalyzer(two).cost;
- return total >= TooCostlyToRunUnconditionally;
}
// As above, but a single expression that we are considering moving to a place
// where it executes unconditionally.
static bool tooCostlyToRunUnconditionally(const PassOptions& passOptions,
Expression* curr) {
- if (passOptions.shrinkLevel) {
+ // If we care entirely about code size, just do it for that reason (early
+ // exit to avoid work).
+ if (passOptions.shrinkLevel >= 2) {
+ return false;
+ }
+ auto cost = CostAnalyzer(curr).cost;
+ return tooCostlyToRunUnconditionally(passOptions, cost);
+}
+
+// Check if it is not worth it to run code unconditionally. This
+// assumes we are trying to run two expressions where previously
+// only one of the two might have executed. We assume here that
+// executing both is good for code size.
+static bool tooCostlyToRunUnconditionally(const PassOptions& passOptions,
+ Expression* one,
+ Expression* two) {
+ // If we care entirely about code size, just do it for that reason (early
+ // exit to avoid work).
+ if (passOptions.shrinkLevel >= 2) {
return false;
}
- return CostAnalyzer(curr).cost >= TooCostlyToRunUnconditionally;
+
+ // Consider the cost of executing all the code unconditionally, which adds
+ // either the cost of running one or two, so the maximum is the worst case.
+ auto max = std::max(CostAnalyzer(one).cost, CostAnalyzer(two).cost);
+ return tooCostlyToRunUnconditionally(passOptions, max);
}
struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> {