summaryrefslogtreecommitdiff
path: root/test/lit/passes
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2023-10-03 16:39:12 -0700
committerGitHub <noreply@github.com>2023-10-03 16:39:12 -0700
commitb2e096d79c36daa2cbfb7dc3db31af76e9f45cc8 (patch)
tree52f962128c9fe6c1b8cc8b847e5af40f33117c40 /test/lit/passes
parent24779b2a3fe5e5c7cc6b1da3661d346cd9c129ae (diff)
downloadbinaryen-b2e096d79c36daa2cbfb7dc3db31af76e9f45cc8.tar.gz
binaryen-b2e096d79c36daa2cbfb7dc3db31af76e9f45cc8.tar.bz2
binaryen-b2e096d79c36daa2cbfb7dc3db31af76e9f45cc8.zip
RemoveUnusedBrs: Allow less unconditional work and in particular division (#5989)
Fixes #5983: The testcase from there is used here in a new testcase remove-unused-brs_levels in which we check if we are willing to unconditionally do a division operation. Turning an if with an arm that does a division into a select, which always does the division, is almost 5x slower, so we should probably be extremely careful about doing that. I took some measurements and have some suggestions for changes in this PR: * Raise the cost of div/rem to what I measure on my machine, which is 5x slower than an add, or worse. * For some reason we added the if arms rather than take the max of them, so fix that. This does not help the issue, but was confusing. * Adjust TooCostlyToRunUnconditionally in the pass from 9 to 8 (this helps balance the last point). * Use half that value when not optimizing for size. That is, we allow only 4 extra unconditional work normally, and 8 in -Os, and when -Oz then we allow any extra amount. Aside from the new testcases, some existing ones changed. They all appear to change in a reasonable way, to me. We should perhaps go even further than this, and not even run a division unconditionally in -Os, but I wasn't sure it makes sense to go that far as other benchmarks may be affected. For now, this makes the benchmark in #5983 run at full speed in -O3 or -Os, and it remains slow in -Oz. The modified version of the benchmark that only divides in the if (no other operations) is still fast in -O3, but it become slow in -Os as we do turn that if into a select (but again, I didn't want to go that far as to overfit on that one benchmark).
Diffstat (limited to 'test/lit/passes')
-rw-r--r--test/lit/passes/remove-unused-brs-gc.wast22
-rw-r--r--test/lit/passes/remove-unused-brs.wast10
-rw-r--r--test/lit/passes/remove-unused-brs_levels.wast129
3 files changed, 142 insertions, 19 deletions
diff --git a/test/lit/passes/remove-unused-brs-gc.wast b/test/lit/passes/remove-unused-brs-gc.wast
index 53100cc91..7a620193e 100644
--- a/test/lit/passes/remove-unused-brs-gc.wast
+++ b/test/lit/passes/remove-unused-brs-gc.wast
@@ -655,22 +655,20 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (select (result (ref null $struct))
- ;; CHECK-NEXT: (block (result (ref null $struct))
- ;; CHECK-NEXT: (block $something (result (ref null $struct))
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (block (result nullref)
- ;; CHECK-NEXT: (br_on_non_null $something
- ;; CHECK-NEXT: (local.get $struct)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (ref.null none)
+ ;; CHECK-NEXT: (if (result (ref null $struct))
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (block $something (result (ref null $struct))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result nullref)
+ ;; CHECK-NEXT: (br_on_non_null $something
+ ;; CHECK-NEXT: (local.get $struct)
;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (ref.null none)
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (ref.null none)
;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (ref.null none)
;; CHECK-NEXT: )
;; CHECK-NEXT: (ref.null none)
- ;; CHECK-NEXT: (local.get $x)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
@@ -716,6 +714,8 @@
)
)
)
+ ;; We do not selectify here because the amount of work in the if is
+ ;; significant (there is a cast and a branch).
(drop
(if (result anyref)
(local.get $x)
diff --git a/test/lit/passes/remove-unused-brs.wast b/test/lit/passes/remove-unused-brs.wast
index 82963f792..93cf4cbd2 100644
--- a/test/lit/passes/remove-unused-brs.wast
+++ b/test/lit/passes/remove-unused-brs.wast
@@ -31,10 +31,7 @@
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: (i32.lt_u
;; CHECK-NEXT: (i32.sub
- ;; CHECK-NEXT: (i32.or
- ;; CHECK-NEXT: (local.get $0)
- ;; CHECK-NEXT: (i32.const 32)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (i32.const 97)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 6)
@@ -60,10 +57,7 @@
(i32.const 1)
(i32.lt_u
(i32.sub
- (i32.or
- (local.get $0)
- (i32.const 32)
- )
+ (local.get $0)
(i32.const 97)
)
(i32.const 6)
diff --git a/test/lit/passes/remove-unused-brs_levels.wast b/test/lit/passes/remove-unused-brs_levels.wast
new file mode 100644
index 000000000..f4737627e
--- /dev/null
+++ b/test/lit/passes/remove-unused-brs_levels.wast
@@ -0,0 +1,129 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
+;; RUN: wasm-opt %s --remove-unused-brs -all -S --shrink-level=0 -o - | filecheck %s --check-prefix=SHRINK_0
+;; RUN: wasm-opt %s --remove-unused-brs -all -S --shrink-level=1 -o - | filecheck %s --check-prefix=SHRINK_1
+;; RUN: wasm-opt %s --remove-unused-brs -all -S --shrink-level=2 -o - | filecheck %s --check-prefix=SHRINK_2
+
+
+(module
+ ;; SHRINK_0: (func $selectify-division (type $0) (param $x i32) (result i32)
+ ;; SHRINK_0-NEXT: (if (result i32)
+ ;; SHRINK_0-NEXT: (i32.eq
+ ;; SHRINK_0-NEXT: (local.get $x)
+ ;; SHRINK_0-NEXT: (i32.const 53498923)
+ ;; SHRINK_0-NEXT: )
+ ;; SHRINK_0-NEXT: (i32.div_s
+ ;; SHRINK_0-NEXT: (local.get $x)
+ ;; SHRINK_0-NEXT: (i32.const 13)
+ ;; SHRINK_0-NEXT: )
+ ;; SHRINK_0-NEXT: (local.get $x)
+ ;; SHRINK_0-NEXT: )
+ ;; SHRINK_0-NEXT: )
+ ;; SHRINK_1: (func $selectify-division (type $0) (param $x i32) (result i32)
+ ;; SHRINK_1-NEXT: (select
+ ;; SHRINK_1-NEXT: (i32.div_s
+ ;; SHRINK_1-NEXT: (local.get $x)
+ ;; SHRINK_1-NEXT: (i32.const 13)
+ ;; SHRINK_1-NEXT: )
+ ;; SHRINK_1-NEXT: (local.get $x)
+ ;; SHRINK_1-NEXT: (i32.eq
+ ;; SHRINK_1-NEXT: (local.get $x)
+ ;; SHRINK_1-NEXT: (i32.const 53498923)
+ ;; SHRINK_1-NEXT: )
+ ;; SHRINK_1-NEXT: )
+ ;; SHRINK_1-NEXT: )
+ ;; SHRINK_2: (func $selectify-division (type $0) (param $x i32) (result i32)
+ ;; SHRINK_2-NEXT: (select
+ ;; SHRINK_2-NEXT: (i32.div_s
+ ;; SHRINK_2-NEXT: (local.get $x)
+ ;; SHRINK_2-NEXT: (i32.const 13)
+ ;; SHRINK_2-NEXT: )
+ ;; SHRINK_2-NEXT: (local.get $x)
+ ;; SHRINK_2-NEXT: (i32.eq
+ ;; SHRINK_2-NEXT: (local.get $x)
+ ;; SHRINK_2-NEXT: (i32.const 53498923)
+ ;; SHRINK_2-NEXT: )
+ ;; SHRINK_2-NEXT: )
+ ;; SHRINK_2-NEXT: )
+ (func $selectify-division (param $x i32) (result i32)
+ ;; See #5983: this if, if turned into a select, becomes almost 5x slower.
+ ;; We only want to selectify here when the shrink level is 1 or 2.
+ (if (result i32)
+ (i32.eq
+ (local.get $x)
+ (i32.const 53498923)
+ )
+ (i32.div_s
+ (local.get $x)
+ (i32.const 13)
+ )
+ (local.get $x)
+ )
+ )
+
+ ;; SHRINK_0: (func $selectify-division2 (type $0) (param $x i32) (result i32)
+ ;; SHRINK_0-NEXT: (if (result i32)
+ ;; SHRINK_0-NEXT: (i32.eq
+ ;; SHRINK_0-NEXT: (local.get $x)
+ ;; SHRINK_0-NEXT: (i32.const 53498923)
+ ;; SHRINK_0-NEXT: )
+ ;; SHRINK_0-NEXT: (i32.div_s
+ ;; SHRINK_0-NEXT: (i32.div_s
+ ;; SHRINK_0-NEXT: (local.get $x)
+ ;; SHRINK_0-NEXT: (i32.const 13)
+ ;; SHRINK_0-NEXT: )
+ ;; SHRINK_0-NEXT: (i32.const 13)
+ ;; SHRINK_0-NEXT: )
+ ;; SHRINK_0-NEXT: (local.get $x)
+ ;; SHRINK_0-NEXT: )
+ ;; SHRINK_0-NEXT: )
+ ;; SHRINK_1: (func $selectify-division2 (type $0) (param $x i32) (result i32)
+ ;; SHRINK_1-NEXT: (if (result i32)
+ ;; SHRINK_1-NEXT: (i32.eq
+ ;; SHRINK_1-NEXT: (local.get $x)
+ ;; SHRINK_1-NEXT: (i32.const 53498923)
+ ;; SHRINK_1-NEXT: )
+ ;; SHRINK_1-NEXT: (i32.div_s
+ ;; SHRINK_1-NEXT: (i32.div_s
+ ;; SHRINK_1-NEXT: (local.get $x)
+ ;; SHRINK_1-NEXT: (i32.const 13)
+ ;; SHRINK_1-NEXT: )
+ ;; SHRINK_1-NEXT: (i32.const 13)
+ ;; SHRINK_1-NEXT: )
+ ;; SHRINK_1-NEXT: (local.get $x)
+ ;; SHRINK_1-NEXT: )
+ ;; SHRINK_1-NEXT: )
+ ;; SHRINK_2: (func $selectify-division2 (type $0) (param $x i32) (result i32)
+ ;; SHRINK_2-NEXT: (select
+ ;; SHRINK_2-NEXT: (i32.div_s
+ ;; SHRINK_2-NEXT: (i32.div_s
+ ;; SHRINK_2-NEXT: (local.get $x)
+ ;; SHRINK_2-NEXT: (i32.const 13)
+ ;; SHRINK_2-NEXT: )
+ ;; SHRINK_2-NEXT: (i32.const 13)
+ ;; SHRINK_2-NEXT: )
+ ;; SHRINK_2-NEXT: (local.get $x)
+ ;; SHRINK_2-NEXT: (i32.eq
+ ;; SHRINK_2-NEXT: (local.get $x)
+ ;; SHRINK_2-NEXT: (i32.const 53498923)
+ ;; SHRINK_2-NEXT: )
+ ;; SHRINK_2-NEXT: )
+ ;; SHRINK_2-NEXT: )
+ (func $selectify-division2 (param $x i32) (result i32)
+ ;; As above, but now only with a shrink level of 2 should we selectify, as
+ ;; there are two divisions.
+ (if (result i32)
+ (i32.eq
+ (local.get $x)
+ (i32.const 53498923)
+ )
+ (i32.div_s
+ (i32.div_s
+ (local.get $x)
+ (i32.const 13)
+ )
+ (i32.const 13)
+ )
+ (local.get $x)
+ )
+ )
+)