diff options
author | Alon Zakai <alonzakai@gmail.com> | 2019-03-01 10:28:07 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-01 10:28:07 -0800 |
commit | 689fe405a3417fbfd59456035add6f6f53149f35 (patch) | |
tree | d6f1dcaf0cbb85eb3ae830f68a46c9a6627d1562 /test/passes/optimize-added-constants_low-memory-unused.txt | |
parent | f59c3033e678ced61bc8c78e8ac9fbee31ef0210 (diff) | |
download | binaryen-689fe405a3417fbfd59456035add6f6f53149f35.tar.gz binaryen-689fe405a3417fbfd59456035add6f6f53149f35.tar.bz2 binaryen-689fe405a3417fbfd59456035add6f6f53149f35.zip |
Consistently optimize small added constants into load/store offsets (#1924)
See #1919 - we did not do this consistently before.
This adds a lowMemoryUnused option to PassOptions. It can be passed on the commandline with --low-memory-unused. If enabled, we run the new optimize-added-constants pass, which does the real work here, replacing older code in post-emscripten.
Aside from running at the proper time (unlike the old pass, see #1919), this also has a -propagate mode, which can do stuff like this:
y = x + 10
[..]
load(y)
[..]
load(y)
=>
y = x + 10
[..]
load(x, offset=10)
[..]
load(x, offset=10)
That is, it can propagate such offsets to the loads/stores. This pattern is common in big interpreter loops, where the pointers are offsets into a big struct of state.
The pass does this propagation by using a new feature of LocalGraph, which can verify which locals are in SSA mode. Binaryen IR is not SSA (intentionally, since it's a later IR), but if a local only has a single set for all gets, that means that local is in such a state, and can be optimized. The tricky thing is that all locals are initialized to zero, so there are at minimum two sets. But if we verify that the real set dominates all the gets, then the zero initialization cannot reach them, and we are safe.
This PR also makes safe-heap aware of lowMemoryUnused. If so, we check for not just an access of 0, but the range 0-1023.
This makes zlib 5% faster, with either the wasm backend or asm2wasm. It also makes it 0.5% smaller. Also helps sqlite (1.5% faster) and lua (1% faster)
Diffstat (limited to 'test/passes/optimize-added-constants_low-memory-unused.txt')
-rw-r--r-- | test/passes/optimize-added-constants_low-memory-unused.txt | 383 |
1 files changed, 383 insertions, 0 deletions
diff --git a/test/passes/optimize-added-constants_low-memory-unused.txt b/test/passes/optimize-added-constants_low-memory-unused.txt new file mode 100644 index 000000000..81f2ba216 --- /dev/null +++ b/test/passes/optimize-added-constants_low-memory-unused.txt @@ -0,0 +1,383 @@ +(module + (type $0 (func)) + (type $1 (func (param i32))) + (type $2 (func (param i32) (result i32))) + (func $consts (; 0 ;) (type $0) + (drop + (i32.load + (i32.const 0) + ) + ) + (drop + (i32.load + (i32.const 1) + ) + ) + (drop + (i32.load + (i32.const 1023) + ) + ) + (drop + (i32.load + (i32.const 1024) + ) + ) + (drop + (i32.load + (i32.const 0) + ) + ) + (drop + (i32.load + (i32.const 1) + ) + ) + (drop + (i32.load + (i32.const 1023) + ) + ) + (drop + (i32.load + (i32.const 1024) + ) + ) + (drop + (i32.load + (i32.const 1023) + ) + ) + (drop + (i32.load + (i32.const 1024) + ) + ) + (i32.store + (i32.const 1) + (i32.const 1) + ) + ) + (func $offsets (; 1 ;) (type $1) (param $x i32) + (drop + (i32.load offset=1 + (local.get $x) + ) + ) + (drop + (i32.load offset=8 + (local.get $x) + ) + ) + (drop + (i32.load offset=1023 + (local.get $x) + ) + ) + (drop + (i32.load + (i32.add + (local.get $x) + (i32.const 1024) + ) + ) + ) + (drop + (i32.load + (i32.add + (local.get $x) + (i32.const 2048) + ) + ) + ) + (drop + (i32.load offset=4 + (local.get $x) + ) + ) + ) + (func $load-off-2 (; 2 ;) (type $2) (param $0 i32) (result i32) + (i32.store + (i32.const 6) + (local.get $0) + ) + (i32.store + (i32.const 6) + (local.get $0) + ) + (i32.store offset=7 + (local.get $0) + (local.get $0) + ) + (i32.store offset=9 + (local.get $0) + (local.get $0) + ) + (i32.store offset=2 + (i32.add + (i32.const -11) + (local.get $0) + ) + (local.get $0) + ) + (i32.store offset=2 + (i32.add + (local.get $0) + (i32.const -13) + ) + (local.get $0) + ) + (i32.store offset=19 + (i32.const -15) + (local.get $0) + ) + (i32.store offset=21 + (i32.const -21) + (local.get $0) + ) + (i32.store + (i32.const 25) + (local.get $0) + ) + (i32.store + (i32.const -23) + (local.get $0) + ) + (drop + (i32.load + (i32.const 8) + ) + ) + (drop + (i32.load + (i32.const 8) + ) + ) + (drop + (i32.load offset=8 + (local.get $0) + ) + ) + (drop + (i32.load + (i32.const 10) + ) + ) + (i32.load offset=12 + (local.get $0) + ) + ) + (func $offset-constant (; 3 ;) (type $0) + (drop + (i32.load + (i32.const 10) + ) + ) + (drop + (i32.load + (i32.const 10) + ) + ) + (drop + (i32.load + (i32.const 20) + ) + ) + (drop + (i32.load + (i32.const 1024) + ) + ) + (drop + (i32.load + (i32.const 1023) + ) + ) + (drop + (i32.load + (i32.const 1023) + ) + ) + (drop + (i32.load + (i32.const 100511) + ) + ) + ) + (func $offset-propagate-param (; 4 ;) (type $1) (param $x i32) + (local $y i32) + (local.set $x + (i32.add + (local.get $y) + (i32.const 1) + ) + ) + (drop + (i32.load + (local.get $x) + ) + ) + ) + (func $offset-propagate (; 5 ;) (type $0) + (local $x i32) + (local $y i32) + (local.set $x + (i32.add + (local.get $y) + (i32.const 1) + ) + ) + (drop + (i32.load + (local.get $x) + ) + ) + ) + (func $offset-propagate2 (; 6 ;) (type $0) + (local $x i32) + (local $y i32) + (local.set $x + (i32.add + (local.get $y) + (i32.add + (i32.const 1) + (i32.const 2) + ) + ) + ) + (drop + (i32.load + (local.get $x) + ) + ) + ) + (func $offset-propagate3 (; 7 ;) (type $0) + (local $x i32) + (local $y i32) + (local.set $x + (i32.add + (i32.const 1) + (local.get $y) + ) + ) + (drop + (i32.load + (local.get $x) + ) + ) + ) + (func $offset-propagate4 (; 8 ;) (type $0) + (local $x i32) + (local $y i32) + (local.set $y + (i32.const -1) + ) + (local.set $x + (i32.add + (i32.const 1) + (local.get $y) + ) + ) + (drop + (i32.load + (local.get $x) + ) + ) + ) + (func $offset-propagate5 (; 9 ;) (type $1) (param $z i32) + (local $x i32) + (local $y i32) + (if + (local.get $z) + (local.set $y + (i32.const -1) + ) + ) + (local.set $x + (i32.add + (i32.const 1) + (local.get $y) + ) + ) + (drop + (i32.load + (local.get $x) + ) + ) + ) + (func $offset-propagate6 (; 10 ;) (type $1) (param $z i32) + (local $x i32) + (local $y i32) + (local.set $y + (local.get $z) + ) + (local.set $x + (i32.add + (i32.const 1) + (local.get $y) + ) + ) + (local.set $y + (i32.const -2) + ) + (drop + (i32.load + (local.get $x) + ) + ) + ) + (func $offset-realistic (; 11 ;) (type $1) (param $ptr i32) + (local $x i32) + (local $y i32) + (local $z i32) + (local.set $x + (i32.add + (local.get $ptr) + (i32.const 8) + ) + ) + (local.set $y + (i32.add + (local.get $ptr) + (i32.const 16) + ) + ) + (local.set $z + (i32.add + (local.get $ptr) + (i32.const 24) + ) + ) + (loop $l + (call $offset-realistic + (i32.load + (local.get $x) + ) + ) + (call $offset-realistic + (i32.load + (local.get $y) + ) + ) + (call $offset-realistic + (i32.load + (local.get $y) + ) + ) + (i32.store + (local.get $z) + (i32.add + (i32.load + (local.get $z) + ) + (i32.const 1) + ) + ) + (br_if $l + (i32.load + (local.get $z) + ) + ) + ) + ) +) |