summaryrefslogtreecommitdiff
path: root/test/memorygrowth.fromasm.imprecise
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2019-03-01 10:28:07 -0800
committerGitHub <noreply@github.com>2019-03-01 10:28:07 -0800
commit689fe405a3417fbfd59456035add6f6f53149f35 (patch)
treed6f1dcaf0cbb85eb3ae830f68a46c9a6627d1562 /test/memorygrowth.fromasm.imprecise
parentf59c3033e678ced61bc8c78e8ac9fbee31ef0210 (diff)
downloadbinaryen-689fe405a3417fbfd59456035add6f6f53149f35.tar.gz
binaryen-689fe405a3417fbfd59456035add6f6f53149f35.tar.bz2
binaryen-689fe405a3417fbfd59456035add6f6f53149f35.zip
Consistently optimize small added constants into load/store offsets (#1924)
See #1919 - we did not do this consistently before. This adds a lowMemoryUnused option to PassOptions. It can be passed on the commandline with --low-memory-unused. If enabled, we run the new optimize-added-constants pass, which does the real work here, replacing older code in post-emscripten. Aside from running at the proper time (unlike the old pass, see #1919), this also has a -propagate mode, which can do stuff like this: y = x + 10 [..] load(y) [..] load(y) => y = x + 10 [..] load(x, offset=10) [..] load(x, offset=10) That is, it can propagate such offsets to the loads/stores. This pattern is common in big interpreter loops, where the pointers are offsets into a big struct of state. The pass does this propagation by using a new feature of LocalGraph, which can verify which locals are in SSA mode. Binaryen IR is not SSA (intentionally, since it's a later IR), but if a local only has a single set for all gets, that means that local is in such a state, and can be optimized. The tricky thing is that all locals are initialized to zero, so there are at minimum two sets. But if we verify that the real set dominates all the gets, then the zero initialization cannot reach them, and we are safe. This PR also makes safe-heap aware of lowMemoryUnused. If so, we check for not just an access of 0, but the range 0-1023. This makes zlib 5% faster, with either the wasm backend or asm2wasm. It also makes it 0.5% smaller. Also helps sqlite (1.5% faster) and lua (1% faster)
Diffstat (limited to 'test/memorygrowth.fromasm.imprecise')
-rw-r--r--test/memorygrowth.fromasm.imprecise209
1 files changed, 85 insertions, 124 deletions
diff --git a/test/memorygrowth.fromasm.imprecise b/test/memorygrowth.fromasm.imprecise
index 075614f71..2ffcd0c51 100644
--- a/test/memorygrowth.fromasm.imprecise
+++ b/test/memorygrowth.fromasm.imprecise
@@ -7685,9 +7685,7 @@
(local $13 i32)
(local $14 i32)
(local $15 i32)
- (local $16 i32)
- (local $17 i32)
- (local.set $12
+ (local.set $7
(global.get $r)
)
(global.set $r
@@ -7696,29 +7694,25 @@
(i32.const 48)
)
)
- (local.set $9
+ (local.set $11
(i32.add
- (local.get $12)
+ (local.get $7)
(i32.const 16)
)
)
+ (local.set $9
+ (local.get $7)
+ )
(i32.store
(local.tee $3
(i32.add
- (local.tee $10
- (local.get $12)
- )
+ (local.get $7)
(i32.const 32)
)
)
- (local.tee $11
- (i32.load
- (local.tee $7
- (i32.add
- (local.get $0)
- (i32.const 28)
- )
- )
+ (local.tee $10
+ (i32.load offset=28
+ (local.get $0)
)
)
)
@@ -7726,15 +7720,10 @@
(local.get $3)
(local.tee $8
(i32.sub
- (i32.load
- (local.tee $13
- (i32.add
- (local.get $0)
- (i32.const 20)
- )
- )
+ (i32.load offset=20
+ (local.get $0)
)
- (local.get $11)
+ (local.get $10)
)
)
)
@@ -7752,7 +7741,7 @@
(i32.const 60)
)
)
- (local.set $11
+ (local.set $10
(i32.add
(local.get $0)
(i32.const 44)
@@ -7786,24 +7775,24 @@
(local.get $0)
)
(i32.store
- (local.get $10)
+ (local.get $9)
(i32.load
(local.get $1)
)
)
(i32.store offset=4
- (local.get $10)
+ (local.get $9)
(local.get $5)
)
(i32.store offset=8
- (local.get $10)
+ (local.get $9)
(local.get $3)
)
(local.set $8
(call $Pa
(call $ya
(i32.const 146)
- (local.get $10)
+ (local.get $9)
)
)
)
@@ -7813,24 +7802,24 @@
(local.get $8)
)
(block (result i32)
- (i32.store
- (local.get $9)
+ (i32.store offset=16
+ (local.get $7)
(i32.load
(local.get $1)
)
)
(i32.store offset=4
- (local.get $9)
+ (local.get $11)
(local.get $5)
)
(i32.store offset=8
- (local.get $9)
+ (local.get $11)
(local.get $3)
)
(call $Pa
(call $ya
(i32.const 146)
- (local.get $9)
+ (local.get $11)
)
)
)
@@ -7850,10 +7839,10 @@
(i32.const 0)
)
(block
- (local.set $15
+ (local.set $13
(local.get $5)
)
- (local.set $16
+ (local.set $14
(local.get $3)
)
(local.set $1
@@ -7871,23 +7860,23 @@
(if
(i32.gt_u
(local.get $6)
- (local.tee $14
+ (local.tee $12
(i32.load offset=4
(local.get $5)
)
)
)
(block
- (i32.store
- (local.get $7)
+ (i32.store offset=28
+ (local.get $0)
(local.tee $4
(i32.load
- (local.get $11)
+ (local.get $10)
)
)
)
- (i32.store
- (local.get $13)
+ (i32.store offset=20
+ (local.get $0)
(local.get $4)
)
(local.set $4
@@ -7898,7 +7887,7 @@
(local.set $6
(i32.sub
(local.get $6)
- (local.get $14)
+ (local.get $12)
)
)
(local.set $5
@@ -7921,12 +7910,12 @@
(i32.const 2)
)
(block
- (i32.store
- (local.get $7)
+ (i32.store offset=28
+ (local.get $0)
(i32.add
(local.get $6)
- (i32.load
- (local.get $7)
+ (i32.load offset=28
+ (local.get $0)
)
)
)
@@ -7936,7 +7925,7 @@
)
)
(local.set $4
- (local.get $14)
+ (local.get $12)
)
)
)
@@ -7963,7 +7952,7 @@
)
)
(global.set $r
- (local.get $12)
+ (local.get $7)
)
(if (result i32)
(i32.eq
@@ -7976,7 +7965,7 @@
(i32.add
(local.tee $4
(i32.load
- (local.get $11)
+ (local.get $10)
)
)
(i32.load offset=48
@@ -7984,12 +7973,12 @@
)
)
)
- (i32.store
- (local.get $7)
+ (i32.store offset=28
+ (local.get $0)
(local.get $4)
)
- (i32.store
- (local.get $13)
+ (i32.store offset=20
+ (local.get $0)
(local.get $4)
)
(local.get $2)
@@ -8004,12 +7993,12 @@
(local.get $0)
(i32.const 0)
)
- (i32.store
- (local.get $7)
+ (i32.store offset=28
+ (local.get $0)
(i32.const 0)
)
- (i32.store
- (local.get $13)
+ (i32.store offset=20
+ (local.get $0)
(i32.const 0)
)
(i32.store
@@ -8026,16 +8015,16 @@
(i32.sub
(local.get $2)
(i32.load offset=4
- (local.get $15)
+ (local.get $13)
)
)
(i32.eq
- (local.get $16)
+ (local.get $14)
(i32.const 2)
)
)
)
- (local.get $17)
+ (local.get $15)
)
)
)
@@ -8569,26 +8558,13 @@
(local $1 i32)
(local $2 i32)
(local $3 i32)
- (local $4 i32)
- (local $5 i32)
- (local $6 i32)
(if
(i32.gt_u
- (i32.load
- (local.tee $3
- (i32.add
- (local.get $0)
- (i32.const 20)
- )
- )
+ (i32.load offset=20
+ (local.get $0)
)
- (i32.load
- (local.tee $4
- (i32.add
- (local.get $0)
- (i32.const 28)
- )
- )
+ (i32.load offset=28
+ (local.get $0)
)
)
(block
@@ -8609,8 +8585,8 @@
)
)
(if
- (i32.load
- (local.get $3)
+ (i32.load offset=20
+ (local.get $0)
)
(local.set $1
(i32.const 3)
@@ -8642,14 +8618,9 @@
)
)
)
- (local.tee $6
- (i32.load
- (local.tee $5
- (i32.add
- (local.get $0)
- (i32.const 8)
- )
- )
+ (local.tee $3
+ (i32.load offset=8
+ (local.get $0)
)
)
)
@@ -8658,7 +8629,7 @@
(local.get $0)
(i32.sub
(local.get $2)
- (local.get $6)
+ (local.get $3)
)
(i32.const 1)
(i32.add
@@ -8677,16 +8648,16 @@
(local.get $0)
(i32.const 0)
)
- (i32.store
- (local.get $4)
+ (i32.store offset=28
+ (local.get $0)
(i32.const 0)
)
- (i32.store
- (local.get $3)
+ (i32.store offset=20
+ (local.get $0)
(i32.const 0)
)
- (i32.store
- (local.get $5)
+ (i32.store offset=8
+ (local.get $0)
(i32.const 0)
)
(i32.store
@@ -9006,22 +8977,22 @@
(i32.add
(select
(i32.div_u
- (local.tee $2
+ (local.tee $1
(call $Wa
- (local.tee $1
- (local.tee $3
+ (local.tee $3
+ (local.tee $2
(call $Za)
)
)
(local.get $0)
)
)
- (local.get $3)
+ (local.get $2)
)
(i32.const 1)
(i32.ne
(local.get $1)
- (local.get $2)
+ (local.get $3)
)
)
(i32.const -1)
@@ -9040,13 +9011,8 @@
(if
(i32.lt_u
(local.tee $1
- (i32.load
- (local.tee $2
- (i32.add
- (local.get $0)
- (i32.const 20)
- )
- )
+ (i32.load offset=20
+ (local.get $0)
)
)
(i32.load offset=16
@@ -9054,8 +9020,8 @@
)
)
(block
- (i32.store
- (local.get $2)
+ (i32.store offset=20
+ (local.get $0)
(i32.add
(local.get $1)
(i32.const 1)
@@ -9156,8 +9122,7 @@
)
(func $Ua (; 26 ;) (; has Stack IR ;) (param $0 i32) (param $1 i32) (param $2 i32) (result i32)
(local $3 i32)
- (local $4 i32)
- (local.set $4
+ (local.set $3
(global.get $r)
)
(global.set $r
@@ -9167,9 +9132,7 @@
)
)
(i32.store
- (local.tee $3
- (local.get $4)
- )
+ (local.get $3)
(i32.load offset=60
(local.get $0)
)
@@ -9184,11 +9147,9 @@
)
(i32.store offset=12
(local.get $3)
- (local.tee $0
- (i32.add
- (local.get $3)
- (i32.const 20)
- )
+ (i32.add
+ (local.get $3)
+ (i32.const 20)
)
)
(i32.store offset=16
@@ -9207,19 +9168,19 @@
(i32.const 0)
)
(block (result i32)
- (i32.store
- (local.get $0)
+ (i32.store offset=20
+ (local.get $3)
(i32.const -1)
)
(i32.const -1)
)
- (i32.load
- (local.get $0)
+ (i32.load offset=20
+ (local.get $3)
)
)
)
(global.set $r
- (local.get $4)
+ (local.get $3)
)
(local.get $0)
)