summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2023-11-08 07:54:03 -0800
committerGitHub <noreply@github.com>2023-11-08 07:54:03 -0800
commit9627c8360d179c2cae168f8bca3bf1b7216c34a8 (patch)
tree77f4eeb54f47b59f44d90e0c206dbbf40600f3b3
parent3640f9c992746867c5cf4cead11077e36a51eb7c (diff)
downloadbinaryen-9627c8360d179c2cae168f8bca3bf1b7216c34a8.tar.gz
binaryen-9627c8360d179c2cae168f8bca3bf1b7216c34a8.tar.bz2
binaryen-9627c8360d179c2cae168f8bca3bf1b7216c34a8.zip
LocalCSE: Do not optimize small things like global.get (#6087)
LocalCSE is nice for large expressions, but for small things it has always been of unclear benefit since VMs also do GVN/CSE anyhow. So we are likely not speeding anything up, but hopefully we are reducing code size at least. Doing LocalCSE on something small like a global.get is very possibly going to increase code size, however (since we add a tee, and since the local gets are of similar size to global gets - depends on LUB sizes). On real-world Java code that overhead is noticeable, so this PR makes us more careful, and we skip things of size 1 (no children).
-rw-r--r--src/passes/LocalCSE.cpp9
-rw-r--r--test/lit/passes/inlining-optimizing_optimize-level=3.wast68
-rw-r--r--test/lit/passes/local-cse.wast15
3 files changed, 45 insertions, 47 deletions
diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp
index 9e92093e1..52346030c 100644
--- a/src/passes/LocalCSE.cpp
+++ b/src/passes/LocalCSE.cpp
@@ -334,13 +334,16 @@ struct Scanner
// and so adding one set+one get and removing one of the items itself
// is not detrimental, and may be beneficial.
// TODO: investigate size 2
- if (options.shrinkLevel > 0 && Measurer::measure(curr) >= 3) {
+ auto size = Measurer::measure(curr);
+ if (options.shrinkLevel > 0 && size >= 3) {
return true;
}
// If we focus on speed, any reduction in cost is beneficial, as the
- // cost of a get is essentially free.
- if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0) {
+ // cost of a get is essentially free. However, we need to balance that with
+ // the fact that the VM will also do CSE/GVN itself, so minor improvements
+ // are not worthwhile, so skip things of size 1 (like a global.get).
+ if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0 && size >= 2) {
return true;
}
diff --git a/test/lit/passes/inlining-optimizing_optimize-level=3.wast b/test/lit/passes/inlining-optimizing_optimize-level=3.wast
index ee891c3cd..fd646d5b4 100644
--- a/test/lit/passes/inlining-optimizing_optimize-level=3.wast
+++ b/test/lit/passes/inlining-optimizing_optimize-level=3.wast
@@ -299,13 +299,11 @@
;; CHECK-NEXT: (local $12 i32)
;; CHECK-NEXT: (local $13 i32)
;; CHECK-NEXT: (local.set $8
- ;; CHECK-NEXT: (local.tee $4
- ;; CHECK-NEXT: (global.get $STACKTOP)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $STACKTOP)
;; CHECK-NEXT: )
;; CHECK-NEXT: (global.set $STACKTOP
;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (local.get $4)
+ ;; CHECK-NEXT: (global.get $STACKTOP)
;; CHECK-NEXT: (i32.const 16)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -316,11 +314,12 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: (call $abort)
;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $6
+ ;; CHECK-NEXT: (global.get $STACKTOP)
+ ;; CHECK-NEXT: )
;; CHECK-NEXT: (global.set $STACKTOP
;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (local.tee $4
- ;; CHECK-NEXT: (global.get $STACKTOP)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $STACKTOP)
;; CHECK-NEXT: (i32.const 16)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -332,7 +331,7 @@
;; CHECK-NEXT: (call $abort)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.store
- ;; CHECK-NEXT: (local.get $4)
+ ;; CHECK-NEXT: (local.get $6)
;; CHECK-NEXT: (local.get $8)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $0
@@ -340,11 +339,12 @@
;; CHECK-NEXT: (i32.const 8)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $1
+ ;; CHECK-NEXT: (global.get $STACKTOP)
+ ;; CHECK-NEXT: )
;; CHECK-NEXT: (global.set $STACKTOP
;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (local.tee $1
- ;; CHECK-NEXT: (global.get $STACKTOP)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $STACKTOP)
;; CHECK-NEXT: (i32.const 224)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -361,13 +361,13 @@
;; CHECK-NEXT: (i32.const 120)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
+ ;; CHECK-NEXT: (local.set $4
;; CHECK-NEXT: (i32.add
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: (i32.const 136)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $6
+ ;; CHECK-NEXT: (local.set $5
;; CHECK-NEXT: (i32.add
;; CHECK-NEXT: (local.tee $3
;; CHECK-NEXT: (local.tee $7
@@ -393,14 +393,14 @@
;; CHECK-NEXT: (i32.const 4)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.get $6)
+ ;; CHECK-NEXT: (local.get $5)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.store
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: (i32.load
- ;; CHECK-NEXT: (local.get $4)
+ ;; CHECK-NEXT: (local.get $6)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
@@ -444,7 +444,7 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: (if
;; CHECK-NEXT: (i32.load
- ;; CHECK-NEXT: (local.tee $6
+ ;; CHECK-NEXT: (local.tee $5
;; CHECK-NEXT: (i32.add
;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (i32.const 48)
@@ -473,7 +473,7 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.store
;; CHECK-NEXT: (local.get $9)
- ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: (local.get $4)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.store
;; CHECK-NEXT: (local.tee $12
@@ -482,7 +482,7 @@
;; CHECK-NEXT: (i32.const 28)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: (local.get $4)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.store
;; CHECK-NEXT: (local.tee $11
@@ -491,10 +491,10 @@
;; CHECK-NEXT: (i32.const 20)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: (local.get $4)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.store
- ;; CHECK-NEXT: (local.get $6)
+ ;; CHECK-NEXT: (local.get $5)
;; CHECK-NEXT: (i32.const 80)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.store
@@ -505,7 +505,7 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: (local.get $4)
;; CHECK-NEXT: (i32.const 80)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -547,7 +547,7 @@
;; CHECK-NEXT: (local.get $10)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.store
- ;; CHECK-NEXT: (local.get $6)
+ ;; CHECK-NEXT: (local.get $5)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.store
@@ -586,7 +586,7 @@
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: )
;; CHECK-NEXT: (global.set $STACKTOP
- ;; CHECK-NEXT: (local.get $4)
+ ;; CHECK-NEXT: (local.get $6)
;; CHECK-NEXT: )
;; CHECK-NEXT: (global.set $STACKTOP
;; CHECK-NEXT: (local.get $8)
@@ -4080,13 +4080,11 @@
;; CHECK-NEXT: (local $44 i32)
;; CHECK-NEXT: (local $45 i32)
;; CHECK-NEXT: (local.set $13
- ;; CHECK-NEXT: (local.tee $5
- ;; CHECK-NEXT: (global.get $STACKTOP)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $STACKTOP)
;; CHECK-NEXT: )
;; CHECK-NEXT: (global.set $STACKTOP
;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: (global.get $STACKTOP)
;; CHECK-NEXT: (i32.const 624)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -5785,21 +5783,19 @@
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: )
;; CHECK-NEXT: (f64.store
- ;; CHECK-NEXT: (local.tee $5
- ;; CHECK-NEXT: (global.get $tempDoublePtr)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $tempDoublePtr)
;; CHECK-NEXT: (local.get $14)
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (i32.load
- ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: (global.get $tempDoublePtr)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $30
;; CHECK-NEXT: (if (result i32)
;; CHECK-NEXT: (i32.lt_s
;; CHECK-NEXT: (i32.load offset=4
- ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: (global.get $tempDoublePtr)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: )
@@ -5844,14 +5840,12 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (f64.store
- ;; CHECK-NEXT: (local.tee $5
- ;; CHECK-NEXT: (global.get $tempDoublePtr)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $tempDoublePtr)
;; CHECK-NEXT: (local.get $14)
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (i32.load
- ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: (global.get $tempDoublePtr)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $7
@@ -5859,7 +5853,7 @@
;; CHECK-NEXT: (i32.lt_u
;; CHECK-NEXT: (i32.and
;; CHECK-NEXT: (i32.load offset=4
- ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: (global.get $tempDoublePtr)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 2146435072)
;; CHECK-NEXT: )
diff --git a/test/lit/passes/local-cse.wast b/test/lit/passes/local-cse.wast
index a5e90adf1..c0e4c9b59 100644
--- a/test/lit/passes/local-cse.wast
+++ b/test/lit/passes/local-cse.wast
@@ -447,20 +447,17 @@
(global $other-glob (mut i32) (i32.const 1))
;; CHECK: (func $global
- ;; CHECK-NEXT: (local $0 i32)
;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.tee $0
- ;; CHECK-NEXT: (global.get $glob)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $glob)
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: (global.get $glob)
;; CHECK-NEXT: )
;; CHECK-NEXT: (global.set $other-glob
;; CHECK-NEXT: (i32.const 100)
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: (global.get $glob)
;; CHECK-NEXT: )
;; CHECK-NEXT: (global.set $glob
;; CHECK-NEXT: (i32.const 200)
@@ -470,7 +467,11 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $global
- ;; We should optimize redundant global.get operations.
+ ;; We should not optimize redundant global.get operations: they are of size
+ ;; 1 (no children), and so we may end up increasing code size here for
+ ;; unclear benefit. The benefit is unclear since VMs already do GVN/CSE
+ ;; themselves, and so we focus on things of size 2 and above, where we
+ ;; definitely reduce code size at least.
(drop (global.get $glob))
(drop (global.get $glob))
;; We can do it past a write to another global