diff options
author | Alon Zakai <azakai@google.com> | 2022-09-09 08:53:25 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-09 08:53:25 -0700 |
commit | 1e8eb596f82e438216b51972f90e10b4fc13b96a (patch) | |
tree | bc67784520f16d70eea19e89cb467a5f2c8d34d5 /test/lit/passes/optimize-instructions.wast | |
parent | d4d33b1e175c962548347c59339783c11d5d1a23 (diff) | |
download | binaryen-1e8eb596f82e438216b51972f90e10b4fc13b96a.tar.gz binaryen-1e8eb596f82e438216b51972f90e10b4fc13b96a.tar.bz2 binaryen-1e8eb596f82e438216b51972f90e10b4fc13b96a.zip |
OptimizeInstructions: Optimize comparisons with an added offset (#5025)
E.g.
x + C1 > C2 ==> x > (C2-C1)
We do need to be careful of overflows in either the add on the left or
the proposed subtract on the right. In the latter case, we can at least do
x + C1 > C2 ==> x + (C1-C2) > 0
Helps #5008 (but more patterns remain).
Found by the superoptimizer #4994. This was the top suggestion for Java and Dart.
Diffstat (limited to 'test/lit/passes/optimize-instructions.wast')
-rw-r--r-- | test/lit/passes/optimize-instructions.wast | 357 |
1 files changed, 357 insertions, 0 deletions
diff --git a/test/lit/passes/optimize-instructions.wast b/test/lit/passes/optimize-instructions.wast index d8c72bb65..728b195de 100644 --- a/test/lit/passes/optimize-instructions.wast +++ b/test/lit/passes/optimize-instructions.wast @@ -15111,4 +15111,361 @@ ) ) ) + + ;; CHECK: (func $gt_u-added-constant (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.gt_u + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.ne + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.ne + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const -1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $gt_u-added-constant (param $x i32) + ;; x + C1 > C2 => x > (C2-C1), iff x+C1 and C2-C1 don't over/underflow + (drop + (i32.gt_u + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 5) + ) + (i32.const 11) + ) + ) + ;; We can optimize even if the constants are equal. + (drop + (i32.gt_u + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 5) + ) + (i32.const 5) + ) + ) + ;; x + C1 > C2 => x + (C1-C2) > 0, iff x+C1 and C1-C2 don't over/underflow + ;; After doing that, further optimizations are possible here. + (drop + (i32.gt_u + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 6) + ) + (i32.const 5) + ) + ) + ) + + ;; CHECK: (func $gt_u-added-constant-no (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.gt_u + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.gt_u + ;; CHECK-NEXT: (i32.sub + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const -2147483648) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.gt_u + ;; CHECK-NEXT: (i32.sub + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $gt_u-added-constant-no (param $x i32) + ;; As above, but without the shr_u, A is big enough for a possible overflow, + ;; and we cannot optimize. + (drop + (i32.gt_u + (i32.add + (local.get $x) + (i32.const 5) + ) + (i32.const 11) + ) + ) + ;; With the added constant too big, it might overflow, and we cannot + ;; optimize. + (drop + (i32.gt_u + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 0x80000000) + ) + (i32.const 11) + ) + ) + (drop + (i32.gt_u + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 0xffffffff) + ) + (i32.const 11) + ) + ) + ) + + ;; CHECK: (func $ge_u-added-constant (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.ge_u + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $ge_u-added-constant (param $x i32) + ;; As above, but with ge rather than gt. We can optimize here. + (drop + (i32.ge_u + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 5) + ) + (i32.const 11) + ) + ) + (drop + (i32.ge_u + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 5) + ) + (i32.const 5) + ) + ) + (drop + (i32.ge_u + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 6) + ) + (i32.const 5) + ) + ) + ) + + ;; CHECK: (func $ge_u-added-constant-no (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.ge_u + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.ge_u + ;; CHECK-NEXT: (i32.sub + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const -2147483648) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.ge_u + ;; CHECK-NEXT: (i32.sub + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $ge_u-added-constant-no (param $x i32) + ;; As above, but with ge rather than gt. We cannot optimize here. + (drop + (i32.ge_u + (i32.add + (local.get $x) + (i32.const 5) + ) + (i32.const 11) + ) + ) + (drop + (i32.ge_u + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 0x80000000) + ) + (i32.const 11) + ) + ) + (drop + (i32.ge_u + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 0xffffffff) + ) + (i32.const 11) + ) + ) + ) + + ;; CHECK: (func $eq-added-constant (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.eq + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $eq-added-constant (param $x i32) + ;; As above, but with eq rather than gt. We can optimize here. + (drop + (i32.eq + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 5) + ) + (i32.const 11) + ) + ) + ) + + ;; CHECK: (func $ne-added-constant (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.ne + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $ne-added-constant (param $x i32) + ;; As above, but with ne rather than gt. We can optimize here. + (drop + (i32.ne + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 5) + ) + (i32.const 11) + ) + ) + ) + + ;; CHECK: (func $lt-added-constant (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.lt_u + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $lt-added-constant (param $x i32) + ;; As above, but with lt_s rather than gt_u. We can optimize here. + (drop + (i32.lt_s + (i32.add + (i32.shr_u + (local.get $x) + (i32.const 1) + ) + (i32.const 5) + ) + (i32.const 11) + ) + ) + ) ) |