summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2022-09-09 08:53:25 -0700
committerGitHub <noreply@github.com>2022-09-09 08:53:25 -0700
commit1e8eb596f82e438216b51972f90e10b4fc13b96a (patch)
treebc67784520f16d70eea19e89cb467a5f2c8d34d5
parentd4d33b1e175c962548347c59339783c11d5d1a23 (diff)
downloadbinaryen-1e8eb596f82e438216b51972f90e10b4fc13b96a.tar.gz
binaryen-1e8eb596f82e438216b51972f90e10b4fc13b96a.tar.bz2
binaryen-1e8eb596f82e438216b51972f90e10b4fc13b96a.zip
OptimizeInstructions: Optimize comparisons with an added offset (#5025)
E.g. x + C1 > C2 ==> x > (C2-C1) We do need to be careful of overflows in either the add on the left or the proposed subtract on the right. In the latter case, we can at least do x + C1 > C2 ==> x + (C1-C2) > 0 Helps #5008 (but more patterns remain). Found by the superoptimizer #4994. This was the top suggestion for Java and Dart.
-rw-r--r--src/passes/OptimizeInstructions.cpp100
-rw-r--r--test/lit/passes/optimize-instructions.wast357
2 files changed, 438 insertions, 19 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 74bfca899..7f0b28574 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -51,6 +51,13 @@
namespace wasm {
+static Index getBitsForType(Type type) {
+ if (!type.isNumber()) {
+ return -1;
+ }
+ return type.getByteSize() * 8;
+}
+
// Useful information about locals
struct LocalInfo {
static const Index kUnknown = Index(-1);
@@ -123,20 +130,6 @@ struct LocalScanner : PostWalker<LocalScanner> {
// define this for the templated getMaxBits method. we know nothing here yet
// about locals, so return the maxes
Index getMaxBitsForLocal(LocalGet* get) { return getBitsForType(get->type); }
-
- Index getBitsForType(Type type) {
- if (!type.isBasic()) {
- return -1;
- }
- switch (type.getBasic()) {
- case Type::i32:
- return 32;
- case Type::i64:
- return 64;
- default:
- return -1;
- }
- }
};
namespace {
@@ -495,6 +488,8 @@ struct OptimizeInstructions
}
{
// unsigned(x) >= 0 => i32(1)
+ // TODO: Use getDroppedChildrenAndAppend() here, so we can optimize even
+ // if pure.
Const* c;
Expression* x;
if (matches(curr, binary(GeU, pure(&x), ival(&c))) &&
@@ -1474,6 +1469,13 @@ struct OptimizeInstructions
}
}
+ // Appends a result after the dropped children, if we need them.
+ Expression* getDroppedChildrenAndAppend(Expression* curr,
+ Expression* result) {
+ return wasm::getDroppedChildrenAndAppend(
+ curr, *getModule(), getPassOptions(), result);
+ }
+
void visitRefEq(RefEq* curr) {
// The types may prove that the same reference cannot appear on both sides.
auto leftType = curr->left->type;
@@ -1494,8 +1496,7 @@ struct OptimizeInstructions
// reference can appear on both sides.
auto* result =
Builder(*getModule()).makeConst(Literal::makeZero(Type::i32));
- replaceCurrent(getDroppedChildrenAndAppend(
- curr, *getModule(), getPassOptions(), result));
+ replaceCurrent(getDroppedChildrenAndAppend(curr, result));
return;
}
@@ -1516,8 +1517,7 @@ struct OptimizeInstructions
if (areConsecutiveInputsEqualAndFoldable(curr->left, curr->right)) {
auto* result =
Builder(*getModule()).makeConst(Literal::makeOne(Type::i32));
- replaceCurrent(getDroppedChildrenAndAppend(
- curr, *getModule(), getPassOptions(), result));
+ replaceCurrent(getDroppedChildrenAndAppend(curr, result));
return;
}
@@ -3382,7 +3382,7 @@ private:
binary(DivSInt64, any(), i64(std::numeric_limits<int64_t>::min())))) {
curr->op = EqInt64;
curr->type = Type::i32;
- return Builder(*getModule()).makeUnary(ExtendUInt32, curr);
+ return builder.makeUnary(ExtendUInt32, curr);
}
// (unsigned)x < 0 ==> i32(0)
if (matches(curr, binary(LtU, pure(&left), ival(0)))) {
@@ -3567,9 +3567,71 @@ private:
return left;
}
}
+ // x + C1 > C2 ==> x > (C2-C1) if no overflowing, C2 >= C1
+ // x + C1 > C2 ==> x + (C1-C2) > 0 if no overflowing, C2 < C1
+ // And similarly for other relational operations on integers.
+ if (curr->isRelational()) {
+ Binary* add;
+ Const* c1;
+ Const* c2;
+ if ((matches(curr,
+ binary(binary(&add, Add, any(), ival(&c1)), ival(&c2))) ||
+ matches(curr,
+ binary(binary(&add, Add, any(), ival(&c1)), ival(&c2)))) &&
+ !canOverflow(add)) {
+ if (c2->value.geU(c1->value).getInteger()) {
+ // This is the first line above, we turn into x > (C2-C1)
+ c2->value = c2->value.sub(c1->value);
+ curr->left = add->left;
+ return curr;
+ }
+ // This is the second line above, we turn into x + (C1-C2) > 0. Other
+ // optimizations can often kick in later. However, we must rule out the
+ // case where C2 is already 0 (as then we would not actually change
+ // anything, and we could infinite loop).
+ auto zero = Literal::makeZero(c2->type);
+ if (c2->value != zero) {
+ c1->value = c1->value.sub(c2->value);
+ c2->value = zero;
+ return curr;
+ }
+ }
+ }
return nullptr;
}
+ // Returns true if the given binary operation can overflow. If we can't be
+ // sure either way, we return true, assuming the worst.
+ bool canOverflow(Binary* binary) {
+ using namespace Abstract;
+
+ // If we know nothing about a limit on the amount of bits on either side,
+ // give up.
+ auto typeMaxBits = getBitsForType(binary->type);
+ auto leftMaxBits = Bits::getMaxBits(binary->left, this);
+ auto rightMaxBits = Bits::getMaxBits(binary->right, this);
+ if (std::max(leftMaxBits, rightMaxBits) == typeMaxBits) {
+ return true;
+ }
+
+ if (binary->op == getBinary(binary->type, Add)) {
+ // Proof this cannot overflow:
+ //
+ // left + right < 2^leftMaxBits + 2^rightMaxBits (1)
+ // <= 2^(typeMaxBits-1) + 2^(typeMaxBits-1) (2)
+ // = 2^typeMaxBits (3)
+ //
+ // (1) By the definition of the max bits (e.g. an int32 has 32 max bits,
+ // and its max value is 2^32 - 1, which is < 2^32).
+ // (2) By the above checks and early returns.
+ // (3) 2^x + 2^x === 2*2^x === 2^(x+1)
+ return false;
+ }
+
+ // TODO subtraction etc.
+ return true;
+ }
+
// Folding two expressions into one with similar operations and
// constants on RHSs
Expression* optimizeDoubletonWithConstantOnRight(Binary* curr) {
diff --git a/test/lit/passes/optimize-instructions.wast b/test/lit/passes/optimize-instructions.wast
index d8c72bb65..728b195de 100644
--- a/test/lit/passes/optimize-instructions.wast
+++ b/test/lit/passes/optimize-instructions.wast
@@ -15111,4 +15111,361 @@
)
)
)
+
+ ;; CHECK: (func $gt_u-added-constant (param $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.gt_u
+ ;; CHECK-NEXT: (i32.shr_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 6)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.ne
+ ;; CHECK-NEXT: (i32.shr_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.ne
+ ;; CHECK-NEXT: (i32.shr_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const -1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $gt_u-added-constant (param $x i32)
+ ;; x + C1 > C2 => x > (C2-C1), iff x+C1 and C2-C1 don't over/underflow
+ (drop
+ (i32.gt_u
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 5)
+ )
+ (i32.const 11)
+ )
+ )
+ ;; We can optimize even if the constants are equal.
+ (drop
+ (i32.gt_u
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 5)
+ )
+ (i32.const 5)
+ )
+ )
+ ;; x + C1 > C2 => x + (C1-C2) > 0, iff x+C1 and C1-C2 don't over/underflow
+ ;; After doing that, further optimizations are possible here.
+ (drop
+ (i32.gt_u
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 6)
+ )
+ (i32.const 5)
+ )
+ )
+ )
+
+ ;; CHECK: (func $gt_u-added-constant-no (param $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.gt_u
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 5)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 11)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.gt_u
+ ;; CHECK-NEXT: (i32.sub
+ ;; CHECK-NEXT: (i32.shr_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const -2147483648)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 11)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.gt_u
+ ;; CHECK-NEXT: (i32.sub
+ ;; CHECK-NEXT: (i32.shr_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 11)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $gt_u-added-constant-no (param $x i32)
+ ;; As above, but without the shr_u, A is big enough for a possible overflow,
+ ;; and we cannot optimize.
+ (drop
+ (i32.gt_u
+ (i32.add
+ (local.get $x)
+ (i32.const 5)
+ )
+ (i32.const 11)
+ )
+ )
+ ;; With the added constant too big, it might overflow, and we cannot
+ ;; optimize.
+ (drop
+ (i32.gt_u
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 0x80000000)
+ )
+ (i32.const 11)
+ )
+ )
+ (drop
+ (i32.gt_u
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 0xffffffff)
+ )
+ (i32.const 11)
+ )
+ )
+ )
+
+ ;; CHECK: (func $ge_u-added-constant (param $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.ge_u
+ ;; CHECK-NEXT: (i32.shr_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 6)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $ge_u-added-constant (param $x i32)
+ ;; As above, but with ge rather than gt. We can optimize here.
+ (drop
+ (i32.ge_u
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 5)
+ )
+ (i32.const 11)
+ )
+ )
+ (drop
+ (i32.ge_u
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 5)
+ )
+ (i32.const 5)
+ )
+ )
+ (drop
+ (i32.ge_u
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 6)
+ )
+ (i32.const 5)
+ )
+ )
+ )
+
+ ;; CHECK: (func $ge_u-added-constant-no (param $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.ge_u
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 5)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 11)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.ge_u
+ ;; CHECK-NEXT: (i32.sub
+ ;; CHECK-NEXT: (i32.shr_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const -2147483648)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 11)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.ge_u
+ ;; CHECK-NEXT: (i32.sub
+ ;; CHECK-NEXT: (i32.shr_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 11)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $ge_u-added-constant-no (param $x i32)
+ ;; As above, but with ge rather than gt. We cannot optimize here.
+ (drop
+ (i32.ge_u
+ (i32.add
+ (local.get $x)
+ (i32.const 5)
+ )
+ (i32.const 11)
+ )
+ )
+ (drop
+ (i32.ge_u
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 0x80000000)
+ )
+ (i32.const 11)
+ )
+ )
+ (drop
+ (i32.ge_u
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 0xffffffff)
+ )
+ (i32.const 11)
+ )
+ )
+ )
+
+ ;; CHECK: (func $eq-added-constant (param $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.eq
+ ;; CHECK-NEXT: (i32.shr_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 6)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $eq-added-constant (param $x i32)
+ ;; As above, but with eq rather than gt. We can optimize here.
+ (drop
+ (i32.eq
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 5)
+ )
+ (i32.const 11)
+ )
+ )
+ )
+
+ ;; CHECK: (func $ne-added-constant (param $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.ne
+ ;; CHECK-NEXT: (i32.shr_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 6)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $ne-added-constant (param $x i32)
+ ;; As above, but with ne rather than gt. We can optimize here.
+ (drop
+ (i32.ne
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 5)
+ )
+ (i32.const 11)
+ )
+ )
+ )
+
+ ;; CHECK: (func $lt-added-constant (param $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.lt_u
+ ;; CHECK-NEXT: (i32.shr_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 6)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $lt-added-constant (param $x i32)
+ ;; As above, but with lt_s rather than gt_u. We can optimize here.
+ (drop
+ (i32.lt_s
+ (i32.add
+ (i32.shr_u
+ (local.get $x)
+ (i32.const 1)
+ )
+ (i32.const 5)
+ )
+ (i32.const 11)
+ )
+ )
+ )
)