2 files changed, 398 insertions, 14 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 69a3a1fdb..ec714c1e1 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -58,6 +58,22 @@ static Index getBitsForType(Type type) {
   return type.getByteSize() * 8;
 }
 
+static bool isSignedOp(BinaryOp op) {
+  switch (op) {
+    case LtSInt32:
+    case LeSInt32:
+    case GtSInt32:
+    case GeSInt32:
+    case LtSInt64:
+    case LeSInt64:
+    case GtSInt64:
+    case GeSInt64:
+      return true;
+    default:
+      return false;
+  }
+}
+
 // Useful information about locals
 struct LocalInfo {
   static const Index kUnknown = Index(-1);
@@ -4070,28 +4086,97 @@ private:
       // x + C1 > C2   ==>  x + (C1-C2) > 0  if no overflowing, C2 <  C1
       // And similarly for other relational operations on integers with a "+"
       // on the left.
+      // TODO: support - and not just +
       {
         Binary* add;
         Const* c1;
         Const* c2;
-        if ((matches(curr,
-                     binary(binary(&add, Add, any(), ival(&c1)), ival(&c2))) ||
-             matches(curr,
-                     binary(binary(&add, Add, any(), ival(&c1)), ival(&c2)))) &&
+        if (matches(curr,
+                    binary(binary(&add, Add, any(), ival(&c1)), ival(&c2))) &&
             !canOverflow(add)) {
-          if (c2->value.geU(c1->value).getInteger()) {
-            // This is the first line above, we turn into x > (C2-C1)
-            c2->value = c2->value.sub(c1->value);
+          // We want to subtract C2-C1 or C1-C2. When doing so, we must avoid an
+          // overflow in that subtraction (so that we keep all the math here
+          // properly linear in the mathematical sense). Overflows that concern
+          // us include an underflow with unsigned values (e.g. 10 - 20, which
+          // flips the result to a large positive number), and a sign bit
+          // overflow for signed values (e.g. 0x80000000 - 1 = 0x7fffffff flips
+          // from a negative number, -1, to a positive one). We also need to be
+          // careful of signed handling of 0x80000000, for whom 0 - 0x80000000
+          // is equal to 0x80000000, leading to
+          //   x + 0x80000000 > 0                ;; always false
+          // (apply the rule)
+          //   x > 0 - 0x80000000 = 0x80000000   ;; depends on x
+          // The general principle in all of this is that when we go from
+          //   (a)    x + C1 > C2
+          // to
+          //   (b)    x      > (C2-C1)
+          // then we want to adjust both sides in the same (linear) manner. That
+          // is, we can write the latter as
+          //   (b')   x + 0  > (C2-C1)
+          // Comparing (a) and (b'), we want the constants to change in a
+          // consistent way: C1 changes to 0, and C2 changes to C2-C1. Both
+          // transformations should decrease the value, which is violated in all
+          // the overflows described above:
+          //   * Unsigned overflow: C1=20, C2=10, then C1 decreases but C2-C1
+          //     is larger than C2.
+          //   * Sign flip: C1=1, C2=0x80000000, then C1 decreases but C2-C1 is
+          //     is larger than C2.
+          //   * C1=0x80000000, C2=0, then C1 increases while C2-C1 stays the
+          //     same.
+          // In the first and second case we can apply the other rule using
+          // C1-C2 rather than C2-C1. The third case, however, doesn't even work
+          // that way.
+          auto C1 = c1->value;
+          auto C2 = c2->value;
+          auto C1SubC2 = C1.sub(C2);
+          auto C2SubC1 = C2.sub(C1);
+          auto zero = Literal::makeZero(add->type);
+          auto doC1SubC2 = false;
+          auto doC2SubC1 = false;
+          // Ignore the case of C1 or C2 being zero, as then C2-C1 or C1-C2
+          // does not change anything (and we don't want the optimizer to think
+          // we improved anything, or we could infinite loop on the mirage of
+          // progress).
+          if (C1 != zero && C2 != zero) {
+            if (isSignedOp(curr->op)) {
+              if (C2SubC1.leS(C2).getInteger() && zero.leS(C1).getInteger()) {
+                // C2=>C2-C1 and C1=>0 both decrease, which means we can do the
+                // rule
+                //   (a)    x + C1   > C2
+                //   (b')   x (+ 0)  > (C2-C1)
+                // That is, subtracting C1 from both sides is ok; the constants
+                // on both sides change in the same manner.
+                doC2SubC1 = true;
+              } else if (C1SubC2.leS(C1).getInteger() &&
+                         zero.leS(C2).getInteger()) {
+                // N.B. this code path is not tested atm as other optimizations
+                // will canonicalize x + C into x - C, and so we would need to
+                // implement the TODO above on subtraction and not only support
+                // addition here.
+                doC1SubC2 = true;
+              }
+            } else {
+              // Unsigned.
+              if (C2SubC1.leU(C2).getInteger() && zero.leU(C1).getInteger()) {
+                doC2SubC1 = true;
+              } else if (C1SubC2.leU(C1).getInteger() &&
+                         zero.leU(C2).getInteger()) {
+                doC1SubC2 = true;
+              }
+              // For unsigned, one of the cases must work out, as there are no
+              // corner cases with the sign bit.
+              assert(doC2SubC1 || doC1SubC2);
+            }
+          }
+          if (doC2SubC1) {
+            // This is the first line above, we turn into x > (C2-C1).
+            c2->value = C2SubC1;
             curr->left = add->left;
             return curr;
           }
-          // This is the second line above, we turn into x + (C1-C2) > 0. Other
-          // optimizations can often kick in later. However, we must rule out
-          // the case where C2 is already 0 (as then we would not actually
-          // change anything, and we could infinite loop).
-          auto zero = Literal::makeZero(c2->type);
-          if (c2->value != zero) {
-            c1->value = c1->value.sub(c2->value);
+          // This is the second line above, we turn into x + (C1-C2) > 0.
+          if (doC1SubC2) {
+            c1->value = C1SubC2;
             c2->value = zero;
             return curr;
           }
diff --git a/test/lit/passes/optimize-instructions-mvp.wast b/test/lit/passes/optimize-instructions-mvp.wast
index 8a86e259f..db864087d 100644
--- a/test/lit/passes/optimize-instructions-mvp.wast
+++ b/test/lit/passes/optimize-instructions-mvp.wast
@@ -16594,4 +16594,303 @@
       )
     )
   )
+
+  ;; CHECK:      (func $skip-added-constants-overflow (result i32)
+  ;; CHECK-NEXT:  (i32.ge_s
+  ;; CHECK-NEXT:   (i32.add
+  ;; CHECK-NEXT:    (i32.shr_u
+  ;; CHECK-NEXT:     (i32.load
+  ;; CHECK-NEXT:      (i32.const 0)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:     (i32.const 1)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:   (i32.const -2147483648)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $skip-added-constants-overflow (result i32)
+    ;; If we subtracted 0x80000000 - 1 we'd get something that changes sign if
+    ;; the comparison is signed (as the sign bit is no longer set). To avoid
+    ;; that we skip optimizing cases where subtracting the constants might lead
+    ;; to an overflow.
+    (i32.ge_s
+      (i32.add
+        (i32.shr_u
+          (i32.load
+            (i32.const 0)
+          )
+          (i32.const 1)
+        )
+        (i32.const 1)
+      )
+      (i32.const 0x80000000)
+    )
+  )
+
+  ;; CHECK:      (func $skip-added-constants-overflow-unsigned (result i32)
+  ;; CHECK-NEXT:  (i32.ge_u
+  ;; CHECK-NEXT:   (i32.shr_u
+  ;; CHECK-NEXT:    (i32.load
+  ;; CHECK-NEXT:     (i32.const 0)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:   (i32.const 2147483647)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $skip-added-constants-overflow-unsigned (result i32)
+    ;; As above, but unsigned. This is ok for us to optimize.
+    (i32.ge_u
+      (i32.add
+        (i32.shr_u
+          (i32.load
+            (i32.const 0)
+          )
+          (i32.const 1)
+        )
+        (i32.const 1)
+      )
+      (i32.const 0x80000000)
+    )
+  )
+
+  ;; CHECK:      (func $skip-added-constants-zero (result i32)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (i32.shr_u
+  ;; CHECK-NEXT:    (i32.load
+  ;; CHECK-NEXT:     (i32.const 0)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (i32.const 1)
+  ;; CHECK-NEXT: )
+  (func $skip-added-constants-zero (result i32)
+    ;; A zero in either constant means we should not optimize using an added
+    ;; constant. However, other optimizations kick in here, as adding zero does
+    ;; nothing, and we end up with [max 31 bits] >=_s MIN_INT which is true.
+    (i32.ge_s
+      (i32.add
+        (i32.shr_u
+          (i32.load
+            (i32.const 0)
+          )
+          (i32.const 1)
+        )
+        (i32.const 0)
+      )
+      (i32.const 0x80000000)
+    )
+  )
+
+  ;; CHECK:      (func $skip-added-constants-zero-a (result i32)
+  ;; CHECK-NEXT:  (i32.ge_s
+  ;; CHECK-NEXT:   (i32.sub
+  ;; CHECK-NEXT:    (i32.shr_u
+  ;; CHECK-NEXT:     (i32.load
+  ;; CHECK-NEXT:      (i32.const 0)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:     (i32.const 1)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const -2147483648)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $skip-added-constants-zero-a (result i32)
+    ;; A zero in the last constant means we should not optimize.
+    (i32.ge_s
+      (i32.add
+        (i32.shr_u
+          (i32.load
+            (i32.const 0)
+          )
+          (i32.const 1)
+        )
+        (i32.const 0x80000000)
+      )
+      (i32.const 0)
+    )
+  )
+
+  ;; CHECK:      (func $skip-added-constants-zero-b (result i32)
+  ;; CHECK-NEXT:  (i32.ge_u
+  ;; CHECK-NEXT:   (i32.add
+  ;; CHECK-NEXT:    (i32.shr_u
+  ;; CHECK-NEXT:     (i32.load
+  ;; CHECK-NEXT:      (i32.const 0)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:     (i32.const 1)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $skip-added-constants-zero-b (result i32)
+    ;; Parallel case to the above, with a zero in the added constant. We do not
+    ;; optimize.
+    (i32.ge_u
+      (i32.add
+        (i32.shr_u
+          (i32.load
+            (i32.const 0)
+          )
+          (i32.const 1)
+        )
+        (i32.const 1)
+      )
+      (i32.const 0)
+    )
+  )
+
+  ;; CHECK:      (func $skip-added-constants-negative (result i32)
+  ;; CHECK-NEXT:  (i32.ge_s
+  ;; CHECK-NEXT:   (i32.sub
+  ;; CHECK-NEXT:    (i32.shr_u
+  ;; CHECK-NEXT:     (i32.load
+  ;; CHECK-NEXT:      (i32.const 0)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:     (i32.const 1)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 10)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:   (i32.const -20)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $skip-added-constants-negative (result i32)
+    ;; Reasonable negative constants can be optimized. But the add is
+    ;; canoncalized into a sub, and atm we do not optimize such added constants.
+    (i32.ge_s
+      (i32.add
+        (i32.shr_u
+          (i32.load
+            (i32.const 0)
+          )
+          (i32.const 1)
+        )
+        (i32.const -10)
+      )
+      (i32.const -20)
+    )
+  )
+
+  ;; CHECK:      (func $skip-added-constants-negative-flip (result i32)
+  ;; CHECK-NEXT:  (i32.ge_s
+  ;; CHECK-NEXT:   (i32.sub
+  ;; CHECK-NEXT:    (i32.shr_u
+  ;; CHECK-NEXT:     (i32.load
+  ;; CHECK-NEXT:      (i32.const 0)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:     (i32.const 1)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 20)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:   (i32.const -10)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $skip-added-constants-negative-flip (result i32)
+    ;; As above, but flipped. The add is canoncalized into a sub, and atm we do
+    ;; not optimize such added constants.
+    (i32.ge_s
+      (i32.add
+        (i32.shr_u
+          (i32.load
+            (i32.const 0)
+          )
+          (i32.const 1)
+        )
+        (i32.const -20)
+      )
+      (i32.const -10)
+    )
+  )
+
+  ;; CHECK:      (func $skip-added-constants-mix (result i32)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (i32.shr_u
+  ;; CHECK-NEXT:    (i32.load
+  ;; CHECK-NEXT:     (i32.const 0)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (i32.const 1)
+  ;; CHECK-NEXT: )
+  (func $skip-added-constants-mix (result i32)
+    ;; A case of one negative and one positive constant. Here we have
+    ;; [max 31 bits] + 10 >=_s -20 which is always true.
+    (i32.ge_s
+      (i32.add
+        (i32.shr_u
+          (i32.load
+            (i32.const 0)
+          )
+          (i32.const 1)
+        )
+        (i32.const 10)
+      )
+      (i32.const -20)
+    )
+  )
+
+  ;; CHECK:      (func $skip-added-constants-mix-flip (result i32)
+  ;; CHECK-NEXT:  (i32.ge_s
+  ;; CHECK-NEXT:   (i32.sub
+  ;; CHECK-NEXT:    (i32.shr_u
+  ;; CHECK-NEXT:     (i32.load
+  ;; CHECK-NEXT:      (i32.const 0)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:     (i32.const 1)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 20)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:   (i32.const 10)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $skip-added-constants-mix-flip (result i32)
+    ;; As above, but with sign flipped. The add is canoncalized into a sub, and
+    ;; atm we do not optimize such added constants.
+    (i32.ge_s
+      (i32.add
+        (i32.shr_u
+          (i32.load
+            (i32.const 0)
+          )
+          (i32.const 1)
+        )
+        (i32.const -20)
+      )
+      (i32.const 10)
+    )
+  )
+
+  ;; CHECK:      (func $skip-added-constants-mix-flip-other (result i32)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (i32.shr_u
+  ;; CHECK-NEXT:    (i32.load
+  ;; CHECK-NEXT:     (i32.const 0)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (i32.const 1)
+  ;; CHECK-NEXT: )
+  (func $skip-added-constants-mix-flip-other (result i32)
+    ;; As above, but with the sign the same while the absolute values are
+    ;; flipped. Here we have [max 31 bits] + 20 >=_s -10 which is always true.
+    (i32.ge_s
+      (i32.add
+        (i32.shr_u
+          (i32.load
+            (i32.const 0)
+          )
+          (i32.const 1)
+        )
+        (i32.const 20)
+      )
+      (i32.const -10)
+    )
+  )
 )