summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/passes/OptimizeInstructions.cpp127
-rw-r--r--test/lit/passes/optimize-instructions.wast221
2 files changed, 292 insertions, 56 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 672484c92..74bfca899 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -853,15 +853,14 @@ struct OptimizeInstructions
}
}
{
- // i32.wrap_i64(i64.extend_i32_s(x)) => x
+ // i32.wrap_i64 can be removed if the operations inside it do not
+ // actually require 64 bits, e.g.:
+ //
// i32.wrap_i64(i64.extend_i32_u(x)) => x
- Unary* inner;
- Expression* x;
- if (matches(curr,
- unary(WrapInt64, unary(&inner, ExtendSInt32, any(&x)))) ||
- matches(curr,
- unary(WrapInt64, unary(&inner, ExtendUInt32, any(&x))))) {
- return replaceCurrent(x);
+ if (matches(curr, unary(WrapInt64, any()))) {
+ if (auto* ret = optimizeWrappedResult(curr)) {
+ return replaceCurrent(ret);
+ }
}
}
{
@@ -2681,6 +2680,118 @@ private:
builder.makeConst(Literal::makeFromInt64(constant, walked->type)));
}
+ // Given an i64.wrap operation, see if we can remove it. If all the things
+ // being operated on behave the same with or without wrapping, then we don't
+ // need to go to 64 bits at all, e.g.:
+ //
+ // int32_t(int64_t(x)) => x (extend, then wrap)
+ // int32_t(int64_t(x) + int64_t(10)) => x + int32_t(10) (also add)
+ //
+ Expression* optimizeWrappedResult(Unary* wrap) {
+ assert(wrap->op == WrapInt64);
+
+ // Core processing logic. This goes through the children, in one of two
+ // modes:
+ // * Scan: Find if there is anything we can't handle. Sets |canOptimize|
+ // with what it finds.
+ // * Optimize: Given we can handle everything, update things.
+ enum Mode { Scan, Optimize };
+ bool canOptimize = true;
+ auto processChildren = [&](Mode mode) {
+ // Use a simple stack as we go through the children. We use ** as we need
+ // to replace children for some optimizations.
+ SmallVector<Expression**, 2> stack;
+ stack.emplace_back(&wrap->value);
+
+ while (!stack.empty() && canOptimize) {
+ auto* currp = stack.back();
+ stack.pop_back();
+ auto* curr = *currp;
+ if (curr->type == Type::unreachable) {
+ // Leave unreachability for other passes.
+ canOptimize = false;
+ return;
+ } else if (auto* c = curr->dynCast<Const>()) {
+ // A i64 const can be handled by just turning it into an i32.
+ if (mode == Optimize) {
+ c->value = Literal(int32_t(c->value.getInteger()));
+ c->type = Type::i32;
+ }
+ } else if (auto* unary = curr->dynCast<Unary>()) {
+ switch (unary->op) {
+ case ExtendSInt32:
+ case ExtendUInt32: {
+ // Note that there is nothing to push to the stack here: the child
+ // is 32-bit already, so we can stop looking. We just need to skip
+ // the extend operation.
+ if (mode == Optimize) {
+ *currp = unary->value;
+ }
+ break;
+ }
+ default: {
+ // TODO: handle more cases here and below,
+ // https://github.com/WebAssembly/binaryen/issues/5004
+ canOptimize = false;
+ return;
+ }
+ }
+ } else if (auto* binary = curr->dynCast<Binary>()) {
+ // Turn the binary into a 32-bit one, if we can.
+ switch (binary->op) {
+ case AddInt64:
+ case SubInt64:
+ case MulInt64: {
+ // We can optimize these.
+ break;
+ }
+ default: {
+ canOptimize = false;
+ return;
+ }
+ }
+ if (mode == Optimize) {
+ switch (binary->op) {
+ case AddInt64: {
+ binary->op = AddInt32;
+ break;
+ }
+ case SubInt64: {
+ binary->op = SubInt32;
+ break;
+ }
+ case MulInt64: {
+ binary->op = MulInt32;
+ break;
+ }
+ default: {
+ WASM_UNREACHABLE("bad op");
+ }
+ }
+ // All things we can optimize change the type to i32.
+ binary->type = Type::i32;
+ }
+ stack.push_back(&binary->left);
+ stack.push_back(&binary->right);
+ } else {
+ // Anything else makes us give up.
+ canOptimize = false;
+ return;
+ }
+ }
+ };
+
+ processChildren(Scan);
+ if (!canOptimize) {
+ return nullptr;
+ }
+
+ // Optimize, and return the optimized results (in which we no longer need
+ // the wrap operation itself).
+ processChildren(Optimize);
+ return wrap->value;
+ }
+
// expensive1 | expensive2 can be turned into expensive1 ? 1 : expensive2,
// and expensive | cheap can be turned into cheap ? 1 : expensive,
// so that we can avoid one expensive computation, if it has no side effects.
diff --git a/test/lit/passes/optimize-instructions.wast b/test/lit/passes/optimize-instructions.wast
index c0c19395d..d8c72bb65 100644
--- a/test/lit/passes/optimize-instructions.wast
+++ b/test/lit/passes/optimize-instructions.wast
@@ -1199,32 +1199,32 @@
(func $store16-and-65534
(i32.store16 (i32.const 11) (i32.and (i32.const -4) (i32.const 65534)))
)
- ;; CHECK: (func $store8-wrap
+ ;; CHECK: (func $store8-wrap (param $x i64)
;; CHECK-NEXT: (i64.store8
;; CHECK-NEXT: (i32.const 11)
- ;; CHECK-NEXT: (i64.const 1)
+ ;; CHECK-NEXT: (local.get $x)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- (func $store8-wrap
- (i32.store8 (i32.const 11) (i32.wrap_i64 (i64.const 1)))
+ (func $store8-wrap (param $x i64)
+ (i32.store8 (i32.const 11) (i32.wrap_i64 (local.get $x)))
)
- ;; CHECK: (func $store16-wrap
+ ;; CHECK: (func $store16-wrap (param $x i64)
;; CHECK-NEXT: (i64.store16
;; CHECK-NEXT: (i32.const 11)
- ;; CHECK-NEXT: (i64.const 2)
+ ;; CHECK-NEXT: (local.get $x)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- (func $store16-wrap
- (i32.store16 (i32.const 11) (i32.wrap_i64 (i64.const 2)))
+ (func $store16-wrap (param $x i64)
+ (i32.store16 (i32.const 11) (i32.wrap_i64 (local.get $x)))
)
- ;; CHECK: (func $store-wrap
+ ;; CHECK: (func $store-wrap (param $x i64)
;; CHECK-NEXT: (i64.store32
;; CHECK-NEXT: (i32.const 11)
- ;; CHECK-NEXT: (i64.const 3)
+ ;; CHECK-NEXT: (local.get $x)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- (func $store-wrap
- (i32.store (i32.const 11) (i32.wrap_i64 (i64.const 3)))
+ (func $store-wrap (param $x i64)
+ (i32.store (i32.const 11) (i32.wrap_i64 (local.get $x)))
)
;; CHECK: (func $store8-neg1
;; CHECK-NEXT: (i32.store8
@@ -3252,9 +3252,7 @@
;; CHECK: (func $sext-24-shr_u-wrap-too-big (result i32)
;; CHECK-NEXT: (i32.shr_s
;; CHECK-NEXT: (i32.and
- ;; CHECK-NEXT: (i32.wrap_i64
- ;; CHECK-NEXT: (i64.const -1)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const -1)
;; CHECK-NEXT: (i32.const -16777216)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 24)
@@ -3276,9 +3274,7 @@
)
;; CHECK: (func $sext-24-shr_u-wrap (result i32)
;; CHECK-NEXT: (i32.shr_u
- ;; CHECK-NEXT: (i32.wrap_i64
- ;; CHECK-NEXT: (i64.const -1)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const -1)
;; CHECK-NEXT: (i32.const 25)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -6647,15 +6643,7 @@
)
)
;; CHECK: (func $optimizeAddedConstants-filters-through-nonzero (result i32)
- ;; CHECK-NEXT: (i32.sub
- ;; CHECK-NEXT: (i32.shl
- ;; CHECK-NEXT: (i32.const -536870912)
- ;; CHECK-NEXT: (i32.wrap_i64
- ;; CHECK-NEXT: (i64.const 0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (i32.const 31744)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const -536902656)
;; CHECK-NEXT: )
(func $optimizeAddedConstants-filters-through-nonzero (result i32)
(i32.sub
@@ -6672,15 +6660,7 @@
)
)
;; CHECK: (func $optimizeAddedConstants-filters-through-nonzero-b (result i32)
- ;; CHECK-NEXT: (i32.sub
- ;; CHECK-NEXT: (i32.shl
- ;; CHECK-NEXT: (i32.const -536870912)
- ;; CHECK-NEXT: (i32.wrap_i64
- ;; CHECK-NEXT: (i64.const -1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (i32.const 31744)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const -31744)
;; CHECK-NEXT: )
(func $optimizeAddedConstants-filters-through-nonzero-b (result i32)
(i32.sub
@@ -6850,19 +6830,12 @@
;; CHECK-NEXT: (i32.eqz
;; CHECK-NEXT: (local.get $x)
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (i32.wrap_i64
- ;; CHECK-NEXT: (i64.const 2)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (i32.and
- ;; CHECK-NEXT: (i32.eqz
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (i32.wrap_i64
- ;; CHECK-NEXT: (i64.const 1)
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.eqz
+ ;; CHECK-NEXT: (local.get $y)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -12783,8 +12756,8 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (i64.eqz
- ;; CHECK-NEXT: (i64.const 1)
+ ;; CHECK-NEXT: (i32.eqz
+ ;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
@@ -14986,4 +14959,156 @@
(drop (i64.extend_i32_s (i32.load16_s (local.get $x))))
(drop (i64.extend_i32_s (i32.load (local.get $x))))
)
+
+ ;; CHECK: (func $wrap-i64-to-i32-add (param $x i32) (result i32)
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 8)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $wrap-i64-to-i32-add (param $x i32) (result i32)
+ ;; Rather than extend to 64 and add there, we can do all of this in i32.
+ (i32.wrap_i64
+ (i64.add
+ (i64.extend_i32_u
+ (local.get $x)
+ )
+ (i64.const 8)
+ )
+ )
+ )
+
+ ;; CHECK: (func $wrap-i64-to-i32-sub (param $x i32) (result i32)
+ ;; CHECK-NEXT: (i32.sub
+ ;; CHECK-NEXT: (i32.const 8)
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $wrap-i64-to-i32-sub (param $x i32) (result i32)
+ (i32.wrap_i64
+ (i64.sub
+ (i64.const 8)
+ (i64.extend_i32_u
+ (local.get $x)
+ )
+ )
+ )
+ )
+
+ ;; CHECK: (func $wrap-i64-to-i32-mul (param $x i32) (result i32)
+ ;; CHECK-NEXT: (i32.mul
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $wrap-i64-to-i32-mul (param $x i32) (result i32)
+ (i32.wrap_i64
+ (i64.mul
+ (i64.extend_i32_u
+ (local.get $x)
+ )
+ (i64.const 42)
+ )
+ )
+ )
+
+ ;; CHECK: (func $wrap-i64-to-i32-many (param $x i32) (param $y i32) (result i32)
+ ;; CHECK-NEXT: (i32.mul
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.sub
+ ;; CHECK-NEXT: (i32.const -1)
+ ;; CHECK-NEXT: (local.get $y)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $wrap-i64-to-i32-many (param $x i32) (param $y i32) (result i32)
+ ;; Multiple operations that all together can be turned into i32.
+ (i32.wrap_i64
+ (i64.mul
+ (i64.add
+ (i64.extend_i32_u
+ (local.get $x)
+ )
+ (i64.sub
+ (i64.const -1)
+ (i64.extend_i32_u
+ (local.get $y)
+ )
+ )
+ )
+ (i64.const 42)
+ )
+ )
+ )
+
+ ;; CHECK: (func $wrap-i64-to-i32-div-no (param $x i32) (result i32)
+ ;; CHECK-NEXT: (i32.wrap_i64
+ ;; CHECK-NEXT: (i64.div_u
+ ;; CHECK-NEXT: (i64.extend_i32_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i64.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $wrap-i64-to-i32-div-no (param $x i32) (result i32)
+ ;; We *cannot* optimize here, as division cares about i32/i64 differences.
+ (i32.wrap_i64
+ (i64.div_s
+ (i64.extend_i32_u
+ (local.get $x)
+ )
+ (i64.const 42)
+ )
+ )
+ )
+
+ ;; CHECK: (func $wrap-i64-to-i32-tee-no (param $x i32) (result i32)
+ ;; CHECK-NEXT: (local $y i64)
+ ;; CHECK-NEXT: (i32.wrap_i64
+ ;; CHECK-NEXT: (i64.add
+ ;; CHECK-NEXT: (local.tee $y
+ ;; CHECK-NEXT: (i64.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i64.extend_i32_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $wrap-i64-to-i32-tee-no (param $x i32) (result i32)
+ ;; The local.tee stops us from optimizing atm. TODO
+ (local $y i64)
+ (i32.wrap_i64
+ (i64.add
+ (i64.extend_i32_u
+ (local.get $x)
+ )
+ (local.tee $y
+ (i64.const 42)
+ )
+ )
+ )
+ )
+
+ ;; CHECK: (func $wrap-i64-to-i32-local-no (param $x i64) (result i32)
+ ;; CHECK-NEXT: (i32.wrap_i64
+ ;; CHECK-NEXT: (i64.div_s
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i64.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $wrap-i64-to-i32-local-no (param $x i64) (result i32)
+ ;; We do not optimize here for now as an input ($x) is an i64. TODO
+ (i32.wrap_i64
+ (i64.div_s
+ (local.get $x)
+ (i64.const 42)
+ )
+ )
+ )
)