diff options
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 127 | ||||
-rw-r--r-- | test/lit/passes/optimize-instructions.wast | 221 |
2 files changed, 292 insertions, 56 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 672484c92..74bfca899 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -853,15 +853,14 @@ struct OptimizeInstructions } } { - // i32.wrap_i64(i64.extend_i32_s(x)) => x + // i32.wrap_i64 can be removed if the operations inside it do not + // actually require 64 bits, e.g.: + // // i32.wrap_i64(i64.extend_i32_u(x)) => x - Unary* inner; - Expression* x; - if (matches(curr, - unary(WrapInt64, unary(&inner, ExtendSInt32, any(&x)))) || - matches(curr, - unary(WrapInt64, unary(&inner, ExtendUInt32, any(&x))))) { - return replaceCurrent(x); + if (matches(curr, unary(WrapInt64, any()))) { + if (auto* ret = optimizeWrappedResult(curr)) { + return replaceCurrent(ret); + } } } { @@ -2681,6 +2680,118 @@ private: builder.makeConst(Literal::makeFromInt64(constant, walked->type))); } + // Given an i64.wrap operation, see if we can remove it. If all the things + // being operated on behave the same with or without wrapping, then we don't + // need to go to 64 bits at all, e.g.: + // + // int32_t(int64_t(x)) => x (extend, then wrap) + // int32_t(int64_t(x) + int64_t(10)) => x + int32_t(10) (also add) + // + Expression* optimizeWrappedResult(Unary* wrap) { + assert(wrap->op == WrapInt64); + + // Core processing logic. This goes through the children, in one of two + // modes: + // * Scan: Find if there is anything we can't handle. Sets |canOptimize| + // with what it finds. + // * Optimize: Given we can handle everything, update things. + enum Mode { Scan, Optimize }; + bool canOptimize = true; + auto processChildren = [&](Mode mode) { + // Use a simple stack as we go through the children. We use ** as we need + // to replace children for some optimizations. + SmallVector<Expression**, 2> stack; + stack.emplace_back(&wrap->value); + + while (!stack.empty() && canOptimize) { + auto* currp = stack.back(); + stack.pop_back(); + auto* curr = *currp; + if (curr->type == Type::unreachable) { + // Leave unreachability for other passes. + canOptimize = false; + return; + } else if (auto* c = curr->dynCast<Const>()) { + // A i64 const can be handled by just turning it into an i32. + if (mode == Optimize) { + c->value = Literal(int32_t(c->value.getInteger())); + c->type = Type::i32; + } + } else if (auto* unary = curr->dynCast<Unary>()) { + switch (unary->op) { + case ExtendSInt32: + case ExtendUInt32: { + // Note that there is nothing to push to the stack here: the child + // is 32-bit already, so we can stop looking. We just need to skip + // the extend operation. + if (mode == Optimize) { + *currp = unary->value; + } + break; + } + default: { + // TODO: handle more cases here and below, + // https://github.com/WebAssembly/binaryen/issues/5004 + canOptimize = false; + return; + } + } + } else if (auto* binary = curr->dynCast<Binary>()) { + // Turn the binary into a 32-bit one, if we can. + switch (binary->op) { + case AddInt64: + case SubInt64: + case MulInt64: { + // We can optimize these. + break; + } + default: { + canOptimize = false; + return; + } + } + if (mode == Optimize) { + switch (binary->op) { + case AddInt64: { + binary->op = AddInt32; + break; + } + case SubInt64: { + binary->op = SubInt32; + break; + } + case MulInt64: { + binary->op = MulInt32; + break; + } + default: { + WASM_UNREACHABLE("bad op"); + } + } + // All things we can optimize change the type to i32. + binary->type = Type::i32; + } + stack.push_back(&binary->left); + stack.push_back(&binary->right); + } else { + // Anything else makes us give up. + canOptimize = false; + return; + } + } + }; + + processChildren(Scan); + if (!canOptimize) { + return nullptr; + } + + // Optimize, and return the optimized results (in which we no longer need + // the wrap operation itself). + processChildren(Optimize); + return wrap->value; + } + // expensive1 | expensive2 can be turned into expensive1 ? 1 : expensive2, // and expensive | cheap can be turned into cheap ? 1 : expensive, // so that we can avoid one expensive computation, if it has no side effects. diff --git a/test/lit/passes/optimize-instructions.wast b/test/lit/passes/optimize-instructions.wast index c0c19395d..d8c72bb65 100644 --- a/test/lit/passes/optimize-instructions.wast +++ b/test/lit/passes/optimize-instructions.wast @@ -1199,32 +1199,32 @@ (func $store16-and-65534 (i32.store16 (i32.const 11) (i32.and (i32.const -4) (i32.const 65534))) ) - ;; CHECK: (func $store8-wrap + ;; CHECK: (func $store8-wrap (param $x i64) ;; CHECK-NEXT: (i64.store8 ;; CHECK-NEXT: (i32.const 11) - ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $store8-wrap - (i32.store8 (i32.const 11) (i32.wrap_i64 (i64.const 1))) + (func $store8-wrap (param $x i64) + (i32.store8 (i32.const 11) (i32.wrap_i64 (local.get $x))) ) - ;; CHECK: (func $store16-wrap + ;; CHECK: (func $store16-wrap (param $x i64) ;; CHECK-NEXT: (i64.store16 ;; CHECK-NEXT: (i32.const 11) - ;; CHECK-NEXT: (i64.const 2) + ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $store16-wrap - (i32.store16 (i32.const 11) (i32.wrap_i64 (i64.const 2))) + (func $store16-wrap (param $x i64) + (i32.store16 (i32.const 11) (i32.wrap_i64 (local.get $x))) ) - ;; CHECK: (func $store-wrap + ;; CHECK: (func $store-wrap (param $x i64) ;; CHECK-NEXT: (i64.store32 ;; CHECK-NEXT: (i32.const 11) - ;; CHECK-NEXT: (i64.const 3) + ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $store-wrap - (i32.store (i32.const 11) (i32.wrap_i64 (i64.const 3))) + (func $store-wrap (param $x i64) + (i32.store (i32.const 11) (i32.wrap_i64 (local.get $x))) ) ;; CHECK: (func $store8-neg1 ;; CHECK-NEXT: (i32.store8 @@ -3252,9 +3252,7 @@ ;; CHECK: (func $sext-24-shr_u-wrap-too-big (result i32) ;; CHECK-NEXT: (i32.shr_s ;; CHECK-NEXT: (i32.and - ;; CHECK-NEXT: (i32.wrap_i64 - ;; CHECK-NEXT: (i64.const -1) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const -1) ;; CHECK-NEXT: (i32.const -16777216) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 24) @@ -3276,9 +3274,7 @@ ) ;; CHECK: (func $sext-24-shr_u-wrap (result i32) ;; CHECK-NEXT: (i32.shr_u - ;; CHECK-NEXT: (i32.wrap_i64 - ;; CHECK-NEXT: (i64.const -1) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const -1) ;; CHECK-NEXT: (i32.const 25) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -6647,15 +6643,7 @@ ) ) ;; CHECK: (func $optimizeAddedConstants-filters-through-nonzero (result i32) - ;; CHECK-NEXT: (i32.sub - ;; CHECK-NEXT: (i32.shl - ;; CHECK-NEXT: (i32.const -536870912) - ;; CHECK-NEXT: (i32.wrap_i64 - ;; CHECK-NEXT: (i64.const 0) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.const 31744) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const -536902656) ;; CHECK-NEXT: ) (func $optimizeAddedConstants-filters-through-nonzero (result i32) (i32.sub @@ -6672,15 +6660,7 @@ ) ) ;; CHECK: (func $optimizeAddedConstants-filters-through-nonzero-b (result i32) - ;; CHECK-NEXT: (i32.sub - ;; CHECK-NEXT: (i32.shl - ;; CHECK-NEXT: (i32.const -536870912) - ;; CHECK-NEXT: (i32.wrap_i64 - ;; CHECK-NEXT: (i64.const -1) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.const 31744) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const -31744) ;; CHECK-NEXT: ) (func $optimizeAddedConstants-filters-through-nonzero-b (result i32) (i32.sub @@ -6850,19 +6830,12 @@ ;; CHECK-NEXT: (i32.eqz ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.wrap_i64 - ;; CHECK-NEXT: (i64.const 2) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.and - ;; CHECK-NEXT: (i32.eqz - ;; CHECK-NEXT: (local.get $y) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.wrap_i64 - ;; CHECK-NEXT: (i64.const 1) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $y) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -12783,8 +12756,8 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i64.eqz - ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -14986,4 +14959,156 @@ (drop (i64.extend_i32_s (i32.load16_s (local.get $x)))) (drop (i64.extend_i32_s (i32.load (local.get $x)))) ) + + ;; CHECK: (func $wrap-i64-to-i32-add (param $x i32) (result i32) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 8) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $wrap-i64-to-i32-add (param $x i32) (result i32) + ;; Rather than extend to 64 and add there, we can do all of this in i32. + (i32.wrap_i64 + (i64.add + (i64.extend_i32_u + (local.get $x) + ) + (i64.const 8) + ) + ) + ) + + ;; CHECK: (func $wrap-i64-to-i32-sub (param $x i32) (result i32) + ;; CHECK-NEXT: (i32.sub + ;; CHECK-NEXT: (i32.const 8) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $wrap-i64-to-i32-sub (param $x i32) (result i32) + (i32.wrap_i64 + (i64.sub + (i64.const 8) + (i64.extend_i32_u + (local.get $x) + ) + ) + ) + ) + + ;; CHECK: (func $wrap-i64-to-i32-mul (param $x i32) (result i32) + ;; CHECK-NEXT: (i32.mul + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $wrap-i64-to-i32-mul (param $x i32) (result i32) + (i32.wrap_i64 + (i64.mul + (i64.extend_i32_u + (local.get $x) + ) + (i64.const 42) + ) + ) + ) + + ;; CHECK: (func $wrap-i64-to-i32-many (param $x i32) (param $y i32) (result i32) + ;; CHECK-NEXT: (i32.mul + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.sub + ;; CHECK-NEXT: (i32.const -1) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $wrap-i64-to-i32-many (param $x i32) (param $y i32) (result i32) + ;; Multiple operations that all together can be turned into i32. + (i32.wrap_i64 + (i64.mul + (i64.add + (i64.extend_i32_u + (local.get $x) + ) + (i64.sub + (i64.const -1) + (i64.extend_i32_u + (local.get $y) + ) + ) + ) + (i64.const 42) + ) + ) + ) + + ;; CHECK: (func $wrap-i64-to-i32-div-no (param $x i32) (result i32) + ;; CHECK-NEXT: (i32.wrap_i64 + ;; CHECK-NEXT: (i64.div_u + ;; CHECK-NEXT: (i64.extend_i32_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i64.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $wrap-i64-to-i32-div-no (param $x i32) (result i32) + ;; We *cannot* optimize here, as division cares about i32/i64 differences. + (i32.wrap_i64 + (i64.div_s + (i64.extend_i32_u + (local.get $x) + ) + (i64.const 42) + ) + ) + ) + + ;; CHECK: (func $wrap-i64-to-i32-tee-no (param $x i32) (result i32) + ;; CHECK-NEXT: (local $y i64) + ;; CHECK-NEXT: (i32.wrap_i64 + ;; CHECK-NEXT: (i64.add + ;; CHECK-NEXT: (local.tee $y + ;; CHECK-NEXT: (i64.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i64.extend_i32_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $wrap-i64-to-i32-tee-no (param $x i32) (result i32) + ;; The local.tee stops us from optimizing atm. TODO + (local $y i64) + (i32.wrap_i64 + (i64.add + (i64.extend_i32_u + (local.get $x) + ) + (local.tee $y + (i64.const 42) + ) + ) + ) + ) + + ;; CHECK: (func $wrap-i64-to-i32-local-no (param $x i64) (result i32) + ;; CHECK-NEXT: (i32.wrap_i64 + ;; CHECK-NEXT: (i64.div_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i64.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $wrap-i64-to-i32-local-no (param $x i64) (result i32) + ;; We do not optimize here for now as an input ($x) is an i64. TODO + (i32.wrap_i64 + (i64.div_s + (local.get $x) + (i64.const 42) + ) + ) + ) ) |