diff options
author | Alon Zakai <azakai@google.com> | 2022-09-07 09:06:38 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-07 09:06:38 -0700 |
commit | 90848f83b3f7bccc0ec815be7a3c0a783a134b03 (patch) | |
tree | 5f84f8962a4a61c21b44b7c21f0de97848c77e78 /src/passes/OptimizeInstructions.cpp | |
parent | 16f72aae58b6c856d405d6d608efd436549ea678 (diff) | |
download | binaryen-90848f83b3f7bccc0ec815be7a3c0a783a134b03.tar.gz binaryen-90848f83b3f7bccc0ec815be7a3c0a783a134b03.tar.bz2 binaryen-90848f83b3f7bccc0ec815be7a3c0a783a134b03.zip |
Switch to i32 operations when heading to a wrap anyhow (#5022)
E.g. if we just do addition etc., then any higher bits will be wrapped out anyhow:
int32_t(int64_t(x) + int64_t(10))
=>
x + int32_t(10)
Found by the superoptimizer #4994 . This is by far the most promising suggestion it
had. Interestingly, it mainly helps Go, where it removes 20% of all Unary operations
(the extends and wraps), and Rust, where it removes 3%.
Helps #5004. This handles the common cases I see in the superoptimizer output, but
there are more that could be handled.
Diffstat (limited to 'src/passes/OptimizeInstructions.cpp')
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 127 |
1 files changed, 119 insertions, 8 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 672484c92..74bfca899 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -853,15 +853,14 @@ struct OptimizeInstructions } } { - // i32.wrap_i64(i64.extend_i32_s(x)) => x + // i32.wrap_i64 can be removed if the operations inside it do not + // actually require 64 bits, e.g.: + // // i32.wrap_i64(i64.extend_i32_u(x)) => x - Unary* inner; - Expression* x; - if (matches(curr, - unary(WrapInt64, unary(&inner, ExtendSInt32, any(&x)))) || - matches(curr, - unary(WrapInt64, unary(&inner, ExtendUInt32, any(&x))))) { - return replaceCurrent(x); + if (matches(curr, unary(WrapInt64, any()))) { + if (auto* ret = optimizeWrappedResult(curr)) { + return replaceCurrent(ret); + } } } { @@ -2681,6 +2680,118 @@ private: builder.makeConst(Literal::makeFromInt64(constant, walked->type))); } + // Given an i64.wrap operation, see if we can remove it. If all the things + // being operated on behave the same with or without wrapping, then we don't + // need to go to 64 bits at all, e.g.: + // + // int32_t(int64_t(x)) => x (extend, then wrap) + // int32_t(int64_t(x) + int64_t(10)) => x + int32_t(10) (also add) + // + Expression* optimizeWrappedResult(Unary* wrap) { + assert(wrap->op == WrapInt64); + + // Core processing logic. This goes through the children, in one of two + // modes: + // * Scan: Find if there is anything we can't handle. Sets |canOptimize| + // with what it finds. + // * Optimize: Given we can handle everything, update things. + enum Mode { Scan, Optimize }; + bool canOptimize = true; + auto processChildren = [&](Mode mode) { + // Use a simple stack as we go through the children. We use ** as we need + // to replace children for some optimizations. + SmallVector<Expression**, 2> stack; + stack.emplace_back(&wrap->value); + + while (!stack.empty() && canOptimize) { + auto* currp = stack.back(); + stack.pop_back(); + auto* curr = *currp; + if (curr->type == Type::unreachable) { + // Leave unreachability for other passes. + canOptimize = false; + return; + } else if (auto* c = curr->dynCast<Const>()) { + // A i64 const can be handled by just turning it into an i32. + if (mode == Optimize) { + c->value = Literal(int32_t(c->value.getInteger())); + c->type = Type::i32; + } + } else if (auto* unary = curr->dynCast<Unary>()) { + switch (unary->op) { + case ExtendSInt32: + case ExtendUInt32: { + // Note that there is nothing to push to the stack here: the child + // is 32-bit already, so we can stop looking. We just need to skip + // the extend operation. + if (mode == Optimize) { + *currp = unary->value; + } + break; + } + default: { + // TODO: handle more cases here and below, + // https://github.com/WebAssembly/binaryen/issues/5004 + canOptimize = false; + return; + } + } + } else if (auto* binary = curr->dynCast<Binary>()) { + // Turn the binary into a 32-bit one, if we can. + switch (binary->op) { + case AddInt64: + case SubInt64: + case MulInt64: { + // We can optimize these. + break; + } + default: { + canOptimize = false; + return; + } + } + if (mode == Optimize) { + switch (binary->op) { + case AddInt64: { + binary->op = AddInt32; + break; + } + case SubInt64: { + binary->op = SubInt32; + break; + } + case MulInt64: { + binary->op = MulInt32; + break; + } + default: { + WASM_UNREACHABLE("bad op"); + } + } + // All things we can optimize change the type to i32. + binary->type = Type::i32; + } + stack.push_back(&binary->left); + stack.push_back(&binary->right); + } else { + // Anything else makes us give up. + canOptimize = false; + return; + } + } + }; + + processChildren(Scan); + if (!canOptimize) { + return nullptr; + } + + // Optimize, and return the optimized results (in which we no longer need + // the wrap operation itself). + processChildren(Optimize); + return wrap->value; + } + // expensive1 | expensive2 can be turned into expensive1 ? 1 : expensive2, // and expensive | cheap can be turned into cheap ? 1 : expensive, // so that we can avoid one expensive computation, if it has no side effects. |