diff options
author | Max Graey <maxgraey@gmail.com> | 2021-08-02 16:30:26 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-02 13:30:26 +0000 |
commit | f35f02c1ce1b3129aa83d2dddeababd414c1ca8f (patch) | |
tree | 52b70c4193f77d63615f6571458248cf055c62a8 | |
parent | 512033eed52fff82274650aca7d7374b4b305551 (diff) | |
download | binaryen-f35f02c1ce1b3129aa83d2dddeababd414c1ca8f.tar.gz binaryen-f35f02c1ce1b3129aa83d2dddeababd414c1ca8f.tar.bz2 binaryen-f35f02c1ce1b3129aa83d2dddeababd414c1ca8f.zip |
[JS] Add a new OptimizeForJS pass (#4033)
Add a new OptimizeForJS pass which contains rewriting rules specific to JavaScript.
LLVM usually lowers x != 0 && (x & (x - 1)) == 0 (isPowerOf2) to popcnt(x) == 1 which is ok for wasm and other targets but is quite expensive for JavaScript. In this PR we lower the popcnt pattern back to the isPowerOf2 pattern.
-rw-r--r-- | src/ir/abstract.h | 5 | ||||
-rw-r--r-- | src/passes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/passes/OptimizeForJS.cpp | 92 | ||||
-rw-r--r-- | src/passes/RemoveNonJSOps.cpp | 20 | ||||
-rw-r--r-- | src/passes/pass.cpp | 3 | ||||
-rw-r--r-- | src/passes/passes.h | 1 | ||||
-rw-r--r-- | src/wasm2js.h | 5 | ||||
-rw-r--r-- | test/lit/help/optimization-opts.test | 3 | ||||
-rw-r--r-- | test/lit/passes/optimize-for-js.wast | 54 |
9 files changed, 180 insertions, 4 deletions
diff --git a/src/ir/abstract.h b/src/ir/abstract.h index 2308c47ba..18df7869e 100644 --- a/src/ir/abstract.h +++ b/src/ir/abstract.h @@ -29,6 +29,7 @@ enum Op { // Unary Abs, Neg, + Popcnt, // Binary Add, Sub, @@ -81,6 +82,8 @@ inline UnaryOp getUnary(Type type, Op op) { switch (op) { case EqZ: return EqZInt32; + case Popcnt: + return PopcntInt32; default: return InvalidUnary; } @@ -90,6 +93,8 @@ inline UnaryOp getUnary(Type type, Op op) { switch (op) { case EqZ: return EqZInt64; + case Popcnt: + return PopcntInt64; default: return InvalidUnary; } diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 783a3df87..9cc5e1f96 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -51,6 +51,7 @@ set(passes_SOURCES NoExitRuntime.cpp OptimizeAddedConstants.cpp OptimizeInstructions.cpp + OptimizeForJS.cpp PickLoadSigns.cpp Poppify.cpp PostEmscripten.cpp diff --git a/src/passes/OptimizeForJS.cpp b/src/passes/OptimizeForJS.cpp new file mode 100644 index 000000000..976a16f94 --- /dev/null +++ b/src/passes/OptimizeForJS.cpp @@ -0,0 +1,92 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <pass.h> +#include <wasm.h> + +#include "wasm-builder.h" +#include <ir/abstract.h> +#include <ir/literal-utils.h> +#include <ir/localize.h> +#include <ir/match.h> + +namespace wasm { + +struct OptimizeForJSPass : public WalkerPass<PostWalker<OptimizeForJSPass>> { + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new OptimizeForJSPass; } + + void visitBinary(Binary* curr) { + using namespace Abstract; + using namespace Match; + { + // Rewrite popcnt(x) == 1 ==> !!x & !(x & (x - 1)) + Expression* x; + if (matches(curr, binary(Eq, unary(Popcnt, any(&x)), ival(1)))) { + rewritePopcntEqualOne(x); + } + } + } + + void rewritePopcntEqualOne(Expression* expr) { + // popcnt(x) == 1 ==> !!x & !(x & (x - 1)) + BinaryOp andOp, subOp; + UnaryOp eqzOp; + Literal litOne; + Type type = expr->type; + + switch (type.getBasic()) { + case Type::i32: + eqzOp = EqZInt32; + andOp = AndInt32; + subOp = SubInt32; + litOne = Literal::makeOne(Type::i32); + break; + + case Type::i64: + eqzOp = EqZInt64; + andOp = AndInt64; + subOp = SubInt64; + litOne = Literal::makeOne(Type::i64); + break; + + default: + return; + } + + Localizer temp(expr, getFunction(), getModule()); + Builder builder(*getModule()); + + replaceCurrent(builder.makeBinary( + AndInt32, + builder.makeUnary( + EqZInt32, + builder.makeUnary(eqzOp, builder.makeLocalGet(temp.index, type))), + builder.makeUnary( + eqzOp, + builder.makeBinary( + andOp, + builder.makeLocalGet(temp.index, type), + builder.makeBinary(subOp, + builder.makeLocalGet(temp.index, type), + builder.makeConst(litOne)))))); + } +}; + +Pass* createOptimizeForJSPass() { return new OptimizeForJSPass(); } + +} // namespace wasm diff --git a/src/passes/RemoveNonJSOps.cpp b/src/passes/RemoveNonJSOps.cpp index 7c0a0e2ce..5bbb9f701 100644 --- a/src/passes/RemoveNonJSOps.cpp +++ b/src/passes/RemoveNonJSOps.cpp @@ -252,17 +252,29 @@ struct RemoveNonJSOpsPass : public WalkerPass<PostWalker<RemoveNonJSOpsPass>> { } void rewriteCopysign(Binary* curr) { + + // i32.copysign(x, y) => f32.reinterpret( + // (i32.reinterpret(x) & ~(1 << 31)) | + // (i32.reinterpret(y) & (1 << 31) + // ) + // + // i64.copysign(x, y) => f64.reinterpret( + // (i64.reinterpret(x) & ~(1 << 63)) | + // (i64.reinterpret(y) & (1 << 63) + // ) + Literal signBit, otherBits; UnaryOp int2float, float2int; BinaryOp bitAnd, bitOr; + switch (curr->op) { case CopySignFloat32: float2int = ReinterpretFloat32; int2float = ReinterpretInt32; bitAnd = AndInt32; bitOr = OrInt32; - signBit = Literal(uint32_t(1 << 31)); - otherBits = Literal(uint32_t(1 << 31) - 1); + signBit = Literal(uint32_t(1U << 31)); + otherBits = Literal(~uint32_t(1U << 31)); break; case CopySignFloat64: @@ -270,8 +282,8 @@ struct RemoveNonJSOpsPass : public WalkerPass<PostWalker<RemoveNonJSOpsPass>> { int2float = ReinterpretInt64; bitAnd = AndInt64; bitOr = OrInt64; - signBit = Literal(uint64_t(1) << 63); - otherBits = Literal((uint64_t(1) << 63) - 1); + signBit = Literal(uint64_t(1ULL << 63)); + otherBits = Literal(~uint64_t(1ULL << 63)); break; default: diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index d21464c2b..137a89703 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -250,6 +250,9 @@ void PassRegistry::registerPasses() { registerPass("post-emscripten", "miscellaneous optimizations for Emscripten-generated code", createPostEmscriptenPass); + registerPass("optimize-for-js", + "early optimize of the instruction combinations for js", + createOptimizeForJSPass); registerPass("precompute", "computes compile-time evaluatable expressions", createPrecomputePass); diff --git a/src/passes/passes.h b/src/passes/passes.h index 004f8f319..c58259f86 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -79,6 +79,7 @@ Pass* createNoExitRuntimePass(); Pass* createOptimizeAddedConstantsPass(); Pass* createOptimizeAddedConstantsPropagatePass(); Pass* createOptimizeInstructionsPass(); +Pass* createOptimizeForJSPass(); Pass* createOptimizeStackIRPass(); Pass* createPickLoadSignsPass(); Pass* createModAsyncifyAlwaysOnlyUnwindPass(); diff --git a/src/wasm2js.h b/src/wasm2js.h index 5d2f95e94..ad8e32647 100644 --- a/src/wasm2js.h +++ b/src/wasm2js.h @@ -341,6 +341,11 @@ Ref Wasm2JSBuilder::processWasm(Module* wasm, Name funcName) { // TODO: only legalize if necessary - emscripten would already do so, and // likely other toolchains. but spec test suite needs that. runner.add("legalize-js-interface"); + // Before lowering non-JS operations we can optimize some instructions which + // may simplify next passes + if (options.optimizeLevel > 0) { + runner.add("optimize-for-js"); + } // First up remove as many non-JS operations we can, including things like // 64-bit integer multiplication/division, `f32.nearest` instructions, etc. // This may inject intrinsics which use i64 so it needs to be run before the diff --git a/test/lit/help/optimization-opts.test b/test/lit/help/optimization-opts.test index 0d4ed48a8..c6d45332d 100644 --- a/test/lit/help/optimization-opts.test +++ b/test/lit/help/optimization-opts.test @@ -347,6 +347,9 @@ ;; CHECK-NEXT: load/store offsets, propagating ;; CHECK-NEXT: them across locals too ;; CHECK-NEXT: +;; CHECK-NEXT: --optimize-for-js early optimize of the +;; CHECK-NEXT: instruction combinations for js +;; CHECK-NEXT: ;; CHECK-NEXT: --optimize-instructions optimizes instruction ;; CHECK-NEXT: combinations ;; CHECK-NEXT: diff --git a/test/lit/passes/optimize-for-js.wast b/test/lit/passes/optimize-for-js.wast new file mode 100644 index 000000000..c123011fc --- /dev/null +++ b/test/lit/passes/optimize-for-js.wast @@ -0,0 +1,54 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --optimize-for-js -all -S -o - \ +;; RUN: | filecheck %s + +(module + ;; CHECK: (func $is-power-of-2_32 (param $x i32) (result i32) + ;; CHECK-NEXT: (i32.and + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (i32.and + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.sub + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $is-power-of-2_32 (param $x i32) (result i32) + (i32.eq + (i32.popcnt (local.get $x)) + (i32.const 1) + ) + ) + ;; CHECK: (func $is-power-of-2_64 (param $x i64) (result i32) + ;; CHECK-NEXT: (i32.and + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (i64.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i64.eqz + ;; CHECK-NEXT: (i64.and + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i64.sub + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $is-power-of-2_64 (param $x i64) (result i32) + (i64.eq + (i64.popcnt (local.get $x)) + (i64.const 1) + ) + ) +) |