diff options
author | Alon Zakai <alonzakai@gmail.com> | 2017-02-16 22:11:13 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-02-16 22:11:13 -0800 |
commit | 0728a53fb6bf0540b9789c7bcd26e195800c5ecc (patch) | |
tree | c615e78f96e79b121e92b598335b9e361a8025d4 | |
parent | a78dddbcf2bf9f23840c7074ce16c04a8d55c3df (diff) | |
download | binaryen-0728a53fb6bf0540b9789c7bcd26e195800c5ecc.tar.gz binaryen-0728a53fb6bf0540b9789c7bcd26e195800c5ecc.tar.bz2 binaryen-0728a53fb6bf0540b9789c7bcd26e195800c5ecc.zip |
optimize linear sums (#904)
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 110 | ||||
-rw-r--r-- | src/support/bits.cpp | 2 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm | 9 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm.imprecise | 9 | ||||
-rw-r--r-- | test/passes/optimize-instructions.txt | 134 | ||||
-rw-r--r-- | test/passes/optimize-instructions.wast | 224 | ||||
-rw-r--r-- | test/unit.fromasm | 32 | ||||
-rw-r--r-- | test/unit.fromasm.imprecise | 32 |
8 files changed, 459 insertions, 93 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index c374a15ac..de46f155a 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -357,6 +357,8 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, } } // note that both left and right may be consts, but then we let precompute compute the constant result + } else if (binary->op == AddInt32 || binary->op == SubInt32) { + return optimizeAddedConstants(binary); } else if (binary->op == AndInt32) { if (auto* right = binary->right->dynCast<Const>()) { if (right->type == i32) { @@ -518,6 +520,114 @@ private: return boolean; } + // find added constants in an expression tree, including multiplied/shifted, and combine them + // note that we ignore division/shift-right, as rounding makes this nonlinear, so not a valid opt + Expression* optimizeAddedConstants(Binary* binary) { + int32_t constant = 0; + std::vector<Const*> constants; + std::function<void (Expression*, int)> seek = [&](Expression* curr, int mul) { + if (auto* c = curr->dynCast<Const>()) { + auto value = c->value.geti32(); + if (value != 0) { + constant += value * mul; + constants.push_back(c); + } + } else if (auto* binary = curr->dynCast<Binary>()) { + if (binary->op == AddInt32) { + seek(binary->left, mul); + seek(binary->right, mul); + return; + } else if (binary->op == SubInt32) { + // if the left is a zero, ignore it, it's how we negate ints + auto* left = binary->left->dynCast<Const>(); + if (!left || left->value.geti32() != 0) { + seek(binary->left, mul); + } + seek(binary->right, -mul); + return; + } else if (binary->op == ShlInt32) { + if (auto* c = binary->right->dynCast<Const>()) { + seek(binary->left, mul * Pow2(c->value.geti32())); + return; + } + } else if (binary->op == MulInt32) { + if (auto* c = binary->left->dynCast<Const>()) { + seek(binary->right, mul * c->value.geti32()); + return; + } else if (auto* c = binary->right->dynCast<Const>()) { + seek(binary->left, mul * c->value.geti32()); + return; + } + } + } + }; + // find all factors + seek(binary, 1); + if (constants.size() <= 1) return nullptr; // nothing to do + // wipe out all constants, we'll replace with a single added one + for (auto* c : constants) { + c->value = Literal(int32_t(0)); + } + // remove added/subbed zeros + struct ZeroRemover : public PostWalker<ZeroRemover, Visitor<ZeroRemover>> { + // TODO: we could save the binarys and costs we drop, and reuse them later + + PassOptions& passOptions; + + ZeroRemover(PassOptions& passOptions) : passOptions(passOptions) {} + + void visitBinary(Binary* curr) { + auto* left = curr->left->dynCast<Const>(); + auto* right = curr->right->dynCast<Const>(); + if (curr->op == AddInt32) { + if (left && left->value.geti32() == 0) { + replaceCurrent(curr->right); + return; + } + if (right && right->value.geti32() == 0) { + replaceCurrent(curr->left); + return; + } + } else if (curr->op == SubInt32) { + // we must leave a left zero, as it is how we negate ints + if (right && right->value.geti32() == 0) { + replaceCurrent(curr->left); + return; + } + } else if (curr->op == ShlInt32) { + // shifting a 0 is a 0, unless the shift has side effects + if (left && left->value.geti32() == 0 && !EffectAnalyzer(passOptions, curr->right).hasSideEffects()) { + replaceCurrent(left); + return; + } + } else if (curr->op == MulInt32) { + // multiplying by zero is a zero, unless the other side has side effects + if (left && left->value.geti32() == 0 && !EffectAnalyzer(passOptions, curr->right).hasSideEffects()) { + replaceCurrent(left); + return; + } + if (right && right->value.geti32() == 0 && !EffectAnalyzer(passOptions, curr->left).hasSideEffects()) { + replaceCurrent(right); + return; + } + } + } + }; + Expression* walked = binary; + ZeroRemover(getPassOptions()).walk(walked); + if (constant == 0) return walked; // nothing more to do + if (auto* c = walked->dynCast<Const>()) { + assert(c->value.geti32() == 0); + c->value = Literal(constant); + return c; + } + Builder builder(*getModule()); + return builder.makeBinary(AddInt32, + walked, + builder.makeConst(Literal(constant)) + ); + } + // expensive1 | expensive2 can be turned into expensive1 ? 1 : expensive2, and // expensive | cheap can be turned into cheap ? 1 : expensive, // so that we can avoid one expensive computation, if it has no side effects. diff --git a/src/support/bits.cpp b/src/support/bits.cpp index aa72201f4..8f28a7fdb 100644 --- a/src/support/bits.cpp +++ b/src/support/bits.cpp @@ -114,13 +114,13 @@ uint32_t Log2(uint32_t v) { uint32_t Pow2(uint32_t v) { switch (v) { - default: WASM_UNREACHABLE(); case 0: return 1; case 1: return 2; case 2: return 4; case 3: return 8; case 4: return 16; case 5: return 32; + default: return 1 << v; } } diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm index b3d40099f..5ac7af9f6 100644 --- a/test/emcc_hello_world.fromasm +++ b/test/emcc_hello_world.fromasm @@ -5206,15 +5206,12 @@ (i32.add (i32.add (get_local $8) - (i32.const 4) - ) - (i32.shl - (i32.add + (i32.shl (get_local $13) - (i32.const -1024) + (i32.const 2) ) - (i32.const 2) ) + (i32.const -4092) ) ) ) diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise index 2e35d2a56..096d105b8 100644 --- a/test/emcc_hello_world.fromasm.imprecise +++ b/test/emcc_hello_world.fromasm.imprecise @@ -5143,18 +5143,15 @@ (i32.add (i32.add (get_local $8) - (i32.const 4) - ) - (i32.shl - (i32.add + (i32.shl (i32.div_s (get_local $13) (i32.const 9) ) - (i32.const -1024) + (i32.const 2) ) - (i32.const 2) ) + (i32.const -4092) ) ) ) diff --git a/test/passes/optimize-instructions.txt b/test/passes/optimize-instructions.txt index 14f29a783..a11bd0d66 100644 --- a/test/passes/optimize-instructions.txt +++ b/test/passes/optimize-instructions.txt @@ -417,18 +417,12 @@ ) ) (func $load-off-2 (type $3) (param $0 i32) (result i32) - (i32.store offset=2 - (i32.add - (i32.const 1) - (i32.const 3) - ) + (i32.store + (i32.const 6) (get_local $0) ) - (i32.store offset=2 - (i32.add - (i32.const 3) - (i32.const 1) - ) + (i32.store + (i32.const 6) (get_local $0) ) (i32.store offset=2 @@ -459,18 +453,12 @@ ) (get_local $0) ) - (i32.store offset=2 - (i32.add - (i32.const -15) - (i32.const 17) - ) + (i32.store + (i32.const 4) (get_local $0) ) - (i32.store offset=2 - (i32.add - (i32.const -21) - (i32.const 19) - ) + (i32.store + (i32.const 0) (get_local $0) ) (i32.store @@ -482,19 +470,13 @@ (get_local $0) ) (drop - (i32.load offset=2 - (i32.add - (i32.const 2) - (i32.const 4) - ) + (i32.load + (i32.const 8) ) ) (drop - (i32.load offset=2 - (i32.add - (i32.const 4) - (i32.const 2) - ) + (i32.load + (i32.const 8) ) ) (drop @@ -944,4 +926,96 @@ ) ) ) + (func $linear-sums (type $4) (param $0 i32) (param $1 i32) + (drop + (i32.add + (get_local $1) + (i32.shl + (get_local $0) + (i32.const 4) + ) + ) + ) + (drop + (i32.add + (i32.add + (get_local $1) + (i32.shl + (get_local $0) + (i32.const 3) + ) + ) + (i32.const 12) + ) + ) + (drop + (i32.const 4) + ) + (drop + (i32.const 18) + ) + (drop + (i32.const 6) + ) + (drop + (i32.const -4) + ) + (drop + (i32.const 2) + ) + (drop + (i32.const 1) + ) + (drop + (i32.const 26) + ) + (drop + (i32.const -20) + ) + (drop + (i32.const 22) + ) + (drop + (i32.add + (i32.shl + (i32.const 1) + (get_local $0) + ) + (i32.const 14) + ) + ) + (drop + (i32.add + (i32.shl + (get_local $1) + (i32.const 3) + ) + (i32.const -66) + ) + ) + (drop + (i32.const 44) + ) + (drop + (i32.add + (i32.mul + (get_local $0) + (i32.const 10) + ) + (i32.const 14) + ) + ) + (drop + (i32.add + (i32.mul + (get_local $0) + (i32.const 2) + ) + (i32.const 34) + ) + ) + (drop + (get_local $0) + ) + ) ) diff --git a/test/passes/optimize-instructions.wast b/test/passes/optimize-instructions.wast index 8eec3f3cd..b9bd420da 100644 --- a/test/passes/optimize-instructions.wast +++ b/test/passes/optimize-instructions.wast @@ -1014,4 +1014,228 @@ ) ) ) + (func $linear-sums (param $0 i32) (param $1 i32) + (drop + (i32.add + (i32.add + (get_local $1) + (i32.const 16) + ) + (i32.shl + (i32.add + (get_local $0) + (i32.const -1) ;; -16, so cancels out! + ) + (i32.const 4) + ) + ) + ) + (drop + (i32.add + (i32.add + (get_local $1) + (i32.const 20) + ) + (i32.shl + (i32.add + (get_local $0) + (i32.const -1) ;; -8, so sum is +12 + ) + (i32.const 3) + ) + ) + ) + (drop + (i32.add ;; simple sum + (i32.const 1) + (i32.const 3) + ) + ) + (drop + (i32.add ;; nested sum + (i32.add + (i32.const 1) + (i32.const 3) + ) + (i32.add + (i32.const 5) + (i32.const 9) + ) + ) + ) + (drop + (i32.add + (i32.add + (i32.const 1) + (i32.const 3) + ) + (i32.sub ;; internal sub + (i32.const 5) + (i32.const 3) + ) + ) + ) + (drop + (i32.sub ;; external sub + (i32.add + (i32.const 1) + (i32.const 3) + ) + (i32.add + (i32.const 5) + (i32.const 3) + ) + ) + ) + (drop + (i32.sub ;; external sub + (i32.add + (i32.const 1) + (i32.const 3) + ) + (i32.sub ;; and also internal sub + (i32.const 5) + (i32.const 3) + ) + ) + ) + (drop + (i32.add + (i32.add + (i32.const 1) + (i32.const 3) + ) + (i32.sub ;; negating sub + (i32.const 0) + (i32.const 3) + ) + ) + ) + (drop + (i32.add + (i32.sub + (i32.const 0) + (i32.sub ;; two negating subs + (i32.const 0) + (i32.add + (i32.const 3) + (i32.const 20) + ) + ) + ) + (i32.add + (i32.const 1) + (i32.const 2) + ) + ) + ) + (drop + (i32.add + (i32.add + (i32.const 0) + (i32.sub ;; one negating sub + (i32.const 0) + (i32.add + (i32.const 3) + (i32.const 20) + ) + ) + ) + (i32.add + (i32.const 1) + (i32.const 2) + ) + ) + ) + (drop + (i32.add + (i32.shl ;; shifted value + (i32.const 1) + (i32.const 3) + ) + (i32.add + (i32.const 5) + (i32.const 9) + ) + ) + ) + (drop + (i32.add + (i32.shl ;; shifted value + (i32.const 1) + (get_local $0) ;; but not by const + ) + (i32.add + (i32.const 5) + (i32.const 9) + ) + ) + ) + (drop + (i32.add + (i32.shl ;; shifted nested value + (i32.sub + (get_local $1) + (i32.const 10) + ) + (i32.const 3) + ) + (i32.add + (i32.const 5) + (i32.const 9) + ) + ) + ) + (drop + (i32.add + (i32.mul ;; multiplied + (i32.const 10) + (i32.const 3) + ) + (i32.add + (i32.const 5) + (i32.const 9) + ) + ) + ) + (drop + (i32.add + (i32.mul ;; multiplied by nonconstant - can't recurse + (i32.const 10) + (get_local $0) + ) + (i32.add + (i32.const 5) + (i32.const 9) + ) + ) + ) + (drop + (i32.add + (i32.mul ;; nested mul + (i32.add + (i32.const 10) + (get_local $0) + ) + (i32.const 2) + ) + (i32.add + (i32.const 5) + (i32.const 9) + ) + ) + ) + (drop + (i32.add + (i32.add + (get_local $0) + (i32.const 10) ;; cancelled out with the below + ) + (i32.sub + (i32.const -5) + (i32.const 5) + ) + ) + ) + ) ) diff --git a/test/unit.fromasm b/test/unit.fromasm index d2f569684..2a6c60735 100644 --- a/test/unit.fromasm +++ b/test/unit.fromasm @@ -279,19 +279,13 @@ (f32.neg (get_local $0) ) - (i32.add - (i32.const 8) - (i32.const 1) - ) + (i32.const 9) ) ) (func $cneg (param $0 f32) (call_indirect $FUNCSIG$vf (get_local $0) - (i32.add - (i32.const 8) - (i32.const 1) - ) + (i32.const 9) ) ) (func $smallCompare (result i32) @@ -326,10 +320,7 @@ (func $cneg_nosemicolon (call_indirect $FUNCSIG$vi (i32.const 1) - (i32.add - (i32.const 16) - (i32.const 1) - ) + (i32.const 17) ) ) (func $forLoop @@ -416,14 +407,8 @@ (drop (call $lb (i32.add - (i32.add - (i32.add - (get_local $0) - (i32.const 3) - ) - (i32.const 7) - ) - (i32.const 12) + (get_local $0) + (i32.const 22) ) ) ) @@ -497,11 +482,8 @@ (i32.store (get_local $0) (i32.add - (i32.add - (get_global $n) - (i32.const 136) - ) - (i32.const 8) + (get_global $n) + (i32.const 144) ) ) (i32.const 0) diff --git a/test/unit.fromasm.imprecise b/test/unit.fromasm.imprecise index 5951d9941..10c1c025c 100644 --- a/test/unit.fromasm.imprecise +++ b/test/unit.fromasm.imprecise @@ -247,19 +247,13 @@ (f32.neg (get_local $0) ) - (i32.add - (i32.const 8) - (i32.const 1) - ) + (i32.const 9) ) ) (func $cneg (param $0 f32) (call_indirect $FUNCSIG$vf (get_local $0) - (i32.add - (i32.const 8) - (i32.const 1) - ) + (i32.const 9) ) ) (func $smallCompare (result i32) @@ -294,10 +288,7 @@ (func $cneg_nosemicolon (call_indirect $FUNCSIG$vi (i32.const 1) - (i32.add - (i32.const 16) - (i32.const 1) - ) + (i32.const 17) ) ) (func $forLoop @@ -384,14 +375,8 @@ (drop (call $lb (i32.add - (i32.add - (i32.add - (get_local $0) - (i32.const 3) - ) - (i32.const 7) - ) - (i32.const 12) + (get_local $0) + (i32.const 22) ) ) ) @@ -465,11 +450,8 @@ (i32.store (get_local $0) (i32.add - (i32.add - (get_global $n) - (i32.const 136) - ) - (i32.const 8) + (get_global $n) + (i32.const 144) ) ) (i32.const 0) |