summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/passes/OptimizeInstructions.cpp110
-rw-r--r--src/support/bits.cpp2
-rw-r--r--test/emcc_hello_world.fromasm9
-rw-r--r--test/emcc_hello_world.fromasm.imprecise9
-rw-r--r--test/passes/optimize-instructions.txt134
-rw-r--r--test/passes/optimize-instructions.wast224
-rw-r--r--test/unit.fromasm32
-rw-r--r--test/unit.fromasm.imprecise32
8 files changed, 459 insertions, 93 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index c374a15ac..de46f155a 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -357,6 +357,8 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions,
}
}
// note that both left and right may be consts, but then we let precompute compute the constant result
+ } else if (binary->op == AddInt32 || binary->op == SubInt32) {
+ return optimizeAddedConstants(binary);
} else if (binary->op == AndInt32) {
if (auto* right = binary->right->dynCast<Const>()) {
if (right->type == i32) {
@@ -518,6 +520,114 @@ private:
return boolean;
}
+ // find added constants in an expression tree, including multiplied/shifted, and combine them
+ // note that we ignore division/shift-right, as rounding makes this nonlinear, so not a valid opt
+ Expression* optimizeAddedConstants(Binary* binary) {
+ int32_t constant = 0;
+ std::vector<Const*> constants;
+ std::function<void (Expression*, int)> seek = [&](Expression* curr, int mul) {
+ if (auto* c = curr->dynCast<Const>()) {
+ auto value = c->value.geti32();
+ if (value != 0) {
+ constant += value * mul;
+ constants.push_back(c);
+ }
+ } else if (auto* binary = curr->dynCast<Binary>()) {
+ if (binary->op == AddInt32) {
+ seek(binary->left, mul);
+ seek(binary->right, mul);
+ return;
+ } else if (binary->op == SubInt32) {
+ // if the left is a zero, ignore it, it's how we negate ints
+ auto* left = binary->left->dynCast<Const>();
+ if (!left || left->value.geti32() != 0) {
+ seek(binary->left, mul);
+ }
+ seek(binary->right, -mul);
+ return;
+ } else if (binary->op == ShlInt32) {
+ if (auto* c = binary->right->dynCast<Const>()) {
+ seek(binary->left, mul * Pow2(c->value.geti32()));
+ return;
+ }
+ } else if (binary->op == MulInt32) {
+ if (auto* c = binary->left->dynCast<Const>()) {
+ seek(binary->right, mul * c->value.geti32());
+ return;
+ } else if (auto* c = binary->right->dynCast<Const>()) {
+ seek(binary->left, mul * c->value.geti32());
+ return;
+ }
+ }
+ }
+ };
+ // find all factors
+ seek(binary, 1);
+ if (constants.size() <= 1) return nullptr; // nothing to do
+ // wipe out all constants, we'll replace with a single added one
+ for (auto* c : constants) {
+ c->value = Literal(int32_t(0));
+ }
+ // remove added/subbed zeros
+ struct ZeroRemover : public PostWalker<ZeroRemover, Visitor<ZeroRemover>> {
+ // TODO: we could save the binarys and costs we drop, and reuse them later
+
+ PassOptions& passOptions;
+
+ ZeroRemover(PassOptions& passOptions) : passOptions(passOptions) {}
+
+ void visitBinary(Binary* curr) {
+ auto* left = curr->left->dynCast<Const>();
+ auto* right = curr->right->dynCast<Const>();
+ if (curr->op == AddInt32) {
+ if (left && left->value.geti32() == 0) {
+ replaceCurrent(curr->right);
+ return;
+ }
+ if (right && right->value.geti32() == 0) {
+ replaceCurrent(curr->left);
+ return;
+ }
+ } else if (curr->op == SubInt32) {
+ // we must leave a left zero, as it is how we negate ints
+ if (right && right->value.geti32() == 0) {
+ replaceCurrent(curr->left);
+ return;
+ }
+ } else if (curr->op == ShlInt32) {
+ // shifting a 0 is a 0, unless the shift has side effects
+ if (left && left->value.geti32() == 0 && !EffectAnalyzer(passOptions, curr->right).hasSideEffects()) {
+ replaceCurrent(left);
+ return;
+ }
+ } else if (curr->op == MulInt32) {
+ // multiplying by zero is a zero, unless the other side has side effects
+ if (left && left->value.geti32() == 0 && !EffectAnalyzer(passOptions, curr->right).hasSideEffects()) {
+ replaceCurrent(left);
+ return;
+ }
+ if (right && right->value.geti32() == 0 && !EffectAnalyzer(passOptions, curr->left).hasSideEffects()) {
+ replaceCurrent(right);
+ return;
+ }
+ }
+ }
+ };
+ Expression* walked = binary;
+ ZeroRemover(getPassOptions()).walk(walked);
+ if (constant == 0) return walked; // nothing more to do
+ if (auto* c = walked->dynCast<Const>()) {
+ assert(c->value.geti32() == 0);
+ c->value = Literal(constant);
+ return c;
+ }
+ Builder builder(*getModule());
+ return builder.makeBinary(AddInt32,
+ walked,
+ builder.makeConst(Literal(constant))
+ );
+ }
+
// expensive1 | expensive2 can be turned into expensive1 ? 1 : expensive2, and
// expensive | cheap can be turned into cheap ? 1 : expensive,
// so that we can avoid one expensive computation, if it has no side effects.
diff --git a/src/support/bits.cpp b/src/support/bits.cpp
index aa72201f4..8f28a7fdb 100644
--- a/src/support/bits.cpp
+++ b/src/support/bits.cpp
@@ -114,13 +114,13 @@ uint32_t Log2(uint32_t v) {
uint32_t Pow2(uint32_t v) {
switch (v) {
- default: WASM_UNREACHABLE();
case 0: return 1;
case 1: return 2;
case 2: return 4;
case 3: return 8;
case 4: return 16;
case 5: return 32;
+ default: return 1 << v;
}
}
diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm
index b3d40099f..5ac7af9f6 100644
--- a/test/emcc_hello_world.fromasm
+++ b/test/emcc_hello_world.fromasm
@@ -5206,15 +5206,12 @@
(i32.add
(i32.add
(get_local $8)
- (i32.const 4)
- )
- (i32.shl
- (i32.add
+ (i32.shl
(get_local $13)
- (i32.const -1024)
+ (i32.const 2)
)
- (i32.const 2)
)
+ (i32.const -4092)
)
)
)
diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise
index 2e35d2a56..096d105b8 100644
--- a/test/emcc_hello_world.fromasm.imprecise
+++ b/test/emcc_hello_world.fromasm.imprecise
@@ -5143,18 +5143,15 @@
(i32.add
(i32.add
(get_local $8)
- (i32.const 4)
- )
- (i32.shl
- (i32.add
+ (i32.shl
(i32.div_s
(get_local $13)
(i32.const 9)
)
- (i32.const -1024)
+ (i32.const 2)
)
- (i32.const 2)
)
+ (i32.const -4092)
)
)
)
diff --git a/test/passes/optimize-instructions.txt b/test/passes/optimize-instructions.txt
index 14f29a783..a11bd0d66 100644
--- a/test/passes/optimize-instructions.txt
+++ b/test/passes/optimize-instructions.txt
@@ -417,18 +417,12 @@
)
)
(func $load-off-2 (type $3) (param $0 i32) (result i32)
- (i32.store offset=2
- (i32.add
- (i32.const 1)
- (i32.const 3)
- )
+ (i32.store
+ (i32.const 6)
(get_local $0)
)
- (i32.store offset=2
- (i32.add
- (i32.const 3)
- (i32.const 1)
- )
+ (i32.store
+ (i32.const 6)
(get_local $0)
)
(i32.store offset=2
@@ -459,18 +453,12 @@
)
(get_local $0)
)
- (i32.store offset=2
- (i32.add
- (i32.const -15)
- (i32.const 17)
- )
+ (i32.store
+ (i32.const 4)
(get_local $0)
)
- (i32.store offset=2
- (i32.add
- (i32.const -21)
- (i32.const 19)
- )
+ (i32.store
+ (i32.const 0)
(get_local $0)
)
(i32.store
@@ -482,19 +470,13 @@
(get_local $0)
)
(drop
- (i32.load offset=2
- (i32.add
- (i32.const 2)
- (i32.const 4)
- )
+ (i32.load
+ (i32.const 8)
)
)
(drop
- (i32.load offset=2
- (i32.add
- (i32.const 4)
- (i32.const 2)
- )
+ (i32.load
+ (i32.const 8)
)
)
(drop
@@ -944,4 +926,96 @@
)
)
)
+ (func $linear-sums (type $4) (param $0 i32) (param $1 i32)
+ (drop
+ (i32.add
+ (get_local $1)
+ (i32.shl
+ (get_local $0)
+ (i32.const 4)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.add
+ (get_local $1)
+ (i32.shl
+ (get_local $0)
+ (i32.const 3)
+ )
+ )
+ (i32.const 12)
+ )
+ )
+ (drop
+ (i32.const 4)
+ )
+ (drop
+ (i32.const 18)
+ )
+ (drop
+ (i32.const 6)
+ )
+ (drop
+ (i32.const -4)
+ )
+ (drop
+ (i32.const 2)
+ )
+ (drop
+ (i32.const 1)
+ )
+ (drop
+ (i32.const 26)
+ )
+ (drop
+ (i32.const -20)
+ )
+ (drop
+ (i32.const 22)
+ )
+ (drop
+ (i32.add
+ (i32.shl
+ (i32.const 1)
+ (get_local $0)
+ )
+ (i32.const 14)
+ )
+ )
+ (drop
+ (i32.add
+ (i32.shl
+ (get_local $1)
+ (i32.const 3)
+ )
+ (i32.const -66)
+ )
+ )
+ (drop
+ (i32.const 44)
+ )
+ (drop
+ (i32.add
+ (i32.mul
+ (get_local $0)
+ (i32.const 10)
+ )
+ (i32.const 14)
+ )
+ )
+ (drop
+ (i32.add
+ (i32.mul
+ (get_local $0)
+ (i32.const 2)
+ )
+ (i32.const 34)
+ )
+ )
+ (drop
+ (get_local $0)
+ )
+ )
)
diff --git a/test/passes/optimize-instructions.wast b/test/passes/optimize-instructions.wast
index 8eec3f3cd..b9bd420da 100644
--- a/test/passes/optimize-instructions.wast
+++ b/test/passes/optimize-instructions.wast
@@ -1014,4 +1014,228 @@
)
)
)
+ (func $linear-sums (param $0 i32) (param $1 i32)
+ (drop
+ (i32.add
+ (i32.add
+ (get_local $1)
+ (i32.const 16)
+ )
+ (i32.shl
+ (i32.add
+ (get_local $0)
+ (i32.const -1) ;; -16, so cancels out!
+ )
+ (i32.const 4)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.add
+ (get_local $1)
+ (i32.const 20)
+ )
+ (i32.shl
+ (i32.add
+ (get_local $0)
+ (i32.const -1) ;; -8, so sum is +12
+ )
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.add ;; simple sum
+ (i32.const 1)
+ (i32.const 3)
+ )
+ )
+ (drop
+ (i32.add ;; nested sum
+ (i32.add
+ (i32.const 1)
+ (i32.const 3)
+ )
+ (i32.add
+ (i32.const 5)
+ (i32.const 9)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.add
+ (i32.const 1)
+ (i32.const 3)
+ )
+ (i32.sub ;; internal sub
+ (i32.const 5)
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.sub ;; external sub
+ (i32.add
+ (i32.const 1)
+ (i32.const 3)
+ )
+ (i32.add
+ (i32.const 5)
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.sub ;; external sub
+ (i32.add
+ (i32.const 1)
+ (i32.const 3)
+ )
+ (i32.sub ;; and also internal sub
+ (i32.const 5)
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.add
+ (i32.const 1)
+ (i32.const 3)
+ )
+ (i32.sub ;; negating sub
+ (i32.const 0)
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.sub
+ (i32.const 0)
+ (i32.sub ;; two negating subs
+ (i32.const 0)
+ (i32.add
+ (i32.const 3)
+ (i32.const 20)
+ )
+ )
+ )
+ (i32.add
+ (i32.const 1)
+ (i32.const 2)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.add
+ (i32.const 0)
+ (i32.sub ;; one negating sub
+ (i32.const 0)
+ (i32.add
+ (i32.const 3)
+ (i32.const 20)
+ )
+ )
+ )
+ (i32.add
+ (i32.const 1)
+ (i32.const 2)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.shl ;; shifted value
+ (i32.const 1)
+ (i32.const 3)
+ )
+ (i32.add
+ (i32.const 5)
+ (i32.const 9)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.shl ;; shifted value
+ (i32.const 1)
+ (get_local $0) ;; but not by const
+ )
+ (i32.add
+ (i32.const 5)
+ (i32.const 9)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.shl ;; shifted nested value
+ (i32.sub
+ (get_local $1)
+ (i32.const 10)
+ )
+ (i32.const 3)
+ )
+ (i32.add
+ (i32.const 5)
+ (i32.const 9)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.mul ;; multiplied
+ (i32.const 10)
+ (i32.const 3)
+ )
+ (i32.add
+ (i32.const 5)
+ (i32.const 9)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.mul ;; multiplied by nonconstant - can't recurse
+ (i32.const 10)
+ (get_local $0)
+ )
+ (i32.add
+ (i32.const 5)
+ (i32.const 9)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.mul ;; nested mul
+ (i32.add
+ (i32.const 10)
+ (get_local $0)
+ )
+ (i32.const 2)
+ )
+ (i32.add
+ (i32.const 5)
+ (i32.const 9)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.add
+ (get_local $0)
+ (i32.const 10) ;; cancelled out with the below
+ )
+ (i32.sub
+ (i32.const -5)
+ (i32.const 5)
+ )
+ )
+ )
+ )
)
diff --git a/test/unit.fromasm b/test/unit.fromasm
index d2f569684..2a6c60735 100644
--- a/test/unit.fromasm
+++ b/test/unit.fromasm
@@ -279,19 +279,13 @@
(f32.neg
(get_local $0)
)
- (i32.add
- (i32.const 8)
- (i32.const 1)
- )
+ (i32.const 9)
)
)
(func $cneg (param $0 f32)
(call_indirect $FUNCSIG$vf
(get_local $0)
- (i32.add
- (i32.const 8)
- (i32.const 1)
- )
+ (i32.const 9)
)
)
(func $smallCompare (result i32)
@@ -326,10 +320,7 @@
(func $cneg_nosemicolon
(call_indirect $FUNCSIG$vi
(i32.const 1)
- (i32.add
- (i32.const 16)
- (i32.const 1)
- )
+ (i32.const 17)
)
)
(func $forLoop
@@ -416,14 +407,8 @@
(drop
(call $lb
(i32.add
- (i32.add
- (i32.add
- (get_local $0)
- (i32.const 3)
- )
- (i32.const 7)
- )
- (i32.const 12)
+ (get_local $0)
+ (i32.const 22)
)
)
)
@@ -497,11 +482,8 @@
(i32.store
(get_local $0)
(i32.add
- (i32.add
- (get_global $n)
- (i32.const 136)
- )
- (i32.const 8)
+ (get_global $n)
+ (i32.const 144)
)
)
(i32.const 0)
diff --git a/test/unit.fromasm.imprecise b/test/unit.fromasm.imprecise
index 5951d9941..10c1c025c 100644
--- a/test/unit.fromasm.imprecise
+++ b/test/unit.fromasm.imprecise
@@ -247,19 +247,13 @@
(f32.neg
(get_local $0)
)
- (i32.add
- (i32.const 8)
- (i32.const 1)
- )
+ (i32.const 9)
)
)
(func $cneg (param $0 f32)
(call_indirect $FUNCSIG$vf
(get_local $0)
- (i32.add
- (i32.const 8)
- (i32.const 1)
- )
+ (i32.const 9)
)
)
(func $smallCompare (result i32)
@@ -294,10 +288,7 @@
(func $cneg_nosemicolon
(call_indirect $FUNCSIG$vi
(i32.const 1)
- (i32.add
- (i32.const 16)
- (i32.const 1)
- )
+ (i32.const 17)
)
)
(func $forLoop
@@ -384,14 +375,8 @@
(drop
(call $lb
(i32.add
- (i32.add
- (i32.add
- (get_local $0)
- (i32.const 3)
- )
- (i32.const 7)
- )
- (i32.const 12)
+ (get_local $0)
+ (i32.const 22)
)
)
)
@@ -465,11 +450,8 @@
(i32.store
(get_local $0)
(i32.add
- (i32.add
- (get_global $n)
- (i32.const 136)
- )
- (i32.const 8)
+ (get_global $n)
+ (i32.const 144)
)
)
(i32.const 0)