summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/passes/OptimizeInstructions.cpp109
-rw-r--r--test/emcc_O2_hello_world.fromasm176
-rw-r--r--test/emcc_O2_hello_world.fromasm.imprecise176
-rw-r--r--test/emcc_hello_world.fromasm160
-rw-r--r--test/emcc_hello_world.fromasm.imprecise160
-rw-r--r--test/memorygrowth.fromasm176
-rw-r--r--test/memorygrowth.fromasm.imprecise176
-rw-r--r--test/passes/optimize-instructions.txt107
-rw-r--r--test/passes/optimize-instructions.wast96
-rw-r--r--test/unit.fromasm11
-rw-r--r--test/unit.fromasm.imprecise11
11 files changed, 677 insertions, 681 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index de46f155a..bb4748a97 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -263,6 +263,36 @@ static Index getSignExtBits(Expression* curr) {
return 32 - curr->cast<Binary>()->right->cast<Const>()->value.geti32();
}
+// Check if an expression is almost a sign-extend: perhaps the inner shift
+// is too large. We can split the shifts in that case, which is sometimes
+// useful (e.g. if we can remove the signext)
+static Expression* getAlmostSignExt(Expression* curr) {
+ if (auto* outer = curr->dynCast<Binary>()) {
+ if (outer->op == ShrSInt32) {
+ if (auto* outerConst = outer->right->dynCast<Const>()) {
+ if (auto* inner = outer->left->dynCast<Binary>()) {
+ if (inner->op == ShlInt32) {
+ if (auto* innerConst = inner->right->dynCast<Const>()) {
+ if (outerConst->value.leU(innerConst->value).geti32()) {
+ return inner->left;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return nullptr;
+}
+
+// gets the size of the almost sign-extended value, as well as the
+// extra shifts, if any
+static Index getAlmostSignExtBits(Expression* curr, Index& extraShifts) {
+ extraShifts = curr->cast<Binary>()->left->cast<Binary>()->right->cast<Const>()->value.geti32() -
+ curr->cast<Binary>()->right->cast<Const>()->value.geti32();
+ return getSignExtBits(curr);
+}
+
// get a mask to keep only the low # of bits
static int32_t lowBitMask(int32_t bits) {
uint32_t ret = -1;
@@ -321,17 +351,18 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions,
std::swap(binary->left, binary->right);
}
}
- if (auto* ext = getSignExt(binary)) {
- auto bits = getSignExtBits(binary);
+ if (auto* ext = getAlmostSignExt(binary)) {
+ Index extraShifts;
+ auto bits = getAlmostSignExtBits(binary, extraShifts);
auto* load = ext->dynCast<Load>();
// pattern match a load of 8 bits and a sign extend using a shl of 24 then shr_s of 24 as well, etc.
if (load && ((load->bytes == 1 && bits == 8) || (load->bytes == 2 && bits == 16))) {
load->signed_ = true;
- return load;
+ return removeAlmostSignExt(binary);
}
// if the sign-extend input cannot have a sign bit, we don't need it
- if (getMaxBits(ext) < bits) {
- return ext;
+ if (getMaxBits(ext) + extraShifts < bits) {
+ return removeAlmostSignExt(binary);
}
} else if (binary->op == EqInt32) {
if (auto* c = binary->right->dynCast<Const>()) {
@@ -359,29 +390,46 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions,
// note that both left and right may be consts, but then we let precompute compute the constant result
} else if (binary->op == AddInt32 || binary->op == SubInt32) {
return optimizeAddedConstants(binary);
- } else if (binary->op == AndInt32) {
- if (auto* right = binary->right->dynCast<Const>()) {
- if (right->type == i32) {
- auto mask = right->value.geti32();
- // and with -1 does nothing (common in asm.js output)
- if (mask == -1) {
- return binary->left;
+ }
+ // a bunch of operations on a constant right side can be simplified
+ if (auto* right = binary->right->dynCast<Const>()) {
+ if (binary->op == AndInt32) {
+ auto mask = right->value.geti32();
+ // and with -1 does nothing (common in asm.js output)
+ if (mask == -1) {
+ return binary->left;
+ }
+ // small loads do not need to be masted, the load itself masks
+ if (auto* load = binary->left->dynCast<Load>()) {
+ if ((load->bytes == 1 && mask == 0xff) ||
+ (load->bytes == 2 && mask == 0xffff)) {
+ load->signed_ = false;
+ return load;
}
- // small loads do not need to be masted, the load itself masks
- if (auto* load = binary->left->dynCast<Load>()) {
- if ((load->bytes == 1 && mask == 0xff) ||
- (load->bytes == 2 && mask == 0xffff)) {
- load->signed_ = false;
- return load;
+ } else if (mask == 1 && Properties::emitsBoolean(binary->left)) {
+ // (bool) & 1 does not need the outer mask
+ return binary->left;
+ }
+ }
+ // the square of some operations can be merged
+ if (auto* left = binary->left->dynCast<Binary>()) {
+ if (left->op == binary->op) {
+ if (auto* leftRight = left->right->dynCast<Const>()) {
+ if (left->op == AndInt32) {
+ leftRight->value = leftRight->value.and_(right->value);
+ return left;
+ } else if (left->op == OrInt32) {
+ leftRight->value = leftRight->value.or_(right->value);
+ return left;
+ } else if (left->op == ShlInt32 || left->op == ShrUInt32 || left->op == ShrSInt32) {
+ leftRight->value = leftRight->value.add(right->value);
+ return left;
}
- } else if (mask == 1 && Properties::emitsBoolean(binary->left)) {
- // (bool) & 1 does not need the outer mask
- return binary->left;
}
}
}
- return conditionalizeExpensiveOnBitwise(binary);
- } else if (binary->op == OrInt32) {
+ }
+ if (binary->op == AndInt32 || binary->op == OrInt32) {
return conditionalizeExpensiveOnBitwise(binary);
}
} else if (auto* unary = curr->dynCast<Unary>()) {
@@ -685,6 +733,21 @@ private:
Builder builder(*getModule());
return builder.makeBinary(AndInt32, curr, builder.makeConst(Literal(lowBitMask(bits))));
}
+
+ // given an "almost" sign extend - either a proper one, or it
+ // has too many shifts left - we remove the sig extend. If there are
+ // too many shifts, we split the shifts first, so this removes the
+ // two sign extend shifts and adds one (smaller one)
+ Expression* removeAlmostSignExt(Binary* outer) {
+ auto* inner = outer->left->cast<Binary>();
+ auto* outerConst = outer->right->cast<Const>();
+ auto* innerConst = inner->right->cast<Const>();
+ auto* value = inner->left;
+ if (outerConst->value == innerConst->value) return value;
+ // add a shift, by reusing the existing node
+ innerConst->value = innerConst->value.sub(outerConst->value);
+ return inner;
+ }
};
Pass *createOptimizeInstructionsPass() {
diff --git a/test/emcc_O2_hello_world.fromasm b/test/emcc_O2_hello_world.fromasm
index 40f7d3585..5aa32979b 100644
--- a/test/emcc_O2_hello_world.fromasm
+++ b/test/emcc_O2_hello_world.fromasm
@@ -158,22 +158,19 @@
(tee_local $1
(i32.add
(i32.shl
- (i32.shl
- (tee_local $10
- (i32.add
- (i32.xor
- (i32.and
- (get_local $2)
- (i32.const 1)
- )
+ (tee_local $10
+ (i32.add
+ (i32.xor
+ (i32.and
+ (get_local $2)
(i32.const 1)
)
- (get_local $6)
+ (i32.const 1)
)
+ (get_local $6)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -339,83 +336,80 @@
(tee_local $11
(i32.add
(i32.shl
- (i32.shl
- (tee_local $10
- (i32.add
+ (tee_local $10
+ (i32.add
+ (i32.or
(i32.or
(i32.or
(i32.or
- (i32.or
- (tee_local $2
- (i32.and
- (i32.shr_u
- (tee_local $7
- (i32.shr_u
- (get_local $2)
- (get_local $1)
- )
- )
- (i32.const 5)
- )
- (i32.const 8)
- )
- )
- (get_local $1)
- )
- (tee_local $7
+ (tee_local $2
(i32.and
(i32.shr_u
- (tee_local $0
+ (tee_local $7
(i32.shr_u
- (get_local $7)
(get_local $2)
+ (get_local $1)
)
)
- (i32.const 2)
+ (i32.const 5)
)
- (i32.const 4)
+ (i32.const 8)
)
)
+ (get_local $1)
)
- (tee_local $0
+ (tee_local $7
(i32.and
(i32.shr_u
- (tee_local $11
+ (tee_local $0
(i32.shr_u
- (get_local $0)
(get_local $7)
+ (get_local $2)
)
)
- (i32.const 1)
+ (i32.const 2)
)
- (i32.const 2)
+ (i32.const 4)
)
)
)
- (tee_local $11
+ (tee_local $0
(i32.and
(i32.shr_u
- (tee_local $19
+ (tee_local $11
(i32.shr_u
- (get_local $11)
(get_local $0)
+ (get_local $7)
)
)
(i32.const 1)
)
- (i32.const 1)
+ (i32.const 2)
)
)
)
- (i32.shr_u
- (get_local $19)
- (get_local $11)
+ (tee_local $11
+ (i32.and
+ (i32.shr_u
+ (tee_local $19
+ (i32.shr_u
+ (get_local $11)
+ (get_local $0)
+ )
+ )
+ (i32.const 1)
+ )
+ (i32.const 1)
+ )
)
)
+ (i32.shr_u
+ (get_local $19)
+ (get_local $11)
+ )
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -539,16 +533,13 @@
(set_local $11
(i32.add
(i32.shl
- (i32.shl
- (tee_local $19
- (i32.shr_u
- (get_local $17)
- (i32.const 3)
- )
+ (tee_local $19
+ (i32.shr_u
+ (get_local $17)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -1259,16 +1250,13 @@
(set_local $1
(i32.add
(i32.shl
- (i32.shl
- (tee_local $7
- (i32.shr_u
- (get_local $1)
- (i32.const 3)
- )
+ (tee_local $7
+ (i32.shr_u
+ (get_local $1)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -2346,11 +2334,8 @@
(set_local $11
(i32.add
(i32.shl
- (i32.shl
- (get_local $6)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $6)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -4370,11 +4355,8 @@
(tee_local $23
(i32.add
(i32.shl
- (i32.shl
- (get_local $6)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $6)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -4535,11 +4517,8 @@
(set_local $0
(i32.add
(i32.shl
- (i32.shl
- (get_local $6)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $6)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -5281,11 +5260,8 @@
(set_local $18
(i32.add
(i32.shl
- (i32.shl
- (get_local $3)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $3)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -5760,11 +5736,8 @@
(tee_local $0
(i32.add
(i32.shl
- (i32.shl
- (get_local $2)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -6122,11 +6095,8 @@
(tee_local $4
(i32.add
(i32.shl
- (i32.shl
- (get_local $7)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $7)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -7143,11 +7113,8 @@
(tee_local $7
(i32.add
(i32.shl
- (i32.shl
- (get_local $14)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $14)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -7298,11 +7265,8 @@
(set_local $1
(i32.add
(i32.shl
- (i32.shl
- (get_local $3)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $3)
+ (i32.const 3)
)
(i32.const 216)
)
diff --git a/test/emcc_O2_hello_world.fromasm.imprecise b/test/emcc_O2_hello_world.fromasm.imprecise
index 901414958..39ed6731f 100644
--- a/test/emcc_O2_hello_world.fromasm.imprecise
+++ b/test/emcc_O2_hello_world.fromasm.imprecise
@@ -157,22 +157,19 @@
(tee_local $1
(i32.add
(i32.shl
- (i32.shl
- (tee_local $10
- (i32.add
- (i32.xor
- (i32.and
- (get_local $2)
- (i32.const 1)
- )
+ (tee_local $10
+ (i32.add
+ (i32.xor
+ (i32.and
+ (get_local $2)
(i32.const 1)
)
- (get_local $6)
+ (i32.const 1)
)
+ (get_local $6)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -338,83 +335,80 @@
(tee_local $11
(i32.add
(i32.shl
- (i32.shl
- (tee_local $10
- (i32.add
+ (tee_local $10
+ (i32.add
+ (i32.or
(i32.or
(i32.or
(i32.or
- (i32.or
- (tee_local $2
- (i32.and
- (i32.shr_u
- (tee_local $7
- (i32.shr_u
- (get_local $2)
- (get_local $1)
- )
- )
- (i32.const 5)
- )
- (i32.const 8)
- )
- )
- (get_local $1)
- )
- (tee_local $7
+ (tee_local $2
(i32.and
(i32.shr_u
- (tee_local $0
+ (tee_local $7
(i32.shr_u
- (get_local $7)
(get_local $2)
+ (get_local $1)
)
)
- (i32.const 2)
+ (i32.const 5)
)
- (i32.const 4)
+ (i32.const 8)
)
)
+ (get_local $1)
)
- (tee_local $0
+ (tee_local $7
(i32.and
(i32.shr_u
- (tee_local $11
+ (tee_local $0
(i32.shr_u
- (get_local $0)
(get_local $7)
+ (get_local $2)
)
)
- (i32.const 1)
+ (i32.const 2)
)
- (i32.const 2)
+ (i32.const 4)
)
)
)
- (tee_local $11
+ (tee_local $0
(i32.and
(i32.shr_u
- (tee_local $19
+ (tee_local $11
(i32.shr_u
- (get_local $11)
(get_local $0)
+ (get_local $7)
)
)
(i32.const 1)
)
- (i32.const 1)
+ (i32.const 2)
)
)
)
- (i32.shr_u
- (get_local $19)
- (get_local $11)
+ (tee_local $11
+ (i32.and
+ (i32.shr_u
+ (tee_local $19
+ (i32.shr_u
+ (get_local $11)
+ (get_local $0)
+ )
+ )
+ (i32.const 1)
+ )
+ (i32.const 1)
+ )
)
)
+ (i32.shr_u
+ (get_local $19)
+ (get_local $11)
+ )
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -538,16 +532,13 @@
(set_local $11
(i32.add
(i32.shl
- (i32.shl
- (tee_local $19
- (i32.shr_u
- (get_local $17)
- (i32.const 3)
- )
+ (tee_local $19
+ (i32.shr_u
+ (get_local $17)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -1258,16 +1249,13 @@
(set_local $1
(i32.add
(i32.shl
- (i32.shl
- (tee_local $7
- (i32.shr_u
- (get_local $1)
- (i32.const 3)
- )
+ (tee_local $7
+ (i32.shr_u
+ (get_local $1)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -2345,11 +2333,8 @@
(set_local $11
(i32.add
(i32.shl
- (i32.shl
- (get_local $6)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $6)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -4369,11 +4354,8 @@
(tee_local $23
(i32.add
(i32.shl
- (i32.shl
- (get_local $6)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $6)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -4534,11 +4516,8 @@
(set_local $0
(i32.add
(i32.shl
- (i32.shl
- (get_local $6)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $6)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -5280,11 +5259,8 @@
(set_local $18
(i32.add
(i32.shl
- (i32.shl
- (get_local $3)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $3)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -5759,11 +5735,8 @@
(tee_local $0
(i32.add
(i32.shl
- (i32.shl
- (get_local $2)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -6121,11 +6094,8 @@
(tee_local $4
(i32.add
(i32.shl
- (i32.shl
- (get_local $7)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $7)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -7142,11 +7112,8 @@
(tee_local $7
(i32.add
(i32.shl
- (i32.shl
- (get_local $14)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $14)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -7297,11 +7264,8 @@
(set_local $1
(i32.add
(i32.shl
- (i32.shl
- (get_local $3)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $3)
+ (i32.const 3)
)
(i32.const 216)
)
diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm
index 5ac7af9f6..71ce805ba 100644
--- a/test/emcc_hello_world.fromasm
+++ b/test/emcc_hello_world.fromasm
@@ -7967,22 +7967,19 @@
(tee_local $2
(i32.add
(i32.shl
- (i32.shl
- (tee_local $4
- (i32.add
- (i32.xor
- (i32.and
- (get_local $5)
- (i32.const 1)
- )
+ (tee_local $4
+ (i32.add
+ (i32.xor
+ (i32.and
+ (get_local $5)
(i32.const 1)
)
- (get_local $13)
+ (i32.const 1)
)
+ (get_local $13)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -8148,43 +8145,27 @@
(tee_local $10
(i32.add
(i32.shl
- (i32.shl
- (tee_local $5
- (i32.add
+ (tee_local $5
+ (i32.add
+ (i32.or
(i32.or
(i32.or
(i32.or
- (i32.or
- (tee_local $3
- (i32.and
- (i32.shr_u
- (tee_local $7
- (i32.shr_u
- (get_local $3)
- (get_local $10)
- )
- )
- (i32.const 5)
- )
- (i32.const 8)
- )
- )
- (get_local $10)
- )
(tee_local $3
(i32.and
(i32.shr_u
(tee_local $7
(i32.shr_u
- (get_local $7)
(get_local $3)
+ (get_local $10)
)
)
- (i32.const 2)
+ (i32.const 5)
)
- (i32.const 4)
+ (i32.const 8)
)
)
+ (get_local $10)
)
(tee_local $3
(i32.and
@@ -8195,9 +8176,9 @@
(get_local $3)
)
)
- (i32.const 1)
+ (i32.const 2)
)
- (i32.const 2)
+ (i32.const 4)
)
)
)
@@ -8212,19 +8193,32 @@
)
(i32.const 1)
)
- (i32.const 1)
+ (i32.const 2)
)
)
)
- (i32.shr_u
- (get_local $7)
- (get_local $3)
+ (tee_local $3
+ (i32.and
+ (i32.shr_u
+ (tee_local $7
+ (i32.shr_u
+ (get_local $7)
+ (get_local $3)
+ )
+ )
+ (i32.const 1)
+ )
+ (i32.const 1)
+ )
)
)
+ (i32.shr_u
+ (get_local $7)
+ (get_local $3)
+ )
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -8348,16 +8342,13 @@
(set_local $4
(i32.add
(i32.shl
- (i32.shl
- (tee_local $0
- (i32.shr_u
- (get_local $8)
- (i32.const 3)
- )
+ (tee_local $0
+ (i32.shr_u
+ (get_local $8)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -9059,16 +9050,13 @@
(set_local $2
(i32.add
(i32.shl
- (i32.shl
- (tee_local $0
- (i32.shr_u
- (get_local $0)
- (i32.const 3)
- )
+ (tee_local $0
+ (i32.shr_u
+ (get_local $0)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -10122,11 +10110,8 @@
(set_local $3
(i32.add
(i32.shl
- (i32.shl
- (get_local $0)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $0)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -11665,11 +11650,8 @@
(tee_local $0
(i32.add
(i32.shl
- (i32.shl
- (get_local $1)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $1)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -12157,11 +12139,8 @@
(set_local $3
(i32.add
(i32.shl
- (i32.shl
- (get_local $0)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $0)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -12852,11 +12831,8 @@
(set_local $2
(i32.add
(i32.shl
- (i32.shl
- (get_local $1)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $1)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -13308,11 +13284,8 @@
(tee_local $4
(i32.add
(i32.shl
- (i32.shl
- (get_local $2)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -13665,11 +13638,8 @@
(tee_local $3
(i32.add
(i32.shl
- (i32.shl
- (get_local $5)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $5)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -14329,11 +14299,8 @@
(tee_local $0
(i32.add
(i32.shl
- (i32.shl
- (get_local $3)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $3)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -14819,11 +14786,8 @@
(set_local $1
(i32.add
(i32.shl
- (i32.shl
- (get_local $0)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $0)
+ (i32.const 3)
)
(i32.const 216)
)
diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise
index 096d105b8..58dc7b313 100644
--- a/test/emcc_hello_world.fromasm.imprecise
+++ b/test/emcc_hello_world.fromasm.imprecise
@@ -7904,22 +7904,19 @@
(tee_local $2
(i32.add
(i32.shl
- (i32.shl
- (tee_local $4
- (i32.add
- (i32.xor
- (i32.and
- (get_local $5)
- (i32.const 1)
- )
+ (tee_local $4
+ (i32.add
+ (i32.xor
+ (i32.and
+ (get_local $5)
(i32.const 1)
)
- (get_local $13)
+ (i32.const 1)
)
+ (get_local $13)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -8085,43 +8082,27 @@
(tee_local $10
(i32.add
(i32.shl
- (i32.shl
- (tee_local $5
- (i32.add
+ (tee_local $5
+ (i32.add
+ (i32.or
(i32.or
(i32.or
(i32.or
- (i32.or
- (tee_local $3
- (i32.and
- (i32.shr_u
- (tee_local $7
- (i32.shr_u
- (get_local $3)
- (get_local $10)
- )
- )
- (i32.const 5)
- )
- (i32.const 8)
- )
- )
- (get_local $10)
- )
(tee_local $3
(i32.and
(i32.shr_u
(tee_local $7
(i32.shr_u
- (get_local $7)
(get_local $3)
+ (get_local $10)
)
)
- (i32.const 2)
+ (i32.const 5)
)
- (i32.const 4)
+ (i32.const 8)
)
)
+ (get_local $10)
)
(tee_local $3
(i32.and
@@ -8132,9 +8113,9 @@
(get_local $3)
)
)
- (i32.const 1)
+ (i32.const 2)
)
- (i32.const 2)
+ (i32.const 4)
)
)
)
@@ -8149,19 +8130,32 @@
)
(i32.const 1)
)
- (i32.const 1)
+ (i32.const 2)
)
)
)
- (i32.shr_u
- (get_local $7)
- (get_local $3)
+ (tee_local $3
+ (i32.and
+ (i32.shr_u
+ (tee_local $7
+ (i32.shr_u
+ (get_local $7)
+ (get_local $3)
+ )
+ )
+ (i32.const 1)
+ )
+ (i32.const 1)
+ )
)
)
+ (i32.shr_u
+ (get_local $7)
+ (get_local $3)
+ )
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -8285,16 +8279,13 @@
(set_local $4
(i32.add
(i32.shl
- (i32.shl
- (tee_local $0
- (i32.shr_u
- (get_local $8)
- (i32.const 3)
- )
+ (tee_local $0
+ (i32.shr_u
+ (get_local $8)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -8996,16 +8987,13 @@
(set_local $2
(i32.add
(i32.shl
- (i32.shl
- (tee_local $0
- (i32.shr_u
- (get_local $0)
- (i32.const 3)
- )
+ (tee_local $0
+ (i32.shr_u
+ (get_local $0)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -10059,11 +10047,8 @@
(set_local $3
(i32.add
(i32.shl
- (i32.shl
- (get_local $0)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $0)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -11602,11 +11587,8 @@
(tee_local $0
(i32.add
(i32.shl
- (i32.shl
- (get_local $1)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $1)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -12094,11 +12076,8 @@
(set_local $3
(i32.add
(i32.shl
- (i32.shl
- (get_local $0)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $0)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -12789,11 +12768,8 @@
(set_local $2
(i32.add
(i32.shl
- (i32.shl
- (get_local $1)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $1)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -13245,11 +13221,8 @@
(tee_local $4
(i32.add
(i32.shl
- (i32.shl
- (get_local $2)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $2)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -13601,11 +13574,8 @@
(tee_local $3
(i32.add
(i32.shl
- (i32.shl
- (get_local $5)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $5)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -14265,11 +14235,8 @@
(tee_local $0
(i32.add
(i32.shl
- (i32.shl
- (get_local $3)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $3)
+ (i32.const 3)
)
(i32.const 216)
)
@@ -14755,11 +14722,8 @@
(set_local $1
(i32.add
(i32.shl
- (i32.shl
- (get_local $0)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $0)
+ (i32.const 3)
)
(i32.const 216)
)
diff --git a/test/memorygrowth.fromasm b/test/memorygrowth.fromasm
index b06a13f64..ef7dc1a78 100644
--- a/test/memorygrowth.fromasm
+++ b/test/memorygrowth.fromasm
@@ -170,22 +170,19 @@
(tee_local $8
(i32.add
(i32.shl
- (i32.shl
- (tee_local $0
- (i32.add
- (i32.xor
- (i32.and
- (get_local $5)
- (i32.const 1)
- )
+ (tee_local $0
+ (i32.add
+ (i32.xor
+ (i32.and
+ (get_local $5)
(i32.const 1)
)
- (get_local $0)
+ (i32.const 1)
)
+ (get_local $0)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -354,83 +351,80 @@
(tee_local $1
(i32.add
(i32.shl
- (i32.shl
- (tee_local $16
- (i32.add
+ (tee_local $16
+ (i32.add
+ (i32.or
(i32.or
(i32.or
(i32.or
- (i32.or
- (tee_local $6
- (i32.and
- (i32.shr_u
- (tee_local $7
- (i32.shr_u
- (get_local $6)
- (get_local $8)
- )
- )
- (i32.const 5)
- )
- (i32.const 8)
- )
- )
- (get_local $8)
- )
- (tee_local $7
+ (tee_local $6
(i32.and
(i32.shr_u
- (tee_local $9
+ (tee_local $7
(i32.shr_u
- (get_local $7)
(get_local $6)
+ (get_local $8)
)
)
- (i32.const 2)
+ (i32.const 5)
)
- (i32.const 4)
+ (i32.const 8)
)
)
+ (get_local $8)
)
- (tee_local $9
+ (tee_local $7
(i32.and
(i32.shr_u
- (tee_local $1
+ (tee_local $9
(i32.shr_u
- (get_local $9)
(get_local $7)
+ (get_local $6)
)
)
- (i32.const 1)
+ (i32.const 2)
)
- (i32.const 2)
+ (i32.const 4)
)
)
)
- (tee_local $1
+ (tee_local $9
(i32.and
(i32.shr_u
- (tee_local $12
+ (tee_local $1
(i32.shr_u
- (get_local $1)
(get_local $9)
+ (get_local $7)
)
)
(i32.const 1)
)
- (i32.const 1)
+ (i32.const 2)
)
)
)
- (i32.shr_u
- (get_local $12)
- (get_local $1)
+ (tee_local $1
+ (i32.and
+ (i32.shr_u
+ (tee_local $12
+ (i32.shr_u
+ (get_local $1)
+ (get_local $9)
+ )
+ )
+ (i32.const 1)
+ )
+ (i32.const 1)
+ )
)
)
+ (i32.shr_u
+ (get_local $12)
+ (get_local $1)
+ )
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -554,16 +548,13 @@
(set_local $4
(i32.add
(i32.shl
- (i32.shl
- (tee_local $14
- (i32.shr_u
- (get_local $34)
- (i32.const 3)
- )
+ (tee_local $14
+ (i32.shr_u
+ (get_local $34)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -1279,16 +1270,13 @@
(set_local $1
(i32.add
(i32.shl
- (i32.shl
- (tee_local $7
- (i32.shr_u
- (get_local $1)
- (i32.const 3)
- )
+ (tee_local $7
+ (i32.shr_u
+ (get_local $1)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -2411,11 +2399,8 @@
(set_local $5
(i32.add
(i32.shl
- (i32.shl
- (get_local $9)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $9)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -4094,11 +4079,8 @@
(tee_local $19
(i32.add
(i32.shl
- (i32.shl
- (get_local $0)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $0)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -4601,11 +4583,8 @@
(set_local $3
(i32.add
(i32.shl
- (i32.shl
- (get_local $0)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $0)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -5328,11 +5307,8 @@
(set_local $13
(i32.add
(i32.shl
- (i32.shl
- (get_local $1)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $1)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -5807,11 +5783,8 @@
(tee_local $13
(i32.add
(i32.shl
- (i32.shl
- (get_local $1)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $1)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -6175,11 +6148,8 @@
(tee_local $4
(i32.add
(i32.shl
- (i32.shl
- (get_local $3)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $3)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -6855,11 +6825,8 @@
(tee_local $4
(i32.add
(i32.shl
- (i32.shl
- (get_local $14)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $14)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -7351,11 +7318,8 @@
(set_local $1
(i32.add
(i32.shl
- (i32.shl
- (get_local $6)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $6)
+ (i32.const 3)
)
(i32.const 1248)
)
diff --git a/test/memorygrowth.fromasm.imprecise b/test/memorygrowth.fromasm.imprecise
index d0f3405d5..2a32f7528 100644
--- a/test/memorygrowth.fromasm.imprecise
+++ b/test/memorygrowth.fromasm.imprecise
@@ -169,22 +169,19 @@
(tee_local $8
(i32.add
(i32.shl
- (i32.shl
- (tee_local $0
- (i32.add
- (i32.xor
- (i32.and
- (get_local $5)
- (i32.const 1)
- )
+ (tee_local $0
+ (i32.add
+ (i32.xor
+ (i32.and
+ (get_local $5)
(i32.const 1)
)
- (get_local $0)
+ (i32.const 1)
)
+ (get_local $0)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -353,83 +350,80 @@
(tee_local $1
(i32.add
(i32.shl
- (i32.shl
- (tee_local $16
- (i32.add
+ (tee_local $16
+ (i32.add
+ (i32.or
(i32.or
(i32.or
(i32.or
- (i32.or
- (tee_local $6
- (i32.and
- (i32.shr_u
- (tee_local $7
- (i32.shr_u
- (get_local $6)
- (get_local $8)
- )
- )
- (i32.const 5)
- )
- (i32.const 8)
- )
- )
- (get_local $8)
- )
- (tee_local $7
+ (tee_local $6
(i32.and
(i32.shr_u
- (tee_local $9
+ (tee_local $7
(i32.shr_u
- (get_local $7)
(get_local $6)
+ (get_local $8)
)
)
- (i32.const 2)
+ (i32.const 5)
)
- (i32.const 4)
+ (i32.const 8)
)
)
+ (get_local $8)
)
- (tee_local $9
+ (tee_local $7
(i32.and
(i32.shr_u
- (tee_local $1
+ (tee_local $9
(i32.shr_u
- (get_local $9)
(get_local $7)
+ (get_local $6)
)
)
- (i32.const 1)
+ (i32.const 2)
)
- (i32.const 2)
+ (i32.const 4)
)
)
)
- (tee_local $1
+ (tee_local $9
(i32.and
(i32.shr_u
- (tee_local $12
+ (tee_local $1
(i32.shr_u
- (get_local $1)
(get_local $9)
+ (get_local $7)
)
)
(i32.const 1)
)
- (i32.const 1)
+ (i32.const 2)
)
)
)
- (i32.shr_u
- (get_local $12)
- (get_local $1)
+ (tee_local $1
+ (i32.and
+ (i32.shr_u
+ (tee_local $12
+ (i32.shr_u
+ (get_local $1)
+ (get_local $9)
+ )
+ )
+ (i32.const 1)
+ )
+ (i32.const 1)
+ )
)
)
+ (i32.shr_u
+ (get_local $12)
+ (get_local $1)
+ )
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -553,16 +547,13 @@
(set_local $4
(i32.add
(i32.shl
- (i32.shl
- (tee_local $14
- (i32.shr_u
- (get_local $34)
- (i32.const 3)
- )
+ (tee_local $14
+ (i32.shr_u
+ (get_local $34)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -1278,16 +1269,13 @@
(set_local $1
(i32.add
(i32.shl
- (i32.shl
- (tee_local $7
- (i32.shr_u
- (get_local $1)
- (i32.const 3)
- )
+ (tee_local $7
+ (i32.shr_u
+ (get_local $1)
+ (i32.const 3)
)
- (i32.const 1)
)
- (i32.const 2)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -2410,11 +2398,8 @@
(set_local $5
(i32.add
(i32.shl
- (i32.shl
- (get_local $9)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $9)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -4093,11 +4078,8 @@
(tee_local $19
(i32.add
(i32.shl
- (i32.shl
- (get_local $0)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $0)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -4600,11 +4582,8 @@
(set_local $3
(i32.add
(i32.shl
- (i32.shl
- (get_local $0)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $0)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -5327,11 +5306,8 @@
(set_local $13
(i32.add
(i32.shl
- (i32.shl
- (get_local $1)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $1)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -5806,11 +5782,8 @@
(tee_local $13
(i32.add
(i32.shl
- (i32.shl
- (get_local $1)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $1)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -6174,11 +6147,8 @@
(tee_local $4
(i32.add
(i32.shl
- (i32.shl
- (get_local $3)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $3)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -6854,11 +6824,8 @@
(tee_local $4
(i32.add
(i32.shl
- (i32.shl
- (get_local $14)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $14)
+ (i32.const 3)
)
(i32.const 1248)
)
@@ -7350,11 +7317,8 @@
(set_local $1
(i32.add
(i32.shl
- (i32.shl
- (get_local $6)
- (i32.const 1)
- )
- (i32.const 2)
+ (get_local $6)
+ (i32.const 3)
)
(i32.const 1248)
)
diff --git a/test/passes/optimize-instructions.txt b/test/passes/optimize-instructions.txt
index a11bd0d66..03f0f7074 100644
--- a/test/passes/optimize-instructions.txt
+++ b/test/passes/optimize-instructions.txt
@@ -704,11 +704,8 @@
(drop
(i32.shr_s
(i32.shl
- (i32.shl
- (i32.const 32)
- (i32.const 2)
- )
- (i32.const 24)
+ (i32.const 32)
+ (i32.const 26)
)
(i32.const 24)
)
@@ -722,11 +719,8 @@
(drop
(i32.shr_s
(i32.shl
- (i32.shl
- (i32.const 32)
- (i32.const 35)
- )
- (i32.const 24)
+ (i32.const 32)
+ (i32.const 59)
)
(i32.const 24)
)
@@ -828,13 +822,10 @@
(drop
(i32.shr_s
(i32.shl
- (i32.shl
- (i32.clz
- (i32.const 0)
- )
- (i32.const 3)
+ (i32.clz
+ (i32.const 0)
)
- (i32.const 24)
+ (i32.const 27)
)
(i32.const 24)
)
@@ -859,15 +850,12 @@
(drop
(i32.shr_s
(i32.shl
- (i32.shl
- (i32.wrap/i64
- (i64.clz
- (i64.const 0)
- )
+ (i32.wrap/i64
+ (i64.clz
+ (i64.const 0)
)
- (i32.const 2)
)
- (i32.const 24)
+ (i32.const 26)
)
(i32.const 24)
)
@@ -1018,4 +1006,77 @@
(get_local $0)
)
)
+ (func $almost-sign-ext (type $4) (param $0 i32) (param $0 i32)
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.const 100)
+ (i32.const 25)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shl
+ (i32.const 50)
+ (i32.const 1)
+ )
+ )
+ )
+ (func $squaring (type $4) (param $0 i32) (param $1 i32)
+ (drop
+ (i32.and
+ (get_local $0)
+ (i32.const 8)
+ )
+ )
+ (drop
+ (i32.and
+ (i32.and
+ (get_local $0)
+ (i32.const 11)
+ )
+ (get_local $0)
+ )
+ )
+ (drop
+ (i32.and
+ (get_local $0)
+ (i32.const 8)
+ )
+ )
+ (drop
+ (i32.or
+ (get_local $0)
+ (i32.const 203)
+ )
+ )
+ (drop
+ (i32.shl
+ (get_local $0)
+ (i32.const 211)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (get_local $0)
+ (i32.const 211)
+ )
+ )
+ (drop
+ (i32.shr_u
+ (get_local $0)
+ (i32.const 211)
+ )
+ )
+ (drop
+ (i32.shr_u
+ (i32.shr_s
+ (get_local $0)
+ (i32.const 11)
+ )
+ (i32.const 200)
+ )
+ )
+ )
)
diff --git a/test/passes/optimize-instructions.wast b/test/passes/optimize-instructions.wast
index b9bd420da..c59bb3ade 100644
--- a/test/passes/optimize-instructions.wast
+++ b/test/passes/optimize-instructions.wast
@@ -539,7 +539,7 @@
(get_local $0)
(i32.const 24)
)
- (i32.const 23) ;; different shift
+ (i32.const 23) ;; different shift, smaller
)
(i32.const 0)
)
@@ -1238,4 +1238,98 @@
)
)
)
+ (func $almost-sign-ext (param $0 i32) (param $0 i32)
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.const 100) ;; too big, there is a sign bit, due to the extra shift
+ (i32.const 25)
+ )
+ (i32.const 24) ;; different shift, but larger, so ok to opt if we leave a shift, in theory
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.const 50) ;; small enough, no sign bit
+ (i32.const 25)
+ )
+ (i32.const 24) ;; different shift, but larger, so ok to opt if we leave a shift
+ )
+ )
+ )
+ (func $squaring (param $0 i32) (param $1 i32)
+ (drop
+ (i32.and
+ (i32.and
+ (get_local $0)
+ (i32.const 11)
+ )
+ (i32.const 200)
+ )
+ )
+ (drop
+ (i32.and
+ (i32.and
+ (get_local $0)
+ (i32.const 11)
+ )
+ (get_local $0) ;; non-const, cannot optimize this!
+ )
+ )
+ (drop
+ (i32.and
+ (i32.and
+ (i32.const 11) ;; flipped order
+ (get_local $0)
+ )
+ (i32.const 200)
+ )
+ )
+ (drop
+ (i32.or
+ (i32.or
+ (get_local $0)
+ (i32.const 11)
+ )
+ (i32.const 200)
+ )
+ )
+ (drop
+ (i32.shl
+ (i32.shl
+ (get_local $0)
+ (i32.const 11)
+ )
+ (i32.const 200)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shr_s
+ (get_local $0)
+ (i32.const 11)
+ )
+ (i32.const 200)
+ )
+ )
+ (drop
+ (i32.shr_u
+ (i32.shr_u
+ (get_local $0)
+ (i32.const 11)
+ )
+ (i32.const 200)
+ )
+ )
+ (drop
+ (i32.shr_u
+ (i32.shr_s ;; but do not optimize a mixture or different shifts!
+ (get_local $0)
+ (i32.const 11)
+ )
+ (i32.const 200)
+ )
+ )
+ )
)
diff --git a/test/unit.fromasm b/test/unit.fromasm
index 2a6c60735..585240a95 100644
--- a/test/unit.fromasm
+++ b/test/unit.fromasm
@@ -696,14 +696,11 @@
)
)
(call $loadSigned
- (i32.shr_s
- (i32.shl
- (i32.load16_u
- (get_local $0)
- )
- (i32.const 24)
+ (i32.shl
+ (i32.load16_s
+ (get_local $0)
)
- (i32.const 16)
+ (i32.const 8)
)
)
)
diff --git a/test/unit.fromasm.imprecise b/test/unit.fromasm.imprecise
index 10c1c025c..64fde3832 100644
--- a/test/unit.fromasm.imprecise
+++ b/test/unit.fromasm.imprecise
@@ -664,14 +664,11 @@
)
)
(call $loadSigned
- (i32.shr_s
- (i32.shl
- (i32.load16_u
- (get_local $0)
- )
- (i32.const 24)
+ (i32.shl
+ (i32.load16_s
+ (get_local $0)
)
- (i32.const 16)
+ (i32.const 8)
)
)
)