summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/passes/OptimizeInstructions.cpp157
-rw-r--r--test/emcc_hello_world.fromasm118
-rw-r--r--test/emcc_hello_world.fromasm.imprecise118
-rw-r--r--test/passes/optimize-instructions.txt431
-rw-r--r--test/passes/optimize-instructions.wast584
-rw-r--r--test/passes/precompute.txt3
-rw-r--r--test/passes/precompute.wast6
7 files changed, 1256 insertions, 161 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 8f7bf63e7..c374a15ac 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -160,9 +160,115 @@ struct Match {
};
return ExpressionManipulator::flexibleCopy(pattern.output, wasm, copy);
}
+};
+// Utilities
-};
+// returns the maximum amount of bits used in an integer expression
+// not extremely precise (doesn't look into add operands, etc.)
+static Index getMaxBits(Expression* curr) {
+ if (auto* const_ = curr->dynCast<Const>()) {
+ switch (curr->type) {
+ case i32: return 32 - const_->value.countLeadingZeroes().geti32();
+ case i64: return 64 - const_->value.countLeadingZeroes().geti64();
+ default: WASM_UNREACHABLE();
+ }
+ } else if (auto* binary = curr->dynCast<Binary>()) {
+ switch (binary->op) {
+ // 32-bit
+ case AddInt32: case SubInt32: case MulInt32:
+ case DivSInt32: case DivUInt32: case RemSInt32:
+ case RemUInt32: case RotLInt32: case RotRInt32: return 32;
+ case AndInt32: case XorInt32: return std::min(getMaxBits(binary->left), getMaxBits(binary->right));
+ case OrInt32: return std::max(getMaxBits(binary->left), getMaxBits(binary->right));
+ case ShlInt32: {
+ if (auto* shifts = binary->right->dynCast<Const>()) {
+ return std::min(Index(32), getMaxBits(binary->left) + shifts->value.geti32());
+ }
+ return 32;
+ }
+ case ShrUInt32: {
+ if (auto* shift = binary->right->dynCast<Const>()) {
+ auto maxBits = getMaxBits(binary->left);
+ auto shifts = std::min(Index(shift->value.geti32()), maxBits); // can ignore more shifts than zero us out
+ return std::max(Index(0), maxBits - shifts);
+ }
+ return 32;
+ }
+ case ShrSInt32: {
+ if (auto* shift = binary->right->dynCast<Const>()) {
+ auto maxBits = getMaxBits(binary->left);
+ if (maxBits == 32) return 32;
+ auto shifts = std::min(Index(shift->value.geti32()), maxBits); // can ignore more shifts than zero us out
+ return std::max(Index(0), maxBits - shifts);
+ }
+ return 32;
+ }
+ // 64-bit TODO
+ // comparisons
+ case EqInt32: case NeInt32: case LtSInt32:
+ case LtUInt32: case LeSInt32: case LeUInt32:
+ case GtSInt32: case GtUInt32: case GeSInt32:
+ case GeUInt32:
+ case EqInt64: case NeInt64: case LtSInt64:
+ case LtUInt64: case LeSInt64: case LeUInt64:
+ case GtSInt64: case GtUInt64: case GeSInt64:
+ case GeUInt64:
+ case EqFloat32: case NeFloat32:
+ case LtFloat32: case LeFloat32: case GtFloat32: case GeFloat32:
+ case EqFloat64: case NeFloat64:
+ case LtFloat64: case LeFloat64: case GtFloat64: case GeFloat64: return 1;
+ default: {}
+ }
+ } else if (auto* unary = curr->dynCast<Unary>()) {
+ switch (unary->op) {
+ case ClzInt32: case CtzInt32: case PopcntInt32: return 5;
+ case ClzInt64: case CtzInt64: case PopcntInt64: return 6;
+ case EqZInt32: case EqZInt64: return 1;
+ case WrapInt64: return std::min(Index(32), getMaxBits(unary->value));
+ default: {}
+ }
+ }
+ switch (curr->type) {
+ case i32: return 32;
+ case i64: return 64;
+ case unreachable: return 64; // not interesting, but don't crash
+ default: WASM_UNREACHABLE();
+ }
+}
+
+// Check if an expression is a sign-extend, and if so, returns the value
+// that is extended, otherwise nullptr
+static Expression* getSignExt(Expression* curr) {
+ if (auto* outer = curr->dynCast<Binary>()) {
+ if (outer->op == ShrSInt32) {
+ if (auto* outerConst = outer->right->dynCast<Const>()) {
+ if (auto* inner = outer->left->dynCast<Binary>()) {
+ if (inner->op == ShlInt32) {
+ if (auto* innerConst = inner->right->dynCast<Const>()) {
+ if (outerConst->value == innerConst->value) {
+ return inner->left;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return nullptr;
+}
+
+// gets the size of the sign-extended value
+static Index getSignExtBits(Expression* curr) {
+ return 32 - curr->cast<Binary>()->right->cast<Const>()->value.geti32();
+}
+
+// get a mask to keep only the low # of bits
+static int32_t lowBitMask(int32_t bits) {
+ uint32_t ret = -1;
+ if (bits >= 32) return ret;
+ return ret >> (32 - bits);
+}
// Main pass class
struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, UnifiedExpressionVisitor<OptimizeInstructions>>> {
@@ -215,32 +321,42 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions,
std::swap(binary->left, binary->right);
}
}
- // pattern match a load of 8 bits and a sign extend using a shl of 24 then shr_s of 24 as well, etc.
- if (binary->op == BinaryOp::ShrSInt32 && binary->right->is<Const>()) {
- auto shifts = binary->right->cast<Const>()->value.geti32();
- if (shifts == 24 || shifts == 16) {
- auto* left = binary->left->dynCast<Binary>();
- if (left && left->op == ShlInt32 && left->right->is<Const>() && left->right->cast<Const>()->value.geti32() == shifts) {
- auto* load = left->left->dynCast<Load>();
- if (load && ((load->bytes == 1 && shifts == 24) || (load->bytes == 2 && shifts == 16))) {
- load->signed_ = true;
- return load;
- }
- }
+ if (auto* ext = getSignExt(binary)) {
+ auto bits = getSignExtBits(binary);
+ auto* load = ext->dynCast<Load>();
+ // pattern match a load of 8 bits and a sign extend using a shl of 24 then shr_s of 24 as well, etc.
+ if (load && ((load->bytes == 1 && bits == 8) || (load->bytes == 2 && bits == 16))) {
+ load->signed_ = true;
+ return load;
+ }
+ // if the sign-extend input cannot have a sign bit, we don't need it
+ if (getMaxBits(ext) < bits) {
+ return ext;
}
} else if (binary->op == EqInt32) {
if (auto* c = binary->right->dynCast<Const>()) {
+ if (auto* ext = getSignExt(binary->left)) {
+ // we are comparing a sign extend to a constant, which means we can use a cheaper zext
+ auto bits = getSignExtBits(binary->left);
+ binary->left = makeZeroExt(ext, bits);
+ // the const we compare to only needs the relevant bits
+ c->value = c->value.and_(Literal(lowBitMask(bits)));
+ return binary;
+ }
if (c->value.geti32() == 0) {
// equal 0 => eqz
return Builder(*getModule()).makeUnary(EqZInt32, binary->left);
}
- }
- if (auto* c = binary->left->dynCast<Const>()) {
- if (c->value.geti32() == 0) {
- // equal 0 => eqz
- return Builder(*getModule()).makeUnary(EqZInt32, binary->right);
+ } else if (auto* left = getSignExt(binary->left)) {
+ if (auto* right = getSignExt(binary->right)) {
+ // we are comparing two sign-exts, so we may as well replace both with cheaper zexts
+ auto bits = getSignExtBits(binary->left);
+ binary->left = makeZeroExt(left, bits);
+ binary->right = makeZeroExt(right, bits);
+ return binary;
}
}
+ // note that both left and right may be consts, but then we let precompute compute the constant result
} else if (binary->op == AndInt32) {
if (auto* right = binary->right->dynCast<Const>()) {
if (right->type == i32) {
@@ -454,6 +570,11 @@ private:
offset = 0;
}
}
+
+ Expression* makeZeroExt(Expression* curr, int32_t bits) {
+ Builder builder(*getModule());
+ return builder.makeBinary(AndInt32, curr, builder.makeConst(Literal(lowBitMask(bits))));
+ }
};
Pass *createOptimizeInstructionsPass() {
diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm
index 87cf3f385..b3d40099f 100644
--- a/test/emcc_hello_world.fromasm
+++ b/test/emcc_hello_world.fromasm
@@ -328,7 +328,7 @@
)
)
(if
- (i32.load8_s
+ (i32.load8_u
(get_local $0)
)
(block
@@ -1471,7 +1471,7 @@
)
(if
(i32.ne
- (i32.load8_s
+ (i32.load8_u
(i32.add
(get_local $0)
(tee_local $6
@@ -1867,15 +1867,12 @@
(loop $while-in
(if
(i32.eq
- (i32.load8_s
+ (i32.load8_u
(get_local $2)
)
- (i32.shr_s
- (i32.shl
- (get_local $4)
- (i32.const 24)
- )
- (i32.const 24)
+ (i32.and
+ (get_local $4)
+ (i32.const 255)
)
)
(block
@@ -1944,20 +1941,17 @@
)
(if
(i32.ne
- (i32.load8_s
+ (i32.load8_u
(get_local $2)
)
- (i32.shr_s
- (i32.shl
- (tee_local $1
- (i32.and
- (get_local $1)
- (i32.const 255)
- )
+ (i32.and
+ (tee_local $1
+ (i32.and
+ (get_local $1)
+ (i32.const 255)
)
- (i32.const 24)
)
- (i32.const 24)
+ (i32.const 255)
)
)
(block
@@ -2038,15 +2032,12 @@
(loop $while-in5
(br_if $label$break$L8
(i32.eq
- (i32.load8_s
+ (i32.load8_u
(get_local $2)
)
- (i32.shr_s
- (i32.shl
- (get_local $1)
- (i32.const 24)
- )
- (i32.const 24)
+ (i32.and
+ (get_local $1)
+ (i32.const 255)
)
)
)
@@ -2513,16 +2504,13 @@
)
(br_if $__rjti$9
(i32.eqz
- (i32.shr_s
- (i32.shl
- (tee_local $7
- (i32.load8_s
- (get_local $5)
- )
+ (i32.and
+ (tee_local $7
+ (i32.load8_s
+ (get_local $5)
)
- (i32.const 24)
)
- (i32.const 24)
+ (i32.const 255)
)
)
)
@@ -2577,7 +2565,7 @@
(loop $while-in
(br_if $label$break$L12
(i32.ne
- (i32.load8_s offset=1
+ (i32.load8_u offset=1
(get_local $6)
)
(i32.const 37)
@@ -2591,7 +2579,7 @@
)
(br_if $while-in
(i32.eq
- (i32.load8_s
+ (i32.load8_u
(tee_local $6
(i32.add
(get_local $6)
@@ -2683,7 +2671,7 @@
(get_local $10)
(tee_local $11
(i32.eq
- (i32.load8_s offset=2
+ (i32.load8_u offset=2
(get_local $6)
)
(i32.const 36)
@@ -2828,12 +2816,9 @@
(block $do-once5
(if
(i32.eq
- (i32.shr_s
- (i32.shl
- (get_local $6)
- (i32.const 24)
- )
- (i32.const 24)
+ (i32.and
+ (get_local $6)
+ (i32.const 255)
)
(i32.const 42)
)
@@ -2861,7 +2846,7 @@
)
(br_if $__rjti$0
(i32.ne
- (i32.load8_s offset=2
+ (i32.load8_u offset=2
(get_local $10)
)
(i32.const 36)
@@ -3093,7 +3078,7 @@
(set_local $6
(if i32
(i32.eq
- (i32.load8_s
+ (i32.load8_u
(get_local $10)
)
(i32.const 46)
@@ -3101,21 +3086,18 @@
(block i32
(if
(i32.ne
- (i32.shr_s
- (i32.shl
- (tee_local $8
- (i32.load8_s
- (tee_local $6
- (i32.add
- (get_local $10)
- (i32.const 1)
- )
+ (i32.and
+ (tee_local $8
+ (i32.load8_s
+ (tee_local $6
+ (i32.add
+ (get_local $10)
+ (i32.const 1)
)
)
)
- (i32.const 24)
)
- (i32.const 24)
+ (i32.const 255)
)
(i32.const 42)
)
@@ -3214,7 +3196,7 @@
)
(if
(i32.eq
- (i32.load8_s offset=3
+ (i32.load8_u offset=3
(get_local $10)
)
(i32.const 36)
@@ -3384,12 +3366,9 @@
)
(if
(i32.eqz
- (i32.shr_s
- (i32.shl
- (get_local $13)
- (i32.const 24)
- )
- (i32.const 24)
+ (i32.and
+ (get_local $13)
+ (i32.const 255)
)
)
(block
@@ -3409,12 +3388,9 @@
(block $__rjti$2
(if
(i32.eq
- (i32.shr_s
- (i32.shl
- (get_local $13)
- (i32.const 24)
- )
- (i32.const 24)
+ (i32.and
+ (get_local $13)
+ (i32.const 255)
)
(i32.const 19)
)
@@ -4249,7 +4225,7 @@
)
(if f64
(i32.eq
- (i32.load8_s
+ (i32.load8_u
(get_local $9)
)
(i32.const 45)
@@ -5312,7 +5288,7 @@
(block
(br_if $do-once83
(i32.ne
- (i32.load8_s
+ (i32.load8_u
(get_local $31)
)
(i32.const 45)
diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise
index 2cd23ba1d..2e35d2a56 100644
--- a/test/emcc_hello_world.fromasm.imprecise
+++ b/test/emcc_hello_world.fromasm.imprecise
@@ -325,7 +325,7 @@
)
)
(if
- (i32.load8_s
+ (i32.load8_u
(get_local $0)
)
(block
@@ -1468,7 +1468,7 @@
)
(if
(i32.ne
- (i32.load8_s
+ (i32.load8_u
(i32.add
(get_local $0)
(tee_local $6
@@ -1864,15 +1864,12 @@
(loop $while-in
(if
(i32.eq
- (i32.load8_s
+ (i32.load8_u
(get_local $2)
)
- (i32.shr_s
- (i32.shl
- (get_local $4)
- (i32.const 24)
- )
- (i32.const 24)
+ (i32.and
+ (get_local $4)
+ (i32.const 255)
)
)
(block
@@ -1941,20 +1938,17 @@
)
(if
(i32.ne
- (i32.load8_s
+ (i32.load8_u
(get_local $2)
)
- (i32.shr_s
- (i32.shl
- (tee_local $1
- (i32.and
- (get_local $1)
- (i32.const 255)
- )
+ (i32.and
+ (tee_local $1
+ (i32.and
+ (get_local $1)
+ (i32.const 255)
)
- (i32.const 24)
)
- (i32.const 24)
+ (i32.const 255)
)
)
(block
@@ -2035,15 +2029,12 @@
(loop $while-in5
(br_if $label$break$L8
(i32.eq
- (i32.load8_s
+ (i32.load8_u
(get_local $2)
)
- (i32.shr_s
- (i32.shl
- (get_local $1)
- (i32.const 24)
- )
- (i32.const 24)
+ (i32.and
+ (get_local $1)
+ (i32.const 255)
)
)
)
@@ -2456,16 +2447,13 @@
)
(br_if $__rjti$9
(i32.eqz
- (i32.shr_s
- (i32.shl
- (tee_local $7
- (i32.load8_s
- (get_local $5)
- )
+ (i32.and
+ (tee_local $7
+ (i32.load8_s
+ (get_local $5)
)
- (i32.const 24)
)
- (i32.const 24)
+ (i32.const 255)
)
)
)
@@ -2520,7 +2508,7 @@
(loop $while-in
(br_if $label$break$L12
(i32.ne
- (i32.load8_s offset=1
+ (i32.load8_u offset=1
(get_local $6)
)
(i32.const 37)
@@ -2534,7 +2522,7 @@
)
(br_if $while-in
(i32.eq
- (i32.load8_s
+ (i32.load8_u
(tee_local $6
(i32.add
(get_local $6)
@@ -2626,7 +2614,7 @@
(get_local $10)
(tee_local $11
(i32.eq
- (i32.load8_s offset=2
+ (i32.load8_u offset=2
(get_local $6)
)
(i32.const 36)
@@ -2771,12 +2759,9 @@
(block $do-once5
(if
(i32.eq
- (i32.shr_s
- (i32.shl
- (get_local $6)
- (i32.const 24)
- )
- (i32.const 24)
+ (i32.and
+ (get_local $6)
+ (i32.const 255)
)
(i32.const 42)
)
@@ -2804,7 +2789,7 @@
)
(br_if $__rjti$0
(i32.ne
- (i32.load8_s offset=2
+ (i32.load8_u offset=2
(get_local $10)
)
(i32.const 36)
@@ -3036,7 +3021,7 @@
(set_local $6
(if i32
(i32.eq
- (i32.load8_s
+ (i32.load8_u
(get_local $10)
)
(i32.const 46)
@@ -3044,21 +3029,18 @@
(block i32
(if
(i32.ne
- (i32.shr_s
- (i32.shl
- (tee_local $8
- (i32.load8_s
- (tee_local $6
- (i32.add
- (get_local $10)
- (i32.const 1)
- )
+ (i32.and
+ (tee_local $8
+ (i32.load8_s
+ (tee_local $6
+ (i32.add
+ (get_local $10)
+ (i32.const 1)
)
)
)
- (i32.const 24)
)
- (i32.const 24)
+ (i32.const 255)
)
(i32.const 42)
)
@@ -3157,7 +3139,7 @@
)
(if
(i32.eq
- (i32.load8_s offset=3
+ (i32.load8_u offset=3
(get_local $10)
)
(i32.const 36)
@@ -3327,12 +3309,9 @@
)
(if
(i32.eqz
- (i32.shr_s
- (i32.shl
- (get_local $13)
- (i32.const 24)
- )
- (i32.const 24)
+ (i32.and
+ (get_local $13)
+ (i32.const 255)
)
)
(block
@@ -3352,12 +3331,9 @@
(block $__rjti$2
(if
(i32.eq
- (i32.shr_s
- (i32.shl
- (get_local $13)
- (i32.const 24)
- )
- (i32.const 24)
+ (i32.and
+ (get_local $13)
+ (i32.const 255)
)
(i32.const 19)
)
@@ -4210,7 +4186,7 @@
(get_local $15)
)
(i32.eq
- (i32.load8_s
+ (i32.load8_u
(get_local $9)
)
(i32.const 45)
@@ -5249,7 +5225,7 @@
(block
(br_if $do-once83
(i32.ne
- (i32.load8_s
+ (i32.load8_u
(get_local $31)
)
(i32.const 45)
diff --git a/test/passes/optimize-instructions.txt b/test/passes/optimize-instructions.txt
index f959c3cd6..14f29a783 100644
--- a/test/passes/optimize-instructions.txt
+++ b/test/passes/optimize-instructions.txt
@@ -3,6 +3,7 @@
(type $1 (func))
(type $2 (func (result i32)))
(type $3 (func (param i32) (result i32)))
+ (type $4 (func (param i32 i32)))
(memory $0 0)
(export "load-off-2" (func $load-off-2))
(func $f (type $0) (param $i1 i32) (param $i2 i64)
@@ -176,7 +177,8 @@
)
)
(drop
- (i32.eqz
+ (i32.eq
+ (i32.const 0)
(i32.const 100)
)
)
@@ -515,4 +517,431 @@
)
)
)
+ (func $sign-ext (type $4) (param $0 i32) (param $1 i32)
+ (drop
+ (i32.eqz
+ (i32.and
+ (get_local $0)
+ (i32.const 255)
+ )
+ )
+ )
+ (drop
+ (i32.eqz
+ (i32.and
+ (get_local $0)
+ (i32.const 65535)
+ )
+ )
+ )
+ (drop
+ (i32.eqz
+ (i32.and
+ (get_local $0)
+ (i32.const 134217727)
+ )
+ )
+ )
+ (drop
+ (i32.eq
+ (i32.and
+ (get_local $0)
+ (i32.const 255)
+ )
+ (i32.const 100)
+ )
+ )
+ (drop
+ (i32.eq
+ (i32.and
+ (get_local $0)
+ (i32.const 255)
+ )
+ (i32.const 255)
+ )
+ )
+ (drop
+ (i32.eq
+ (i32.and
+ (get_local $0)
+ (i32.const 255)
+ )
+ (i32.and
+ (get_local $1)
+ (i32.const 255)
+ )
+ )
+ )
+ (drop
+ (i32.eq
+ (i32.and
+ (get_local $0)
+ (i32.const 65535)
+ )
+ (i32.and
+ (get_local $1)
+ (i32.const 65535)
+ )
+ )
+ )
+ (drop
+ (i32.eqz
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 23)
+ )
+ )
+ )
+ (drop
+ (i32.eqz
+ (i32.shr_u
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ )
+ (drop
+ (i32.lt_s
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ (i32.const 0)
+ )
+ )
+ )
+ (func $sign-ext-input (type $4) (param $0 i32) (param $1 i32)
+ (drop
+ (i32.const 100)
+ )
+ (drop
+ (i32.const 127)
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.const 128)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (unreachable)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.div_s
+ (i32.const 1)
+ (i32.const 2)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.and
+ (i32.const 127)
+ (i32.const 128)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.and
+ (i32.const 128)
+ (i32.const 129)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.xor
+ (i32.const 127)
+ (i32.const 128)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.xor
+ (i32.const 128)
+ (i32.const 129)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.or
+ (i32.const 127)
+ (i32.const 126)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.or
+ (i32.const 127)
+ (i32.const 128)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shl
+ (i32.const 32)
+ (i32.const 2)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shl
+ (i32.const 32)
+ (i32.const 1)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shl
+ (i32.const 32)
+ (i32.const 35)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u
+ (i32.const 256)
+ (i32.const 1)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_u
+ (i32.const 256)
+ (i32.const 2)
+ )
+ )
+ (drop
+ (i32.shr_u
+ (i32.const 128)
+ (i32.const 35)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_s
+ (i32.const 256)
+ (i32.const 1)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.const 256)
+ (i32.const 2)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.const 128)
+ (i32.const 35)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_s
+ (i32.const -1)
+ (i32.const 32)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.and
+ (i32.const -1)
+ (i32.const 2147483647)
+ )
+ (i32.const 32)
+ )
+ )
+ (drop
+ (i32.ne
+ (i32.const -1)
+ (i32.const -1)
+ )
+ )
+ (drop
+ (f32.le
+ (f32.const -1)
+ (f32.const -1)
+ )
+ )
+ (drop
+ (i32.clz
+ (i32.const 0)
+ )
+ )
+ (drop
+ (i32.shl
+ (i32.clz
+ (i32.const 0)
+ )
+ (i32.const 2)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shl
+ (i32.clz
+ (i32.const 0)
+ )
+ (i32.const 3)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.wrap/i64
+ (i64.clz
+ (i64.const 0)
+ )
+ )
+ )
+ (drop
+ (i32.shl
+ (i32.wrap/i64
+ (i64.clz
+ (i64.const 0)
+ )
+ )
+ (i32.const 1)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shl
+ (i32.wrap/i64
+ (i64.clz
+ (i64.const 0)
+ )
+ )
+ (i32.const 2)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.eqz
+ (i32.const -1)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u
+ (i32.wrap/i64
+ (i64.const -1)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_u
+ (i32.wrap/i64
+ (i64.const -1)
+ )
+ (i32.const 25)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u
+ (i32.wrap/i64
+ (i64.extend_s/i32
+ (i32.const -1)
+ )
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_u
+ (i32.wrap/i64
+ (i64.extend_s/i32
+ (i32.const -1)
+ )
+ )
+ (i32.const 25)
+ )
+ )
+ )
)
diff --git a/test/passes/optimize-instructions.wast b/test/passes/optimize-instructions.wast
index 41ca48951..8eec3f3cd 100644
--- a/test/passes/optimize-instructions.wast
+++ b/test/passes/optimize-instructions.wast
@@ -192,6 +192,8 @@
)
)
)
+ ;; we handle only 0 in the right position, as we assume a const is there, and don't care about if
+ ;; both are consts here (precompute does that, so no need)
(drop
(i32.eq
(i32.const 100)
@@ -430,4 +432,586 @@
)
)
)
+ (func $sign-ext (param $0 i32) (param $1 i32)
+ ;; eq of sign-ext to const, can be a zext
+ (drop
+ (i32.eq
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ (i32.const 0)
+ )
+ )
+ (drop
+ (i32.eq
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 16)
+ )
+ (i32.const 16)
+ )
+ (i32.const 0)
+ )
+ )
+ (drop
+ (i32.eq
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 5) ;; weird size, but still valid
+ )
+ (i32.const 5)
+ )
+ (i32.const 0)
+ )
+ )
+ (drop
+ (i32.eq
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ (i32.const 100) ;; non-zero
+ )
+ )
+ (drop
+ (i32.eq
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ (i32.const 32767) ;; non-zero and bigger than the mask
+ )
+ )
+ ;; eq of two sign-ext, can both be a zext
+ (drop
+ (i32.eq
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ (i32.shr_s
+ (i32.shl
+ (get_local $1)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ )
+ (drop
+ (i32.eq
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 16)
+ )
+ (i32.const 16)
+ )
+ (i32.shr_s
+ (i32.shl
+ (get_local $1)
+ (i32.const 16)
+ )
+ (i32.const 16)
+ )
+ )
+ )
+ ;; corner cases we should not opt
+ (drop
+ (i32.eq
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 23) ;; different shift
+ )
+ (i32.const 0)
+ )
+ )
+ (drop
+ (i32.eq
+ (i32.shr_u ;; unsigned
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ (i32.const 0)
+ )
+ )
+ (drop
+ (i32.lt_s ;; non-eq
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ (i32.const 0)
+ )
+ )
+ )
+ (func $sign-ext-input (param $0 i32) (param $1 i32)
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.const 100) ;; small!
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.const 127) ;; just small enough
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.const 128) ;; just too big
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get-local $0) ;; who knows...
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (unreachable) ;; ignore
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.div_s ;; this could be optimizable in theory, but currently we don't look into adds etc.
+ (i32.const 1)
+ (i32.const 2)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.and ;; takes the min, here it is ok
+ (i32.const 127)
+ (i32.const 128)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.and ;; takes the min, here it is not
+ (i32.const 128)
+ (i32.const 129)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.xor ;; takes the min, here it is ok
+ (i32.const 127)
+ (i32.const 128)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.xor ;; takes the min, here it is not
+ (i32.const 128)
+ (i32.const 129)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.or ;; takes the max, here it is ok
+ (i32.const 127)
+ (i32.const 126)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.or ;; takes the max, here it is not
+ (i32.const 127)
+ (i32.const 128)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shl ;; adds, here it is too much
+ (i32.const 32)
+ (i32.const 2)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shl ;; adds, here it is ok
+ (i32.const 32)
+ (i32.const 1)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shl ;; adds, here it is too much and "overflows"
+ (i32.const 32)
+ (i32.const 35)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u ;; subtracts, here it is still too much
+ (i32.const 256)
+ (i32.const 1)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u ;; subtracts, here it is ok
+ (i32.const 256)
+ (i32.const 2)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u ;; subtracts, here it "overflows"
+ (i32.const 128)
+ (i32.const 35)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_s ;; subtracts, here it is still too much
+ (i32.const 256)
+ (i32.const 1)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_s ;; subtracts, here it is ok
+ (i32.const 256)
+ (i32.const 2)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_s ;; subtracts, here it "overflows"
+ (i32.const 128)
+ (i32.const 35)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_s ;; subtracts, here there is a sign bit, so it stays 32 bits no matter how much we shift
+ (i32.const -1)
+ (i32.const 32)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_s ;; subtracts, here we mask out that sign bit
+ (i32.and
+ (i32.const -1)
+ (i32.const 2147483647)
+ )
+ (i32.const 32)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.ne ;; 1 bit
+ (i32.const -1)
+ (i32.const -1)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (f32.le
+ (f32.const -1)
+ (f32.const -1)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.clz ;; assumed 5 bits
+ (i32.const 0)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shl
+ (i32.clz ;; assumed 5 bits
+ (i32.const 0)
+ )
+ (i32.const 2) ;; + 2, so 7
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shl
+ (i32.clz ;; assumed 5 bits
+ (i32.const 0)
+ )
+ (i32.const 3) ;; + 3, so 8, too much
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.wrap/i64 ;; preserves 6
+ (i64.clz ;; assumed 6 bits
+ (i64.const 0)
+ )
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shl
+ (i32.wrap/i64 ;; preserves 6
+ (i64.clz ;; assumed 6 bits
+ (i64.const 0)
+ )
+ )
+ (i32.const 1) ;; + 1, so 7
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shl
+ (i32.wrap/i64 ;; preserves 6
+ (i64.clz ;; assumed 6 bits
+ (i64.const 0)
+ )
+ )
+ (i32.const 2) ;; + 2, so 8, too much
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.eqz ;; 1 bit
+ (i32.const -1)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u
+ (i32.wrap/i64 ;; down to 32
+ (i64.const -1) ;; 64
+ )
+ (i32.const 24) ;; 32 - 24 = 8
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u
+ (i32.wrap/i64 ;; down to 32
+ (i64.const -1) ;; 64
+ )
+ (i32.const 25) ;; 32 - 25 = 7, ok
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u
+ (i32.wrap/i64 ;; stay 32
+ (i64.extend_s/i32
+ (i32.const -1)
+ )
+ )
+ (i32.const 24) ;; 32 - 24 = 8
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u
+ (i32.wrap/i64 ;; stay 32
+ (i64.extend_s/i32
+ (i32.const -1)
+ )
+ )
+ (i32.const 25) ;; 32 - 25 = 7, ok
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ )
)
diff --git a/test/passes/precompute.txt b/test/passes/precompute.txt
index e7fc6526f..af44d18d8 100644
--- a/test/passes/precompute.txt
+++ b/test/passes/precompute.txt
@@ -4,6 +4,9 @@
(type $2 (func))
(memory $0 0)
(func $x (type $0) (param $x i32)
+ (call $x
+ (i32.const 2300)
+ )
(nop)
(drop
(i32.add
diff --git a/test/passes/precompute.wast b/test/passes/precompute.wast
index b498d9df0..d5e91fb9d 100644
--- a/test/passes/precompute.wast
+++ b/test/passes/precompute.wast
@@ -2,6 +2,12 @@
(memory 0)
(type $0 (func (param i32)))
(func $x (type $0) (param $x i32)
+ (call $x
+ (i32.add
+ (i32.const 100)
+ (i32.const 2200)
+ )
+ )
(drop
(i32.add
(i32.const 1)