diff options
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 157 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm | 118 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm.imprecise | 118 | ||||
-rw-r--r-- | test/passes/optimize-instructions.txt | 431 | ||||
-rw-r--r-- | test/passes/optimize-instructions.wast | 584 | ||||
-rw-r--r-- | test/passes/precompute.txt | 3 | ||||
-rw-r--r-- | test/passes/precompute.wast | 6 |
7 files changed, 1256 insertions, 161 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 8f7bf63e7..c374a15ac 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -160,9 +160,115 @@ struct Match { }; return ExpressionManipulator::flexibleCopy(pattern.output, wasm, copy); } +}; +// Utilities -}; +// returns the maximum amount of bits used in an integer expression +// not extremely precise (doesn't look into add operands, etc.) +static Index getMaxBits(Expression* curr) { + if (auto* const_ = curr->dynCast<Const>()) { + switch (curr->type) { + case i32: return 32 - const_->value.countLeadingZeroes().geti32(); + case i64: return 64 - const_->value.countLeadingZeroes().geti64(); + default: WASM_UNREACHABLE(); + } + } else if (auto* binary = curr->dynCast<Binary>()) { + switch (binary->op) { + // 32-bit + case AddInt32: case SubInt32: case MulInt32: + case DivSInt32: case DivUInt32: case RemSInt32: + case RemUInt32: case RotLInt32: case RotRInt32: return 32; + case AndInt32: case XorInt32: return std::min(getMaxBits(binary->left), getMaxBits(binary->right)); + case OrInt32: return std::max(getMaxBits(binary->left), getMaxBits(binary->right)); + case ShlInt32: { + if (auto* shifts = binary->right->dynCast<Const>()) { + return std::min(Index(32), getMaxBits(binary->left) + shifts->value.geti32()); + } + return 32; + } + case ShrUInt32: { + if (auto* shift = binary->right->dynCast<Const>()) { + auto maxBits = getMaxBits(binary->left); + auto shifts = std::min(Index(shift->value.geti32()), maxBits); // can ignore more shifts than zero us out + return std::max(Index(0), maxBits - shifts); + } + return 32; + } + case ShrSInt32: { + if (auto* shift = binary->right->dynCast<Const>()) { + auto maxBits = getMaxBits(binary->left); + if (maxBits == 32) return 32; + auto shifts = std::min(Index(shift->value.geti32()), maxBits); // can ignore more shifts than zero us out + return std::max(Index(0), maxBits - shifts); + } + return 32; + } + // 64-bit TODO + // comparisons + case EqInt32: case NeInt32: case LtSInt32: + case LtUInt32: case LeSInt32: case LeUInt32: + case GtSInt32: case GtUInt32: case GeSInt32: + case GeUInt32: + case EqInt64: case NeInt64: case LtSInt64: + case LtUInt64: case LeSInt64: case LeUInt64: + case GtSInt64: case GtUInt64: case GeSInt64: + case GeUInt64: + case EqFloat32: case NeFloat32: + case LtFloat32: case LeFloat32: case GtFloat32: case GeFloat32: + case EqFloat64: case NeFloat64: + case LtFloat64: case LeFloat64: case GtFloat64: case GeFloat64: return 1; + default: {} + } + } else if (auto* unary = curr->dynCast<Unary>()) { + switch (unary->op) { + case ClzInt32: case CtzInt32: case PopcntInt32: return 5; + case ClzInt64: case CtzInt64: case PopcntInt64: return 6; + case EqZInt32: case EqZInt64: return 1; + case WrapInt64: return std::min(Index(32), getMaxBits(unary->value)); + default: {} + } + } + switch (curr->type) { + case i32: return 32; + case i64: return 64; + case unreachable: return 64; // not interesting, but don't crash + default: WASM_UNREACHABLE(); + } +} + +// Check if an expression is a sign-extend, and if so, returns the value +// that is extended, otherwise nullptr +static Expression* getSignExt(Expression* curr) { + if (auto* outer = curr->dynCast<Binary>()) { + if (outer->op == ShrSInt32) { + if (auto* outerConst = outer->right->dynCast<Const>()) { + if (auto* inner = outer->left->dynCast<Binary>()) { + if (inner->op == ShlInt32) { + if (auto* innerConst = inner->right->dynCast<Const>()) { + if (outerConst->value == innerConst->value) { + return inner->left; + } + } + } + } + } + } + } + return nullptr; +} + +// gets the size of the sign-extended value +static Index getSignExtBits(Expression* curr) { + return 32 - curr->cast<Binary>()->right->cast<Const>()->value.geti32(); +} + +// get a mask to keep only the low # of bits +static int32_t lowBitMask(int32_t bits) { + uint32_t ret = -1; + if (bits >= 32) return ret; + return ret >> (32 - bits); +} // Main pass class struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, UnifiedExpressionVisitor<OptimizeInstructions>>> { @@ -215,32 +321,42 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, std::swap(binary->left, binary->right); } } - // pattern match a load of 8 bits and a sign extend using a shl of 24 then shr_s of 24 as well, etc. - if (binary->op == BinaryOp::ShrSInt32 && binary->right->is<Const>()) { - auto shifts = binary->right->cast<Const>()->value.geti32(); - if (shifts == 24 || shifts == 16) { - auto* left = binary->left->dynCast<Binary>(); - if (left && left->op == ShlInt32 && left->right->is<Const>() && left->right->cast<Const>()->value.geti32() == shifts) { - auto* load = left->left->dynCast<Load>(); - if (load && ((load->bytes == 1 && shifts == 24) || (load->bytes == 2 && shifts == 16))) { - load->signed_ = true; - return load; - } - } + if (auto* ext = getSignExt(binary)) { + auto bits = getSignExtBits(binary); + auto* load = ext->dynCast<Load>(); + // pattern match a load of 8 bits and a sign extend using a shl of 24 then shr_s of 24 as well, etc. + if (load && ((load->bytes == 1 && bits == 8) || (load->bytes == 2 && bits == 16))) { + load->signed_ = true; + return load; + } + // if the sign-extend input cannot have a sign bit, we don't need it + if (getMaxBits(ext) < bits) { + return ext; } } else if (binary->op == EqInt32) { if (auto* c = binary->right->dynCast<Const>()) { + if (auto* ext = getSignExt(binary->left)) { + // we are comparing a sign extend to a constant, which means we can use a cheaper zext + auto bits = getSignExtBits(binary->left); + binary->left = makeZeroExt(ext, bits); + // the const we compare to only needs the relevant bits + c->value = c->value.and_(Literal(lowBitMask(bits))); + return binary; + } if (c->value.geti32() == 0) { // equal 0 => eqz return Builder(*getModule()).makeUnary(EqZInt32, binary->left); } - } - if (auto* c = binary->left->dynCast<Const>()) { - if (c->value.geti32() == 0) { - // equal 0 => eqz - return Builder(*getModule()).makeUnary(EqZInt32, binary->right); + } else if (auto* left = getSignExt(binary->left)) { + if (auto* right = getSignExt(binary->right)) { + // we are comparing two sign-exts, so we may as well replace both with cheaper zexts + auto bits = getSignExtBits(binary->left); + binary->left = makeZeroExt(left, bits); + binary->right = makeZeroExt(right, bits); + return binary; } } + // note that both left and right may be consts, but then we let precompute compute the constant result } else if (binary->op == AndInt32) { if (auto* right = binary->right->dynCast<Const>()) { if (right->type == i32) { @@ -454,6 +570,11 @@ private: offset = 0; } } + + Expression* makeZeroExt(Expression* curr, int32_t bits) { + Builder builder(*getModule()); + return builder.makeBinary(AndInt32, curr, builder.makeConst(Literal(lowBitMask(bits)))); + } }; Pass *createOptimizeInstructionsPass() { diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm index 87cf3f385..b3d40099f 100644 --- a/test/emcc_hello_world.fromasm +++ b/test/emcc_hello_world.fromasm @@ -328,7 +328,7 @@ ) ) (if - (i32.load8_s + (i32.load8_u (get_local $0) ) (block @@ -1471,7 +1471,7 @@ ) (if (i32.ne - (i32.load8_s + (i32.load8_u (i32.add (get_local $0) (tee_local $6 @@ -1867,15 +1867,12 @@ (loop $while-in (if (i32.eq - (i32.load8_s + (i32.load8_u (get_local $2) ) - (i32.shr_s - (i32.shl - (get_local $4) - (i32.const 24) - ) - (i32.const 24) + (i32.and + (get_local $4) + (i32.const 255) ) ) (block @@ -1944,20 +1941,17 @@ ) (if (i32.ne - (i32.load8_s + (i32.load8_u (get_local $2) ) - (i32.shr_s - (i32.shl - (tee_local $1 - (i32.and - (get_local $1) - (i32.const 255) - ) + (i32.and + (tee_local $1 + (i32.and + (get_local $1) + (i32.const 255) ) - (i32.const 24) ) - (i32.const 24) + (i32.const 255) ) ) (block @@ -2038,15 +2032,12 @@ (loop $while-in5 (br_if $label$break$L8 (i32.eq - (i32.load8_s + (i32.load8_u (get_local $2) ) - (i32.shr_s - (i32.shl - (get_local $1) - (i32.const 24) - ) - (i32.const 24) + (i32.and + (get_local $1) + (i32.const 255) ) ) ) @@ -2513,16 +2504,13 @@ ) (br_if $__rjti$9 (i32.eqz - (i32.shr_s - (i32.shl - (tee_local $7 - (i32.load8_s - (get_local $5) - ) + (i32.and + (tee_local $7 + (i32.load8_s + (get_local $5) ) - (i32.const 24) ) - (i32.const 24) + (i32.const 255) ) ) ) @@ -2577,7 +2565,7 @@ (loop $while-in (br_if $label$break$L12 (i32.ne - (i32.load8_s offset=1 + (i32.load8_u offset=1 (get_local $6) ) (i32.const 37) @@ -2591,7 +2579,7 @@ ) (br_if $while-in (i32.eq - (i32.load8_s + (i32.load8_u (tee_local $6 (i32.add (get_local $6) @@ -2683,7 +2671,7 @@ (get_local $10) (tee_local $11 (i32.eq - (i32.load8_s offset=2 + (i32.load8_u offset=2 (get_local $6) ) (i32.const 36) @@ -2828,12 +2816,9 @@ (block $do-once5 (if (i32.eq - (i32.shr_s - (i32.shl - (get_local $6) - (i32.const 24) - ) - (i32.const 24) + (i32.and + (get_local $6) + (i32.const 255) ) (i32.const 42) ) @@ -2861,7 +2846,7 @@ ) (br_if $__rjti$0 (i32.ne - (i32.load8_s offset=2 + (i32.load8_u offset=2 (get_local $10) ) (i32.const 36) @@ -3093,7 +3078,7 @@ (set_local $6 (if i32 (i32.eq - (i32.load8_s + (i32.load8_u (get_local $10) ) (i32.const 46) @@ -3101,21 +3086,18 @@ (block i32 (if (i32.ne - (i32.shr_s - (i32.shl - (tee_local $8 - (i32.load8_s - (tee_local $6 - (i32.add - (get_local $10) - (i32.const 1) - ) + (i32.and + (tee_local $8 + (i32.load8_s + (tee_local $6 + (i32.add + (get_local $10) + (i32.const 1) ) ) ) - (i32.const 24) ) - (i32.const 24) + (i32.const 255) ) (i32.const 42) ) @@ -3214,7 +3196,7 @@ ) (if (i32.eq - (i32.load8_s offset=3 + (i32.load8_u offset=3 (get_local $10) ) (i32.const 36) @@ -3384,12 +3366,9 @@ ) (if (i32.eqz - (i32.shr_s - (i32.shl - (get_local $13) - (i32.const 24) - ) - (i32.const 24) + (i32.and + (get_local $13) + (i32.const 255) ) ) (block @@ -3409,12 +3388,9 @@ (block $__rjti$2 (if (i32.eq - (i32.shr_s - (i32.shl - (get_local $13) - (i32.const 24) - ) - (i32.const 24) + (i32.and + (get_local $13) + (i32.const 255) ) (i32.const 19) ) @@ -4249,7 +4225,7 @@ ) (if f64 (i32.eq - (i32.load8_s + (i32.load8_u (get_local $9) ) (i32.const 45) @@ -5312,7 +5288,7 @@ (block (br_if $do-once83 (i32.ne - (i32.load8_s + (i32.load8_u (get_local $31) ) (i32.const 45) diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise index 2cd23ba1d..2e35d2a56 100644 --- a/test/emcc_hello_world.fromasm.imprecise +++ b/test/emcc_hello_world.fromasm.imprecise @@ -325,7 +325,7 @@ ) ) (if - (i32.load8_s + (i32.load8_u (get_local $0) ) (block @@ -1468,7 +1468,7 @@ ) (if (i32.ne - (i32.load8_s + (i32.load8_u (i32.add (get_local $0) (tee_local $6 @@ -1864,15 +1864,12 @@ (loop $while-in (if (i32.eq - (i32.load8_s + (i32.load8_u (get_local $2) ) - (i32.shr_s - (i32.shl - (get_local $4) - (i32.const 24) - ) - (i32.const 24) + (i32.and + (get_local $4) + (i32.const 255) ) ) (block @@ -1941,20 +1938,17 @@ ) (if (i32.ne - (i32.load8_s + (i32.load8_u (get_local $2) ) - (i32.shr_s - (i32.shl - (tee_local $1 - (i32.and - (get_local $1) - (i32.const 255) - ) + (i32.and + (tee_local $1 + (i32.and + (get_local $1) + (i32.const 255) ) - (i32.const 24) ) - (i32.const 24) + (i32.const 255) ) ) (block @@ -2035,15 +2029,12 @@ (loop $while-in5 (br_if $label$break$L8 (i32.eq - (i32.load8_s + (i32.load8_u (get_local $2) ) - (i32.shr_s - (i32.shl - (get_local $1) - (i32.const 24) - ) - (i32.const 24) + (i32.and + (get_local $1) + (i32.const 255) ) ) ) @@ -2456,16 +2447,13 @@ ) (br_if $__rjti$9 (i32.eqz - (i32.shr_s - (i32.shl - (tee_local $7 - (i32.load8_s - (get_local $5) - ) + (i32.and + (tee_local $7 + (i32.load8_s + (get_local $5) ) - (i32.const 24) ) - (i32.const 24) + (i32.const 255) ) ) ) @@ -2520,7 +2508,7 @@ (loop $while-in (br_if $label$break$L12 (i32.ne - (i32.load8_s offset=1 + (i32.load8_u offset=1 (get_local $6) ) (i32.const 37) @@ -2534,7 +2522,7 @@ ) (br_if $while-in (i32.eq - (i32.load8_s + (i32.load8_u (tee_local $6 (i32.add (get_local $6) @@ -2626,7 +2614,7 @@ (get_local $10) (tee_local $11 (i32.eq - (i32.load8_s offset=2 + (i32.load8_u offset=2 (get_local $6) ) (i32.const 36) @@ -2771,12 +2759,9 @@ (block $do-once5 (if (i32.eq - (i32.shr_s - (i32.shl - (get_local $6) - (i32.const 24) - ) - (i32.const 24) + (i32.and + (get_local $6) + (i32.const 255) ) (i32.const 42) ) @@ -2804,7 +2789,7 @@ ) (br_if $__rjti$0 (i32.ne - (i32.load8_s offset=2 + (i32.load8_u offset=2 (get_local $10) ) (i32.const 36) @@ -3036,7 +3021,7 @@ (set_local $6 (if i32 (i32.eq - (i32.load8_s + (i32.load8_u (get_local $10) ) (i32.const 46) @@ -3044,21 +3029,18 @@ (block i32 (if (i32.ne - (i32.shr_s - (i32.shl - (tee_local $8 - (i32.load8_s - (tee_local $6 - (i32.add - (get_local $10) - (i32.const 1) - ) + (i32.and + (tee_local $8 + (i32.load8_s + (tee_local $6 + (i32.add + (get_local $10) + (i32.const 1) ) ) ) - (i32.const 24) ) - (i32.const 24) + (i32.const 255) ) (i32.const 42) ) @@ -3157,7 +3139,7 @@ ) (if (i32.eq - (i32.load8_s offset=3 + (i32.load8_u offset=3 (get_local $10) ) (i32.const 36) @@ -3327,12 +3309,9 @@ ) (if (i32.eqz - (i32.shr_s - (i32.shl - (get_local $13) - (i32.const 24) - ) - (i32.const 24) + (i32.and + (get_local $13) + (i32.const 255) ) ) (block @@ -3352,12 +3331,9 @@ (block $__rjti$2 (if (i32.eq - (i32.shr_s - (i32.shl - (get_local $13) - (i32.const 24) - ) - (i32.const 24) + (i32.and + (get_local $13) + (i32.const 255) ) (i32.const 19) ) @@ -4210,7 +4186,7 @@ (get_local $15) ) (i32.eq - (i32.load8_s + (i32.load8_u (get_local $9) ) (i32.const 45) @@ -5249,7 +5225,7 @@ (block (br_if $do-once83 (i32.ne - (i32.load8_s + (i32.load8_u (get_local $31) ) (i32.const 45) diff --git a/test/passes/optimize-instructions.txt b/test/passes/optimize-instructions.txt index f959c3cd6..14f29a783 100644 --- a/test/passes/optimize-instructions.txt +++ b/test/passes/optimize-instructions.txt @@ -3,6 +3,7 @@ (type $1 (func)) (type $2 (func (result i32))) (type $3 (func (param i32) (result i32))) + (type $4 (func (param i32 i32))) (memory $0 0) (export "load-off-2" (func $load-off-2)) (func $f (type $0) (param $i1 i32) (param $i2 i64) @@ -176,7 +177,8 @@ ) ) (drop - (i32.eqz + (i32.eq + (i32.const 0) (i32.const 100) ) ) @@ -515,4 +517,431 @@ ) ) ) + (func $sign-ext (type $4) (param $0 i32) (param $1 i32) + (drop + (i32.eqz + (i32.and + (get_local $0) + (i32.const 255) + ) + ) + ) + (drop + (i32.eqz + (i32.and + (get_local $0) + (i32.const 65535) + ) + ) + ) + (drop + (i32.eqz + (i32.and + (get_local $0) + (i32.const 134217727) + ) + ) + ) + (drop + (i32.eq + (i32.and + (get_local $0) + (i32.const 255) + ) + (i32.const 100) + ) + ) + (drop + (i32.eq + (i32.and + (get_local $0) + (i32.const 255) + ) + (i32.const 255) + ) + ) + (drop + (i32.eq + (i32.and + (get_local $0) + (i32.const 255) + ) + (i32.and + (get_local $1) + (i32.const 255) + ) + ) + ) + (drop + (i32.eq + (i32.and + (get_local $0) + (i32.const 65535) + ) + (i32.and + (get_local $1) + (i32.const 65535) + ) + ) + ) + (drop + (i32.eqz + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 24) + ) + (i32.const 23) + ) + ) + ) + (drop + (i32.eqz + (i32.shr_u + (i32.shl + (get_local $0) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (drop + (i32.lt_s + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + ) + (func $sign-ext-input (type $4) (param $0 i32) (param $1 i32) + (drop + (i32.const 100) + ) + (drop + (i32.const 127) + ) + (drop + (i32.shr_s + (i32.shl + (i32.const 128) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (unreachable) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.div_s + (i32.const 1) + (i32.const 2) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.and + (i32.const 127) + (i32.const 128) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.and + (i32.const 128) + (i32.const 129) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.xor + (i32.const 127) + (i32.const 128) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.xor + (i32.const 128) + (i32.const 129) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.or + (i32.const 127) + (i32.const 126) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.or + (i32.const 127) + (i32.const 128) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shl + (i32.const 32) + (i32.const 2) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shl + (i32.const 32) + (i32.const 1) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shl + (i32.const 32) + (i32.const 35) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_u + (i32.const 256) + (i32.const 1) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_u + (i32.const 256) + (i32.const 2) + ) + ) + (drop + (i32.shr_u + (i32.const 128) + (i32.const 35) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_s + (i32.const 256) + (i32.const 1) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.const 256) + (i32.const 2) + ) + ) + (drop + (i32.shr_s + (i32.const 128) + (i32.const 35) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_s + (i32.const -1) + (i32.const 32) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.and + (i32.const -1) + (i32.const 2147483647) + ) + (i32.const 32) + ) + ) + (drop + (i32.ne + (i32.const -1) + (i32.const -1) + ) + ) + (drop + (f32.le + (f32.const -1) + (f32.const -1) + ) + ) + (drop + (i32.clz + (i32.const 0) + ) + ) + (drop + (i32.shl + (i32.clz + (i32.const 0) + ) + (i32.const 2) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shl + (i32.clz + (i32.const 0) + ) + (i32.const 3) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.wrap/i64 + (i64.clz + (i64.const 0) + ) + ) + ) + (drop + (i32.shl + (i32.wrap/i64 + (i64.clz + (i64.const 0) + ) + ) + (i32.const 1) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shl + (i32.wrap/i64 + (i64.clz + (i64.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.eqz + (i32.const -1) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_u + (i32.wrap/i64 + (i64.const -1) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_u + (i32.wrap/i64 + (i64.const -1) + ) + (i32.const 25) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_u + (i32.wrap/i64 + (i64.extend_s/i32 + (i32.const -1) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_u + (i32.wrap/i64 + (i64.extend_s/i32 + (i32.const -1) + ) + ) + (i32.const 25) + ) + ) + ) ) diff --git a/test/passes/optimize-instructions.wast b/test/passes/optimize-instructions.wast index 41ca48951..8eec3f3cd 100644 --- a/test/passes/optimize-instructions.wast +++ b/test/passes/optimize-instructions.wast @@ -192,6 +192,8 @@ ) ) ) + ;; we handle only 0 in the right position, as we assume a const is there, and don't care about if + ;; both are consts here (precompute does that, so no need) (drop (i32.eq (i32.const 100) @@ -430,4 +432,586 @@ ) ) ) + (func $sign-ext (param $0 i32) (param $1 i32) + ;; eq of sign-ext to const, can be a zext + (drop + (i32.eq + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + (drop + (i32.eq + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 16) + ) + (i32.const 16) + ) + (i32.const 0) + ) + ) + (drop + (i32.eq + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 5) ;; weird size, but still valid + ) + (i32.const 5) + ) + (i32.const 0) + ) + ) + (drop + (i32.eq + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 100) ;; non-zero + ) + ) + (drop + (i32.eq + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 32767) ;; non-zero and bigger than the mask + ) + ) + ;; eq of two sign-ext, can both be a zext + (drop + (i32.eq + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.shr_s + (i32.shl + (get_local $1) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (drop + (i32.eq + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 16) + ) + (i32.const 16) + ) + (i32.shr_s + (i32.shl + (get_local $1) + (i32.const 16) + ) + (i32.const 16) + ) + ) + ) + ;; corner cases we should not opt + (drop + (i32.eq + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 24) + ) + (i32.const 23) ;; different shift + ) + (i32.const 0) + ) + ) + (drop + (i32.eq + (i32.shr_u ;; unsigned + (i32.shl + (get_local $0) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + (drop + (i32.lt_s ;; non-eq + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + ) + (func $sign-ext-input (param $0 i32) (param $1 i32) + (drop + (i32.shr_s + (i32.shl + (i32.const 100) ;; small! + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.const 127) ;; just small enough + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.const 128) ;; just too big + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (get-local $0) ;; who knows... + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (unreachable) ;; ignore + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.div_s ;; this could be optimizable in theory, but currently we don't look into adds etc. + (i32.const 1) + (i32.const 2) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.and ;; takes the min, here it is ok + (i32.const 127) + (i32.const 128) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.and ;; takes the min, here it is not + (i32.const 128) + (i32.const 129) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.xor ;; takes the min, here it is ok + (i32.const 127) + (i32.const 128) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.xor ;; takes the min, here it is not + (i32.const 128) + (i32.const 129) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.or ;; takes the max, here it is ok + (i32.const 127) + (i32.const 126) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.or ;; takes the max, here it is not + (i32.const 127) + (i32.const 128) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shl ;; adds, here it is too much + (i32.const 32) + (i32.const 2) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shl ;; adds, here it is ok + (i32.const 32) + (i32.const 1) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shl ;; adds, here it is too much and "overflows" + (i32.const 32) + (i32.const 35) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_u ;; subtracts, here it is still too much + (i32.const 256) + (i32.const 1) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_u ;; subtracts, here it is ok + (i32.const 256) + (i32.const 2) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_u ;; subtracts, here it "overflows" + (i32.const 128) + (i32.const 35) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_s ;; subtracts, here it is still too much + (i32.const 256) + (i32.const 1) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_s ;; subtracts, here it is ok + (i32.const 256) + (i32.const 2) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_s ;; subtracts, here it "overflows" + (i32.const 128) + (i32.const 35) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_s ;; subtracts, here there is a sign bit, so it stays 32 bits no matter how much we shift + (i32.const -1) + (i32.const 32) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_s ;; subtracts, here we mask out that sign bit + (i32.and + (i32.const -1) + (i32.const 2147483647) + ) + (i32.const 32) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.ne ;; 1 bit + (i32.const -1) + (i32.const -1) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (f32.le + (f32.const -1) + (f32.const -1) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.clz ;; assumed 5 bits + (i32.const 0) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shl + (i32.clz ;; assumed 5 bits + (i32.const 0) + ) + (i32.const 2) ;; + 2, so 7 + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shl + (i32.clz ;; assumed 5 bits + (i32.const 0) + ) + (i32.const 3) ;; + 3, so 8, too much + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.wrap/i64 ;; preserves 6 + (i64.clz ;; assumed 6 bits + (i64.const 0) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shl + (i32.wrap/i64 ;; preserves 6 + (i64.clz ;; assumed 6 bits + (i64.const 0) + ) + ) + (i32.const 1) ;; + 1, so 7 + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shl + (i32.wrap/i64 ;; preserves 6 + (i64.clz ;; assumed 6 bits + (i64.const 0) + ) + ) + (i32.const 2) ;; + 2, so 8, too much + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.eqz ;; 1 bit + (i32.const -1) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_u + (i32.wrap/i64 ;; down to 32 + (i64.const -1) ;; 64 + ) + (i32.const 24) ;; 32 - 24 = 8 + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_u + (i32.wrap/i64 ;; down to 32 + (i64.const -1) ;; 64 + ) + (i32.const 25) ;; 32 - 25 = 7, ok + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_u + (i32.wrap/i64 ;; stay 32 + (i64.extend_s/i32 + (i32.const -1) + ) + ) + (i32.const 24) ;; 32 - 24 = 8 + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.shr_u + (i32.wrap/i64 ;; stay 32 + (i64.extend_s/i32 + (i32.const -1) + ) + ) + (i32.const 25) ;; 32 - 25 = 7, ok + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) ) diff --git a/test/passes/precompute.txt b/test/passes/precompute.txt index e7fc6526f..af44d18d8 100644 --- a/test/passes/precompute.txt +++ b/test/passes/precompute.txt @@ -4,6 +4,9 @@ (type $2 (func)) (memory $0 0) (func $x (type $0) (param $x i32) + (call $x + (i32.const 2300) + ) (nop) (drop (i32.add diff --git a/test/passes/precompute.wast b/test/passes/precompute.wast index b498d9df0..d5e91fb9d 100644 --- a/test/passes/precompute.wast +++ b/test/passes/precompute.wast @@ -2,6 +2,12 @@ (memory 0) (type $0 (func (param i32))) (func $x (type $0) (param $x i32) + (call $x + (i32.add + (i32.const 100) + (i32.const 2200) + ) + ) (drop (i32.add (i32.const 1) |