diff options
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 109 | ||||
-rw-r--r-- | test/emcc_O2_hello_world.fromasm | 176 | ||||
-rw-r--r-- | test/emcc_O2_hello_world.fromasm.imprecise | 176 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm | 160 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm.imprecise | 160 | ||||
-rw-r--r-- | test/memorygrowth.fromasm | 176 | ||||
-rw-r--r-- | test/memorygrowth.fromasm.imprecise | 176 | ||||
-rw-r--r-- | test/passes/optimize-instructions.txt | 107 | ||||
-rw-r--r-- | test/passes/optimize-instructions.wast | 96 | ||||
-rw-r--r-- | test/unit.fromasm | 11 | ||||
-rw-r--r-- | test/unit.fromasm.imprecise | 11 |
11 files changed, 677 insertions, 681 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index de46f155a..bb4748a97 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -263,6 +263,36 @@ static Index getSignExtBits(Expression* curr) { return 32 - curr->cast<Binary>()->right->cast<Const>()->value.geti32(); } +// Check if an expression is almost a sign-extend: perhaps the inner shift +// is too large. We can split the shifts in that case, which is sometimes +// useful (e.g. if we can remove the signext) +static Expression* getAlmostSignExt(Expression* curr) { + if (auto* outer = curr->dynCast<Binary>()) { + if (outer->op == ShrSInt32) { + if (auto* outerConst = outer->right->dynCast<Const>()) { + if (auto* inner = outer->left->dynCast<Binary>()) { + if (inner->op == ShlInt32) { + if (auto* innerConst = inner->right->dynCast<Const>()) { + if (outerConst->value.leU(innerConst->value).geti32()) { + return inner->left; + } + } + } + } + } + } + } + return nullptr; +} + +// gets the size of the almost sign-extended value, as well as the +// extra shifts, if any +static Index getAlmostSignExtBits(Expression* curr, Index& extraShifts) { + extraShifts = curr->cast<Binary>()->left->cast<Binary>()->right->cast<Const>()->value.geti32() - + curr->cast<Binary>()->right->cast<Const>()->value.geti32(); + return getSignExtBits(curr); +} + // get a mask to keep only the low # of bits static int32_t lowBitMask(int32_t bits) { uint32_t ret = -1; @@ -321,17 +351,18 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, std::swap(binary->left, binary->right); } } - if (auto* ext = getSignExt(binary)) { - auto bits = getSignExtBits(binary); + if (auto* ext = getAlmostSignExt(binary)) { + Index extraShifts; + auto bits = getAlmostSignExtBits(binary, extraShifts); auto* load = ext->dynCast<Load>(); // pattern match a load of 8 bits and a sign extend using a shl of 24 then shr_s of 24 as well, etc. if (load && ((load->bytes == 1 && bits == 8) || (load->bytes == 2 && bits == 16))) { load->signed_ = true; - return load; + return removeAlmostSignExt(binary); } // if the sign-extend input cannot have a sign bit, we don't need it - if (getMaxBits(ext) < bits) { - return ext; + if (getMaxBits(ext) + extraShifts < bits) { + return removeAlmostSignExt(binary); } } else if (binary->op == EqInt32) { if (auto* c = binary->right->dynCast<Const>()) { @@ -359,29 +390,46 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, // note that both left and right may be consts, but then we let precompute compute the constant result } else if (binary->op == AddInt32 || binary->op == SubInt32) { return optimizeAddedConstants(binary); - } else if (binary->op == AndInt32) { - if (auto* right = binary->right->dynCast<Const>()) { - if (right->type == i32) { - auto mask = right->value.geti32(); - // and with -1 does nothing (common in asm.js output) - if (mask == -1) { - return binary->left; + } + // a bunch of operations on a constant right side can be simplified + if (auto* right = binary->right->dynCast<Const>()) { + if (binary->op == AndInt32) { + auto mask = right->value.geti32(); + // and with -1 does nothing (common in asm.js output) + if (mask == -1) { + return binary->left; + } + // small loads do not need to be masted, the load itself masks + if (auto* load = binary->left->dynCast<Load>()) { + if ((load->bytes == 1 && mask == 0xff) || + (load->bytes == 2 && mask == 0xffff)) { + load->signed_ = false; + return load; } - // small loads do not need to be masted, the load itself masks - if (auto* load = binary->left->dynCast<Load>()) { - if ((load->bytes == 1 && mask == 0xff) || - (load->bytes == 2 && mask == 0xffff)) { - load->signed_ = false; - return load; + } else if (mask == 1 && Properties::emitsBoolean(binary->left)) { + // (bool) & 1 does not need the outer mask + return binary->left; + } + } + // the square of some operations can be merged + if (auto* left = binary->left->dynCast<Binary>()) { + if (left->op == binary->op) { + if (auto* leftRight = left->right->dynCast<Const>()) { + if (left->op == AndInt32) { + leftRight->value = leftRight->value.and_(right->value); + return left; + } else if (left->op == OrInt32) { + leftRight->value = leftRight->value.or_(right->value); + return left; + } else if (left->op == ShlInt32 || left->op == ShrUInt32 || left->op == ShrSInt32) { + leftRight->value = leftRight->value.add(right->value); + return left; } - } else if (mask == 1 && Properties::emitsBoolean(binary->left)) { - // (bool) & 1 does not need the outer mask - return binary->left; } } } - return conditionalizeExpensiveOnBitwise(binary); - } else if (binary->op == OrInt32) { + } + if (binary->op == AndInt32 || binary->op == OrInt32) { return conditionalizeExpensiveOnBitwise(binary); } } else if (auto* unary = curr->dynCast<Unary>()) { @@ -685,6 +733,21 @@ private: Builder builder(*getModule()); return builder.makeBinary(AndInt32, curr, builder.makeConst(Literal(lowBitMask(bits)))); } + + // given an "almost" sign extend - either a proper one, or it + // has too many shifts left - we remove the sig extend. If there are + // too many shifts, we split the shifts first, so this removes the + // two sign extend shifts and adds one (smaller one) + Expression* removeAlmostSignExt(Binary* outer) { + auto* inner = outer->left->cast<Binary>(); + auto* outerConst = outer->right->cast<Const>(); + auto* innerConst = inner->right->cast<Const>(); + auto* value = inner->left; + if (outerConst->value == innerConst->value) return value; + // add a shift, by reusing the existing node + innerConst->value = innerConst->value.sub(outerConst->value); + return inner; + } }; Pass *createOptimizeInstructionsPass() { diff --git a/test/emcc_O2_hello_world.fromasm b/test/emcc_O2_hello_world.fromasm index 40f7d3585..5aa32979b 100644 --- a/test/emcc_O2_hello_world.fromasm +++ b/test/emcc_O2_hello_world.fromasm @@ -158,22 +158,19 @@ (tee_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $10 - (i32.add - (i32.xor - (i32.and - (get_local $2) - (i32.const 1) - ) + (tee_local $10 + (i32.add + (i32.xor + (i32.and + (get_local $2) (i32.const 1) ) - (get_local $6) + (i32.const 1) ) + (get_local $6) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -339,83 +336,80 @@ (tee_local $11 (i32.add (i32.shl - (i32.shl - (tee_local $10 - (i32.add + (tee_local $10 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $2 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $2) - (get_local $1) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $1) - ) - (tee_local $7 + (tee_local $2 (i32.and (i32.shr_u - (tee_local $0 + (tee_local $7 (i32.shr_u - (get_local $7) (get_local $2) + (get_local $1) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $1) ) - (tee_local $0 + (tee_local $7 (i32.and (i32.shr_u - (tee_local $11 + (tee_local $0 (i32.shr_u - (get_local $0) (get_local $7) + (get_local $2) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $11 + (tee_local $0 (i32.and (i32.shr_u - (tee_local $19 + (tee_local $11 (i32.shr_u - (get_local $11) (get_local $0) + (get_local $7) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $19) - (get_local $11) + (tee_local $11 + (i32.and + (i32.shr_u + (tee_local $19 + (i32.shr_u + (get_local $11) + (get_local $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $19) + (get_local $11) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -539,16 +533,13 @@ (set_local $11 (i32.add (i32.shl - (i32.shl - (tee_local $19 - (i32.shr_u - (get_local $17) - (i32.const 3) - ) + (tee_local $19 + (i32.shr_u + (get_local $17) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -1259,16 +1250,13 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $7 - (i32.shr_u - (get_local $1) - (i32.const 3) - ) + (tee_local $7 + (i32.shr_u + (get_local $1) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -2346,11 +2334,8 @@ (set_local $11 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -4370,11 +4355,8 @@ (tee_local $23 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -4535,11 +4517,8 @@ (set_local $0 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -5281,11 +5260,8 @@ (set_local $18 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) @@ -5760,11 +5736,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $2) - (i32.const 1) - ) - (i32.const 2) + (get_local $2) + (i32.const 3) ) (i32.const 216) ) @@ -6122,11 +6095,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $7) - (i32.const 1) - ) - (i32.const 2) + (get_local $7) + (i32.const 3) ) (i32.const 216) ) @@ -7143,11 +7113,8 @@ (tee_local $7 (i32.add (i32.shl - (i32.shl - (get_local $14) - (i32.const 1) - ) - (i32.const 2) + (get_local $14) + (i32.const 3) ) (i32.const 216) ) @@ -7298,11 +7265,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) diff --git a/test/emcc_O2_hello_world.fromasm.imprecise b/test/emcc_O2_hello_world.fromasm.imprecise index 901414958..39ed6731f 100644 --- a/test/emcc_O2_hello_world.fromasm.imprecise +++ b/test/emcc_O2_hello_world.fromasm.imprecise @@ -157,22 +157,19 @@ (tee_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $10 - (i32.add - (i32.xor - (i32.and - (get_local $2) - (i32.const 1) - ) + (tee_local $10 + (i32.add + (i32.xor + (i32.and + (get_local $2) (i32.const 1) ) - (get_local $6) + (i32.const 1) ) + (get_local $6) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -338,83 +335,80 @@ (tee_local $11 (i32.add (i32.shl - (i32.shl - (tee_local $10 - (i32.add + (tee_local $10 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $2 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $2) - (get_local $1) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $1) - ) - (tee_local $7 + (tee_local $2 (i32.and (i32.shr_u - (tee_local $0 + (tee_local $7 (i32.shr_u - (get_local $7) (get_local $2) + (get_local $1) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $1) ) - (tee_local $0 + (tee_local $7 (i32.and (i32.shr_u - (tee_local $11 + (tee_local $0 (i32.shr_u - (get_local $0) (get_local $7) + (get_local $2) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $11 + (tee_local $0 (i32.and (i32.shr_u - (tee_local $19 + (tee_local $11 (i32.shr_u - (get_local $11) (get_local $0) + (get_local $7) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $19) - (get_local $11) + (tee_local $11 + (i32.and + (i32.shr_u + (tee_local $19 + (i32.shr_u + (get_local $11) + (get_local $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $19) + (get_local $11) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -538,16 +532,13 @@ (set_local $11 (i32.add (i32.shl - (i32.shl - (tee_local $19 - (i32.shr_u - (get_local $17) - (i32.const 3) - ) + (tee_local $19 + (i32.shr_u + (get_local $17) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -1258,16 +1249,13 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $7 - (i32.shr_u - (get_local $1) - (i32.const 3) - ) + (tee_local $7 + (i32.shr_u + (get_local $1) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -2345,11 +2333,8 @@ (set_local $11 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -4369,11 +4354,8 @@ (tee_local $23 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -4534,11 +4516,8 @@ (set_local $0 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -5280,11 +5259,8 @@ (set_local $18 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) @@ -5759,11 +5735,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $2) - (i32.const 1) - ) - (i32.const 2) + (get_local $2) + (i32.const 3) ) (i32.const 216) ) @@ -6121,11 +6094,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $7) - (i32.const 1) - ) - (i32.const 2) + (get_local $7) + (i32.const 3) ) (i32.const 216) ) @@ -7142,11 +7112,8 @@ (tee_local $7 (i32.add (i32.shl - (i32.shl - (get_local $14) - (i32.const 1) - ) - (i32.const 2) + (get_local $14) + (i32.const 3) ) (i32.const 216) ) @@ -7297,11 +7264,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm index 5ac7af9f6..71ce805ba 100644 --- a/test/emcc_hello_world.fromasm +++ b/test/emcc_hello_world.fromasm @@ -7967,22 +7967,19 @@ (tee_local $2 (i32.add (i32.shl - (i32.shl - (tee_local $4 - (i32.add - (i32.xor - (i32.and - (get_local $5) - (i32.const 1) - ) + (tee_local $4 + (i32.add + (i32.xor + (i32.and + (get_local $5) (i32.const 1) ) - (get_local $13) + (i32.const 1) ) + (get_local $13) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -8148,43 +8145,27 @@ (tee_local $10 (i32.add (i32.shl - (i32.shl - (tee_local $5 - (i32.add + (tee_local $5 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $3 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $3) - (get_local $10) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $10) - ) (tee_local $3 (i32.and (i32.shr_u (tee_local $7 (i32.shr_u - (get_local $7) (get_local $3) + (get_local $10) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $10) ) (tee_local $3 (i32.and @@ -8195,9 +8176,9 @@ (get_local $3) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) @@ -8212,19 +8193,32 @@ ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $7) - (get_local $3) + (tee_local $3 + (i32.and + (i32.shr_u + (tee_local $7 + (i32.shr_u + (get_local $7) + (get_local $3) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $7) + (get_local $3) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -8348,16 +8342,13 @@ (set_local $4 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.shr_u - (get_local $8) - (i32.const 3) - ) + (tee_local $0 + (i32.shr_u + (get_local $8) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -9059,16 +9050,13 @@ (set_local $2 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.shr_u - (get_local $0) - (i32.const 3) - ) + (tee_local $0 + (i32.shr_u + (get_local $0) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -10122,11 +10110,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) @@ -11665,11 +11650,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 216) ) @@ -12157,11 +12139,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) @@ -12852,11 +12831,8 @@ (set_local $2 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 216) ) @@ -13308,11 +13284,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $2) - (i32.const 1) - ) - (i32.const 2) + (get_local $2) + (i32.const 3) ) (i32.const 216) ) @@ -13665,11 +13638,8 @@ (tee_local $3 (i32.add (i32.shl - (i32.shl - (get_local $5) - (i32.const 1) - ) - (i32.const 2) + (get_local $5) + (i32.const 3) ) (i32.const 216) ) @@ -14329,11 +14299,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) @@ -14819,11 +14786,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise index 096d105b8..58dc7b313 100644 --- a/test/emcc_hello_world.fromasm.imprecise +++ b/test/emcc_hello_world.fromasm.imprecise @@ -7904,22 +7904,19 @@ (tee_local $2 (i32.add (i32.shl - (i32.shl - (tee_local $4 - (i32.add - (i32.xor - (i32.and - (get_local $5) - (i32.const 1) - ) + (tee_local $4 + (i32.add + (i32.xor + (i32.and + (get_local $5) (i32.const 1) ) - (get_local $13) + (i32.const 1) ) + (get_local $13) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -8085,43 +8082,27 @@ (tee_local $10 (i32.add (i32.shl - (i32.shl - (tee_local $5 - (i32.add + (tee_local $5 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $3 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $3) - (get_local $10) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $10) - ) (tee_local $3 (i32.and (i32.shr_u (tee_local $7 (i32.shr_u - (get_local $7) (get_local $3) + (get_local $10) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $10) ) (tee_local $3 (i32.and @@ -8132,9 +8113,9 @@ (get_local $3) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) @@ -8149,19 +8130,32 @@ ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $7) - (get_local $3) + (tee_local $3 + (i32.and + (i32.shr_u + (tee_local $7 + (i32.shr_u + (get_local $7) + (get_local $3) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $7) + (get_local $3) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -8285,16 +8279,13 @@ (set_local $4 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.shr_u - (get_local $8) - (i32.const 3) - ) + (tee_local $0 + (i32.shr_u + (get_local $8) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -8996,16 +8987,13 @@ (set_local $2 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.shr_u - (get_local $0) - (i32.const 3) - ) + (tee_local $0 + (i32.shr_u + (get_local $0) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -10059,11 +10047,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) @@ -11602,11 +11587,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 216) ) @@ -12094,11 +12076,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) @@ -12789,11 +12768,8 @@ (set_local $2 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 216) ) @@ -13245,11 +13221,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $2) - (i32.const 1) - ) - (i32.const 2) + (get_local $2) + (i32.const 3) ) (i32.const 216) ) @@ -13601,11 +13574,8 @@ (tee_local $3 (i32.add (i32.shl - (i32.shl - (get_local $5) - (i32.const 1) - ) - (i32.const 2) + (get_local $5) + (i32.const 3) ) (i32.const 216) ) @@ -14265,11 +14235,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) @@ -14755,11 +14722,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) diff --git a/test/memorygrowth.fromasm b/test/memorygrowth.fromasm index b06a13f64..ef7dc1a78 100644 --- a/test/memorygrowth.fromasm +++ b/test/memorygrowth.fromasm @@ -170,22 +170,19 @@ (tee_local $8 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.add - (i32.xor - (i32.and - (get_local $5) - (i32.const 1) - ) + (tee_local $0 + (i32.add + (i32.xor + (i32.and + (get_local $5) (i32.const 1) ) - (get_local $0) + (i32.const 1) ) + (get_local $0) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -354,83 +351,80 @@ (tee_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $16 - (i32.add + (tee_local $16 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $6 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $6) - (get_local $8) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $8) - ) - (tee_local $7 + (tee_local $6 (i32.and (i32.shr_u - (tee_local $9 + (tee_local $7 (i32.shr_u - (get_local $7) (get_local $6) + (get_local $8) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $8) ) - (tee_local $9 + (tee_local $7 (i32.and (i32.shr_u - (tee_local $1 + (tee_local $9 (i32.shr_u - (get_local $9) (get_local $7) + (get_local $6) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $1 + (tee_local $9 (i32.and (i32.shr_u - (tee_local $12 + (tee_local $1 (i32.shr_u - (get_local $1) (get_local $9) + (get_local $7) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $12) - (get_local $1) + (tee_local $1 + (i32.and + (i32.shr_u + (tee_local $12 + (i32.shr_u + (get_local $1) + (get_local $9) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $12) + (get_local $1) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -554,16 +548,13 @@ (set_local $4 (i32.add (i32.shl - (i32.shl - (tee_local $14 - (i32.shr_u - (get_local $34) - (i32.const 3) - ) + (tee_local $14 + (i32.shr_u + (get_local $34) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -1279,16 +1270,13 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $7 - (i32.shr_u - (get_local $1) - (i32.const 3) - ) + (tee_local $7 + (i32.shr_u + (get_local $1) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -2411,11 +2399,8 @@ (set_local $5 (i32.add (i32.shl - (i32.shl - (get_local $9) - (i32.const 1) - ) - (i32.const 2) + (get_local $9) + (i32.const 3) ) (i32.const 1248) ) @@ -4094,11 +4079,8 @@ (tee_local $19 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 1248) ) @@ -4601,11 +4583,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 1248) ) @@ -5328,11 +5307,8 @@ (set_local $13 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 1248) ) @@ -5807,11 +5783,8 @@ (tee_local $13 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 1248) ) @@ -6175,11 +6148,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 1248) ) @@ -6855,11 +6825,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $14) - (i32.const 1) - ) - (i32.const 2) + (get_local $14) + (i32.const 3) ) (i32.const 1248) ) @@ -7351,11 +7318,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 1248) ) diff --git a/test/memorygrowth.fromasm.imprecise b/test/memorygrowth.fromasm.imprecise index d0f3405d5..2a32f7528 100644 --- a/test/memorygrowth.fromasm.imprecise +++ b/test/memorygrowth.fromasm.imprecise @@ -169,22 +169,19 @@ (tee_local $8 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.add - (i32.xor - (i32.and - (get_local $5) - (i32.const 1) - ) + (tee_local $0 + (i32.add + (i32.xor + (i32.and + (get_local $5) (i32.const 1) ) - (get_local $0) + (i32.const 1) ) + (get_local $0) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -353,83 +350,80 @@ (tee_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $16 - (i32.add + (tee_local $16 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $6 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $6) - (get_local $8) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $8) - ) - (tee_local $7 + (tee_local $6 (i32.and (i32.shr_u - (tee_local $9 + (tee_local $7 (i32.shr_u - (get_local $7) (get_local $6) + (get_local $8) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $8) ) - (tee_local $9 + (tee_local $7 (i32.and (i32.shr_u - (tee_local $1 + (tee_local $9 (i32.shr_u - (get_local $9) (get_local $7) + (get_local $6) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $1 + (tee_local $9 (i32.and (i32.shr_u - (tee_local $12 + (tee_local $1 (i32.shr_u - (get_local $1) (get_local $9) + (get_local $7) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $12) - (get_local $1) + (tee_local $1 + (i32.and + (i32.shr_u + (tee_local $12 + (i32.shr_u + (get_local $1) + (get_local $9) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $12) + (get_local $1) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -553,16 +547,13 @@ (set_local $4 (i32.add (i32.shl - (i32.shl - (tee_local $14 - (i32.shr_u - (get_local $34) - (i32.const 3) - ) + (tee_local $14 + (i32.shr_u + (get_local $34) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -1278,16 +1269,13 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $7 - (i32.shr_u - (get_local $1) - (i32.const 3) - ) + (tee_local $7 + (i32.shr_u + (get_local $1) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -2410,11 +2398,8 @@ (set_local $5 (i32.add (i32.shl - (i32.shl - (get_local $9) - (i32.const 1) - ) - (i32.const 2) + (get_local $9) + (i32.const 3) ) (i32.const 1248) ) @@ -4093,11 +4078,8 @@ (tee_local $19 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 1248) ) @@ -4600,11 +4582,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 1248) ) @@ -5327,11 +5306,8 @@ (set_local $13 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 1248) ) @@ -5806,11 +5782,8 @@ (tee_local $13 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 1248) ) @@ -6174,11 +6147,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 1248) ) @@ -6854,11 +6824,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $14) - (i32.const 1) - ) - (i32.const 2) + (get_local $14) + (i32.const 3) ) (i32.const 1248) ) @@ -7350,11 +7317,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 1248) ) diff --git a/test/passes/optimize-instructions.txt b/test/passes/optimize-instructions.txt index a11bd0d66..03f0f7074 100644 --- a/test/passes/optimize-instructions.txt +++ b/test/passes/optimize-instructions.txt @@ -704,11 +704,8 @@ (drop (i32.shr_s (i32.shl - (i32.shl - (i32.const 32) - (i32.const 2) - ) - (i32.const 24) + (i32.const 32) + (i32.const 26) ) (i32.const 24) ) @@ -722,11 +719,8 @@ (drop (i32.shr_s (i32.shl - (i32.shl - (i32.const 32) - (i32.const 35) - ) - (i32.const 24) + (i32.const 32) + (i32.const 59) ) (i32.const 24) ) @@ -828,13 +822,10 @@ (drop (i32.shr_s (i32.shl - (i32.shl - (i32.clz - (i32.const 0) - ) - (i32.const 3) + (i32.clz + (i32.const 0) ) - (i32.const 24) + (i32.const 27) ) (i32.const 24) ) @@ -859,15 +850,12 @@ (drop (i32.shr_s (i32.shl - (i32.shl - (i32.wrap/i64 - (i64.clz - (i64.const 0) - ) + (i32.wrap/i64 + (i64.clz + (i64.const 0) ) - (i32.const 2) ) - (i32.const 24) + (i32.const 26) ) (i32.const 24) ) @@ -1018,4 +1006,77 @@ (get_local $0) ) ) + (func $almost-sign-ext (type $4) (param $0 i32) (param $0 i32) + (drop + (i32.shr_s + (i32.shl + (i32.const 100) + (i32.const 25) + ) + (i32.const 24) + ) + ) + (drop + (i32.shl + (i32.const 50) + (i32.const 1) + ) + ) + ) + (func $squaring (type $4) (param $0 i32) (param $1 i32) + (drop + (i32.and + (get_local $0) + (i32.const 8) + ) + ) + (drop + (i32.and + (i32.and + (get_local $0) + (i32.const 11) + ) + (get_local $0) + ) + ) + (drop + (i32.and + (get_local $0) + (i32.const 8) + ) + ) + (drop + (i32.or + (get_local $0) + (i32.const 203) + ) + ) + (drop + (i32.shl + (get_local $0) + (i32.const 211) + ) + ) + (drop + (i32.shr_s + (get_local $0) + (i32.const 211) + ) + ) + (drop + (i32.shr_u + (get_local $0) + (i32.const 211) + ) + ) + (drop + (i32.shr_u + (i32.shr_s + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + ) ) diff --git a/test/passes/optimize-instructions.wast b/test/passes/optimize-instructions.wast index b9bd420da..c59bb3ade 100644 --- a/test/passes/optimize-instructions.wast +++ b/test/passes/optimize-instructions.wast @@ -539,7 +539,7 @@ (get_local $0) (i32.const 24) ) - (i32.const 23) ;; different shift + (i32.const 23) ;; different shift, smaller ) (i32.const 0) ) @@ -1238,4 +1238,98 @@ ) ) ) + (func $almost-sign-ext (param $0 i32) (param $0 i32) + (drop + (i32.shr_s + (i32.shl + (i32.const 100) ;; too big, there is a sign bit, due to the extra shift + (i32.const 25) + ) + (i32.const 24) ;; different shift, but larger, so ok to opt if we leave a shift, in theory + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.const 50) ;; small enough, no sign bit + (i32.const 25) + ) + (i32.const 24) ;; different shift, but larger, so ok to opt if we leave a shift + ) + ) + ) + (func $squaring (param $0 i32) (param $1 i32) + (drop + (i32.and + (i32.and + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + (drop + (i32.and + (i32.and + (get_local $0) + (i32.const 11) + ) + (get_local $0) ;; non-const, cannot optimize this! + ) + ) + (drop + (i32.and + (i32.and + (i32.const 11) ;; flipped order + (get_local $0) + ) + (i32.const 200) + ) + ) + (drop + (i32.or + (i32.or + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + (drop + (i32.shl + (i32.shl + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + (drop + (i32.shr_s + (i32.shr_s + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + (drop + (i32.shr_u + (i32.shr_u + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + (drop + (i32.shr_u + (i32.shr_s ;; but do not optimize a mixture or different shifts! + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + ) ) diff --git a/test/unit.fromasm b/test/unit.fromasm index 2a6c60735..585240a95 100644 --- a/test/unit.fromasm +++ b/test/unit.fromasm @@ -696,14 +696,11 @@ ) ) (call $loadSigned - (i32.shr_s - (i32.shl - (i32.load16_u - (get_local $0) - ) - (i32.const 24) + (i32.shl + (i32.load16_s + (get_local $0) ) - (i32.const 16) + (i32.const 8) ) ) ) diff --git a/test/unit.fromasm.imprecise b/test/unit.fromasm.imprecise index 10c1c025c..64fde3832 100644 --- a/test/unit.fromasm.imprecise +++ b/test/unit.fromasm.imprecise @@ -664,14 +664,11 @@ ) ) (call $loadSigned - (i32.shr_s - (i32.shl - (i32.load16_u - (get_local $0) - ) - (i32.const 24) + (i32.shl + (i32.load16_s + (get_local $0) ) - (i32.const 16) + (i32.const 8) ) ) ) |