diff options
author | Alon Zakai <alonzakai@gmail.com> | 2017-02-16 22:42:31 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-02-16 22:42:31 -0800 |
commit | c6ea79d1532face076c2dfeb8eadb58319e4e5fd (patch) | |
tree | 12a840d94dda462827a8874371bb9858948ea42b | |
parent | 0728a53fb6bf0540b9789c7bcd26e195800c5ecc (diff) | |
download | binaryen-c6ea79d1532face076c2dfeb8eadb58319e4e5fd.tar.gz binaryen-c6ea79d1532face076c2dfeb8eadb58319e4e5fd.tar.bz2 binaryen-c6ea79d1532face076c2dfeb8eadb58319e4e5fd.zip |
Optimize "squared" operations (#905)
* optimize 'almost' sign extends: when we can remove one entirely, then extra shifts can be left behind. with that in place, we can then optimize 'squared' operations like shl on shl, as doing so does not break our sign extend opts
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 109 | ||||
-rw-r--r-- | test/emcc_O2_hello_world.fromasm | 176 | ||||
-rw-r--r-- | test/emcc_O2_hello_world.fromasm.imprecise | 176 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm | 160 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm.imprecise | 160 | ||||
-rw-r--r-- | test/memorygrowth.fromasm | 176 | ||||
-rw-r--r-- | test/memorygrowth.fromasm.imprecise | 176 | ||||
-rw-r--r-- | test/passes/optimize-instructions.txt | 107 | ||||
-rw-r--r-- | test/passes/optimize-instructions.wast | 96 | ||||
-rw-r--r-- | test/unit.fromasm | 11 | ||||
-rw-r--r-- | test/unit.fromasm.imprecise | 11 |
11 files changed, 677 insertions, 681 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index de46f155a..bb4748a97 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -263,6 +263,36 @@ static Index getSignExtBits(Expression* curr) { return 32 - curr->cast<Binary>()->right->cast<Const>()->value.geti32(); } +// Check if an expression is almost a sign-extend: perhaps the inner shift +// is too large. We can split the shifts in that case, which is sometimes +// useful (e.g. if we can remove the signext) +static Expression* getAlmostSignExt(Expression* curr) { + if (auto* outer = curr->dynCast<Binary>()) { + if (outer->op == ShrSInt32) { + if (auto* outerConst = outer->right->dynCast<Const>()) { + if (auto* inner = outer->left->dynCast<Binary>()) { + if (inner->op == ShlInt32) { + if (auto* innerConst = inner->right->dynCast<Const>()) { + if (outerConst->value.leU(innerConst->value).geti32()) { + return inner->left; + } + } + } + } + } + } + } + return nullptr; +} + +// gets the size of the almost sign-extended value, as well as the +// extra shifts, if any +static Index getAlmostSignExtBits(Expression* curr, Index& extraShifts) { + extraShifts = curr->cast<Binary>()->left->cast<Binary>()->right->cast<Const>()->value.geti32() - + curr->cast<Binary>()->right->cast<Const>()->value.geti32(); + return getSignExtBits(curr); +} + // get a mask to keep only the low # of bits static int32_t lowBitMask(int32_t bits) { uint32_t ret = -1; @@ -321,17 +351,18 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, std::swap(binary->left, binary->right); } } - if (auto* ext = getSignExt(binary)) { - auto bits = getSignExtBits(binary); + if (auto* ext = getAlmostSignExt(binary)) { + Index extraShifts; + auto bits = getAlmostSignExtBits(binary, extraShifts); auto* load = ext->dynCast<Load>(); // pattern match a load of 8 bits and a sign extend using a shl of 24 then shr_s of 24 as well, etc. if (load && ((load->bytes == 1 && bits == 8) || (load->bytes == 2 && bits == 16))) { load->signed_ = true; - return load; + return removeAlmostSignExt(binary); } // if the sign-extend input cannot have a sign bit, we don't need it - if (getMaxBits(ext) < bits) { - return ext; + if (getMaxBits(ext) + extraShifts < bits) { + return removeAlmostSignExt(binary); } } else if (binary->op == EqInt32) { if (auto* c = binary->right->dynCast<Const>()) { @@ -359,29 +390,46 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, // note that both left and right may be consts, but then we let precompute compute the constant result } else if (binary->op == AddInt32 || binary->op == SubInt32) { return optimizeAddedConstants(binary); - } else if (binary->op == AndInt32) { - if (auto* right = binary->right->dynCast<Const>()) { - if (right->type == i32) { - auto mask = right->value.geti32(); - // and with -1 does nothing (common in asm.js output) - if (mask == -1) { - return binary->left; + } + // a bunch of operations on a constant right side can be simplified + if (auto* right = binary->right->dynCast<Const>()) { + if (binary->op == AndInt32) { + auto mask = right->value.geti32(); + // and with -1 does nothing (common in asm.js output) + if (mask == -1) { + return binary->left; + } + // small loads do not need to be masted, the load itself masks + if (auto* load = binary->left->dynCast<Load>()) { + if ((load->bytes == 1 && mask == 0xff) || + (load->bytes == 2 && mask == 0xffff)) { + load->signed_ = false; + return load; } - // small loads do not need to be masted, the load itself masks - if (auto* load = binary->left->dynCast<Load>()) { - if ((load->bytes == 1 && mask == 0xff) || - (load->bytes == 2 && mask == 0xffff)) { - load->signed_ = false; - return load; + } else if (mask == 1 && Properties::emitsBoolean(binary->left)) { + // (bool) & 1 does not need the outer mask + return binary->left; + } + } + // the square of some operations can be merged + if (auto* left = binary->left->dynCast<Binary>()) { + if (left->op == binary->op) { + if (auto* leftRight = left->right->dynCast<Const>()) { + if (left->op == AndInt32) { + leftRight->value = leftRight->value.and_(right->value); + return left; + } else if (left->op == OrInt32) { + leftRight->value = leftRight->value.or_(right->value); + return left; + } else if (left->op == ShlInt32 || left->op == ShrUInt32 || left->op == ShrSInt32) { + leftRight->value = leftRight->value.add(right->value); + return left; } - } else if (mask == 1 && Properties::emitsBoolean(binary->left)) { - // (bool) & 1 does not need the outer mask - return binary->left; } } } - return conditionalizeExpensiveOnBitwise(binary); - } else if (binary->op == OrInt32) { + } + if (binary->op == AndInt32 || binary->op == OrInt32) { return conditionalizeExpensiveOnBitwise(binary); } } else if (auto* unary = curr->dynCast<Unary>()) { @@ -685,6 +733,21 @@ private: Builder builder(*getModule()); return builder.makeBinary(AndInt32, curr, builder.makeConst(Literal(lowBitMask(bits)))); } + + // given an "almost" sign extend - either a proper one, or it + // has too many shifts left - we remove the sig extend. If there are + // too many shifts, we split the shifts first, so this removes the + // two sign extend shifts and adds one (smaller one) + Expression* removeAlmostSignExt(Binary* outer) { + auto* inner = outer->left->cast<Binary>(); + auto* outerConst = outer->right->cast<Const>(); + auto* innerConst = inner->right->cast<Const>(); + auto* value = inner->left; + if (outerConst->value == innerConst->value) return value; + // add a shift, by reusing the existing node + innerConst->value = innerConst->value.sub(outerConst->value); + return inner; + } }; Pass *createOptimizeInstructionsPass() { diff --git a/test/emcc_O2_hello_world.fromasm b/test/emcc_O2_hello_world.fromasm index 40f7d3585..5aa32979b 100644 --- a/test/emcc_O2_hello_world.fromasm +++ b/test/emcc_O2_hello_world.fromasm @@ -158,22 +158,19 @@ (tee_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $10 - (i32.add - (i32.xor - (i32.and - (get_local $2) - (i32.const 1) - ) + (tee_local $10 + (i32.add + (i32.xor + (i32.and + (get_local $2) (i32.const 1) ) - (get_local $6) + (i32.const 1) ) + (get_local $6) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -339,83 +336,80 @@ (tee_local $11 (i32.add (i32.shl - (i32.shl - (tee_local $10 - (i32.add + (tee_local $10 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $2 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $2) - (get_local $1) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $1) - ) - (tee_local $7 + (tee_local $2 (i32.and (i32.shr_u - (tee_local $0 + (tee_local $7 (i32.shr_u - (get_local $7) (get_local $2) + (get_local $1) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $1) ) - (tee_local $0 + (tee_local $7 (i32.and (i32.shr_u - (tee_local $11 + (tee_local $0 (i32.shr_u - (get_local $0) (get_local $7) + (get_local $2) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $11 + (tee_local $0 (i32.and (i32.shr_u - (tee_local $19 + (tee_local $11 (i32.shr_u - (get_local $11) (get_local $0) + (get_local $7) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $19) - (get_local $11) + (tee_local $11 + (i32.and + (i32.shr_u + (tee_local $19 + (i32.shr_u + (get_local $11) + (get_local $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $19) + (get_local $11) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -539,16 +533,13 @@ (set_local $11 (i32.add (i32.shl - (i32.shl - (tee_local $19 - (i32.shr_u - (get_local $17) - (i32.const 3) - ) + (tee_local $19 + (i32.shr_u + (get_local $17) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -1259,16 +1250,13 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $7 - (i32.shr_u - (get_local $1) - (i32.const 3) - ) + (tee_local $7 + (i32.shr_u + (get_local $1) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -2346,11 +2334,8 @@ (set_local $11 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -4370,11 +4355,8 @@ (tee_local $23 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -4535,11 +4517,8 @@ (set_local $0 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -5281,11 +5260,8 @@ (set_local $18 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) @@ -5760,11 +5736,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $2) - (i32.const 1) - ) - (i32.const 2) + (get_local $2) + (i32.const 3) ) (i32.const 216) ) @@ -6122,11 +6095,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $7) - (i32.const 1) - ) - (i32.const 2) + (get_local $7) + (i32.const 3) ) (i32.const 216) ) @@ -7143,11 +7113,8 @@ (tee_local $7 (i32.add (i32.shl - (i32.shl - (get_local $14) - (i32.const 1) - ) - (i32.const 2) + (get_local $14) + (i32.const 3) ) (i32.const 216) ) @@ -7298,11 +7265,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) diff --git a/test/emcc_O2_hello_world.fromasm.imprecise b/test/emcc_O2_hello_world.fromasm.imprecise index 901414958..39ed6731f 100644 --- a/test/emcc_O2_hello_world.fromasm.imprecise +++ b/test/emcc_O2_hello_world.fromasm.imprecise @@ -157,22 +157,19 @@ (tee_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $10 - (i32.add - (i32.xor - (i32.and - (get_local $2) - (i32.const 1) - ) + (tee_local $10 + (i32.add + (i32.xor + (i32.and + (get_local $2) (i32.const 1) ) - (get_local $6) + (i32.const 1) ) + (get_local $6) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -338,83 +335,80 @@ (tee_local $11 (i32.add (i32.shl - (i32.shl - (tee_local $10 - (i32.add + (tee_local $10 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $2 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $2) - (get_local $1) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $1) - ) - (tee_local $7 + (tee_local $2 (i32.and (i32.shr_u - (tee_local $0 + (tee_local $7 (i32.shr_u - (get_local $7) (get_local $2) + (get_local $1) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $1) ) - (tee_local $0 + (tee_local $7 (i32.and (i32.shr_u - (tee_local $11 + (tee_local $0 (i32.shr_u - (get_local $0) (get_local $7) + (get_local $2) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $11 + (tee_local $0 (i32.and (i32.shr_u - (tee_local $19 + (tee_local $11 (i32.shr_u - (get_local $11) (get_local $0) + (get_local $7) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $19) - (get_local $11) + (tee_local $11 + (i32.and + (i32.shr_u + (tee_local $19 + (i32.shr_u + (get_local $11) + (get_local $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $19) + (get_local $11) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -538,16 +532,13 @@ (set_local $11 (i32.add (i32.shl - (i32.shl - (tee_local $19 - (i32.shr_u - (get_local $17) - (i32.const 3) - ) + (tee_local $19 + (i32.shr_u + (get_local $17) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -1258,16 +1249,13 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $7 - (i32.shr_u - (get_local $1) - (i32.const 3) - ) + (tee_local $7 + (i32.shr_u + (get_local $1) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -2345,11 +2333,8 @@ (set_local $11 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -4369,11 +4354,8 @@ (tee_local $23 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -4534,11 +4516,8 @@ (set_local $0 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 216) ) @@ -5280,11 +5259,8 @@ (set_local $18 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) @@ -5759,11 +5735,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $2) - (i32.const 1) - ) - (i32.const 2) + (get_local $2) + (i32.const 3) ) (i32.const 216) ) @@ -6121,11 +6094,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $7) - (i32.const 1) - ) - (i32.const 2) + (get_local $7) + (i32.const 3) ) (i32.const 216) ) @@ -7142,11 +7112,8 @@ (tee_local $7 (i32.add (i32.shl - (i32.shl - (get_local $14) - (i32.const 1) - ) - (i32.const 2) + (get_local $14) + (i32.const 3) ) (i32.const 216) ) @@ -7297,11 +7264,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm index 5ac7af9f6..71ce805ba 100644 --- a/test/emcc_hello_world.fromasm +++ b/test/emcc_hello_world.fromasm @@ -7967,22 +7967,19 @@ (tee_local $2 (i32.add (i32.shl - (i32.shl - (tee_local $4 - (i32.add - (i32.xor - (i32.and - (get_local $5) - (i32.const 1) - ) + (tee_local $4 + (i32.add + (i32.xor + (i32.and + (get_local $5) (i32.const 1) ) - (get_local $13) + (i32.const 1) ) + (get_local $13) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -8148,43 +8145,27 @@ (tee_local $10 (i32.add (i32.shl - (i32.shl - (tee_local $5 - (i32.add + (tee_local $5 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $3 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $3) - (get_local $10) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $10) - ) (tee_local $3 (i32.and (i32.shr_u (tee_local $7 (i32.shr_u - (get_local $7) (get_local $3) + (get_local $10) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $10) ) (tee_local $3 (i32.and @@ -8195,9 +8176,9 @@ (get_local $3) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) @@ -8212,19 +8193,32 @@ ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $7) - (get_local $3) + (tee_local $3 + (i32.and + (i32.shr_u + (tee_local $7 + (i32.shr_u + (get_local $7) + (get_local $3) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $7) + (get_local $3) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -8348,16 +8342,13 @@ (set_local $4 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.shr_u - (get_local $8) - (i32.const 3) - ) + (tee_local $0 + (i32.shr_u + (get_local $8) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -9059,16 +9050,13 @@ (set_local $2 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.shr_u - (get_local $0) - (i32.const 3) - ) + (tee_local $0 + (i32.shr_u + (get_local $0) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -10122,11 +10110,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) @@ -11665,11 +11650,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 216) ) @@ -12157,11 +12139,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) @@ -12852,11 +12831,8 @@ (set_local $2 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 216) ) @@ -13308,11 +13284,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $2) - (i32.const 1) - ) - (i32.const 2) + (get_local $2) + (i32.const 3) ) (i32.const 216) ) @@ -13665,11 +13638,8 @@ (tee_local $3 (i32.add (i32.shl - (i32.shl - (get_local $5) - (i32.const 1) - ) - (i32.const 2) + (get_local $5) + (i32.const 3) ) (i32.const 216) ) @@ -14329,11 +14299,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) @@ -14819,11 +14786,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise index 096d105b8..58dc7b313 100644 --- a/test/emcc_hello_world.fromasm.imprecise +++ b/test/emcc_hello_world.fromasm.imprecise @@ -7904,22 +7904,19 @@ (tee_local $2 (i32.add (i32.shl - (i32.shl - (tee_local $4 - (i32.add - (i32.xor - (i32.and - (get_local $5) - (i32.const 1) - ) + (tee_local $4 + (i32.add + (i32.xor + (i32.and + (get_local $5) (i32.const 1) ) - (get_local $13) + (i32.const 1) ) + (get_local $13) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -8085,43 +8082,27 @@ (tee_local $10 (i32.add (i32.shl - (i32.shl - (tee_local $5 - (i32.add + (tee_local $5 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $3 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $3) - (get_local $10) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $10) - ) (tee_local $3 (i32.and (i32.shr_u (tee_local $7 (i32.shr_u - (get_local $7) (get_local $3) + (get_local $10) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $10) ) (tee_local $3 (i32.and @@ -8132,9 +8113,9 @@ (get_local $3) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) @@ -8149,19 +8130,32 @@ ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $7) - (get_local $3) + (tee_local $3 + (i32.and + (i32.shr_u + (tee_local $7 + (i32.shr_u + (get_local $7) + (get_local $3) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $7) + (get_local $3) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -8285,16 +8279,13 @@ (set_local $4 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.shr_u - (get_local $8) - (i32.const 3) - ) + (tee_local $0 + (i32.shr_u + (get_local $8) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -8996,16 +8987,13 @@ (set_local $2 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.shr_u - (get_local $0) - (i32.const 3) - ) + (tee_local $0 + (i32.shr_u + (get_local $0) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 216) ) @@ -10059,11 +10047,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) @@ -11602,11 +11587,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 216) ) @@ -12094,11 +12076,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) @@ -12789,11 +12768,8 @@ (set_local $2 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 216) ) @@ -13245,11 +13221,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $2) - (i32.const 1) - ) - (i32.const 2) + (get_local $2) + (i32.const 3) ) (i32.const 216) ) @@ -13601,11 +13574,8 @@ (tee_local $3 (i32.add (i32.shl - (i32.shl - (get_local $5) - (i32.const 1) - ) - (i32.const 2) + (get_local $5) + (i32.const 3) ) (i32.const 216) ) @@ -14265,11 +14235,8 @@ (tee_local $0 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 216) ) @@ -14755,11 +14722,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 216) ) diff --git a/test/memorygrowth.fromasm b/test/memorygrowth.fromasm index b06a13f64..ef7dc1a78 100644 --- a/test/memorygrowth.fromasm +++ b/test/memorygrowth.fromasm @@ -170,22 +170,19 @@ (tee_local $8 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.add - (i32.xor - (i32.and - (get_local $5) - (i32.const 1) - ) + (tee_local $0 + (i32.add + (i32.xor + (i32.and + (get_local $5) (i32.const 1) ) - (get_local $0) + (i32.const 1) ) + (get_local $0) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -354,83 +351,80 @@ (tee_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $16 - (i32.add + (tee_local $16 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $6 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $6) - (get_local $8) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $8) - ) - (tee_local $7 + (tee_local $6 (i32.and (i32.shr_u - (tee_local $9 + (tee_local $7 (i32.shr_u - (get_local $7) (get_local $6) + (get_local $8) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $8) ) - (tee_local $9 + (tee_local $7 (i32.and (i32.shr_u - (tee_local $1 + (tee_local $9 (i32.shr_u - (get_local $9) (get_local $7) + (get_local $6) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $1 + (tee_local $9 (i32.and (i32.shr_u - (tee_local $12 + (tee_local $1 (i32.shr_u - (get_local $1) (get_local $9) + (get_local $7) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $12) - (get_local $1) + (tee_local $1 + (i32.and + (i32.shr_u + (tee_local $12 + (i32.shr_u + (get_local $1) + (get_local $9) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $12) + (get_local $1) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -554,16 +548,13 @@ (set_local $4 (i32.add (i32.shl - (i32.shl - (tee_local $14 - (i32.shr_u - (get_local $34) - (i32.const 3) - ) + (tee_local $14 + (i32.shr_u + (get_local $34) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -1279,16 +1270,13 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $7 - (i32.shr_u - (get_local $1) - (i32.const 3) - ) + (tee_local $7 + (i32.shr_u + (get_local $1) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -2411,11 +2399,8 @@ (set_local $5 (i32.add (i32.shl - (i32.shl - (get_local $9) - (i32.const 1) - ) - (i32.const 2) + (get_local $9) + (i32.const 3) ) (i32.const 1248) ) @@ -4094,11 +4079,8 @@ (tee_local $19 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 1248) ) @@ -4601,11 +4583,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 1248) ) @@ -5328,11 +5307,8 @@ (set_local $13 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 1248) ) @@ -5807,11 +5783,8 @@ (tee_local $13 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 1248) ) @@ -6175,11 +6148,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 1248) ) @@ -6855,11 +6825,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $14) - (i32.const 1) - ) - (i32.const 2) + (get_local $14) + (i32.const 3) ) (i32.const 1248) ) @@ -7351,11 +7318,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 1248) ) diff --git a/test/memorygrowth.fromasm.imprecise b/test/memorygrowth.fromasm.imprecise index d0f3405d5..2a32f7528 100644 --- a/test/memorygrowth.fromasm.imprecise +++ b/test/memorygrowth.fromasm.imprecise @@ -169,22 +169,19 @@ (tee_local $8 (i32.add (i32.shl - (i32.shl - (tee_local $0 - (i32.add - (i32.xor - (i32.and - (get_local $5) - (i32.const 1) - ) + (tee_local $0 + (i32.add + (i32.xor + (i32.and + (get_local $5) (i32.const 1) ) - (get_local $0) + (i32.const 1) ) + (get_local $0) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -353,83 +350,80 @@ (tee_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $16 - (i32.add + (tee_local $16 + (i32.add + (i32.or (i32.or (i32.or (i32.or - (i32.or - (tee_local $6 - (i32.and - (i32.shr_u - (tee_local $7 - (i32.shr_u - (get_local $6) - (get_local $8) - ) - ) - (i32.const 5) - ) - (i32.const 8) - ) - ) - (get_local $8) - ) - (tee_local $7 + (tee_local $6 (i32.and (i32.shr_u - (tee_local $9 + (tee_local $7 (i32.shr_u - (get_local $7) (get_local $6) + (get_local $8) ) ) - (i32.const 2) + (i32.const 5) ) - (i32.const 4) + (i32.const 8) ) ) + (get_local $8) ) - (tee_local $9 + (tee_local $7 (i32.and (i32.shr_u - (tee_local $1 + (tee_local $9 (i32.shr_u - (get_local $9) (get_local $7) + (get_local $6) ) ) - (i32.const 1) + (i32.const 2) ) - (i32.const 2) + (i32.const 4) ) ) ) - (tee_local $1 + (tee_local $9 (i32.and (i32.shr_u - (tee_local $12 + (tee_local $1 (i32.shr_u - (get_local $1) (get_local $9) + (get_local $7) ) ) (i32.const 1) ) - (i32.const 1) + (i32.const 2) ) ) ) - (i32.shr_u - (get_local $12) - (get_local $1) + (tee_local $1 + (i32.and + (i32.shr_u + (tee_local $12 + (i32.shr_u + (get_local $1) + (get_local $9) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) ) ) + (i32.shr_u + (get_local $12) + (get_local $1) + ) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -553,16 +547,13 @@ (set_local $4 (i32.add (i32.shl - (i32.shl - (tee_local $14 - (i32.shr_u - (get_local $34) - (i32.const 3) - ) + (tee_local $14 + (i32.shr_u + (get_local $34) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -1278,16 +1269,13 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (tee_local $7 - (i32.shr_u - (get_local $1) - (i32.const 3) - ) + (tee_local $7 + (i32.shr_u + (get_local $1) + (i32.const 3) ) - (i32.const 1) ) - (i32.const 2) + (i32.const 3) ) (i32.const 1248) ) @@ -2410,11 +2398,8 @@ (set_local $5 (i32.add (i32.shl - (i32.shl - (get_local $9) - (i32.const 1) - ) - (i32.const 2) + (get_local $9) + (i32.const 3) ) (i32.const 1248) ) @@ -4093,11 +4078,8 @@ (tee_local $19 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 1248) ) @@ -4600,11 +4582,8 @@ (set_local $3 (i32.add (i32.shl - (i32.shl - (get_local $0) - (i32.const 1) - ) - (i32.const 2) + (get_local $0) + (i32.const 3) ) (i32.const 1248) ) @@ -5327,11 +5306,8 @@ (set_local $13 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 1248) ) @@ -5806,11 +5782,8 @@ (tee_local $13 (i32.add (i32.shl - (i32.shl - (get_local $1) - (i32.const 1) - ) - (i32.const 2) + (get_local $1) + (i32.const 3) ) (i32.const 1248) ) @@ -6174,11 +6147,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $3) - (i32.const 1) - ) - (i32.const 2) + (get_local $3) + (i32.const 3) ) (i32.const 1248) ) @@ -6854,11 +6824,8 @@ (tee_local $4 (i32.add (i32.shl - (i32.shl - (get_local $14) - (i32.const 1) - ) - (i32.const 2) + (get_local $14) + (i32.const 3) ) (i32.const 1248) ) @@ -7350,11 +7317,8 @@ (set_local $1 (i32.add (i32.shl - (i32.shl - (get_local $6) - (i32.const 1) - ) - (i32.const 2) + (get_local $6) + (i32.const 3) ) (i32.const 1248) ) diff --git a/test/passes/optimize-instructions.txt b/test/passes/optimize-instructions.txt index a11bd0d66..03f0f7074 100644 --- a/test/passes/optimize-instructions.txt +++ b/test/passes/optimize-instructions.txt @@ -704,11 +704,8 @@ (drop (i32.shr_s (i32.shl - (i32.shl - (i32.const 32) - (i32.const 2) - ) - (i32.const 24) + (i32.const 32) + (i32.const 26) ) (i32.const 24) ) @@ -722,11 +719,8 @@ (drop (i32.shr_s (i32.shl - (i32.shl - (i32.const 32) - (i32.const 35) - ) - (i32.const 24) + (i32.const 32) + (i32.const 59) ) (i32.const 24) ) @@ -828,13 +822,10 @@ (drop (i32.shr_s (i32.shl - (i32.shl - (i32.clz - (i32.const 0) - ) - (i32.const 3) + (i32.clz + (i32.const 0) ) - (i32.const 24) + (i32.const 27) ) (i32.const 24) ) @@ -859,15 +850,12 @@ (drop (i32.shr_s (i32.shl - (i32.shl - (i32.wrap/i64 - (i64.clz - (i64.const 0) - ) + (i32.wrap/i64 + (i64.clz + (i64.const 0) ) - (i32.const 2) ) - (i32.const 24) + (i32.const 26) ) (i32.const 24) ) @@ -1018,4 +1006,77 @@ (get_local $0) ) ) + (func $almost-sign-ext (type $4) (param $0 i32) (param $0 i32) + (drop + (i32.shr_s + (i32.shl + (i32.const 100) + (i32.const 25) + ) + (i32.const 24) + ) + ) + (drop + (i32.shl + (i32.const 50) + (i32.const 1) + ) + ) + ) + (func $squaring (type $4) (param $0 i32) (param $1 i32) + (drop + (i32.and + (get_local $0) + (i32.const 8) + ) + ) + (drop + (i32.and + (i32.and + (get_local $0) + (i32.const 11) + ) + (get_local $0) + ) + ) + (drop + (i32.and + (get_local $0) + (i32.const 8) + ) + ) + (drop + (i32.or + (get_local $0) + (i32.const 203) + ) + ) + (drop + (i32.shl + (get_local $0) + (i32.const 211) + ) + ) + (drop + (i32.shr_s + (get_local $0) + (i32.const 211) + ) + ) + (drop + (i32.shr_u + (get_local $0) + (i32.const 211) + ) + ) + (drop + (i32.shr_u + (i32.shr_s + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + ) ) diff --git a/test/passes/optimize-instructions.wast b/test/passes/optimize-instructions.wast index b9bd420da..c59bb3ade 100644 --- a/test/passes/optimize-instructions.wast +++ b/test/passes/optimize-instructions.wast @@ -539,7 +539,7 @@ (get_local $0) (i32.const 24) ) - (i32.const 23) ;; different shift + (i32.const 23) ;; different shift, smaller ) (i32.const 0) ) @@ -1238,4 +1238,98 @@ ) ) ) + (func $almost-sign-ext (param $0 i32) (param $0 i32) + (drop + (i32.shr_s + (i32.shl + (i32.const 100) ;; too big, there is a sign bit, due to the extra shift + (i32.const 25) + ) + (i32.const 24) ;; different shift, but larger, so ok to opt if we leave a shift, in theory + ) + ) + (drop + (i32.shr_s + (i32.shl + (i32.const 50) ;; small enough, no sign bit + (i32.const 25) + ) + (i32.const 24) ;; different shift, but larger, so ok to opt if we leave a shift + ) + ) + ) + (func $squaring (param $0 i32) (param $1 i32) + (drop + (i32.and + (i32.and + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + (drop + (i32.and + (i32.and + (get_local $0) + (i32.const 11) + ) + (get_local $0) ;; non-const, cannot optimize this! + ) + ) + (drop + (i32.and + (i32.and + (i32.const 11) ;; flipped order + (get_local $0) + ) + (i32.const 200) + ) + ) + (drop + (i32.or + (i32.or + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + (drop + (i32.shl + (i32.shl + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + (drop + (i32.shr_s + (i32.shr_s + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + (drop + (i32.shr_u + (i32.shr_u + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + (drop + (i32.shr_u + (i32.shr_s ;; but do not optimize a mixture or different shifts! + (get_local $0) + (i32.const 11) + ) + (i32.const 200) + ) + ) + ) ) diff --git a/test/unit.fromasm b/test/unit.fromasm index 2a6c60735..585240a95 100644 --- a/test/unit.fromasm +++ b/test/unit.fromasm @@ -696,14 +696,11 @@ ) ) (call $loadSigned - (i32.shr_s - (i32.shl - (i32.load16_u - (get_local $0) - ) - (i32.const 24) + (i32.shl + (i32.load16_s + (get_local $0) ) - (i32.const 16) + (i32.const 8) ) ) ) diff --git a/test/unit.fromasm.imprecise b/test/unit.fromasm.imprecise index 10c1c025c..64fde3832 100644 --- a/test/unit.fromasm.imprecise +++ b/test/unit.fromasm.imprecise @@ -664,14 +664,11 @@ ) ) (call $loadSigned - (i32.shr_s - (i32.shl - (i32.load16_u - (get_local $0) - ) - (i32.const 24) + (i32.shl + (i32.load16_s + (get_local $0) ) - (i32.const 16) + (i32.const 8) ) ) ) |