diff options
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 154 | ||||
-rw-r--r-- | src/wasm/wasm.cpp | 12 | ||||
-rw-r--r-- | test/passes/optimize-instructions_all-features.txt | 253 | ||||
-rw-r--r-- | test/passes/optimize-instructions_all-features.wast | 241 |
4 files changed, 635 insertions, 25 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 04661af2d..a3afa58c4 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -368,13 +368,11 @@ struct OptimizeInstructions } } } - auto* ret = optimizeAddedConstants(binary); - if (ret) { + if (auto* ret = optimizeAddedConstants(binary)) { return ret; } } else if (binary->op == SubInt32) { - auto* ret = optimizeAddedConstants(binary); - if (ret) { + if (auto* ret = optimizeAddedConstants(binary)) { return ret; } } @@ -401,8 +399,7 @@ struct OptimizeInstructions } } // some math operations have trivial results - Expression* ret = optimizeWithConstantOnRight(binary); - if (ret) { + if (auto* ret = optimizeWithConstantOnRight(binary)) { return ret; } // the square of some operations can be merged @@ -465,8 +462,7 @@ struct OptimizeInstructions } // a bunch of operations on a constant left side can be simplified if (binary->left->is<Const>()) { - Expression* ret = optimizeWithConstantOnLeft(binary); - if (ret) { + if (auto* ret = optimizeWithConstantOnLeft(binary)) { return ret; } } @@ -518,9 +514,15 @@ struct OptimizeInstructions if (!EffectAnalyzer(getPassOptions(), features, binary->left) .hasSideEffects()) { if (ExpressionAnalyzer::equal(binary->left, binary->right)) { - return optimizeBinaryWithEqualEffectlessChildren(binary); + if (auto* ret = optimizeBinaryWithEqualEffectlessChildren(binary)) { + return ret; + } } } + + if (auto* ret = deduplicateBinary(binary)) { + return ret; + } } else if (auto* unary = curr->dynCast<Unary>()) { if (unary->op == EqZInt32) { if (auto* inner = unary->value->dynCast<Binary>()) { @@ -540,6 +542,10 @@ struct OptimizeInstructions return unary; } } + + if (auto* ret = deduplicateUnary(unary)) { + return ret; + } } else if (auto* set = curr->dynCast<GlobalSet>()) { // optimize out a set of a get auto* get = set->value->dynCast<GlobalGet>(); @@ -1397,6 +1403,125 @@ private: return nullptr; } + Expression* deduplicateUnary(Unary* unaryOuter) { + if (auto* unaryInner = unaryOuter->value->dynCast<Unary>()) { + if (unaryInner->op == unaryOuter->op) { + switch (unaryInner->op) { + case NegFloat32: + case NegFloat64: { + // neg(neg(x)) ==> x + return unaryInner->value; + } + case AbsFloat32: + case CeilFloat32: + case FloorFloat32: + case TruncFloat32: + case NearestFloat32: + case AbsFloat64: + case CeilFloat64: + case FloorFloat64: + case TruncFloat64: + case NearestFloat64: { + // unaryOp(unaryOp(x)) ==> unaryOp(x) + return unaryInner; + } + case ExtendS8Int32: + case ExtendS16Int32: { + assert(getModule()->features.hasSignExt()); + return unaryInner; + } + case EqZInt32: { + // eqz(eqz(bool(x))) ==> bool(x) + if (Bits::getMaxBits(unaryInner->value, this) == 1) { + return unaryInner->value; + } + break; + } + default: { + } + } + } + } + return nullptr; + } + + Expression* deduplicateBinary(Binary* outer) { + Type type = outer->type; + if (type.isInteger()) { + if (auto* inner = outer->right->dynCast<Binary>()) { + if (outer->op == inner->op) { + if (!EffectAnalyzer( + getPassOptions(), getModule()->features, outer->left) + .hasSideEffects()) { + if (ExpressionAnalyzer::equal(inner->left, outer->left)) { + // x - (x - y) ==> y + // x ^ (x ^ y) ==> y + if (outer->op == Abstract::getBinary(type, Abstract::Sub) || + outer->op == Abstract::getBinary(type, Abstract::Xor)) { + return inner->right; + } + // x & (x & y) ==> x & y + // x | (x | y) ==> x | y + if (outer->op == Abstract::getBinary(type, Abstract::And) || + outer->op == Abstract::getBinary(type, Abstract::Or)) { + return inner; + } + } + if (ExpressionAnalyzer::equal(inner->right, outer->left)) { + // x ^ (y ^ x) ==> y + if (outer->op == Abstract::getBinary(type, Abstract::Xor)) { + return inner->left; + } + + // x & (y & x) ==> y & x + // x | (y | x) ==> y | x + if (outer->op == Abstract::getBinary(type, Abstract::And) || + outer->op == Abstract::getBinary(type, Abstract::Or)) { + return inner; + } + } + } + } + } + if (auto* inner = outer->left->dynCast<Binary>()) { + if (outer->op == inner->op) { + if (!EffectAnalyzer( + getPassOptions(), getModule()->features, outer->right) + .hasSideEffects()) { + if (ExpressionAnalyzer::equal(inner->right, outer->right)) { + // (x ^ y) ^ y ==> x + if (outer->op == Abstract::getBinary(type, Abstract::Xor)) { + return inner->left; + } + // (x % y) % y ==> x % y + // (x & y) & y ==> x & y + // (x | y) | y ==> x | y + if (outer->op == Abstract::getBinary(type, Abstract::RemS) || + outer->op == Abstract::getBinary(type, Abstract::RemU) || + outer->op == Abstract::getBinary(type, Abstract::And) || + outer->op == Abstract::getBinary(type, Abstract::Or)) { + return inner; + } + } + if (ExpressionAnalyzer::equal(inner->left, outer->right)) { + // (x ^ y) ^ x ==> y + if (outer->op == Abstract::getBinary(type, Abstract::Xor)) { + return inner->right; + } + // (x & y) & x ==> x & y + // (x | y) | x ==> x | y + if (outer->op == Abstract::getBinary(type, Abstract::And) || + outer->op == Abstract::getBinary(type, Abstract::Or)) { + return inner; + } + } + } + } + } + } + return nullptr; + } + // given a relational binary with a const on both sides, combine the constants // left is also a binary, and has a constant; right may be just a constant, in // which case right is nullptr @@ -1491,7 +1616,6 @@ private: // given a binary expression with equal children and no side effects in // either, we can fold various things - // TODO: trinaries, things like (x & (y & x)) ? Expression* optimizeBinaryWithEqualEffectlessChildren(Binary* binary) { // TODO add: perhaps worth doing 2*x if x is quite large? switch (binary->op) { @@ -1500,16 +1624,16 @@ private: case SubInt64: case XorInt64: return LiteralUtils::makeZero(binary->left->type, *getModule()); - case NeInt64: - case LtSInt64: - case LtUInt64: - case GtSInt64: - case GtUInt64: case NeInt32: case LtSInt32: case LtUInt32: case GtSInt32: case GtUInt32: + case NeInt64: + case LtSInt64: + case LtUInt64: + case GtSInt64: + case GtUInt64: return LiteralUtils::makeZero(Type::i32, *getModule()); case AndInt32: case OrInt32: diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index d3c0b46e8..87488cfaf 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -812,12 +812,6 @@ void Unary::finalize() { bool Binary::isRelational() { switch (op) { - case EqFloat64: - case NeFloat64: - case LtFloat64: - case LeFloat64: - case GtFloat64: - case GeFloat64: case EqInt32: case NeInt32: case LtSInt32: @@ -844,6 +838,12 @@ bool Binary::isRelational() { case LeFloat32: case GtFloat32: case GeFloat32: + case EqFloat64: + case NeFloat64: + case LtFloat64: + case LeFloat64: + case GtFloat64: + case GeFloat64: return true; default: return false; diff --git a/test/passes/optimize-instructions_all-features.txt b/test/passes/optimize-instructions_all-features.txt index 33e137b9d..dda7a47c0 100644 --- a/test/passes/optimize-instructions_all-features.txt +++ b/test/passes/optimize-instructions_all-features.txt @@ -12,6 +12,7 @@ (type $i32_i64_f32_f64_=>_none (func (param i32 i64 f32 f64))) (type $none_=>_anyref (func (result anyref))) (type $i32_i32_i32_=>_none (func (param i32 i32 i32))) + (type $i32_i32_i32_f64_=>_none (func (param i32 i32 i32 f64))) (type $i32_i32_f64_f64_=>_none (func (param i32 i32 f64 f64))) (type $i32_i64_f64_i32_=>_none (func (param i32 i64 f64 i32))) (type $f32_f64_=>_none (func (param f32 f64))) @@ -1301,10 +1302,7 @@ (drop (i32.and (local.get $0) - (i32.and - (local.get $0) - (i32.const 11) - ) + (i32.const 11) ) ) (drop @@ -3786,6 +3784,253 @@ ) ) ) + (func $duplicate-elimination (param $x i32) (param $y i32) (param $z i32) (param $w f64) + (drop + (f64.abs + (local.get $w) + ) + ) + (drop + (f64.ceil + (local.get $w) + ) + ) + (drop + (f64.floor + (local.get $w) + ) + ) + (drop + (f64.trunc + (local.get $w) + ) + ) + (drop + (f64.nearest + (local.get $w) + ) + ) + (drop + (f64.nearest + (f64.trunc + (local.get $w) + ) + ) + ) + (drop + (f64.trunc + (f64.nearest + (local.get $w) + ) + ) + ) + (drop + (local.get $w) + ) + (drop + (f64.neg + (local.get $w) + ) + ) + (drop + (local.get $w) + ) + (drop + (i32.eqz + (i32.eqz + (local.get $x) + ) + ) + ) + (drop + (i32.eqz + (local.get $x) + ) + ) + (drop + (i64.eqz + (i64.const 1) + ) + ) + (drop + (i32.ne + (local.get $x) + (i32.const 2) + ) + ) + (drop + (i32.extend8_s + (local.get $x) + ) + ) + (drop + (i32.extend16_s + (local.get $x) + ) + ) + (drop + (i32.and + (local.get $x) + (i32.const 1) + ) + ) + (drop + (i32.rem_s + (local.get $x) + (local.get $y) + ) + ) + (drop + (i32.rem_u + (local.get $x) + (local.get $y) + ) + ) + (drop + (local.get $y) + ) + (drop + (local.get $y) + ) + (drop + (i32.sub + (local.get $y) + (i32.sub + (local.get $x) + (local.get $y) + ) + ) + ) + (drop + (local.get $y) + ) + (drop + (local.get $y) + ) + (drop + (local.get $y) + ) + (drop + (local.get $y) + ) + (drop + (local.get $x) + ) + (drop + (i32.and + (local.get $x) + (local.get $y) + ) + ) + (drop + (i32.and + (local.get $x) + (local.get $y) + ) + ) + (drop + (i32.and + (local.get $x) + (local.get $y) + ) + ) + (drop + (i32.and + (local.get $x) + (local.get $y) + ) + ) + (drop + (i32.or + (local.get $x) + (local.get $y) + ) + ) + (drop + (i32.or + (local.get $x) + (local.get $y) + ) + ) + (drop + (i32.or + (local.get $x) + (local.get $y) + ) + ) + (drop + (i32.or + (local.get $x) + (local.get $y) + ) + ) + (drop + (i32.or + (local.get $z) + (i32.or + (local.get $x) + (local.get $y) + ) + ) + ) + (drop + (i32.or + (local.get $y) + (i32.or + (local.get $x) + (local.get $z) + ) + ) + ) + (drop + (i32.or + (call $ne0) + (local.get $x) + ) + ) + (drop + (i32.or + (i32.or + (call $ne0) + (local.get $x) + ) + (call $ne0) + ) + ) + (drop + (i32.or + (call $ne0) + (local.get $x) + ) + ) + (drop + (i32.or + (call $ne0) + (i32.or + (call $ne0) + (local.get $x) + ) + ) + ) + (drop + (i32.rem_s + (i32.rem_s + (local.get $y) + (local.get $x) + ) + (local.get $y) + ) + ) + (drop + (i32.rem_u + (local.get $y) + (i32.rem_u + (local.get $x) + (local.get $y) + ) + ) + ) + ) (func $optimize-bulk-memory-copy (param $dst i32) (param $src i32) (param $sz i32) (memory.copy (local.get $dst) diff --git a/test/passes/optimize-instructions_all-features.wast b/test/passes/optimize-instructions_all-features.wast index c67270a4f..7dc8cffbf 100644 --- a/test/passes/optimize-instructions_all-features.wast +++ b/test/passes/optimize-instructions_all-features.wast @@ -4289,6 +4289,247 @@ ) )) ) + (func $duplicate-elimination (param $x i32) (param $y i32) (param $z i32) (param $w f64) + ;; unary + (drop (f64.abs (f64.abs (local.get $w)))) + (drop (f64.ceil (f64.ceil (local.get $w)))) + (drop (f64.floor (f64.floor (local.get $w)))) + (drop (f64.trunc (f64.trunc (local.get $w)))) + (drop (f64.nearest (f64.nearest (local.get $w)))) + + (drop (f64.nearest (f64.trunc (local.get $w)))) ;; skip + (drop (f64.trunc (f64.nearest (local.get $w)))) ;; skip + + (drop (f64.neg (f64.neg (local.get $w)))) + (drop (f64.neg (f64.neg (f64.neg (local.get $w))))) + (drop (f64.neg (f64.neg (f64.neg (f64.neg (local.get $w)))))) + + (drop (i32.eqz (i32.eqz (local.get $x)))) ;; skip + (drop (i32.eqz (i32.eqz (i32.eqz (local.get $x))))) + (drop (i32.eqz (i32.eqz (i64.eqz (i64.const 1))))) + (drop (i32.eqz (i32.eqz (i32.ne (local.get $x) (i32.const 2))))) + + (drop (i32.extend8_s (i32.extend8_s (local.get $x)))) + (drop (i32.extend16_s (i32.extend16_s (local.get $x)))) + (drop (i32.eqz + (i32.eqz + (i32.and + (local.get $x) + (i32.const 1) + ) + ) + )) + + ;; binary + ;; ((signed)x % y) % y + (drop (i32.rem_s + (i32.rem_s + (local.get $x) + (local.get $y) + ) + (local.get $y) + )) + ;; ((unsigned)x % y) % y + (drop (i32.rem_u + (i32.rem_u + (local.get $x) + (local.get $y) + ) + (local.get $y) + )) + ;; 0 - (0 - y) + (drop (i32.sub + (i32.const 0) + (i32.sub + (i32.const 0) + (local.get $y) + ) + )) + ;; x - (x - y) + (drop (i32.sub + (local.get $x) + (i32.sub + (local.get $x) + (local.get $y) + ) + )) + ;; y - (x - y) - skip + (drop (i32.sub + (local.get $y) + (i32.sub + (local.get $x) + (local.get $y) + ) + )) + ;; x ^ (x ^ y) + (drop (i32.xor + (local.get $x) + (i32.xor + (local.get $x) + (local.get $y) + ) + )) + ;; x ^ (y ^ x) + (drop (i32.xor + (local.get $x) + (i32.xor + (local.get $y) + (local.get $x) + ) + )) + ;; (x ^ y) ^ x + (drop (i32.xor + (i32.xor + (local.get $x) + (local.get $y) + ) + (local.get $x) + )) + ;; (y ^ x) ^ x + (drop (i32.xor + (i32.xor + (local.get $y) + (local.get $x) + ) + (local.get $x) + )) + ;; x ^ (x ^ x) + (drop (i32.xor + (local.get $x) + (i32.xor + (local.get $x) + (local.get $x) + ) + )) + ;; x & (x & y) + (drop (i32.and + (local.get $x) + (i32.and + (local.get $x) + (local.get $y) + ) + )) + ;; x & (y & x) + (drop (i32.and + (local.get $x) + (i32.and + (local.get $y) + (local.get $x) + ) + )) + ;; (x & y) & x + (drop (i32.and + (i32.and + (local.get $x) + (local.get $y) + ) + (local.get $x) + )) + ;; (y & x) & x + (drop (i32.and + (i32.and + (local.get $y) + (local.get $x) + ) + (local.get $x) + )) + ;; x | (x | y) + (drop (i32.or + (local.get $x) + (i32.or + (local.get $x) + (local.get $y) + ) + )) + ;; x | (y | x) + (drop (i32.or + (local.get $x) + (i32.or + (local.get $y) + (local.get $x) + ) + )) + ;; (x | y) | x + (drop (i32.or + (i32.or + (local.get $x) + (local.get $y) + ) + (local.get $x) + )) + ;; (y | x) | x + (drop (i32.or + (i32.or + (local.get $y) + (local.get $x) + ) + (local.get $x) + )) + ;; (y | x) | z - skip + (drop (i32.or + (i32.or + (local.get $y) + (local.get $x) + ) + (local.get $z) + )) + ;; (z | x) | y - skip + (drop (i32.or + (i32.or + (local.get $z) + (local.get $x) + ) + (local.get $y) + )) + ;; (SE() | x) | x + (drop (i32.or + (i32.or + (call $ne0) ;; side effect + (local.get $x) + ) + (local.get $x) + )) + ;; (x | SE()) | SE() - skip + (drop (i32.or + (i32.or + (local.get $x) + (call $ne0) ;; side effect + ) + (call $ne0) ;; side effect + )) + ;; x | (SE() | x) + (drop (i32.or + (local.get $x) + (i32.or + (local.get $x) + (call $ne0) ;; side effect + ) + )) + ;; SE() | (x | SE()) - skip + (drop (i32.or + (call $ne0) ;; side effect + (i32.or + (call $ne0) ;; side effect + (local.get $x) + ) + )) + ;; (y % x) % y - skip + (drop (i32.rem_s + (i32.rem_s + (local.get $y) + (local.get $x) + ) + (local.get $y) + )) + ;; y % (x % y) - skip + (drop (i32.rem_u + (local.get $y) + (i32.rem_u + (local.get $x) + (local.get $y) + ) + )) + ) (func $optimize-bulk-memory-copy (param $dst i32) (param $src i32) (param $sz i32) (memory.copy ;; skip (local.get $dst) |