diff options
-rwxr-xr-x | check.py | 4 | ||||
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 74 | ||||
-rw-r--r-- | test/passes/optimize-instructions.txt | 136 | ||||
-rw-r--r-- | test/passes/optimize-instructions.wast | 227 |
4 files changed, 435 insertions, 6 deletions
@@ -369,6 +369,10 @@ def run_binaryen_js_tests(): if not s.endswith('.js'): continue print s f = open('a.js', 'w') + # avoid stdout/stderr ordering issues in some js shells - use just stdout + f.write(''' + console.warn = function(x) { console.log(x) }; + ''') binaryen_js = open(os.path.join(options.binaryen_root, 'bin', 'binaryen.js')).read() f.write(binaryen_js) if NODEJS: diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 70e339241..3e7c15720 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -609,6 +609,11 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, } } } + // a bunch of operations on a constant left side can be simplified + if (binary->left->is<Const>()) { + Expression* ret = optimizeWithConstantOnLeft(binary); + if (ret) return ret; + } // bitwise operations if (binary->op == AndInt32) { // try de-morgan's AND law, @@ -636,11 +641,18 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, } // for and and or, we can potentially conditionalize if (binary->op == AndInt32 || binary->op == OrInt32) { - return conditionalizeExpensiveOnBitwise(binary); + auto* ret = conditionalizeExpensiveOnBitwise(binary); + if (ret) return ret; } // relation/comparisons allow for math optimizations if (binary->isRelational()) { - return optimizeRelational(binary); + auto* ret = optimizeRelational(binary); + if (ret) return ret; + } + // finally, try more expensive operations on the binary + if (!EffectAnalyzer(getPassOptions(), binary->left).hasSideEffects() && + ExpressionAnalyzer::equal(binary->left, binary->right)) { + return optimizeBinaryWithEqualEffectlessChildren(binary); } } else if (auto* unary = curr->dynCast<Unary>()) { // de-morgan's laws @@ -1154,6 +1166,27 @@ private: return nullptr; } + // optimize trivial math operations, given that the left side of a binary + // is a constant. since we canonicalize constants to the right for symmetrical + // operations, we only need to handle asymmetrical ones here + // TODO: templatize on type? + Expression* optimizeWithConstantOnLeft(Binary* binary) { + auto type = binary->left->type; + auto* left = binary->left->cast<Const>(); + if (isIntegerType(type)) { + // operations on zero + if (left->value == LiteralUtils::makeLiteralFromInt32(0, type)) { + if ((binary->op == Abstract::getBinary(type, Abstract::Shl) || + binary->op == Abstract::getBinary(type, Abstract::ShrU) || + binary->op == Abstract::getBinary(type, Abstract::ShrS)) && + !EffectAnalyzer(getPassOptions(), binary->right).hasSideEffects()) { + return binary->left; + } + } + } + return nullptr; + } + // integer math, even on 2s complement, allows stuff like // x + 5 == 7 // => @@ -1204,6 +1237,43 @@ private: binary->left = left->left; return binary; } + + // given a binary expression with equal children and no side effects in either, + // we can fold various things + Expression* optimizeBinaryWithEqualEffectlessChildren(Binary* binary) { + // TODO add: perhaps worth doing 2*x if x is quite large? + switch (binary->op) { + case SubInt32: + case XorInt32: + case SubInt64: + case XorInt64: return LiteralUtils::makeZero(binary->left->type, *getModule()); + case NeInt64: + case LtSInt64: + case LtUInt64: + case GtSInt64: + case GtUInt64: + case NeInt32: + case LtSInt32: + case LtUInt32: + case GtSInt32: + case GtUInt32: return LiteralUtils::makeZero(i32, *getModule()); + case AndInt32: + case OrInt32: + case AndInt64: + case OrInt64: return binary->left; + case EqInt32: + case LeSInt32: + case LeUInt32: + case GeSInt32: + case GeUInt32: + case EqInt64: + case LeSInt64: + case LeUInt64: + case GeSInt64: + case GeUInt64: return LiteralUtils::makeFromInt32(1, i32, *getModule()); + default: return nullptr; + } + } }; Pass *createOptimizeInstructionsPass() { diff --git a/test/passes/optimize-instructions.txt b/test/passes/optimize-instructions.txt index 5ed72a62e..b1b4ce2d2 100644 --- a/test/passes/optimize-instructions.txt +++ b/test/passes/optimize-instructions.txt @@ -10,6 +10,7 @@ (type $8 (func (result i64))) (type $9 (func (param i32 i64 f32 f64))) (type $10 (func (param i32 i64 f32))) + (type $11 (func (param i32 i64 f64 i32))) (memory $0 0) (export "load-off-2" (func $load-off-2)) (func $f (; 0 ;) (type $0) (param $i1 i32) (param $i2 i64) @@ -813,10 +814,7 @@ ) ) (drop - (i32.ne - (i32.const -1) - (i32.const -1) - ) + (i32.const 0) ) (drop (f32.le @@ -2722,6 +2720,136 @@ ) ) ) + (func $shift-a-zero (; 66 ;) (type $10) (param $x i32) (param $y i64) (param $z f32) + (drop + (i32.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (i64.const 0) + ) + (drop + (i32.shl + (i32.const 0) + (unreachable) + ) + ) + ) + (func $identical-siblings (; 67 ;) (type $11) (param $x i32) (param $y i64) (param $z f64) (param $xx i32) + (drop + (i32.const 0) + ) + (drop + (i64.const 0) + ) + (drop + (f64.sub + (get_local $z) + (get_local $z) + ) + ) + (drop + (i32.sub + (get_local $x) + (get_local $xx) + ) + ) + (drop + (i32.sub + (unreachable) + (unreachable) + ) + ) + (drop + (i32.add + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (get_local $x) + ) + (drop + (get_local $x) + ) + (drop + (i32.const 1) + ) + (drop + (i32.const 1) + ) + (drop + (i32.const 1) + ) + (drop + (i32.const 1) + ) + (drop + (i32.const 1) + ) + (drop + (i64.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (i32.const 0) + ) + (drop + (get_local $y) + ) + (drop + (get_local $y) + ) + (drop + (i32.const 1) + ) + (drop + (i32.const 1) + ) + (drop + (i32.const 1) + ) + (drop + (i32.const 1) + ) + (drop + (i32.const 1) + ) + ) ) (module (type $0 (func)) diff --git a/test/passes/optimize-instructions.wast b/test/passes/optimize-instructions.wast index e4d7773ee..85709c7ca 100644 --- a/test/passes/optimize-instructions.wast +++ b/test/passes/optimize-instructions.wast @@ -3158,6 +3158,233 @@ (drop (f32.add (get_local $z) (f32.const 0x40))) ) + (func $shift-a-zero (param $x i32) (param $y i64) (param $z f32) + (drop + (i32.shl + (i32.const 0) + (get_local $x) + ) + ) + (drop + (i32.shr_u + (i32.const 0) + (get_local $x) + ) + ) + (drop + (i32.shr_s + (i32.const 0) + (get_local $x) + ) + ) + (drop + (i64.shl + (i64.const 0) + (get_local $y) + ) + ) + (drop + (i32.shl + (i32.const 0) + (unreachable) + ) + ) + ) + (func $identical-siblings (param $x i32) (param $y i64) (param $z f64) (param $xx i32) + (drop + (i32.sub + (get_local $x) + (get_local $x) + ) + ) + (drop + (i64.sub + (get_local $y) + (get_local $y) + ) + ) + (drop + (f64.sub + (get_local $z) + (get_local $z) + ) + ) + (drop + (i32.sub + (get_local $x) + (get_local $xx) + ) + ) + (drop + (i32.sub + (unreachable) + (unreachable) + ) + ) + (drop + (i32.add + (get_local $x) + (get_local $x) + ) + ) + ;; more ops + (drop + (i32.xor + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.ne + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.lt_s + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.lt_u + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.gt_s + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.gt_u + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.and + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.or + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.eq + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.le_s + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.le_u + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.ge_s + (get_local $x) + (get_local $x) + ) + ) + (drop + (i32.ge_u + (get_local $x) + (get_local $x) + ) + ) + (drop + (i64.xor + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.ne + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.lt_s + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.lt_u + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.gt_s + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.gt_u + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.and + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.or + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.eq + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.le_s + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.le_u + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.ge_s + (get_local $y) + (get_local $y) + ) + ) + (drop + (i64.ge_u + (get_local $y) + (get_local $y) + ) + ) + ) ) (module (import "env" "memory" (memory $0 (shared 256 256))) |