diff options
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 36 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm | 36 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm.imprecise | 36 | ||||
-rw-r--r-- | test/passes/optimize-instructions.txt | 37 | ||||
-rw-r--r-- | test/passes/optimize-instructions.wast | 47 |
5 files changed, 138 insertions, 54 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 6dbb1b7e8..c81119f75 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -305,6 +305,24 @@ static int32_t lowBitMask(int32_t bits) { return ret >> (32 - bits); } +// performs a dynCast on the fallthrough value, i.e., looks through +// too and block fallthroughs, etc. +template<typename T> +T* getFallthroughDynCast(Expression* curr) { + if (T* ret = curr->dynCast<T>()) { + return ret; + } + if (auto* set = curr->dynCast<SetLocal>()) { + if (set->isTee()) return getFallthroughDynCast<T>(set->value); + } else if (auto* block = curr->dynCast<Block>()) { + // if no name, we can't be broken to, and then can look at the fallthrough + if (!block->name.is() && block->list.size() > 0) { + return getFallthroughDynCast<T>(block->list.back()); + } + } + return nullptr; +} + // Main pass class struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, UnifiedExpressionVisitor<OptimizeInstructions>>> { bool isFunctionParallel() override { return true; } @@ -359,11 +377,14 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, if (auto* ext = getAlmostSignExt(binary)) { Index extraShifts; auto bits = getAlmostSignExtBits(binary, extraShifts); - auto* load = ext->dynCast<Load>(); + auto* load = getFallthroughDynCast<Load>(ext); // pattern match a load of 8 bits and a sign extend using a shl of 24 then shr_s of 24 as well, etc. if (load && ((load->bytes == 1 && bits == 8) || (load->bytes == 2 && bits == 16))) { - load->signed_ = true; - return removeAlmostSignExt(binary); + // if the value falls through, we can't alter the load, as it might be captured in a tee + if (load->signed_ == true || load == ext) { + load->signed_ = true; + return removeAlmostSignExt(binary); + } } // if the sign-extend input cannot have a sign bit, we don't need it if (getMaxBits(ext) + extraShifts < bits) { @@ -405,11 +426,14 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, return binary->left; } // small loads do not need to be masted, the load itself masks - if (auto* load = binary->left->dynCast<Load>()) { + if (auto* load = getFallthroughDynCast<Load>(binary->left)) { if ((load->bytes == 1 && mask == 0xff) || (load->bytes == 2 && mask == 0xffff)) { - load->signed_ = false; - return load; + // if the value falls through, we can't alter the load, as it might be captured in a tee + if (load->signed_ == false || load == binary->left) { + load->signed_ = false; + return binary->left; + } } } else if (mask == 1 && Properties::emitsBoolean(binary->left)) { // (bool) & 1 does not need the outer mask diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm index e90c9c008..78f0460fa 100644 --- a/test/emcc_hello_world.fromasm +++ b/test/emcc_hello_world.fromasm @@ -2632,21 +2632,15 @@ (i32.lt_u (tee_local $8 (i32.add - (i32.shr_s - (i32.shl - (tee_local $11 - (i32.load8_s - (tee_local $10 - (i32.add - (get_local $6) - (i32.const 1) - ) - ) + (tee_local $11 + (i32.load8_s + (tee_local $10 + (i32.add + (get_local $6) + (i32.const 1) ) ) - (i32.const 24) ) - (i32.const 24) ) (i32.const -48) ) @@ -2772,21 +2766,15 @@ (i32.eq (i32.and (tee_local $6 - (i32.shr_s - (i32.shl - (tee_local $1 - (i32.load8_s - (tee_local $10 - (i32.add - (get_local $10) - (i32.const 1) - ) - ) + (tee_local $1 + (i32.load8_s + (tee_local $10 + (i32.add + (get_local $10) + (i32.const 1) ) ) - (i32.const 24) ) - (i32.const 24) ) ) (i32.const -32) diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise index 8a5ce0b89..ba1a50ca7 100644 --- a/test/emcc_hello_world.fromasm.imprecise +++ b/test/emcc_hello_world.fromasm.imprecise @@ -2575,21 +2575,15 @@ (i32.lt_u (tee_local $8 (i32.add - (i32.shr_s - (i32.shl - (tee_local $11 - (i32.load8_s - (tee_local $10 - (i32.add - (get_local $6) - (i32.const 1) - ) - ) + (tee_local $11 + (i32.load8_s + (tee_local $10 + (i32.add + (get_local $6) + (i32.const 1) ) ) - (i32.const 24) ) - (i32.const 24) ) (i32.const -48) ) @@ -2715,21 +2709,15 @@ (i32.eq (i32.and (tee_local $6 - (i32.shr_s - (i32.shl - (tee_local $1 - (i32.load8_s - (tee_local $10 - (i32.add - (get_local $10) - (i32.const 1) - ) - ) + (tee_local $1 + (i32.load8_s + (tee_local $10 + (i32.add + (get_local $10) + (i32.const 1) ) ) - (i32.const 24) ) - (i32.const 24) ) ) (i32.const -32) diff --git a/test/passes/optimize-instructions.txt b/test/passes/optimize-instructions.txt index 01f1070f7..e4ab82365 100644 --- a/test/passes/optimize-instructions.txt +++ b/test/passes/optimize-instructions.txt @@ -1237,5 +1237,42 @@ (i32.const 256) ) ) + (drop + (tee_local $1 + (i32.load8_s + (i32.const 1) + ) + ) + ) + (drop + (i32.shr_s + (i32.shl + (tee_local $1 + (i32.load8_u + (i32.const 1) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.and + (tee_local $1 + (i32.load8_s + (i32.const 1) + ) + ) + (i32.const 255) + ) + ) + (drop + (tee_local $1 + (i32.load8_u + (i32.const 1) + ) + ) + ) ) ) diff --git a/test/passes/optimize-instructions.wast b/test/passes/optimize-instructions.wast index 9cdec0a6c..7cfcd7a0d 100644 --- a/test/passes/optimize-instructions.wast +++ b/test/passes/optimize-instructions.wast @@ -1562,5 +1562,52 @@ (i32.const 16) ) ) + ;; through tees, we cannot alter the load sign + (drop + (i32.shr_s + (i32.shl + (tee_local $1 + (i32.load8_s + (i32.const 1) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.shr_s + (i32.shl + (tee_local $1 + (i32.load8_u + (i32.const 1) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (drop + (i32.and + (tee_local $1 + (i32.load8_s + (i32.const 1) + ) + ) + (i32.const 255) + ) + ) + (drop + (i32.and + (tee_local $1 + (i32.load8_u + (i32.const 1) + ) + ) + (i32.const 255) + ) + ) ) ) |