diff options
-rw-r--r-- | src/passes/CodeFolding.cpp | 25 | ||||
-rw-r--r-- | src/passes/pass.cpp | 2 | ||||
-rw-r--r-- | test/emcc_O2_hello_world.fromasm | 26 | ||||
-rw-r--r-- | test/emcc_O2_hello_world.fromasm.clamp | 26 | ||||
-rw-r--r-- | test/emcc_O2_hello_world.fromasm.imprecise | 26 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm | 15 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm.clamp | 15 | ||||
-rw-r--r-- | test/emcc_hello_world.fromasm.imprecise | 15 | ||||
-rw-r--r-- | test/memorygrowth.fromasm | 26 | ||||
-rw-r--r-- | test/memorygrowth.fromasm.clamp | 26 | ||||
-rw-r--r-- | test/memorygrowth.fromasm.imprecise | 26 | ||||
-rw-r--r-- | test/passes/remove-unused-names_code-folding.txt | 31 | ||||
-rw-r--r-- | test/passes/remove-unused-names_code-folding.wast | 18 |
13 files changed, 174 insertions, 103 deletions
diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index c2ad07595..b639c7681 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -15,7 +15,8 @@ */ // -// Folds duplicate code together, saving space. +// Folds duplicate code together, saving space (and possibly phis in +// the wasm VM, which can save time). // // We fold tails of code where they merge and moving the code // to the merge point is helpful. There are two cases here: (1) expressions, @@ -198,8 +199,6 @@ struct CodeFolding : public WalkerPass<ControlFlowWalker<CodeFolding>> { void visitIf(If* curr) { if (!curr->ifFalse) return; // if both sides are identical, this is easy to fold - // (except if the condition is unreachable and we return a value, then we can't just replace - // outselves with a drop if (ExpressionAnalyzer::equal(curr->ifTrue, curr->ifFalse)) { Builder builder(*getModule()); // remove if (4 bytes), remove one arm, add drop (1), add block (3), @@ -216,6 +215,26 @@ struct CodeFolding : public WalkerPass<ControlFlowWalker<CodeFolding>> { // if both are blocks, look for a tail we can merge auto* left = curr->ifTrue->dynCast<Block>(); auto* right = curr->ifFalse->dynCast<Block>(); + // If one is a block and the other isn't, and the non-block is a tail + // of the other, we can fold that - for our convenience, we just add + // a block and run the rest of the optimization mormally. + auto maybeAddBlock = [this](Block* block, Expression*& other) -> Block* { + // if other is a suffix of the block, wrap it in a block + if (block->list.empty() || + !ExpressionAnalyzer::equal(other, block->list.back())) { + return nullptr; + } + // do it, assign to the out param `other`, and return the block + Builder builder(*getModule()); + auto* ret = builder.makeBlock(other); + other = ret; + return ret; + }; + if (left && !right) { + right = maybeAddBlock(left, curr->ifFalse); + } else if (!left && right) { + left = maybeAddBlock(right, curr->ifTrue); + } // we need nameless blocks, as if there is a name, someone might branch // to the end, skipping the code we want to merge if (left && right && diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 38a21fae8..0ec5864e0 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -154,7 +154,7 @@ void PassRunner::addDefaultFunctionOptimizationPasses() { add("simplify-locals"); add("vacuum"); // previous pass creates garbage add("reorder-locals"); - if (options.shrinkLevel >= 1) { + if (options.optimizeLevel >= 3 || options.shrinkLevel >= 1) { add("code-folding"); } add("merge-blocks"); // makes remove-unused-brs more effective diff --git a/test/emcc_O2_hello_world.fromasm b/test/emcc_O2_hello_world.fromasm index b046c9fe9..25b5a1be8 100644 --- a/test/emcc_O2_hello_world.fromasm +++ b/test/emcc_O2_hello_world.fromasm @@ -3097,15 +3097,16 @@ (i32.eq (tee_local $7 (block $label$break$L257 (result i32) - (if (result i32) - (i32.and - (i32.load - (i32.const 620) + (if + (i32.eqz + (i32.and + (i32.load + (i32.const 620) + ) + (i32.const 4) ) - (i32.const 4) ) - (i32.const 190) - (block (result i32) + (block (block $label$break$L259 (if (tee_local $10 @@ -3478,9 +3479,9 @@ (i32.const 4) ) ) - (i32.const 190) ) ) + (i32.const 190) ) ) (i32.const 190) @@ -7894,13 +7895,13 @@ ) ) ) - (set_local $5 - (if (result i32) + (block + (if (i32.eq (get_local $3) (i32.const 2) ) - (block (result i32) + (block (i32.store (get_local $8) (i32.add @@ -7913,8 +7914,9 @@ (set_local $3 (i32.const 2) ) - (get_local $14) ) + ) + (set_local $5 (get_local $14) ) ) diff --git a/test/emcc_O2_hello_world.fromasm.clamp b/test/emcc_O2_hello_world.fromasm.clamp index b046c9fe9..25b5a1be8 100644 --- a/test/emcc_O2_hello_world.fromasm.clamp +++ b/test/emcc_O2_hello_world.fromasm.clamp @@ -3097,15 +3097,16 @@ (i32.eq (tee_local $7 (block $label$break$L257 (result i32) - (if (result i32) - (i32.and - (i32.load - (i32.const 620) + (if + (i32.eqz + (i32.and + (i32.load + (i32.const 620) + ) + (i32.const 4) ) - (i32.const 4) ) - (i32.const 190) - (block (result i32) + (block (block $label$break$L259 (if (tee_local $10 @@ -3478,9 +3479,9 @@ (i32.const 4) ) ) - (i32.const 190) ) ) + (i32.const 190) ) ) (i32.const 190) @@ -7894,13 +7895,13 @@ ) ) ) - (set_local $5 - (if (result i32) + (block + (if (i32.eq (get_local $3) (i32.const 2) ) - (block (result i32) + (block (i32.store (get_local $8) (i32.add @@ -7913,8 +7914,9 @@ (set_local $3 (i32.const 2) ) - (get_local $14) ) + ) + (set_local $5 (get_local $14) ) ) diff --git a/test/emcc_O2_hello_world.fromasm.imprecise b/test/emcc_O2_hello_world.fromasm.imprecise index ff0fa1fe3..3db79a820 100644 --- a/test/emcc_O2_hello_world.fromasm.imprecise +++ b/test/emcc_O2_hello_world.fromasm.imprecise @@ -3096,15 +3096,16 @@ (i32.eq (tee_local $7 (block $label$break$L257 (result i32) - (if (result i32) - (i32.and - (i32.load - (i32.const 620) + (if + (i32.eqz + (i32.and + (i32.load + (i32.const 620) + ) + (i32.const 4) ) - (i32.const 4) ) - (i32.const 190) - (block (result i32) + (block (block $label$break$L259 (if (tee_local $10 @@ -3477,9 +3478,9 @@ (i32.const 4) ) ) - (i32.const 190) ) ) + (i32.const 190) ) ) (i32.const 190) @@ -7893,13 +7894,13 @@ ) ) ) - (set_local $5 - (if (result i32) + (block + (if (i32.eq (get_local $3) (i32.const 2) ) - (block (result i32) + (block (i32.store (get_local $8) (i32.add @@ -7912,8 +7913,9 @@ (set_local $3 (i32.const 2) ) - (get_local $14) ) + ) + (set_local $5 (get_local $14) ) ) diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm index 16af1a160..0af69c9fe 100644 --- a/test/emcc_hello_world.fromasm +++ b/test/emcc_hello_world.fromasm @@ -579,17 +579,14 @@ (get_local $0) ) ) - (set_local $0 - (if (result i32) - (tee_local $2 - (i32.eqz - (i32.const 0) - ) - ) - (get_local $1) - (get_local $1) + (set_local $2 + (i32.eqz + (i32.const 0) ) ) + (set_local $0 + (get_local $1) + ) ) (block (set_local $0 diff --git a/test/emcc_hello_world.fromasm.clamp b/test/emcc_hello_world.fromasm.clamp index 3a6147692..a72cae0f6 100644 --- a/test/emcc_hello_world.fromasm.clamp +++ b/test/emcc_hello_world.fromasm.clamp @@ -577,17 +577,14 @@ (get_local $0) ) ) - (set_local $0 - (if (result i32) - (tee_local $2 - (i32.eqz - (i32.const 0) - ) - ) - (get_local $1) - (get_local $1) + (set_local $2 + (i32.eqz + (i32.const 0) ) ) + (set_local $0 + (get_local $1) + ) ) (block (set_local $0 diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise index dae544f1b..70f3b96a3 100644 --- a/test/emcc_hello_world.fromasm.imprecise +++ b/test/emcc_hello_world.fromasm.imprecise @@ -576,17 +576,14 @@ (get_local $0) ) ) - (set_local $0 - (if (result i32) - (tee_local $2 - (i32.eqz - (i32.const 0) - ) - ) - (get_local $1) - (get_local $1) + (set_local $2 + (i32.eqz + (i32.const 0) ) ) + (set_local $0 + (get_local $1) + ) ) (block (set_local $0 diff --git a/test/memorygrowth.fromasm b/test/memorygrowth.fromasm index 031e1b8ce..7d5380cd0 100644 --- a/test/memorygrowth.fromasm +++ b/test/memorygrowth.fromasm @@ -3120,15 +3120,16 @@ (i32.eq (tee_local $7 (block $label$break$b (result i32) - (if (result i32) - (i32.and - (i32.load - (i32.const 1652) + (if + (i32.eqz + (i32.and + (i32.load + (i32.const 1652) + ) + (i32.const 4) ) - (i32.const 4) ) - (i32.const 188) - (block (result i32) + (block (block $label$break$c (if (tee_local $18 @@ -3500,9 +3501,9 @@ (i32.const 4) ) ) - (i32.const 188) ) ) + (i32.const 188) ) ) (i32.const 188) @@ -7982,13 +7983,13 @@ ) ) ) - (set_local $5 - (if (result i32) + (block + (if (i32.eq (get_local $3) (i32.const 2) ) - (block (result i32) + (block (i32.store (get_local $8) (i32.add @@ -8001,8 +8002,9 @@ (set_local $3 (i32.const 2) ) - (get_local $14) ) + ) + (set_local $5 (get_local $14) ) ) diff --git a/test/memorygrowth.fromasm.clamp b/test/memorygrowth.fromasm.clamp index 031e1b8ce..7d5380cd0 100644 --- a/test/memorygrowth.fromasm.clamp +++ b/test/memorygrowth.fromasm.clamp @@ -3120,15 +3120,16 @@ (i32.eq (tee_local $7 (block $label$break$b (result i32) - (if (result i32) - (i32.and - (i32.load - (i32.const 1652) + (if + (i32.eqz + (i32.and + (i32.load + (i32.const 1652) + ) + (i32.const 4) ) - (i32.const 4) ) - (i32.const 188) - (block (result i32) + (block (block $label$break$c (if (tee_local $18 @@ -3500,9 +3501,9 @@ (i32.const 4) ) ) - (i32.const 188) ) ) + (i32.const 188) ) ) (i32.const 188) @@ -7982,13 +7983,13 @@ ) ) ) - (set_local $5 - (if (result i32) + (block + (if (i32.eq (get_local $3) (i32.const 2) ) - (block (result i32) + (block (i32.store (get_local $8) (i32.add @@ -8001,8 +8002,9 @@ (set_local $3 (i32.const 2) ) - (get_local $14) ) + ) + (set_local $5 (get_local $14) ) ) diff --git a/test/memorygrowth.fromasm.imprecise b/test/memorygrowth.fromasm.imprecise index 062d52a2b..97c650686 100644 --- a/test/memorygrowth.fromasm.imprecise +++ b/test/memorygrowth.fromasm.imprecise @@ -3118,15 +3118,16 @@ (i32.eq (tee_local $7 (block $label$break$b (result i32) - (if (result i32) - (i32.and - (i32.load - (i32.const 1652) + (if + (i32.eqz + (i32.and + (i32.load + (i32.const 1652) + ) + (i32.const 4) ) - (i32.const 4) ) - (i32.const 188) - (block (result i32) + (block (block $label$break$c (if (tee_local $18 @@ -3498,9 +3499,9 @@ (i32.const 4) ) ) - (i32.const 188) ) ) + (i32.const 188) ) ) (i32.const 188) @@ -7980,13 +7981,13 @@ ) ) ) - (set_local $5 - (if (result i32) + (block + (if (i32.eq (get_local $3) (i32.const 2) ) - (block (result i32) + (block (i32.store (get_local $8) (i32.add @@ -7999,8 +8000,9 @@ (set_local $3 (i32.const 2) ) - (get_local $14) ) + ) + (set_local $5 (get_local $14) ) ) diff --git a/test/passes/remove-unused-names_code-folding.txt b/test/passes/remove-unused-names_code-folding.txt index 5f5b30e7e..a37cbefbd 100644 --- a/test/passes/remove-unused-names_code-folding.txt +++ b/test/passes/remove-unused-names_code-folding.txt @@ -2,6 +2,7 @@ (type $0 (func)) (type $1 (func (param i32 i32) (result i32))) (type $2 (func (result i32))) + (type $3 (func (param i32) (result i32))) (func $ifs (; 0 ;) (type $0) (if (i32.const 0) @@ -1677,4 +1678,34 @@ (i32.const 3) ) ) + (func $if-suffix (; 35 ;) (type $3) (param $x i32) (result i32) + (block + (if + (get_local $x) + (block + ) + (drop + (call $if-suffix + (i32.const -1) + ) + ) + ) + (set_local $x + (i32.const 1) + ) + ) + (block (result i32) + (if + (get_local $x) + (block + ) + (drop + (call $if-suffix + (i32.const -2) + ) + ) + ) + (i32.const 2) + ) + ) ) diff --git a/test/passes/remove-unused-names_code-folding.wast b/test/passes/remove-unused-names_code-folding.wast index 35d95ba90..1472c7871 100644 --- a/test/passes/remove-unused-names_code-folding.wast +++ b/test/passes/remove-unused-names_code-folding.wast @@ -1173,4 +1173,22 @@ ) (drop (i32.const 3)) ) + (func $if-suffix (param $x i32) (result i32) + (if + (get_local $x) + (set_local $x (i32.const 1)) + (block + (drop (call $if-suffix (i32.const -1))) + (set_local $x (i32.const 1)) + ) + ) + (if (result i32) + (get_local $x) + (i32.const 2) + (block (result i32) + (drop (call $if-suffix (i32.const -2))) + (i32.const 2) + ) + ) + ) ) |