diff options
-rw-r--r-- | src/passes/CodeFolding.cpp | 234 | ||||
-rw-r--r-- | test/lit/passes/code-folding-eh-legacy.wast | 3 | ||||
-rw-r--r-- | test/lit/passes/code-folding.wast | 506 | ||||
-rw-r--r-- | test/passes/O3_low-memory-unused_metrics.txt | 438 | ||||
-rw-r--r-- | test/passes/remove-unused-names_code-folding.txt | 114 |
5 files changed, 831 insertions, 464 deletions
diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index 0cddec4ca..42331b747 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -105,19 +105,11 @@ struct CodeFolding Tail(Block* block) : expr(nullptr), block(block), pointer(nullptr) {} // For a break Tail(Expression* expr, Block* block) - : expr(expr), block(block), pointer(nullptr) { - validate(); - } + : expr(expr), block(block), pointer(nullptr) {} Tail(Expression* expr, Expression** pointer) : expr(expr), block(nullptr), pointer(pointer) {} bool isFallthrough() const { return expr == nullptr; } - - void validate() const { - if (expr && block) { - assert(block->list.back() == expr); - } - } }; // state @@ -152,15 +144,13 @@ struct CodeFolding } void visitBreak(Break* curr) { - if (curr->condition || curr->value) { + if (curr->condition) { unoptimizables.insert(curr->name); } else { - // we can only optimize if we are at the end of the parent block, - // and if the parent block does not return a value (we can't move - // elements out of it if there is a value being returned) + // we can only optimize if we are at the end of the parent block. + // TODO: Relax this. Block* parent = controlFlowStack.back()->dynCast<Block>(); - if (parent && curr == parent->list.back() && - !parent->list.back()->type.isConcrete()) { + if (parent && curr == parent->list.back()) { breakTails[curr->name].push_back(Tail(curr, parent)); } else { unoptimizables.insert(curr->name); @@ -222,24 +212,19 @@ struct CodeFolding if (unoptimizables.count(curr->name) > 0) { return; } - // we can't optimize a fallthrough value - if (curr->list.back()->type.isConcrete()) { - return; - } auto iter = breakTails.find(curr->name); if (iter == breakTails.end()) { return; } - // looks promising + // Looks promising. auto& tails = iter->second; - // see if there is a fallthrough - bool hasFallthrough = true; - for (auto* child : curr->list) { - if (child->type == Type::unreachable) { - hasFallthrough = false; - } - } - if (hasFallthrough) { + // If the end of the block cannot be reached, then we don't need to include + // it in the set of folded tails. + bool includeFallthrough = + !std::any_of(curr->list.begin(), curr->list.end(), [&](auto* child) { + return child->type == Type::unreachable; + }); + if (includeFallthrough) { tails.push_back({Tail(curr)}); } optimizeExpressionTails(tails, curr); @@ -249,48 +234,34 @@ struct CodeFolding if (!curr->ifFalse) { return; } - // if both sides are identical, this is easy to fold - if (ExpressionAnalyzer::equal(curr->ifTrue, curr->ifFalse)) { + // If both are blocks, look for a tail we can merge. + auto* left = curr->ifTrue->dynCast<Block>(); + auto* right = curr->ifFalse->dynCast<Block>(); + // If one is a block and the other isn't, and the non-block is a tail of the + // other, we can fold that - for our convenience, we just add a block and + // run the rest of the optimization mormally. + auto maybeAddBlock = [this](Block* block, Expression*& other) -> Block* { + // If other is a suffix of the block, wrap it in a block. + if (block->list.empty() || + !ExpressionAnalyzer::equal(other, block->list.back())) { + return nullptr; + } + // Do it, assign to the out param `other`, and return the block. Builder builder(*getModule()); - // remove if (4 bytes), remove one arm, add drop (1), add block (3), - // so this must be a net savings - markAsModified(curr); - auto* ret = - builder.makeSequence(builder.makeDrop(curr->condition), curr->ifTrue); - // we must ensure we present the same type as the if had - ret->finalize(curr->type); - replaceCurrent(ret); - needEHFixups = true; - } else { - // if both are blocks, look for a tail we can merge - auto* left = curr->ifTrue->dynCast<Block>(); - auto* right = curr->ifFalse->dynCast<Block>(); - // If one is a block and the other isn't, and the non-block is a tail - // of the other, we can fold that - for our convenience, we just add - // a block and run the rest of the optimization mormally. - auto maybeAddBlock = [this](Block* block, Expression*& other) -> Block* { - // if other is a suffix of the block, wrap it in a block - if (block->list.empty() || - !ExpressionAnalyzer::equal(other, block->list.back())) { - return nullptr; - } - // do it, assign to the out param `other`, and return the block - Builder builder(*getModule()); - auto* ret = builder.makeBlock(other); - other = ret; - return ret; - }; - if (left && !right) { - right = maybeAddBlock(left, curr->ifFalse); - } else if (!left && right) { - left = maybeAddBlock(right, curr->ifTrue); - } - // we need nameless blocks, as if there is a name, someone might branch - // to the end, skipping the code we want to merge - if (left && right && !left->name.is() && !right->name.is()) { - std::vector<Tail> tails = {Tail(left), Tail(right)}; - optimizeExpressionTails(tails, curr); - } + auto* ret = builder.makeBlock(other); + other = ret; + return ret; + }; + if (left && !right) { + right = maybeAddBlock(left, curr->ifFalse); + } else if (!left && right) { + left = maybeAddBlock(right, curr->ifTrue); + } + // We need nameless blocks, as if there is a name, someone might branch to + // the end, skipping the code we want to merge. + if (left && right && !left->name.is() && !right->name.is()) { + std::vector<Tail> tails = {Tail(left), Tail(right)}; + optimizeExpressionTails(tails, curr); } } @@ -315,10 +286,6 @@ struct CodeFolding if (needEHFixups) { EHUtils::handleBlockNestedPops(func, *getModule()); } - // if we did any work, types may need to be propagated - if (anotherPass) { - ReFinalize().walkFunctionInModule(func, getModule()); - } } } @@ -372,6 +339,7 @@ private: // identical in all paths leading to the block exit can be merged. template<typename T> void optimizeExpressionTails(std::vector<Tail>& tails, T* curr) { + auto oldType = curr->type; if (tails.size() < 2) { return; } @@ -384,50 +352,49 @@ private: return; } // if we were not modified, then we should be valid for processing - tail.validate(); + assert(!tail.expr || !tail.block || + (tail.expr == tail.block->list.back())); } - // we can ignore the final br in a tail - auto effectiveSize = [&](const Tail& tail) { - auto ret = tail.block->list.size(); + auto getMergeable = [&](const Tail& tail, Index num) -> Expression* { if (!tail.isFallthrough()) { - ret--; + // If there is a branch value, it is the first mergeable item. + auto* val = tail.expr->cast<Break>()->value; + if (val && num == 0) { + return val; + } + if (!val) { + // Skip the branch instruction at the end; it is not part of the + // merged tail. + ++num; + } } - return ret; - }; - // the mergeable items do not include the final br in a tail - auto getMergeable = [&](const Tail& tail, Index num) { - return tail.block->list[effectiveSize(tail) - num - 1]; + if (num >= tail.block->list.size()) { + return nullptr; + } + return tail.block->list[tail.block->list.size() - num - 1]; }; // we are going to remove duplicate elements and add a block. // so for this to make sense, we need the size of the duplicate // elements to be worth that extra block (although, there is // some chance the block would get merged higher up, see later) std::vector<Expression*> mergeable; // the elements we can merge - Index num = 0; // how many elements back from the tail to look at Index saved = 0; // how much we can save - while (1) { - // check if this num is still relevant - bool stop = false; - for (auto& tail : tails) { - assert(tail.block); - if (num >= effectiveSize(tail)) { - // one of the lists is too short - stop = true; - break; - } - } - if (stop) { + for (Index num = 0; true; ++num) { + auto* item = getMergeable(tails[0], num); + if (!item) { + // The list is too short. break; } - auto* item = getMergeable(tails[0], num); - for (auto& tail : tails) { - if (!ExpressionAnalyzer::equal(item, getMergeable(tail, num))) { - // one of the lists has a different item - stop = true; + Index tail = 1; + for (; tail < tails.size(); ++tail) { + auto* other = getMergeable(tails[tail], num); + if (!other || !ExpressionAnalyzer::equal(item, other)) { + // Other tail too short or has a difference. break; } } - if (stop) { + if (tail != tails.size()) { + // We saw a tail without a matching item. break; } // we may have found another one we can merge - can we move it? @@ -436,7 +403,6 @@ private: } // we found another one we can merge mergeable.push_back(item); - num++; saved += Measurer::measure(item); } if (saved == 0) { @@ -450,7 +416,7 @@ private: for (auto& tail : tails) { // it is enough to zero out the block, or leave just one // element, as then the block can be replaced with that - if (num >= tail.block->list.size() - 1) { + if (mergeable.size() >= tail.block->list.size() - 1) { willEmptyBlock = true; break; } @@ -483,6 +449,7 @@ private: } } } + // this is worth doing, do it! for (auto& tail : tails) { // remove the items we are merging / moving @@ -490,37 +457,61 @@ private: // again in this pass, which might be buggy markAsModified(tail.block); // we must preserve the br if there is one - Expression* last = nullptr; + Break* branch = nullptr; if (!tail.isFallthrough()) { - last = tail.block->list.back(); - tail.block->list.pop_back(); + branch = tail.block->list.back()->cast<Break>(); + if (branch->value) { + branch->value = nullptr; + } else { + tail.block->list.pop_back(); + } } - for (Index i = 0; i < mergeable.size(); i++) { + for (Index i = 0; i < mergeable.size(); ++i) { tail.block->list.pop_back(); } - if (!tail.isFallthrough()) { - tail.block->list.push_back(last); + if (tail.isFallthrough()) { + // The block now ends in an expression that was previously in the middle + // of the block, meaning it must have type none. + tail.block->finalize(Type::none); + } else { + tail.block->list.push_back(branch); + // The block still ends with the same branch it previously ended with, + // so its type cannot have changed. + tail.block->finalize(tail.block->type); } - // the block type may change if we removed unreachable stuff, - // but in general it should remain the same, as if it had a - // forced type it should remain, *and*, we don't have a - // fallthrough value (we would never get here), so a concrete - // type was not from that. I.e., any type on the block is - // either forced and/or from breaks with a value, so the - // type cannot be changed by moving code out. - tail.block->finalize(tail.block->type); } // since we managed a merge, then it might open up more opportunities later anotherPass = true; // make a block with curr + the merged code Builder builder(*getModule()); auto* block = builder.makeBlock(); - block->list.push_back(curr); + if constexpr (T::SpecificId == Expression::IfId) { + // If we've moved all the contents out of both arms of the If, then we can + // simplify the output by replacing it entirely with just a drop of the + // condition. + auto* iff = curr->template cast<If>(); + if (iff->ifTrue->template cast<Block>()->list.empty() && + iff->ifFalse->template cast<Block>()->list.empty()) { + block->list.push_back(builder.makeDrop(iff->condition)); + } else { + block->list.push_back(curr); + } + } else { + block->list.push_back(curr); + } while (!mergeable.empty()) { block->list.push_back(mergeable.back()); mergeable.pop_back(); } - auto oldType = curr->type; + if constexpr (T::SpecificId == Expression::BlockId) { + // If we didn't have a fallthrough tail because the end of the block was + // not reachable, then we might have a concrete expression at the end of + // the block even though the value produced by the block has been moved + // out of it. If so, drop that expression. + auto* currBlock = curr->template cast<Block>(); + currBlock->list.back() = + builder.dropIfConcretelyTyped(currBlock->list.back()); + } // NB: we template-specialize so that this calls the proper finalizer for // the type curr->finalize(); @@ -553,9 +544,6 @@ private: if (tail.block && modifieds.count(tail.block) > 0) { return true; } - // if we were not modified, then we should be valid for - // processing - tail.validate(); return false; }), tails.end()); diff --git a/test/lit/passes/code-folding-eh-legacy.wast b/test/lit/passes/code-folding-eh-legacy.wast index 81180d044..cde0fba28 100644 --- a/test/lit/passes/code-folding-eh-legacy.wast +++ b/test/lit/passes/code-folding-eh-legacy.wast @@ -355,6 +355,7 @@ ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.eqz ;; CHECK-NEXT: (i32.const 1) @@ -377,6 +378,7 @@ (if (pop i32) (then + (nop) (drop (i32.eqz (i32.const 1) @@ -384,6 +386,7 @@ ) ) (else + (nop) (drop (i32.eqz (i32.const 1) diff --git a/test/lit/passes/code-folding.wast b/test/lit/passes/code-folding.wast index 358167481..007aa5909 100644 --- a/test/lit/passes/code-folding.wast +++ b/test/lit/passes/code-folding.wast @@ -1,18 +1,32 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. ;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. -;; RUN: foreach %s %t wasm-opt -all --code-folding -S -o - | filecheck %s +;; RUN: wasm-opt %s -all --code-folding -S -o - | filecheck %s (module ;; CHECK: (type $0 (func)) ;; CHECK: (type $1 (func (result f32))) + ;; CHECK: (type $2 (func (result i32))) + + ;; CHECK: (type $3 (func (result anyref))) + ;; CHECK: (type $13 (func (param f32))) (type $13 (func (param f32))) (table 282 282 funcref) + ;; CHECK: (type $5 (func (param i32))) + + ;; CHECK: (global $global$0 (mut i32) (i32.const 10)) + ;; CHECK: (memory $0 1 1) (memory $0 1 1) + + ;; CHECK: (memory $shared 1 1 shared) + (memory $shared 1 1 shared) + + (global $global$0 (mut i32) (i32.const 10)) + ;; CHECK: (table $0 282 282 funcref) ;; CHECK: (func $0 (type $0) @@ -22,7 +36,7 @@ ;; CHECK-NEXT: (then ;; CHECK-NEXT: (block $label$3 ;; CHECK-NEXT: (call_indirect $0 (type $13) - ;; CHECK-NEXT: (block $label$4 + ;; CHECK-NEXT: (block $label$4 (result f32) ;; CHECK-NEXT: (br $label$3) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 105) @@ -52,18 +66,15 @@ ) ) ) + ;; CHECK: (func $negative-zero (type $1) (result f32) ;; CHECK-NEXT: (if (result f32) ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (block $label$0 (result f32) - ;; CHECK-NEXT: (f32.const 0) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f32.const 0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (else - ;; CHECK-NEXT: (block $label$1 (result f32) - ;; CHECK-NEXT: (f32.const -0) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f32.const -0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -71,63 +82,173 @@ (if (result f32) (i32.const 0) (then - (block $label$0 (result f32) + (block (result f32) (f32.const 0) ) ) (else - (block $label$1 (result f32) + (block (result f32) (f32.const -0) ) ) ) ) + ;; CHECK: (func $negative-zero-b (type $1) (result f32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (block $label$0 (result f32) - ;; CHECK-NEXT: (f32.const -0) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f32.const -0) ;; CHECK-NEXT: ) (func $negative-zero-b (result f32) (if (result f32) (i32.const 0) (then - (block $label$0 (result f32) + (block (result f32) (f32.const -0) ) ) (else - (block $label$1 (result f32) + (block (result f32) (f32.const -0) ) ) ) ) - ;; CHECK: (func $negative-zero-c (type $1) (result f32) - ;; CHECK-NEXT: (drop + + ;; CHECK: (func $positive-zero (type $1) (result f32) + ;; CHECK-NEXT: (if (result f32) ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (block $label$0 (result f32) - ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + (func $positive-zero (result f32) + ;; This doesn't get optimized because we only look at Ifs with block arms. + ;; This simpler case will be optimized by OptimizeInstructions. + (if (result f32) + (i32.const 0) + (then + (f32.const 0) + ) + (else + (f32.const 0) + ) + ) + ) + + ;; CHECK: (func $positive-zero-names (type $1) (result f32) + ;; CHECK-NEXT: (if (result f32) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (block $l1 (result f32) + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (block $l2 (result f32) + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $negative-zero-c (result f32) + (func $positive-zero-names (result f32) + ;; This one has block arms, but doesn't get optimized because the blocks have + ;; names. (if (result f32) (i32.const 0) (then - (block $label$0 (result f32) + (block $l1 (result f32) (f32.const 0) ) ) (else - (block $label$1 (result f32) + (block $l2 (result f32) (f32.const 0) ) ) ) ) + + ;; CHECK: (func $positive-zero-extra-a (type $1) (result f32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + (func $positive-zero-extra-a (result f32) + (if (result f32) + (i32.const 0) + (then + (nop) + (f32.const 0) + ) + (else + (f32.const 0) + ) + ) + ) + + ;; CHECK: (func $positive-zero-extra-b (type $1) (result f32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + (func $positive-zero-extra-b (result f32) + (if (result f32) + (i32.const 0) + (then + (f32.const 0) + ) + (else + (nop) + (f32.const 0) + ) + ) + ) + + ;; CHECK: (func $positive-zero-extra-c (type $1) (result f32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + (func $positive-zero-extra-c (result f32) + (if (result f32) + (i32.const 0) + (then + (nop) + (nop) + (f32.const 0) + ) + (else + (nop) + (f32.const 0) + ) + ) + ) + ;; CHECK: (func $break-target-outside-of-return-merged-code (type $0) ;; CHECK-NEXT: (block $label$A ;; CHECK-NEXT: (if @@ -202,6 +323,7 @@ ) ) ) + ;; CHECK: (func $break-target-inside-all-good (type $0) ;; CHECK-NEXT: (block $folding-inner0 ;; CHECK-NEXT: (block $label$A @@ -269,11 +391,12 @@ ) ) ) + ;; CHECK: (func $leave-inner-block-type (type $0) ;; CHECK-NEXT: (block $label$1 ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block $label$2 - ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (block $label$2 (result i32) + ;; CHECK-NEXT: (br_if $label$2 ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) @@ -304,54 +427,40 @@ ) ) ) -) -(module - ;; CHECK: (type $0 (func (result i32))) - ;; CHECK: (memory $0 1 1 shared) - (memory $0 1 1 shared) - ;; CHECK: (export "func_2224" (func $0)) - (export "func_2224" (func $0)) - ;; CHECK: (func $0 (type $0) (result i32) + ;; CHECK: (func $atomic-load-different (type $2) (result i32) ;; CHECK-NEXT: (local $var$0 i32) ;; CHECK-NEXT: (if (result i32) ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (i32.load offset=22 + ;; CHECK-NEXT: (i32.load $shared offset=22 ;; CHECK-NEXT: (local.get $var$0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (else - ;; CHECK-NEXT: (i32.atomic.load offset=22 + ;; CHECK-NEXT: (i32.atomic.load $shared offset=22 ;; CHECK-NEXT: (local.get $var$0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $0 (result i32) + (func $atomic-load-different (result i32) (local $var$0 i32) (if (result i32) (i32.const 0) (then - (i32.load offset=22 + (i32.load $shared offset=22 (local.get $var$0) ) ) (else - (i32.atomic.load offset=22 + (i32.atomic.load $shared offset=22 (local.get $var$0) ) ) ) ) -) -(module - ;; CHECK: (type $0 (func)) - (type $0 (func)) - ;; CHECK: (type $1 (func (param i32))) - ;; CHECK: (global $global$0 (mut i32) (i32.const 10)) - (global $global$0 (mut i32) (i32.const 10)) ;; CHECK: (func $determinism (type $0) ;; CHECK-NEXT: (block $folding-inner0 ;; CHECK-NEXT: (block @@ -390,7 +499,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) - (func $determinism (; 0 ;) (type $0) + (func $determinism (block $label$1 (br_if $label$1 (i32.const 1) @@ -439,7 +548,8 @@ ) (unreachable) ) - ;; CHECK: (func $careful-of-the-switch (type $1) (param $0 i32) + + ;; CHECK: (func $careful-of-the-switch (type $5) (param $0 i32) ;; CHECK-NEXT: (block $label$1 ;; CHECK-NEXT: (block $label$3 ;; CHECK-NEXT: (block $label$5 @@ -481,10 +591,243 @@ (unreachable) ) ) -) -(module - ;; CHECK: (type $0 (func)) + ;; CHECK: (func $br-with-value (type $2) (result i32) + ;; CHECK-NEXT: (block $l + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (br $l) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (br $l) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + (func $br-with-value (result i32) + (block $l (result i32) + (block + (br $l + (i32.const 1) + ) + ) + (block + (br $l + (i32.const 1) + ) + ) + (i32.const 1) + ) + ) + + ;; CHECK: (func $br-and-fallthrough-with-value (type $2) (result i32) + ;; CHECK-NEXT: (block $l + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (br $l) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (br $l) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + (func $br-and-fallthrough-with-value (result i32) + (block $l (result i32) + (drop + (block (result i32) + (br $l + (i32.const 1) + ) + ) + ) + (drop + (block (result i32) + (br $l + (i32.const 1) + ) + ) + ) + (i32.const 1) + ) + ) + + ;; CHECK: (func $br-with-value-and-more (type $2) (result i32) + ;; CHECK-NEXT: (block $l + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (br $l) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (br $l) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + (func $br-with-value-and-more (result i32) + (block $l (result i32) + (block + (nop) + (nop) + (br $l + (i32.const 1) + ) + ) + (block + (nop) + (nop) + (nop) + (br $l + (i32.const 1) + ) + ) + (nop) + (i32.const 1) + ) + ) + + ;; CHECK: (func $br-and-fallthrough-with-value-and-more (type $2) (result i32) + ;; CHECK-NEXT: (block $l + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (br $l) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (br $l) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + (func $br-and-fallthrough-with-value-and-more (result i32) + (block $l (result i32) + (drop + (block (result i32) + (nop) + (nop) + (br $l + (i32.const 1) + ) + ) + ) + (drop + (block (result i32) + (nop) + (nop) + (nop) + (br $l + (i32.const 1) + ) + ) + ) + (nop) + (i32.const 1) + ) + ) + + ;; CHECK: (func $unreachable-if (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $unreachable-if + (if + (unreachable) + (then + (nop) + (drop + (i32.const 1) + ) + ) + (else + (nop) + (drop + (i32.const 1) + ) + ) + ) + ) + + ;; CHECK: (func $unreachable-if-suffix (type $0) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $unreachable-if-suffix + (if + (unreachable) + (then + (drop + (i32.const 1) + ) + ) + (else + (nop) + (drop + (i32.const 1) + ) + ) + ) + ) + + ;; CHECK: (func $unreachable-if-concrete-arms (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $unreachable-if-concrete-arms + (if (result i32) + (unreachable) + (then + (i32.const 1) + ) + (else + (nop) + (i32.const 1) + ) + ) + (unreachable) + ) ;; CHECK: (func $br-on-null (type $0) ;; CHECK-NEXT: (block $block @@ -520,4 +863,69 @@ (call $br-on-null) ) ) + + ;; CHECK: (func $refined-type (type $3) (result anyref) + ;; CHECK-NEXT: (select (result anyref) + ;; CHECK-NEXT: (if (result anyref) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $refined-type (result anyref) + (select (result anyref) + ;; If we fold the identical arms, the select will have a stale type. + (if (result anyref) + (i32.const 0) + (then + (ref.null none) + ) + (else + (ref.null none) + ) + ) + (ref.null none) + (i32.const 0) + ) + ) + + ;; CHECK: (func $refined-type-blocks (type $3) (result anyref) + ;; CHECK-NEXT: (select (result anyref) + ;; CHECK-NEXT: (block (result anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $refined-type-blocks (result anyref) + (select (result anyref) + ;; Same, but now the arms are blocks, so they have the stale types (which is + ;; allowed) and the select is ok. + (if (result anyref) + (i32.const 0) + (then + (nop) + (ref.null none) + ) + (else + (nop) + (ref.null none) + ) + ) + (ref.null none) + (i32.const 0) + ) + ) ) diff --git a/test/passes/O3_low-memory-unused_metrics.txt b/test/passes/O3_low-memory-unused_metrics.txt index 8806e9a04..3117d7778 100644 --- a/test/passes/O3_low-memory-unused_metrics.txt +++ b/test/passes/O3_low-memory-unused_metrics.txt @@ -9,23 +9,23 @@ total [table-data] : 0 [tables] : 0 [tags] : 0 - [total] : 1964 + [total] : 1953 [vars] : 9 - Binary : 240 + Binary : 238 Block : 68 Break : 90 Call : 22 CallIndirect : 1 - Const : 175 + Const : 174 Drop : 8 If : 27 - Load : 313 - LocalGet : 633 - LocalSet : 181 + Load : 311 + LocalGet : 629 + LocalSet : 180 Loop : 3 Return : 3 Select : 11 - Store : 160 + Store : 159 Unary : 29 (module (type $0 (func (param i32 i32 i32) (result i32))) @@ -2718,307 +2718,289 @@ total (local.get $0) ) ) - (i32.store8 - (block $label$68 (result i32) - (if - (i32.eq - (local.get $4) - (i32.const 2) - ) - (then - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $3 - (i32.load offset=20 - (local.get $2) - ) - ) - (i32.const 1) - ) - ) - (i32.store8 - (i32.add - (local.get $3) - (i32.load offset=8 + (block $label$68 + (if + (i32.eq + (local.get $4) + (i32.const 2) + ) + (then + (i32.store offset=20 + (local.get $2) + (i32.add + (local.tee $3 + (i32.load offset=20 (local.get $2) ) ) - (local.get $1) + (i32.const 1) ) - (local.set $1 - (i32.load offset=48 - (local.get $0) + ) + (i32.store8 + (i32.add + (local.get $3) + (i32.load offset=8 + (local.get $2) ) ) - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $3 - (i32.load offset=20 - (local.get $2) - ) - ) - (i32.const 1) - ) + (local.get $1) + ) + (local.set $1 + (i32.load offset=48 + (local.get $0) ) - (i32.store8 - (i32.add - (local.get $3) - (i32.load offset=8 + ) + (i32.store offset=20 + (local.get $2) + (i32.add + (local.tee $3 + (i32.load offset=20 (local.get $2) ) ) - (i32.shr_u - (local.get $1) - (i32.const 8) - ) - ) - (local.set $1 - (i32.load16_u offset=50 - (local.get $0) - ) + (i32.const 1) ) - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $3 - (i32.load offset=20 - (local.get $2) - ) - ) - (i32.const 1) + ) + (i32.store8 + (i32.add + (local.get $3) + (i32.load offset=8 + (local.get $2) ) ) - (i32.store8 - (i32.add - (local.get $3) - (i32.load offset=8 - (local.get $2) - ) - ) + (i32.shr_u (local.get $1) + (i32.const 8) ) - (local.set $1 - (i32.load8_u offset=51 - (local.get $0) - ) - ) - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $3 - (i32.load offset=20 - (local.get $2) - ) - ) - (i32.const 1) - ) + ) + (local.set $1 + (i32.load16_u offset=50 + (local.get $0) ) - (i32.store8 - (i32.add - (local.get $3) - (i32.load offset=8 + ) + (i32.store offset=20 + (local.get $2) + (i32.add + (local.tee $3 + (i32.load offset=20 (local.get $2) ) ) - (local.get $1) + (i32.const 1) ) - (local.set $1 + ) + (i32.store8 + (i32.add + (local.get $3) (i32.load offset=8 - (local.get $0) + (local.get $2) ) ) - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $3 - (i32.load offset=20 - (local.get $2) - ) - ) - (i32.const 1) - ) + (local.get $1) + ) + (local.set $1 + (i32.load8_u offset=51 + (local.get $0) ) - (i32.store8 - (i32.add - (local.get $3) - (i32.load offset=8 + ) + (i32.store offset=20 + (local.get $2) + (i32.add + (local.tee $3 + (i32.load offset=20 (local.get $2) ) ) - (local.get $1) + (i32.const 1) ) - (local.set $1 + ) + (i32.store8 + (i32.add + (local.get $3) (i32.load offset=8 - (local.get $0) + (local.get $2) ) ) - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $3 - (i32.load offset=20 - (local.get $2) - ) - ) - (i32.const 1) - ) + (local.get $1) + ) + (local.set $1 + (i32.load offset=8 + (local.get $0) ) - (i32.store8 - (i32.add - (local.get $3) - (i32.load offset=8 + ) + (i32.store offset=20 + (local.get $2) + (i32.add + (local.tee $3 + (i32.load offset=20 (local.get $2) ) ) - (i32.shr_u - (local.get $1) - (i32.const 8) - ) + (i32.const 1) ) - (local.set $1 - (i32.load16_u offset=10 - (local.get $0) + ) + (i32.store8 + (i32.add + (local.get $3) + (i32.load offset=8 + (local.get $2) ) ) - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $3 - (i32.load offset=20 - (local.get $2) - ) - ) - (i32.const 1) - ) + (local.get $1) + ) + (local.set $1 + (i32.load offset=8 + (local.get $0) ) - (i32.store8 - (i32.add - (local.get $3) - (i32.load offset=8 + ) + (i32.store offset=20 + (local.get $2) + (i32.add + (local.tee $3 + (i32.load offset=20 (local.get $2) ) ) - (local.get $1) + (i32.const 1) ) - (local.set $3 - (i32.load8_u offset=11 - (local.get $0) + ) + (i32.store8 + (i32.add + (local.get $3) + (i32.load offset=8 + (local.get $2) ) ) - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $1 - (i32.load offset=20 - (local.get $2) - ) - ) - (i32.const 1) - ) + (i32.shr_u + (local.get $1) + (i32.const 8) ) - (br $label$68 - (i32.add - (local.get $1) - (i32.load offset=8 + ) + (local.set $1 + (i32.load16_u offset=10 + (local.get $0) + ) + ) + (i32.store offset=20 + (local.get $2) + (i32.add + (local.tee $3 + (i32.load offset=20 (local.get $2) ) ) + (i32.const 1) ) ) - ) - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $3 - (i32.load offset=20 + (i32.store8 + (i32.add + (local.get $3) + (i32.load offset=8 (local.get $2) ) ) - (i32.const 1) - ) - ) - (i32.store8 - (i32.add - (local.get $3) - (i32.load offset=8 - (local.get $2) - ) - ) - (i32.shr_u (local.get $1) - (i32.const 24) ) - ) - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $3 - (i32.load offset=20 - (local.get $2) - ) + (local.set $3 + (i32.load8_u offset=11 + (local.get $0) ) - (i32.const 1) ) + (br $label$68) ) - (i32.store8 - (i32.add - (local.get $3) - (i32.load offset=8 + ) + (i32.store offset=20 + (local.get $2) + (i32.add + (local.tee $3 + (i32.load offset=20 (local.get $2) ) ) - (i32.shr_u - (local.get $1) - (i32.const 16) - ) + (i32.const 1) ) - (local.set $3 - (i32.load offset=48 - (local.get $0) + ) + (i32.store8 + (i32.add + (local.get $3) + (i32.load offset=8 + (local.get $2) ) ) - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $1 - (i32.load offset=20 - (local.get $2) - ) - ) - (i32.const 1) - ) + (i32.shr_u + (local.get $1) + (i32.const 24) ) - (i32.store8 - (i32.add - (local.get $1) - (i32.load offset=8 + ) + (i32.store offset=20 + (local.get $2) + (i32.add + (local.tee $3 + (i32.load offset=20 (local.get $2) ) ) - (i32.shr_u - (local.get $3) - (i32.const 8) + (i32.const 1) + ) + ) + (i32.store8 + (i32.add + (local.get $3) + (i32.load offset=8 + (local.get $2) ) ) - (i32.store offset=20 - (local.get $2) - (i32.add - (local.tee $1 - (i32.load offset=20 - (local.get $2) - ) + (i32.shr_u + (local.get $1) + (i32.const 16) + ) + ) + (local.set $3 + (i32.load offset=48 + (local.get $0) + ) + ) + (i32.store offset=20 + (local.get $2) + (i32.add + (local.tee $1 + (i32.load offset=20 + (local.get $2) ) - (i32.const 1) ) + (i32.const 1) ) + ) + (i32.store8 (i32.add (local.get $1) (i32.load offset=8 (local.get $2) ) ) + (i32.shr_u + (local.get $3) + (i32.const 8) + ) + ) + ) + (i32.store offset=20 + (local.get $2) + (i32.add + (local.tee $1 + (i32.load offset=20 + (local.get $2) + ) + ) + (i32.const 1) + ) + ) + (i32.store8 + (i32.add + (local.get $1) + (i32.load offset=8 + (local.get $2) + ) ) (local.get $3) ) diff --git a/test/passes/remove-unused-names_code-folding.txt b/test/passes/remove-unused-names_code-folding.txt index d0486b3c7..85810131b 100644 --- a/test/passes/remove-unused-names_code-folding.txt +++ b/test/passes/remove-unused-names_code-folding.txt @@ -10,11 +10,14 @@ (nop) ) ) - (block - (drop - (i32.const 0) + (if + (i32.const 0) + (then + (nop) + ) + (else + (nop) ) - (nop) ) (if (i32.const 0) @@ -26,13 +29,19 @@ ) ) (drop - (block (result i32) - (drop - (i32.const 0) + (if (result i32) + (i32.const 0) + (then + (i32.add + (i32.const 1) + (i32.const 2) + ) ) - (i32.add - (i32.const 1) - (i32.const 2) + (else + (i32.add + (i32.const 1) + (i32.const 2) + ) ) ) ) @@ -59,9 +68,7 @@ (drop (i32.const 0) ) - (block - (nop) - ) + (nop) ) (block (if @@ -111,12 +118,10 @@ (drop (i32.const 0) ) - (block - (drop - (i32.add - (i32.const 1) - (i32.const 2) - ) + (drop + (i32.add + (i32.const 1) + (i32.const 2) ) ) ) @@ -502,12 +507,10 @@ (drop (local.get $x) ) - (block - (br_if $out - (local.get $y) - ) - (nop) + (br_if $out + (local.get $y) ) + (nop) ) (block (if @@ -695,18 +698,16 @@ (drop (i32.const 1) ) - (block - (drop - (i32.const 2) - ) - (nop) - (nop) - (nop) - (nop) - (nop) - (nop) - (br $out) + (drop + (i32.const 2) ) + (nop) + (nop) + (nop) + (nop) + (nop) + (nop) + (br $out) ) ) (block $out2 @@ -745,17 +746,13 @@ (drop (i32.const 1) ) - (block - (br $out3) - ) + (br $out3) ) (block (drop (i32.const 1) ) - (block - (br $out3) - ) + (br $out3) ) (br $out3) ) @@ -788,20 +785,15 @@ ) ) (drop - (block $y (result i32) - (if - (i32.const 0) - (then - (drop - (i32.const 1) - ) - (drop - (i32.const 2) - ) - (br $y - (i32.const 3) + (block (result i32) + (block $y + (if + (i32.const 0) + (then + (br $y) ) ) + (br $y) ) (drop (i32.const 1) @@ -809,9 +801,7 @@ (drop (i32.const 2) ) - (br $y - (i32.const 3) - ) + (i32.const 3) ) ) (drop @@ -1508,9 +1498,7 @@ (drop (i32.const 0) ) - (block - (nop) - ) + (nop) ) (if (i32.const 0) @@ -1527,11 +1515,9 @@ (drop (unreachable) ) - (block (result i32) - (i32.add - (i32.const 1) - (i32.const 2) - ) + (i32.add + (i32.const 1) + (i32.const 2) ) ) ) |