diff options
-rwxr-xr-x | build-js.sh | 1 | ||||
-rw-r--r-- | src/ir/effects.h | 9 | ||||
-rw-r--r-- | src/passes/CMakeLists.txt | 7 | ||||
-rw-r--r-- | src/passes/LoopInvariantCodeMotion.cpp | 246 | ||||
-rw-r--r-- | src/passes/RedundantSetElimination.cpp | 4 | ||||
-rw-r--r-- | src/passes/pass.cpp | 1 | ||||
-rw-r--r-- | src/passes/passes.h | 1 | ||||
-rw-r--r-- | test/passes/licm.txt | 677 | ||||
-rw-r--r-- | test/passes/licm.wast | 392 |
9 files changed, 1335 insertions, 3 deletions
diff --git a/build-js.sh b/build-js.sh index 99471e29e..f4a2157b5 100755 --- a/build-js.sh +++ b/build-js.sh @@ -106,6 +106,7 @@ echo "building shared bitcode" $BINARYEN_SRC/passes/LegalizeJSInterface.cpp \ $BINARYEN_SRC/passes/LocalCSE.cpp \ $BINARYEN_SRC/passes/LogExecution.cpp \ + $BINARYEN_SRC/passes/LoopInvariantCodeMotion.cpp \ $BINARYEN_SRC/passes/MemoryPacking.cpp \ $BINARYEN_SRC/passes/MergeBlocks.cpp \ $BINARYEN_SRC/passes/MergeLocals.cpp \ diff --git a/src/ir/effects.h b/src/ir/effects.h index b6eafae27..98687c0dc 100644 --- a/src/ir/effects.h +++ b/src/ir/effects.h @@ -39,6 +39,8 @@ struct EffectAnalyzer : public PostWalker<EffectAnalyzer> { if (breakNames.size() > 0) branches = true; } + // Core effect tracking + bool branches = false; // branches out of this expression, returns, infinite loops, etc bool calls = false; std::set<Index> localsRead; @@ -55,13 +57,18 @@ struct EffectAnalyzer : public PostWalker<EffectAnalyzer> { bool isAtomic = false; // An atomic load/store/RMW/Cmpxchg or an operator that // has a defined ordering wrt atomics (e.g. grow_memory) + // Helper functions to check for various effect types + bool accessesLocal() { return localsRead.size() + localsWritten.size() > 0; } bool accessesGlobal() { return globalsRead.size() + globalsWritten.size() > 0; } bool accessesMemory() { return calls || readsMemory || writesMemory; } + bool hasGlobalSideEffects() { return calls || globalsWritten.size() > 0 || writesMemory || isAtomic; } bool hasSideEffects() { return hasGlobalSideEffects() || localsWritten.size() > 0 || branches || implicitTrap; } bool hasAnything() { return branches || calls || accessesLocal() || readsMemory || writesMemory || accessesGlobal() || implicitTrap || isAtomic; } + bool noticesGlobalSideEffects() { return calls || readsMemory || isAtomic || globalsRead.size(); } + // check if we break to anything external from ourselves bool hasExternalBreakTargets() { return !breakNames.empty(); } @@ -113,6 +120,8 @@ struct EffectAnalyzer : public PostWalker<EffectAnalyzer> { calls = calls || other.calls; readsMemory = readsMemory || other.readsMemory; writesMemory = writesMemory || other.writesMemory; + implicitTrap = implicitTrap || other.implicitTrap; + isAtomic = isAtomic || other.isAtomic; for (auto i : other.localsRead) localsRead.insert(i); for (auto i : other.localsWritten) localsWritten.insert(i); for (auto i : other.globalsRead) globalsRead.insert(i); diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index fc53a194f..87b7661aa 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -15,13 +15,14 @@ SET(passes_SOURCES ExtractFunction.cpp Flatten.cpp FuncCastEmulation.cpp + I64ToI32Lowering.cpp Inlining.cpp + InstrumentLocals.cpp + InstrumentMemory.cpp LegalizeJSInterface.cpp LocalCSE.cpp LogExecution.cpp - I64ToI32Lowering.cpp - InstrumentLocals.cpp - InstrumentMemory.cpp + LoopInvariantCodeMotion.cpp MemoryPacking.cpp MergeBlocks.cpp MergeLocals.cpp diff --git a/src/passes/LoopInvariantCodeMotion.cpp b/src/passes/LoopInvariantCodeMotion.cpp new file mode 100644 index 000000000..aec2f7ce5 --- /dev/null +++ b/src/passes/LoopInvariantCodeMotion.cpp @@ -0,0 +1,246 @@ +/* + * Copyright 2018 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Simple loop invariant code motion (licm): for every none-typed +// expression in a loop, see if we can move it out. +// +// Flattening is not necessary here, but may help (as separating +// out expressions may allow moving at least part of a larger whole). +// + +#include <unordered_map> + +#include "wasm.h" +#include "pass.h" +#include "wasm-builder.h" +#include "ir/local-graph.h" +#include "ir/effects.h" +#include "ir/find_all.h" + +namespace wasm { + +struct LoopInvariantCodeMotion : public WalkerPass<ExpressionStackWalker<LoopInvariantCodeMotion>> { + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new LoopInvariantCodeMotion; } + + typedef std::unordered_set<SetLocal*> LoopSets; + + // main entry point + + LocalGraph* localGraph; + + void doWalkFunction(Function* func) { + // Compute all local dependencies first. + LocalGraph localGraphInstance(func); + localGraph = &localGraphInstance; + // Traverse the function. + super::doWalkFunction(func); + } + + void visitLoop(Loop* loop) { + // We accumulate all the code we can move out, and will place it + // in a block just preceding the loop. + std::vector<Expression*> movedCode; + // Accumulate effects of things we can't move out - things + // we move out later must cross them, so we must verify it + // is ok to do so. + EffectAnalyzer effectsSoFar(getPassOptions()); + // The loop's total effects also matter. For example, a store + // in the loop means we can't move a load outside. + // FIXME: we look at the loop "tail" area too, after the last + // possible branch back, which can cause false positives + // for bad effect interactions. + EffectAnalyzer loopEffects(getPassOptions(), loop); + // Note all the sets in each loop, and how many per index. Currently + // EffectAnalyzer can't do that, and we need it to know if we + // can move a set out of the loop (if there is another set + // still there, we can't). Another possible option here is for + // LocalGraph to track interfering sets. TODO + // FIXME: also the loop tail issue from above. + auto numLocals = getFunction()->getNumLocals(); + std::vector<Index> numSetsForIndex(numLocals); + std::fill(numSetsForIndex.begin(), numSetsForIndex.end(), 0); + LoopSets loopSets; + { + FindAll<SetLocal> finder(loop); + for (auto* set : finder.list) { + numSetsForIndex[set->index]++; + loopSets.insert(set); + } + } + // Walk along the loop entrance, while all the code there + // is executed unconditionally. That is the code we want to + // move out - anything that might or might not be executed + // may be best left alone anyhow. + std::vector<Expression**> work; + work.push_back(&loop->body); + while (!work.empty()) { + auto** currp = work.back(); + work.pop_back(); + auto* curr = *currp; + // Look into blocks. + if (auto* block = curr->dynCast<Block>()) { + auto& list = block->list; + Index i = list.size(); + while (i > 0) { + i--; + work.push_back(&list[i]); + } + continue; + // Note that if the block had a merge at the end, we would have seen + // a branch to it anyhow, so we would stop before that point anyhow. + } + // If this may branch, we are done. + EffectAnalyzer effects(getPassOptions(), curr); + if (effects.branches) { + break; + } + if (interestingToMove(curr)) { + // Let's see if we can move this out. + // Global side effects would prevent this - we might end up + // executing them just once. + // And we must also move across anything not moved out already, + // so check for issues there too. + // The rest of the loop's effects matter too, we must also + // take into account global state like interacting loads and + // stores. + bool unsafeToMove = effects.hasGlobalSideEffects() || + effectsSoFar.invalidates(effects) || + (effects.noticesGlobalSideEffects() && + loopEffects.hasGlobalSideEffects()); + if (!unsafeToMove) { + // So far so good. Check if our local dependencies are all + // outside of the loop, in which case everything is good - + // either they are before the loop and constant for us, or + // they are after and don't matter. + if (effects.localsRead.empty() || !hasGetDependingOnLoopSet(curr, loopSets)) { + // We have checked if our gets are influenced by sets in the loop, and + // must also check if our sets interfere with them. To do so, assume + // temporarily that we are moving curr out; see if any sets remain for + // its indexes. + FindAll<SetLocal> currSets(curr); + for (auto* set : currSets.list) { + assert(numSetsForIndex[set->index] > 0); + numSetsForIndex[set->index]--; + } + bool canMove = true; + for (auto* set : currSets.list) { + if (numSetsForIndex[set->index] > 0) { + canMove = false; + break; + } + } + if (!canMove) { + // We failed to move the code, undo those changes. + for (auto* set : currSets.list) { + numSetsForIndex[set->index]++; + } + } else { + // We can move it! Leave the changes, move the code, and update + // loopSets. + movedCode.push_back(curr); + *currp = Builder(*getModule()).makeNop(); + for (auto* set : currSets.list) { + loopSets.erase(set); + } + continue; + } + } + } + } + // We did not move this item. Accumulate its effects. + effectsSoFar.mergeIn(effects); + } + // If we moved the code out, finish up by emitting it + // outside of the loop. + // Note that this works with nested loops - after moving outside + // of an inner loop, we can encounter it again in an outer loop, + // and move it further outside, without requiring any extra pass. + if (!movedCode.empty()) { + // Finish the moving by emitting the code outside. + Builder builder(*getModule()); + auto* ret = builder.makeBlock(movedCode); + ret->list.push_back(loop); + ret->finalize(loop->type); + replaceCurrent(ret); + // Note that we do not need to modify the localGraph - we keep + // each get in a position to be influenced by exactly the same + // sets as before. + } + } + + bool interestingToMove(Expression* curr) { + // In theory we could consider blocks, but then heavy nesting of + // switch patterns would be heavy, and almost always pointless. + if (curr->type != none || curr->is<Nop>() || curr->is<Block>() + || curr->is<Loop>()) { + return false; + } + // Don't move copies (set of a get, or set of a tee of a get, etc.), + // as they often do not turn into actual work inside the loop + // (after later optimizations by us or the VM), and if we move them + // out it will prevent other opts from potentially eliminating the + // copy, as we don't have another pass that considers moving code + // back *into* loops. + // Likewise, constants also are not worth moving out. + // Note that the issue remains for moving things out which later + // optimizations turn into a copy or a constant, so in general + // it is beneficial to run this pass later on (but that has downsides + // too, as with more nesting moving code is harder - so something + // like -O --flatten --licm -O may be best). + if (auto* set = curr->dynCast<SetLocal>()) { + while (1) { + auto* next = set->value->dynCast<SetLocal>(); + if (!next) break; + set = next; + } + if (set->value->is<GetLocal>() || set->value->is<Const>()) { + return false; + } + } + return true; + } + + bool hasGetDependingOnLoopSet(Expression* curr, LoopSets& loopSets) { + FindAll<GetLocal> gets(curr); + for (auto* get : gets.list) { + auto& sets = localGraph->getSetses[get]; + for (auto* set : sets) { + // nullptr means a parameter or zero-init value; + // no danger to us. + if (!set) continue; + // Check if the set is in the loop. If not, it's either before, + // which is fine, or after, which is also fine - moving curr + // to just outside the loop will preserve those relationships. + // TODO: this still counts curr's sets as inside the loop, which + // might matter in non-flat mode. + if (loopSets.count(set)) { + return true; + } + } + } + return false; + } +}; + +Pass *createLoopInvariantCodeMotionPass() { + return new LoopInvariantCodeMotion(); +} + +} // namespace wasm + diff --git a/src/passes/RedundantSetElimination.cpp b/src/passes/RedundantSetElimination.cpp index a63866111..8c00a0880 100644 --- a/src/passes/RedundantSetElimination.cpp +++ b/src/passes/RedundantSetElimination.cpp @@ -49,6 +49,8 @@ namespace wasm { // its current value typedef std::vector<Index> LocalValues; +namespace { + // information in a basic block struct Info { LocalValues start, end; // the local values at the start and end of the block @@ -366,6 +368,8 @@ struct RedundantSetElimination : public WalkerPass<CFGWalker<RedundantSetElimina } }; +} // namespace + Pass *createRedundantSetEliminationPass() { return new RedundantSetElimination(); } diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index c8d02f445..aa3ed8eb3 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -87,6 +87,7 @@ void PassRegistry::registerPasses() { registerPass("i64-to-i32-lowering", "lower all uses of i64s to use i32s instead", createI64ToI32LoweringPass); registerPass("instrument-locals", "instrument the build with code to intercept all loads and stores", createInstrumentLocalsPass); registerPass("instrument-memory", "instrument the build with code to intercept all loads and stores", createInstrumentMemoryPass); + registerPass("licm", "loop invariant code motion", createLoopInvariantCodeMotionPass); registerPass("memory-packing", "packs memory into separate segments, skipping zeros", createMemoryPackingPass); registerPass("merge-blocks", "merges blocks to their parents", createMergeBlocksPass); registerPass("merge-locals", "merges locals when beneficial", createMergeLocalsPass); diff --git a/src/passes/passes.h b/src/passes/passes.h index e853c040a..be0849075 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -44,6 +44,7 @@ Pass* createLocalCSEPass(); Pass* createLogExecutionPass(); Pass* createInstrumentLocalsPass(); Pass* createInstrumentMemoryPass(); +Pass* createLoopInvariantCodeMotionPass(); Pass* createMemoryPackingPass(); Pass* createMergeBlocksPass(); Pass* createMergeLocalsPass(); diff --git a/test/passes/licm.txt b/test/passes/licm.txt new file mode 100644 index 000000000..e76a0d683 --- /dev/null +++ b/test/passes/licm.txt @@ -0,0 +1,677 @@ +(module + (type $0 (func)) + (type $1 (func (result i32))) + (type $2 (func (result i64))) + (type $3 (func (param i32))) + (type $4 (func (param i32) (result i32))) + (func $loop1 (; 0 ;) (type $0) + (drop + (i32.const 10) + ) + (loop $loop + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop2 (; 1 ;) (type $0) + (drop + (i32.const 10) + ) + (drop + (i32.const 20) + ) + (loop $loop + (nop) + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop3 (; 2 ;) (type $0) + (drop + (i32.const 10) + ) + (drop + (i32.const 20) + ) + (loop $loop + (nop) + (call $loop2) + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop4 (; 3 ;) (type $0) + (drop + (i32.load + (i32.const 1) + ) + ) + (loop $loop + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop3-4 (; 4 ;) (type $0) + (loop $loop + (drop + (i32.load + (i32.const 10) + ) + ) + (call $loop2) + (drop + (i32.load + (i32.const 20) + ) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop3-4-b (; 5 ;) (type $0) + (drop + (i32.load + (i32.const 10) + ) + ) + (drop + (i32.load + (i32.const 20) + ) + ) + (loop $loop + (nop) + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop5 (; 6 ;) (type $0) + (loop $loop + (i32.store + (i32.const 1) + (i32.const 2) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop6 (; 7 ;) (type $0) + (loop $loop + (i32.store + (i32.const 1) + (i32.const 2) + ) + (i32.store + (i32.const 2) + (i32.const 3) + ) + ) + ) + (func $loop7 (; 8 ;) (type $0) + (loop $loop + (i32.store + (i32.const 1) + (i32.const 2) + ) + (i32.store + (i32.const 2) + (i32.const 3) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop8 (; 9 ;) (type $0) + (loop $loop + (i32.store + (i32.const 1) + (i32.const 2) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop9 (; 10 ;) (type $0) + (loop $loop + (drop + (i32.load + (i32.const 1) + ) + ) + (i32.store + (i32.const 1) + (i32.const 2) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop10 (; 11 ;) (type $0) + (drop + (i32.load + (i32.const 1) + ) + ) + (drop + (i32.load + (i32.const 2) + ) + ) + (loop $loop + (nop) + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop11 (; 12 ;) (type $0) + (local $x i32) + (local $y i32) + (loop $loop + (drop + (get_local $x) + ) + (br_if $loop + (tee_local $x + (i32.const 2) + ) + ) + ) + ) + (func $loop12 (; 13 ;) (type $0) + (local $x i32) + (local $y i32) + (drop + (get_local $x) + ) + (loop $loop + (nop) + (br_if $loop + (tee_local $y + (i32.const 2) + ) + ) + ) + ) + (func $loop13 (; 14 ;) (type $0) + (local $x i32) + (local $y i32) + (set_local $x + (i32.eqz + (get_local $y) + ) + ) + (loop $loop + (nop) + (call $loop12) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop14 (; 15 ;) (type $0) + (local $x i32) + (local $y i32) + (set_local $x + (i32.eqz + (get_local $y) + ) + ) + (loop $loop + (nop) + (call $loop12) + (br_if $loop + (i32.const 1) + ) + (set_local $y + (get_local $x) + ) + ) + ) + (func $loop14-1 (; 16 ;) (type $0) + (local $x i32) + (local $y i32) + (loop $loop + (set_local $x + (i32.eqz + (get_local $y) + ) + ) + (call $loop12) + (set_local $y + (get_local $x) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop15 (; 17 ;) (type $0) + (local $x i32) + (local $y i32) + (set_local $x + (i32.eqz + (get_local $y) + ) + ) + (loop $loop + (nop) + (call $loop12) + (br_if $loop + (i32.const 1) + ) + (drop + (get_local $y) + ) + ) + ) + (func $loop15-1 (; 18 ;) (type $0) + (local $x i32) + (local $y i32) + (set_local $x + (i32.eqz + (get_local $y) + ) + ) + (drop + (get_local $y) + ) + (loop $loop + (nop) + (call $loop12) + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop16 (; 19 ;) (type $0) + (local $x i32) + (local $y i32) + (set_local $x + (i32.eqz + (get_local $y) + ) + ) + (loop $loop + (nop) + (call $loop12) + (br_if $loop + (i32.const 1) + ) + (drop + (get_local $x) + ) + ) + ) + (func $loop16-1 (; 20 ;) (type $0) + (local $x i32) + (local $y i32) + (set_local $x + (i32.eqz + (get_local $y) + ) + ) + (drop + (get_local $x) + ) + (loop $loop + (nop) + (call $loop12) + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop16-2 (; 21 ;) (type $0) + (local $x i32) + (local $y i32) + (set_local $x + (i32.const 2) + ) + (block + (set_local $x + (i32.eqz + (get_local $y) + ) + ) + (drop + (get_local $x) + ) + (loop $loop + (nop) + (call $loop12) + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + ) + (func $loop16-3 (; 22 ;) (type $0) + (local $x i32) + (local $y i32) + (set_local $y + (i32.const 2) + ) + (block + (set_local $x + (i32.eqz + (get_local $y) + ) + ) + (drop + (get_local $x) + ) + (loop $loop + (nop) + (call $loop12) + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + ) + (func $nop (; 23 ;) (type $0) + (loop $loop + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $nested-blocks (; 24 ;) (type $0) + (loop $loop + (block $block + (nop) + ) + (block $x + (nop) + ) + (block $a + (block $b + (block $c + (nop) + ) + ) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $nested-unhoistable-blocks (; 25 ;) (type $0) + (loop $loop + (block $block + (call $nested-unhoistable-blocks) + ) + (block $x + (call $nested-unhoistable-blocks) + ) + (block $a + (block $b + (block $c + (call $nested-unhoistable-blocks) + ) + ) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $conditional (; 26 ;) (type $0) + (if + (i32.const 0) + (drop + (i32.const 10) + ) + ) + (loop $loop + (nop) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $conditional1 (; 27 ;) (type $1) (result i32) + (loop $loop + (if + (call $conditional1) + (drop + (i32.const 10) + ) + ) + (br_if $loop + (i32.const 1) + ) + ) + (unreachable) + ) + (func $conditional2 (; 28 ;) (type $0) + (block $out + (loop $loop + (br_if $out + (i32.const 1) + ) + (drop + (i32.const 10) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + ) + (func $conditional3 (; 29 ;) (type $0) + (block $out + (block + (drop + (i32.const 10) + ) + (loop $loop + (nop) + (br_if $out + (i32.const 1) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + ) + ) + (func $after (; 30 ;) (type $0) + (loop $loop + (nop) + ) + (drop + (i32.const 10) + ) + ) + (func $loops (; 31 ;) (type $0) + (drop + (i32.const 10) + ) + (loop $loop2 + (nop) + (loop $loop1 + (nop) + (br_if $loop1 + (i32.const 1) + ) + ) + ) + ) + (func $loops2 (; 32 ;) (type $0) + (drop + (i32.const 10) + ) + (loop $loop2 + (nop) + (loop $loop1 + (nop) + (br_if $loop2 + (i32.const 1) + ) + ) + ) + ) + (func $fuzz1 (; 33 ;) (type $2) (result i64) + (local $var$1 i64) + (drop + (block (result i32) + (set_local $var$1 + (block $label$5 (result i64) + (set_local $var$1 + (i64.const -29585) + ) + (i64.const -70) + ) + ) + (loop $label$4 (result i32) + (nop) + (i32.const 1) + ) + ) + ) + (loop $label$1 (result i64) + (block $label$2 + (block $label$3 + (nop) + (br $label$2) + ) + (unreachable) + ) + (get_local $var$1) + ) + ) + (func $self (; 34 ;) (type $1) (result i32) + (local $x i32) + (loop $loop + (set_local $x + (i32.add + (get_local $x) + (i32.const 1) + ) + ) + (br_if $loop + (i32.const 1) + ) + ) + (get_local $x) + ) + (func $nested-set (; 35 ;) (type $0) + (local $var$0 i32) + (local $var$1 i64) + (loop $label$1 + (set_local $var$0 + (block $label$3 (result i32) + (set_local $var$1 + (i64.const 0) + ) + (get_local $var$0) + ) + ) + (set_local $var$1 + (i64.const 1) + ) + (br_if $label$1 + (i32.const 0) + ) + ) + ) + (func $load-store (; 36 ;) (type $3) (param $x i32) + (loop $loop + (drop + (i32.load + (i32.const 0) + ) + ) + (i32.store + (get_local $x) + (get_local $x) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $set-set (; 37 ;) (type $4) (param $x i32) (result i32) + (loop $loop + (set_local $x + (i32.const 1) + ) + (br_if $loop + (i32.const 2) + ) + (set_local $x + (i32.const 3) + ) + (br_if $loop + (i32.const 4) + ) + ) + (get_local $x) + ) + (func $copies-no (; 38 ;) (type $0) + (local $x i32) + (local $y i32) + (local $z i32) + (local $a i32) + (local $b i32) + (local $c i32) + (loop $loop + (set_local $x + (get_local $x) + ) + (set_local $y + (get_local $z) + ) + (set_local $a + (tee_local $b + (get_local $c) + ) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $consts-no (; 39 ;) (type $0) + (local $x i32) + (local $a i32) + (local $b i32) + (loop $loop + (set_local $x + (i32.const 0) + ) + (set_local $a + (tee_local $b + (i32.const 1) + ) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) +) diff --git a/test/passes/licm.wast b/test/passes/licm.wast new file mode 100644 index 000000000..d4227525b --- /dev/null +++ b/test/passes/licm.wast @@ -0,0 +1,392 @@ +(module + (func $loop1 + (loop $loop + (drop (i32.const 10)) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop2 + (loop $loop + (drop (i32.const 10)) + (drop (i32.const 20)) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop3 + (loop $loop + (drop (i32.const 10)) + (call $loop2) + (drop (i32.const 20)) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop4 + (loop $loop + (drop (i32.load (i32.const 1))) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop3-4 + (loop $loop + (drop (i32.load (i32.const 10))) + (call $loop2) ;; may have global side effects which alter a load! + (drop (i32.load (i32.const 20))) ;; this load must stay put + (br_if $loop (i32.const 1)) + ) + ) + (func $loop3-4-b (; 4 ;) (type $0) + (loop $loop + (drop + (i32.load + (i32.const 10) + ) + ) + (drop + (i32.load + (i32.const 20) + ) + ) + (br_if $loop + (i32.const 1) + ) + ) + ) + (func $loop5 + (loop $loop + (i32.store (i32.const 1) (i32.const 2)) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop6 + (loop $loop + (i32.store (i32.const 1) (i32.const 2)) + (i32.store (i32.const 2) (i32.const 3)) + ) + ) + (func $loop7 + (loop $loop + (i32.store (i32.const 1) (i32.const 2)) + (i32.store (i32.const 2) (i32.const 3)) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop8 + (loop $loop + (i32.store (i32.const 1) (i32.const 2)) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop9 + (loop $loop + (drop (i32.load (i32.const 1))) + (i32.store (i32.const 1) (i32.const 2)) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop10 + (loop $loop + (drop (i32.load (i32.const 1))) + (drop (i32.load (i32.const 2))) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop11 + (local $x i32) + (local $y i32) + (loop $loop + (drop (get_local $x)) + (br_if $loop (tee_local $x (i32.const 2))) + ) + ) + (func $loop12 + (local $x i32) + (local $y i32) + (loop $loop + (drop (get_local $x)) + (br_if $loop (tee_local $y (i32.const 2))) + ) + ) + (func $loop13 + (local $x i32) + (local $y i32) + (loop $loop + (set_local $x (i32.eqz (get_local $y))) + (call $loop12) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop14 + (local $x i32) + (local $y i32) + (loop $loop + (set_local $x (i32.eqz (get_local $y))) + (call $loop12) + (br_if $loop (i32.const 1)) + (set_local $y (get_local $x)) ;; not actually in the loop! + ) + ) + (func $loop14-1 + (local $x i32) + (local $y i32) + (loop $loop + (set_local $x (i32.eqz (get_local $y))) + (call $loop12) + (set_local $y (get_local $x)) ;; in the loop + (br_if $loop (i32.const 1)) + ) + ) + (func $loop15 + (local $x i32) + (local $y i32) + (loop $loop + (set_local $x (i32.eqz (get_local $y))) + (call $loop12) + (br_if $loop (i32.const 1)) + (drop (get_local $y)) + ) + ) + (func $loop15-1 + (local $x i32) + (local $y i32) + (loop $loop + (set_local $x (i32.eqz (get_local $y))) + (call $loop12) + (drop (get_local $y)) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop16 + (local $x i32) + (local $y i32) + (loop $loop + (set_local $x (i32.eqz (get_local $y))) + (call $loop12) + (br_if $loop (i32.const 1)) + (drop (get_local $x)) + ) + ) + (func $loop16-1 + (local $x i32) + (local $y i32) + (loop $loop + (set_local $x (i32.eqz (get_local $y))) + (call $loop12) + (drop (get_local $x)) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop16-2 + (local $x i32) + (local $y i32) + (set_local $x (i32.const 2)) + (loop $loop + (set_local $x (i32.eqz (get_local $y))) + (call $loop12) + (drop (get_local $x)) + (br_if $loop (i32.const 1)) + ) + ) + (func $loop16-3 + (local $x i32) + (local $y i32) + (set_local $y (i32.const 2)) + (loop $loop + (set_local $x (i32.eqz (get_local $y))) + (call $loop12) + (drop (get_local $x)) + (br_if $loop (i32.const 1)) + ) + ) + (func $nop + (loop $loop + (nop) + (br_if $loop (i32.const 1)) + ) + ) + (func $nested-blocks + (loop $loop + (block + (nop) + ) + (block $x + (nop) + ) + (block $a + (block $b + (block $c + (nop) + ) + ) + ) + (br_if $loop (i32.const 1)) + ) + ) + (func $nested-unhoistable-blocks + (loop $loop + (block + (call $nested-unhoistable-blocks) + ) + (block $x + (call $nested-unhoistable-blocks) + ) + (block $a + (block $b + (block $c + (call $nested-unhoistable-blocks) + ) + ) + ) + (br_if $loop (i32.const 1)) + ) + ) + (func $conditional + (loop $loop + (if (i32.const 0) + (drop (i32.const 10)) ;; cannot be hoisted - might never be reached + ) + (br_if $loop (i32.const 1)) + ) + ) + (func $conditional1 (result i32) + (loop $loop + (if (call $conditional1) + (drop (i32.const 10)) ;; cannot be hoisted - might never be reached + ;; also anyhow the whole if also cannot, due to the call + ) + (br_if $loop (i32.const 1)) + ) + (unreachable) + ) + (func $conditional2 + (block $out + (loop $loop + (br_if $out (i32.const 1)) + (drop (i32.const 10)) ;; cannot be hoisted - might never be reached + (br_if $loop (i32.const 1)) + ) + ) + ) + (func $conditional3 + (block $out + (loop $loop + (drop (i32.const 10)) ;; *CAN* be hoisted - will definitely be reached + (br_if $out (i32.const 1)) + (br_if $loop (i32.const 1)) + ) + ) + ) + (func $after + (loop $loop) + (drop (i32.const 10)) ;; may be part of the loop's basic block, logically, but is not nested in it + ) + (func $loops + (loop $loop2 + (loop $loop1 + (drop (i32.const 10)) + (br_if $loop1 (i32.const 1)) + ) + ) + ) + (func $loops2 + (loop $loop2 + (loop $loop1 + (drop (i32.const 10)) + (br_if $loop2 (i32.const 1)) + ) + ) + ) + (func $fuzz1 (result i64) + (local $var$1 i64) + (loop $label$1 (result i64) ;; multiple loops here require us to be careful not to Nop out stuff before we finalize things + (block $label$2 + (block $label$3 + (drop + (loop $label$4 (result i32) + (set_local $var$1 + (block $label$5 (result i64) + (set_local $var$1 + (i64.const -29585) + ) + (i64.const -70) + ) + ) + (i32.const 1) + ) + ) + (br $label$2) + ) + (unreachable) + ) + (get_local $var$1) + ) + ) + (func $self (result i32) + (local $x i32) + (loop $loop + (set_local $x (i32.add (get_local $x) (i32.const 1))) + (br_if $loop (i32.const 1)) + ) + (get_local $x) + ) + (func $nested-set + (local $var$0 i32) + (local $var$1 i64) + (loop $label$1 + (set_local $var$0 + (block $label$3 (result i32) + (set_local $var$1 ;; cannot be moved out (in current position - other opts would help), and invalidates moving out the set below + (i64.const 0) + ) + (get_local $var$0) + ) + ) + (set_local $var$1 + (i64.const 1) + ) + (br_if $label$1 + (i32.const 0) + ) + ) + ) + (func $load-store (param $x i32) + (loop $loop + (drop (i32.load (i32.const 0))) ;; can't move this out, the store might affect it for later iterations + (i32.store (get_local $x) (get_local $x)) + (br_if $loop (i32.const 1)) + ) + ) + (func $set-set (param $x i32) (result i32) + (loop $loop + (set_local $x (i32.const 1)) + (br_if $loop (i32.const 2)) + (set_local $x (i32.const 3)) + (br_if $loop (i32.const 4)) + ) + (get_local $x) + ) + (func $copies-no + (local $x i32) + (local $y i32) + (local $z i32) + (local $a i32) + (local $b i32) + (local $c i32) + (loop $loop + (set_local $x (get_local $x)) + (set_local $y (get_local $z)) + (set_local $a (tee_local $b (get_local $c))) + (br_if $loop (i32.const 1)) + ) + ) + (func $consts-no + (local $x i32) + (local $a i32) + (local $b i32) + (loop $loop + (set_local $x (i32.const 0)) + (set_local $a (tee_local $b (i32.const 1))) + (br_if $loop (i32.const 1)) + ) + ) +) + |