diff options
author | Alon Zakai <alonzakai@gmail.com> | 2018-06-08 15:45:02 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-06-08 15:45:02 -0700 |
commit | e3d201158d9136d6ffb655f70904dae5f9079317 (patch) | |
tree | 93329d0026eab20e5344358a902a6e2cf8b49d62 | |
parent | 7676221b837bbd20daf1889dbdabf3cb76721658 (diff) | |
download | binaryen-e3d201158d9136d6ffb655f70904dae5f9079317.tar.gz binaryen-e3d201158d9136d6ffb655f70904dae5f9079317.tar.bz2 binaryen-e3d201158d9136d6ffb655f70904dae5f9079317.zip |
Improve local-cse (#1594)
This makes it much more effective, by rewriting it to depend on flatten. In flattened IR, it is very simple to check if an expression is equivalent to one already available for use in a local, and use that one instead, basically we just track values in locals.
Helps with #1521
-rw-r--r-- | src/passes/LocalCSE.cpp | 111 | ||||
-rw-r--r-- | src/passes/SimplifyLocals.cpp | 2 | ||||
-rw-r--r-- | src/passes/pass.cpp | 4 | ||||
-rw-r--r-- | src/tools/wasm-reduce.cpp | 37 | ||||
-rw-r--r-- | test/passes/Oz.txt | 28 | ||||
-rw-r--r-- | test/passes/coalesce-locals.txt | 91 | ||||
-rw-r--r-- | test/passes/coalesce-locals.wast | 71 | ||||
-rw-r--r-- | test/passes/flatten_local-cse.txt | 764 | ||||
-rw-r--r-- | test/passes/flatten_local-cse.wast (renamed from test/passes/local-cse.wast) | 97 | ||||
-rw-r--r-- | test/passes/flatten_local-cse_Os.txt | 27 | ||||
-rw-r--r-- | test/passes/flatten_local-cse_Os.wast | 39 | ||||
-rw-r--r-- | test/passes/local-cse.txt | 184 | ||||
-rw-r--r-- | test/passes/local-cse_ignore-implicit-traps.txt | 30 | ||||
-rw-r--r-- | test/passes/local-cse_ignore-implicit-traps.wast | 20 | ||||
-rw-r--r-- | test/passes/simplify-locals.wast | 1 |
15 files changed, 1199 insertions, 307 deletions
diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp index 7524ad0fa..a458937db 100644 --- a/src/passes/LocalCSE.cpp +++ b/src/passes/LocalCSE.cpp @@ -17,6 +17,15 @@ // // Local CSE // +// This requires --flatten to be run before in order to be effective, +// and preserves flatness. The reason flatness is required is that +// this pass assumes everything is stored in a local, and all it does +// is alter set_locals to do get_locals of an existing value when +// possible, replacing a recomputing of that value. That design means that +// if there are block and if return values, nested expressions not stored +// to a local, etc., then it can't operate on them (and will just not +// do anything for them). +// // In each linear area of execution, // * track each relevant (big enough) expression // * if already seen, write to a local if not already, and reuse @@ -25,17 +34,19 @@ // TODO: global, inter-block gvn etc. // +#include <algorithm> +#include <memory> + #include <wasm.h> #include <wasm-builder.h> #include <wasm-traversal.h> #include <pass.h> #include <ir/effects.h> +#include <ir/equivalent_sets.h> #include <ir/hashed.h> namespace wasm { -const Index UNUSED = -1; - struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { bool isFunctionParallel() override { return true; } @@ -43,11 +54,11 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { // information for an expression we can reuse struct UsableInfo { - Expression** item; - Index index; // if not UNUSED, then the local we are assigned to, use that to reuse us + Expression* value; // the value we can reuse + Index index; // the local we are assigned to, get_local that to reuse us EffectAnalyzer effects; - UsableInfo(Expression** item, PassOptions& passOptions) : item(item), index(UNUSED), effects(passOptions, *item) {} + UsableInfo(Expression* value, Index index, PassOptions& passOptions) : value(value), index(index), effects(passOptions, value) {} }; // a list of usables in a linear execution trace @@ -56,8 +67,29 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { // locals in current linear execution trace, which we try to sink Usables usables; + // We track locals containing the same value - the value is what matters, not + // the index. + EquivalentSets equivalences; + + bool anotherPass; + + void doWalkFunction(Function* func) { + anotherPass = true; + // we may need multiple rounds + while (anotherPass) { + anotherPass = false; + clear(); + super::doWalkFunction(func); + } + } + static void doNoteNonLinear(LocalCSE* self, Expression** currp) { - self->usables.clear(); + self->clear(); + } + + void clear() { + usables.clear(); + equivalences.clear(); } void checkInvalidations(EffectAnalyzer& effects) { @@ -91,9 +123,7 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { auto* curr = *currp; // main operations - if (self->isRelevant(curr)) { - self->handle(currp, curr); - } + self->handle(curr); // post operations @@ -114,38 +144,51 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { self->pushTask(visitPre, currp); } - bool isRelevant(Expression* curr) { - if (curr->is<GetLocal>()) { + void handle(Expression* curr) { + if (auto* set = curr->dynCast<SetLocal>()) { + // Calculate equivalences + equivalences.reset(set->index); + if (auto* get = set->value->dynCast<GetLocal>()) { + equivalences.add(set->index, get->index); + } + // consider the value + auto* value = set->value; + if (isRelevant(value)) { + HashedExpression hashed(value); + auto iter = usables.find(hashed); + if (iter != usables.end()) { + // already exists in the table, this is good to reuse + auto& info = iter->second; + set->value = Builder(*getModule()).makeGetLocal(info.index, value->type); + anotherPass = true; + } else { + // not in table, add this, maybe we can help others later + usables.emplace(std::make_pair(hashed, UsableInfo(value, set->index, getPassOptions()))); + } + } + } else if (auto* get = curr->dynCast<GetLocal>()) { + if (auto* set = equivalences.getEquivalents(get->index)) { + // Canonicalize to the lowest index. This lets hashing and comparisons + // "just work". + get->index = *std::min_element(set->begin(), set->end()); + } + } + } + + // A relevant value is a non-trivial one, something we may want to reuse + // and are able to. + bool isRelevant(Expression* value) { + if (value->is<GetLocal>()) { return false; // trivial, this is what we optimize to! } - if (!isConcreteType(curr->type)) { + if (!isConcreteType(value->type)) { return false; // don't bother with unreachable etc. } - if (EffectAnalyzer(getPassOptions(), curr).hasSideEffects()) { + if (EffectAnalyzer(getPassOptions(), value).hasSideEffects()) { return false; // we can't combine things with side effects } // check what we care about TODO: use optimize/shrink levels? - return Measurer::measure(curr) > 1; - } - - void handle(Expression** currp, Expression* curr) { - HashedExpression hashed(curr); - auto iter = usables.find(hashed); - if (iter != usables.end()) { - // already exists in the table, this is good to reuse - auto& info = iter->second; - if (info.index == UNUSED) { - // we need to assign to a local. create a new one - auto index = info.index = Builder::addVar(getFunction(), curr->type); - (*info.item) = Builder(*getModule()).makeTeeLocal(index, *info.item); - } - replaceCurrent( - Builder(*getModule()).makeGetLocal(info.index, curr->type) - ); - } else { - // not in table, add this, maybe we can help others later - usables.emplace(std::make_pair(hashed, UsableInfo(currp, getPassOptions()))); - } + return Measurer::measure(value) > 1; } }; diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp index d941ce0d4..aadf766ac 100644 --- a/src/passes/SimplifyLocals.cpp +++ b/src/passes/SimplifyLocals.cpp @@ -705,7 +705,7 @@ struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<a void visitGetLocal(GetLocal *curr) { // Canonicalize gets: if some are equivalent, then we can pick more // then one, and other passes may benefit from having more uniformity. - if (auto *set = equivalences.getEquivalents(curr->index)) { + if (auto* set = equivalences.getEquivalents(curr->index)) { // Pick the index with the most uses - maximizing the chance to // lower one's uses to zero. // Helper method that returns the # of gets *ignoring the current get*, diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 6c2aa3731..2f917d779 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -175,10 +175,6 @@ void PassRunner::addDefaultFunctionOptimizationPasses() { } else { add("precompute"); } - if (options.shrinkLevel >= 2) { - add("local-cse"); // TODO: run this early, before first coalesce-locals. right now doing so uncovers some deficiencies we need to fix first - add("coalesce-locals"); // just for localCSE - } if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) { add("rse"); // after all coalesce-locals, and before a final vacuum } diff --git a/src/tools/wasm-reduce.cpp b/src/tools/wasm-reduce.cpp index 0dfb97907..6b19e6216 100644 --- a/src/tools/wasm-reduce.cpp +++ b/src/tools/wasm-reduce.cpp @@ -236,13 +236,15 @@ struct Reducer : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor< "-O1", "-O2", "-O3", + "--flatten -Os", + "--flatten -O3", + "--flatten --local-cse -Os", "--coalesce-locals --vacuum", "--dce", "--duplicate-function-elimination", "--inlining", "--inlining-optimizing", "--optimize-level=3 --inlining-optimizing", - "--local-cse --vacuum", "--memory-packing", "--remove-unused-names --merge-blocks --vacuum", "--optimize-instructions", @@ -263,24 +265,21 @@ struct Reducer : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor< //std::cerr << "| starting passes loop iteration\n"; more = false; // try both combining with a generic shrink (so minor pass overhead is compensated for), and without - for (auto shrinking : { false, true }) { - for (auto pass : passes) { - std::string currCommand = Path::getBinaryenBinaryTool("wasm-opt") + " "; - if (shrinking) currCommand += " --dce --vacuum "; - currCommand += working + " -o " + test + " " + pass; - if (debugInfo) currCommand += " -g "; - if (verbose) std::cerr << "| trying pass command: " << currCommand << "\n"; - if (!ProgramResult(currCommand).failed()) { - auto newSize = file_size(test); - if (newSize < oldSize) { - // the pass didn't fail, and the size looks smaller, so promising - // see if it is still has the property we are preserving - if (ProgramResult(command) == expected) { - std::cerr << "| command \"" << currCommand << "\" succeeded, reduced size to " << newSize << ", and preserved the property\n"; - copy_file(test, working); - more = true; - oldSize = newSize; - } + for (auto pass : passes) { + std::string currCommand = Path::getBinaryenBinaryTool("wasm-opt") + " "; + currCommand += working + " -o " + test + " " + pass; + if (debugInfo) currCommand += " -g "; + if (verbose) std::cerr << "| trying pass command: " << currCommand << "\n"; + if (!ProgramResult(currCommand).failed()) { + auto newSize = file_size(test); + if (newSize < oldSize) { + // the pass didn't fail, and the size looks smaller, so promising + // see if it is still has the property we are preserving + if (ProgramResult(command) == expected) { + std::cerr << "| command \"" << currCommand << "\" succeeded, reduced size to " << newSize << ", and preserved the property\n"; + copy_file(test, working); + more = true; + oldSize = newSize; } } } diff --git a/test/passes/Oz.txt b/test/passes/Oz.txt index c0f5252e6..883ccc476 100644 --- a/test/passes/Oz.txt +++ b/test/passes/Oz.txt @@ -5,26 +5,23 @@ (export "localcse" (func $basics)) (export "localcse-2" (func $8)) (func $basics (; 0 ;) (type $0) (param $0 i32) (param $1 i32) (result i32) - (local $2 i32) (i32.add - (tee_local $2 - (i32.add - (get_local $0) - (get_local $1) - ) + (i32.add + (get_local $0) + (get_local $1) + ) + (i32.add + (get_local $0) + (get_local $1) ) - (get_local $2) ) ) (func $8 (; 1 ;) (type $1) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) - (local $4 i32) (i32.store (tee_local $2 - (tee_local $4 - (i32.add - (get_local $1) - (i32.const 4) - ) + (i32.add + (get_local $1) + (i32.const 4) ) ) (i32.and @@ -36,7 +33,10 @@ ) (i32.store (tee_local $1 - (get_local $4) + (i32.add + (get_local $1) + (i32.const 4) + ) ) (i32.or (i32.load diff --git a/test/passes/coalesce-locals.txt b/test/passes/coalesce-locals.txt index b7544af44..4f856d0dc 100644 --- a/test/passes/coalesce-locals.txt +++ b/test/passes/coalesce-locals.txt @@ -8,6 +8,8 @@ (type $FUNCSIG$vi (func (param i32))) (type $7 (func (param i32) (result i32))) (type $8 (func (param f64 i32) (result i64))) + (type $9 (func (param i32 i32))) + (type $10 (func (result f64))) (import "env" "_emscripten_autodebug_i32" (func $_emscripten_autodebug_i32 (param i32 i32) (result i32))) (import "env" "get" (func $get (result i32))) (import "env" "set" (func $set (param i32))) @@ -1153,4 +1155,93 @@ ) ) ) + (func $pick (; 52 ;) (type $2) + (local $0 i32) + (nop) + (if + (i32.const 1) + (drop + (i32.const 1) + ) + ) + (nop) + (nop) + ) + (func $pick-2 (; 53 ;) (type $2) + (local $0 i32) + (nop) + (if + (i32.const 1) + (drop + (i32.const 1) + ) + ) + (nop) + (nop) + ) + (func $many (; 54 ;) (type $2) + (local $0 i32) + (nop) + (nop) + (nop) + (nop) + (if + (i32.const 1) + (drop + (i32.const 1) + ) + ) + (nop) + (if + (i32.const 1) + (drop + (i32.const 1) + ) + ) + (nop) + (nop) + (nop) + (set_local $0 + (i32.const 2) + ) + (nop) + (if + (i32.const 1) + (drop + (i32.const 1) + ) + ) + (nop) + (nop) + (nop) + (drop + (i32.const 2) + ) + (nop) + ) + (func $loop-copies (; 55 ;) (type $9) (param $0 i32) (param $1 i32) + (loop $loop + (set_local $0 + (get_local $1) + ) + (set_local $1 + (get_local $0) + ) + (br_if $loop + (get_local $0) + ) + ) + ) + (func $proper-type (; 56 ;) (type $10) (result f64) + (local $0 f64) + (local $1 i32) + (drop + (select + (i32.const 0) + (i32.const 1) + (get_local $1) + ) + ) + (get_local $0) + ) ) diff --git a/test/passes/coalesce-locals.wast b/test/passes/coalesce-locals.wast index ee92bb05e..04e8e0763 100644 --- a/test/passes/coalesce-locals.wast +++ b/test/passes/coalesce-locals.wast @@ -1127,4 +1127,75 @@ ) ) ) + (func $pick + (local $x i32) + (local $y i32) + (set_local $x (get_local $y)) + (if (i32.const 1) + (set_local $x (i32.const 1)) + ) + (set_local $x (get_local $y)) + (set_local $x (get_local $y)) + ) + (func $pick-2 + (local $x i32) + (local $y i32) + (set_local $y (get_local $x)) + (if (i32.const 1) + (set_local $y (i32.const 1)) + ) + (set_local $y (get_local $x)) + (set_local $y (get_local $x)) + ) + (func $many + (local $x i32) + (local $y i32) + (local $z i32) + (local $w i32) + (set_local $y (get_local $x)) + (set_local $z (get_local $y)) + (set_local $w (get_local $z)) + (set_local $x (get_local $z)) + (if (i32.const 1) + (set_local $y (i32.const 1)) + ) + (set_local $x (get_local $z)) + (if (i32.const 1) + (set_local $y (i32.const 1)) + ) + (set_local $y (get_local $x)) + (set_local $z (get_local $y)) + (set_local $w (get_local $z)) + (set_local $z (i32.const 2)) + (set_local $x (get_local $z)) + (if (i32.const 1) + (set_local $y (i32.const 1)) + ) + (set_local $y (get_local $x)) + (set_local $z (get_local $y)) + (set_local $w (get_local $z)) + (set_local $z (i32.const 2)) + (set_local $x (get_local $w)) + ) + (func $loop-copies (param $x i32) (param $y i32) + (loop $loop + (set_local $x (get_local $y)) + (set_local $y (get_local $x)) + (br_if $loop (get_local $x)) + ) + ) + (func $proper-type (result f64) + (local $var$0 i32) + (local $var$2 f64) + (set_local $var$0 + (select + (i32.const 0) + (i32.const 1) + (get_local $var$0) + ) + ) + (tee_local $var$2 ;; the locals will be reordered, this should be the f64 + (get_local $var$2) + ) + ) ) diff --git a/test/passes/flatten_local-cse.txt b/test/passes/flatten_local-cse.txt new file mode 100644 index 000000000..defa27c92 --- /dev/null +++ b/test/passes/flatten_local-cse.txt @@ -0,0 +1,764 @@ +(module + (type $0 (func)) + (type $1 (func (param i32) (result i32))) + (type $2 (func (param i32 i32) (result i32))) + (type $3 (func (param i32 i32 i32) (result i32))) + (type $4 (func (param f64 f64 i32) (result f32))) + (memory $0 100 100) + (func $basics (; 0 ;) (type $0) + (local $x i32) + (local $y i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (block + (set_local $2 + (i32.add + (i32.const 1) + (i32.const 2) + ) + ) + (drop + (get_local $2) + ) + (nop) + (set_local $3 + (get_local $2) + ) + (drop + (get_local $2) + ) + (nop) + (if + (i32.const 0) + (nop) + ) + (nop) + (set_local $4 + (i32.add + (i32.const 1) + (i32.const 2) + ) + ) + (drop + (get_local $4) + ) + (nop) + (set_local $5 + (get_local $x) + ) + (set_local $6 + (get_local $y) + ) + (set_local $7 + (i32.add + (get_local $x) + (get_local $y) + ) + ) + (drop + (get_local $7) + ) + (nop) + (set_local $8 + (get_local $x) + ) + (set_local $9 + (get_local $y) + ) + (set_local $10 + (get_local $7) + ) + (drop + (get_local $7) + ) + (nop) + (set_local $11 + (get_local $x) + ) + (set_local $12 + (get_local $y) + ) + (set_local $13 + (get_local $7) + ) + (drop + (get_local $7) + ) + (nop) + (call $basics) + (nop) + (set_local $14 + (get_local $x) + ) + (set_local $15 + (get_local $y) + ) + (set_local $16 + (get_local $7) + ) + (drop + (get_local $7) + ) + (nop) + (set_local $x + (i32.const 100) + ) + (nop) + (set_local $17 + (get_local $x) + ) + (set_local $18 + (get_local $y) + ) + (set_local $19 + (i32.add + (get_local $x) + (get_local $y) + ) + ) + (drop + (get_local $19) + ) + (nop) + ) + (nop) + ) + (func $recursive1 (; 1 ;) (type $0) + (local $x i32) + (local $y i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (block + (set_local $2 + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + (set_local $3 + (i32.add + (i32.const 1) + (get_local $2) + ) + ) + (drop + (get_local $3) + ) + (nop) + (set_local $4 + (get_local $2) + ) + (set_local $5 + (get_local $3) + ) + (drop + (get_local $3) + ) + (nop) + (set_local $6 + (get_local $2) + ) + (drop + (get_local $2) + ) + (nop) + ) + (nop) + ) + (func $recursive2 (; 2 ;) (type $0) + (local $x i32) + (local $y i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (block + (set_local $2 + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + (set_local $3 + (i32.add + (i32.const 1) + (get_local $2) + ) + ) + (drop + (get_local $3) + ) + (nop) + (set_local $4 + (get_local $2) + ) + (drop + (get_local $2) + ) + (nop) + (set_local $5 + (get_local $2) + ) + (set_local $6 + (get_local $3) + ) + (drop + (get_local $3) + ) + (nop) + ) + (nop) + ) + (func $self (; 3 ;) (type $0) + (local $x i32) + (local $y i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block + (set_local $2 + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + (set_local $3 + (get_local $2) + ) + (set_local $4 + (i32.add + (get_local $2) + (get_local $2) + ) + ) + (drop + (get_local $4) + ) + (nop) + (set_local $5 + (get_local $2) + ) + (drop + (get_local $2) + ) + (nop) + ) + (nop) + ) + (func $loads (; 4 ;) (type $0) + (local $0 i32) + (local $1 i32) + (block + (set_local $0 + (i32.load + (i32.const 10) + ) + ) + (drop + (get_local $0) + ) + (nop) + (set_local $1 + (i32.load + (i32.const 10) + ) + ) + (drop + (get_local $1) + ) + (nop) + ) + (nop) + ) + (func $8 (; 5 ;) (type $1) (param $var$0 i32) (result i32) + (local $var$1 i32) + (local $var$2 i32) + (local $var$3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 i32) + (local $21 i32) + (block $label$0 + (set_local $4 + (get_local $var$1) + ) + (set_local $5 + (i32.add + (get_local $var$1) + (i32.const 4) + ) + ) + (set_local $var$2 + (get_local $5) + ) + (set_local $6 + (get_local $var$2) + ) + (set_local $7 + (get_local $var$2) + ) + (set_local $8 + (i32.load + (get_local $var$2) + ) + ) + (set_local $var$2 + (i32.const 74) + ) + (set_local $9 + (get_local $var$2) + ) + (set_local $10 + (i32.xor + (get_local $var$2) + (i32.const -1) + ) + ) + (set_local $11 + (i32.and + (get_local $8) + (get_local $10) + ) + ) + (i32.store + (get_local $5) + (get_local $11) + ) + (nop) + (set_local $12 + (get_local $var$1) + ) + (set_local $13 + (get_local $5) + ) + (set_local $var$1 + (get_local $5) + ) + (set_local $14 + (get_local $var$1) + ) + (set_local $15 + (get_local $var$1) + ) + (set_local $16 + (i32.load + (get_local $var$1) + ) + ) + (set_local $17 + (get_local $var$2) + ) + (set_local $18 + (i32.and + (get_local $var$2) + (i32.const 8) + ) + ) + (set_local $19 + (i32.or + (get_local $16) + (get_local $18) + ) + ) + (i32.store + (get_local $var$1) + (get_local $19) + ) + (nop) + (set_local $20 + (i32.const 0) + ) + ) + (set_local $21 + (get_local $20) + ) + (return + (get_local $20) + ) + ) + (func $loop1 (; 6 ;) (type $2) (param $x i32) (param $y i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (block + (set_local $2 + (get_local $y) + ) + (set_local $x + (get_local $y) + ) + (nop) + (set_local $3 + (get_local $x) + ) + (set_local $y + (get_local $x) + ) + (nop) + (set_local $4 + (get_local $x) + ) + (set_local $x + (get_local $x) + ) + (nop) + (set_local $5 + (get_local $x) + ) + (set_local $y + (get_local $x) + ) + (nop) + (set_local $6 + (get_local $x) + ) + (return + (get_local $x) + ) + (unreachable) + ) + (set_local $8 + (get_local $7) + ) + (return + (get_local $7) + ) + ) + (func $loop2 (; 7 ;) (type $3) (param $x i32) (param $y i32) (param $z i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (block + (set_local $3 + (get_local $y) + ) + (set_local $x + (get_local $y) + ) + (nop) + (set_local $4 + (get_local $z) + ) + (set_local $y + (get_local $z) + ) + (nop) + (set_local $5 + (get_local $x) + ) + (set_local $z + (get_local $x) + ) + (nop) + (set_local $6 + (get_local $y) + ) + (set_local $x + (get_local $y) + ) + (nop) + (set_local $7 + (get_local $z) + ) + (set_local $y + (get_local $z) + ) + (nop) + (set_local $8 + (get_local $x) + ) + (set_local $z + (get_local $x) + ) + (nop) + (set_local $9 + (get_local $x) + ) + (return + (get_local $x) + ) + (unreachable) + ) + (set_local $11 + (get_local $10) + ) + (return + (get_local $10) + ) + ) + (func $loop3 (; 8 ;) (type $3) (param $x i32) (param $y i32) (param $z i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (block + (set_local $3 + (get_local $y) + ) + (set_local $x + (get_local $y) + ) + (nop) + (set_local $4 + (get_local $z) + ) + (set_local $y + (get_local $z) + ) + (nop) + (set_local $5 + (get_local $y) + ) + (set_local $z + (get_local $y) + ) + (nop) + (set_local $6 + (get_local $y) + ) + (set_local $y + (get_local $y) + ) + (nop) + (set_local $7 + (get_local $y) + ) + (set_local $z + (get_local $y) + ) + (nop) + (set_local $8 + (get_local $y) + ) + (return + (get_local $y) + ) + (unreachable) + ) + (set_local $10 + (get_local $9) + ) + (return + (get_local $9) + ) + ) + (func $handle-removing (; 9 ;) (type $4) (param $var$0 f64) (param $var$1 f64) (param $var$2 i32) (result f32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 f32) + (local $7 f32) + (block + (set_local $var$2 + (i32.const 32767) + ) + (set_local $3 + (get_local $var$2) + ) + (set_local $var$2 + (i32.const 1024) + ) + (set_local $4 + (get_local $var$2) + ) + (set_local $5 + (select + (get_local $3) + (get_local $var$2) + (i32.const -2147483648) + ) + ) + (set_local $var$2 + (get_local $5) + ) + (nop) + (set_local $6 + (f32.const 1) + ) + ) + (set_local $7 + (get_local $6) + ) + (return + (get_local $6) + ) + ) +) +(module + (type $0 (func)) + (type $1 (func (param i32 f64) (result i32))) + (type $2 (func (param i64 f32 i32))) + (global $global$0 (mut i32) (i32.const 10)) + (table 23 23 anyfunc) + (export "func_1_invoker" (func $1)) + (export "func_6" (func $2)) + (func $0 (; 0 ;) (type $2) (param $var$0 i64) (param $var$1 f32) (param $var$2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (block + (block $label$1 + (set_local $3 + (i32.const 128) + ) + (br_if $label$1 + (i32.const 0) + ) + (set_local $4 + (get_local $3) + ) + (set_local $3 + (i32.const 0) + ) + (br_if $label$1 + (get_local $4) + ) + (set_local $5 + (get_local $3) + ) + (drop + (get_local $3) + ) + (nop) + (set_local $3 + (i32.const -14051) + ) + ) + (set_local $6 + (get_local $3) + ) + (if + (get_local $3) + (block + (set_global $global$0 + (i32.const 0) + ) + (nop) + ) + ) + ) + (nop) + ) + (func $1 (; 1 ;) (type $0) + (call $0 + (i64.const 1125899906842624) + (f32.const -nan:0x7fc91a) + (i32.const -46) + ) + (nop) + ) + (func $2 (; 2 ;) (type $1) (param $var$0 i32) (param $var$1 f64) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (block + (block + (set_local $2 + (get_global $global$0) + ) + (if + (get_local $2) + (block + (unreachable) + (unreachable) + ) + ) + ) + (nop) + (set_local $3 + (i32.const 0) + ) + ) + (set_local $4 + (get_local $3) + ) + (return + (get_local $3) + ) + ) +) +(module + (type $FUNCSIG$vi (func (param i32))) + (import "env" "out" (func $out (param i32))) + (func $each-pass-must-clear (; 1 ;) (type $FUNCSIG$vi) (param $var$0 i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (block + (set_local $1 + (get_local $var$0) + ) + (set_local $2 + (i32.eqz + (get_local $var$0) + ) + ) + (call $out + (get_local $2) + ) + (nop) + (set_local $3 + (get_local $var$0) + ) + (set_local $4 + (get_local $2) + ) + (call $out + (get_local $2) + ) + (nop) + ) + (nop) + ) +) diff --git a/test/passes/local-cse.wast b/test/passes/flatten_local-cse.wast index 31b49cbb1..4a33900aa 100644 --- a/test/passes/local-cse.wast +++ b/test/passes/flatten_local-cse.wast @@ -161,4 +161,101 @@ (i32.const 0) ) ) + (func $loop1 (param $x i32) (param $y i32) (result i32) + (set_local $x (get_local $y)) + (set_local $y (get_local $x)) + (set_local $x (get_local $y)) + (set_local $y (get_local $x)) + (return (get_local $y)) + ) + (func $loop2 (param $x i32) (param $y i32) (param $z i32) (result i32) + (set_local $x (get_local $y)) + (set_local $y (get_local $z)) + (set_local $z (get_local $x)) + (set_local $x (get_local $y)) + (set_local $y (get_local $z)) + (set_local $z (get_local $x)) + (return (get_local $x)) + ) + (func $loop3 (param $x i32) (param $y i32) (param $z i32) (result i32) + (set_local $x (get_local $y)) + (set_local $y (get_local $z)) + (set_local $z (get_local $y)) + (set_local $y (get_local $z)) + (set_local $z (get_local $y)) + (return (get_local $y)) + ) + (func $handle-removing (param $var$0 f64) (param $var$1 f64) (param $var$2 i32) (result f32) + (set_local $var$2 + (select + (tee_local $var$2 + (i32.const 32767) + ) + (tee_local $var$2 + (i32.const 1024) + ) + (i32.const -2147483648) + ) + ) + (f32.const 1) + ) +) +;; a testcase that fails if we don't handle equivalent local canonicalization properly +(module + (type $0 (func)) + (type $1 (func (param i32 f64) (result i32))) + (type $2 (func (param i64 f32 i32))) + (global $global$0 (mut i32) (i32.const 10)) + (table 23 23 anyfunc) + (export "func_1_invoker" (func $1)) + (export "func_6" (func $2)) + (func $0 (; 0 ;) (type $2) (param $var$0 i64) (param $var$1 f32) (param $var$2 i32) + (if + (block $label$1 (result i32) + (drop + (br_if $label$1 + (i32.const 0) + (br_if $label$1 + (i32.const 128) + (i32.const 0) + ) + ) + ) + (i32.const -14051) + ) + (set_global $global$0 + (i32.const 0) + ) + ) + ) + (func $1 (; 1 ;) (type $0) + (call $0 + (i64.const 1125899906842624) + (f32.const -nan:0x7fc91a) + (i32.const -46) + ) + ) + (func $2 (; 2 ;) (type $1) (param $var$0 i32) (param $var$1 f64) (result i32) + (if + (get_global $global$0) + (unreachable) + ) + (i32.const 0) + ) +) +(module + (import "env" "out" (func $out (param i32))) + (func $each-pass-must-clear (param $var$0 i32) + (call $out + (i32.eqz + (get_local $var$0) + ) + ) + (call $out + (i32.eqz + (get_local $var$0) + ) + ) + ) ) + diff --git a/test/passes/flatten_local-cse_Os.txt b/test/passes/flatten_local-cse_Os.txt new file mode 100644 index 000000000..85bc4d6f5 --- /dev/null +++ b/test/passes/flatten_local-cse_Os.txt @@ -0,0 +1,27 @@ +(module + (type $0 (func (param i32 i32) (result i32))) + (export "div16_internal" (func $0)) + (func $0 (; 0 ;) (type $0) (param $0 i32) (param $1 i32) (result i32) + (i32.add + (tee_local $0 + (i32.xor + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 16) + ) + (i32.const 16) + ) + (i32.shr_s + (i32.shl + (get_local $1) + (i32.const 16) + ) + (i32.const 16) + ) + ) + ) + (get_local $0) + ) + ) +) diff --git a/test/passes/flatten_local-cse_Os.wast b/test/passes/flatten_local-cse_Os.wast new file mode 100644 index 000000000..bc981b1b9 --- /dev/null +++ b/test/passes/flatten_local-cse_Os.wast @@ -0,0 +1,39 @@ +(module + ;; testcase from AssemblyScript + (func "div16_internal" (param $0 i32) (param $1 i32) (result i32) + (i32.add + (i32.xor + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 16) + ) + (i32.const 16) + ) + (i32.shr_s + (i32.shl + (get_local $1) + (i32.const 16) + ) + (i32.const 16) + ) + ) + (i32.xor + (i32.shr_s + (i32.shl + (get_local $0) + (i32.const 16) + ) + (i32.const 16) + ) + (i32.shr_s + (i32.shl + (get_local $1) + (i32.const 16) + ) + (i32.const 16) + ) + ) + ) + ) +) diff --git a/test/passes/local-cse.txt b/test/passes/local-cse.txt deleted file mode 100644 index c15b1a95c..000000000 --- a/test/passes/local-cse.txt +++ /dev/null @@ -1,184 +0,0 @@ -(module - (type $0 (func)) - (type $1 (func (param i32) (result i32))) - (memory $0 100 100) - (func $basics (; 0 ;) (type $0) - (local $x i32) - (local $y i32) - (local $2 i32) - (local $3 i32) - (drop - (tee_local $2 - (i32.add - (i32.const 1) - (i32.const 2) - ) - ) - ) - (drop - (get_local $2) - ) - (if - (i32.const 0) - (nop) - ) - (drop - (i32.add - (i32.const 1) - (i32.const 2) - ) - ) - (drop - (tee_local $3 - (i32.add - (get_local $x) - (get_local $y) - ) - ) - ) - (drop - (get_local $3) - ) - (drop - (get_local $3) - ) - (call $basics) - (drop - (get_local $3) - ) - (set_local $x - (i32.const 100) - ) - (drop - (i32.add - (get_local $x) - (get_local $y) - ) - ) - ) - (func $recursive1 (; 1 ;) (type $0) - (local $x i32) - (local $y i32) - (local $2 i32) - (drop - (i32.add - (i32.const 1) - (tee_local $2 - (i32.add - (i32.const 2) - (i32.const 3) - ) - ) - ) - ) - (drop - (i32.add - (i32.const 1) - (get_local $2) - ) - ) - (drop - (get_local $2) - ) - ) - (func $recursive2 (; 2 ;) (type $0) - (local $x i32) - (local $y i32) - (local $2 i32) - (drop - (i32.add - (i32.const 1) - (tee_local $2 - (i32.add - (i32.const 2) - (i32.const 3) - ) - ) - ) - ) - (drop - (get_local $2) - ) - (drop - (i32.add - (i32.const 1) - (get_local $2) - ) - ) - ) - (func $self (; 3 ;) (type $0) - (local $x i32) - (local $y i32) - (local $2 i32) - (drop - (i32.add - (tee_local $2 - (i32.add - (i32.const 2) - (i32.const 3) - ) - ) - (get_local $2) - ) - ) - (drop - (get_local $2) - ) - ) - (func $loads (; 4 ;) (type $0) - (drop - (i32.load - (i32.const 10) - ) - ) - (drop - (i32.load - (i32.const 10) - ) - ) - ) - (func $8 (; 5 ;) (type $1) (param $var$0 i32) (result i32) - (local $var$1 i32) - (local $var$2 i32) - (local $var$3 i32) - (local $4 i32) - (block $label$0 (result i32) - (i32.store - (tee_local $var$2 - (tee_local $4 - (i32.add - (get_local $var$1) - (i32.const 4) - ) - ) - ) - (i32.and - (i32.load - (get_local $var$2) - ) - (i32.xor - (tee_local $var$2 - (i32.const 74) - ) - (i32.const -1) - ) - ) - ) - (i32.store - (tee_local $var$1 - (get_local $4) - ) - (i32.or - (i32.load - (get_local $var$1) - ) - (i32.and - (get_local $var$2) - (i32.const 8) - ) - ) - ) - (i32.const 0) - ) - ) -) diff --git a/test/passes/local-cse_ignore-implicit-traps.txt b/test/passes/local-cse_ignore-implicit-traps.txt deleted file mode 100644 index 8d5720143..000000000 --- a/test/passes/local-cse_ignore-implicit-traps.txt +++ /dev/null @@ -1,30 +0,0 @@ -(module - (type $0 (func)) - (memory $0 100 100) - (func $loads (; 0 ;) (type $0) - (local $0 i32) - (drop - (tee_local $0 - (i32.load - (i32.const 10) - ) - ) - ) - (drop - (get_local $0) - ) - (drop - (i32.load offset=5 - (i32.const 10) - ) - ) - (drop - (i32.load - (i32.const 11) - ) - ) - (drop - (get_local $0) - ) - ) -) diff --git a/test/passes/local-cse_ignore-implicit-traps.wast b/test/passes/local-cse_ignore-implicit-traps.wast deleted file mode 100644 index 0f22084c6..000000000 --- a/test/passes/local-cse_ignore-implicit-traps.wast +++ /dev/null @@ -1,20 +0,0 @@ -(module - (memory 100 100) - (func $loads - (drop - (i32.load (i32.const 10)) - ) - (drop - (i32.load (i32.const 10)) - ) - (drop - (i32.load offset=5 (i32.const 10)) - ) - (drop - (i32.load (i32.const 11)) - ) - (drop - (i32.load (i32.const 10)) - ) - ) -) diff --git a/test/passes/simplify-locals.wast b/test/passes/simplify-locals.wast index 70eb79faa..60531717c 100644 --- a/test/passes/simplify-locals.wast +++ b/test/passes/simplify-locals.wast @@ -1129,7 +1129,6 @@ ) (i32.const 0) ) - (func $pick (local $x i32) (local $y i32) |