diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/passes/LocalCSE.cpp | 111 | ||||
-rw-r--r-- | src/passes/SimplifyLocals.cpp | 2 | ||||
-rw-r--r-- | src/passes/pass.cpp | 4 | ||||
-rw-r--r-- | src/tools/wasm-reduce.cpp | 37 |
4 files changed, 96 insertions, 58 deletions
diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp index 7524ad0fa..a458937db 100644 --- a/src/passes/LocalCSE.cpp +++ b/src/passes/LocalCSE.cpp @@ -17,6 +17,15 @@ // // Local CSE // +// This requires --flatten to be run before in order to be effective, +// and preserves flatness. The reason flatness is required is that +// this pass assumes everything is stored in a local, and all it does +// is alter set_locals to do get_locals of an existing value when +// possible, replacing a recomputing of that value. That design means that +// if there are block and if return values, nested expressions not stored +// to a local, etc., then it can't operate on them (and will just not +// do anything for them). +// // In each linear area of execution, // * track each relevant (big enough) expression // * if already seen, write to a local if not already, and reuse @@ -25,17 +34,19 @@ // TODO: global, inter-block gvn etc. // +#include <algorithm> +#include <memory> + #include <wasm.h> #include <wasm-builder.h> #include <wasm-traversal.h> #include <pass.h> #include <ir/effects.h> +#include <ir/equivalent_sets.h> #include <ir/hashed.h> namespace wasm { -const Index UNUSED = -1; - struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { bool isFunctionParallel() override { return true; } @@ -43,11 +54,11 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { // information for an expression we can reuse struct UsableInfo { - Expression** item; - Index index; // if not UNUSED, then the local we are assigned to, use that to reuse us + Expression* value; // the value we can reuse + Index index; // the local we are assigned to, get_local that to reuse us EffectAnalyzer effects; - UsableInfo(Expression** item, PassOptions& passOptions) : item(item), index(UNUSED), effects(passOptions, *item) {} + UsableInfo(Expression* value, Index index, PassOptions& passOptions) : value(value), index(index), effects(passOptions, value) {} }; // a list of usables in a linear execution trace @@ -56,8 +67,29 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { // locals in current linear execution trace, which we try to sink Usables usables; + // We track locals containing the same value - the value is what matters, not + // the index. + EquivalentSets equivalences; + + bool anotherPass; + + void doWalkFunction(Function* func) { + anotherPass = true; + // we may need multiple rounds + while (anotherPass) { + anotherPass = false; + clear(); + super::doWalkFunction(func); + } + } + static void doNoteNonLinear(LocalCSE* self, Expression** currp) { - self->usables.clear(); + self->clear(); + } + + void clear() { + usables.clear(); + equivalences.clear(); } void checkInvalidations(EffectAnalyzer& effects) { @@ -91,9 +123,7 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { auto* curr = *currp; // main operations - if (self->isRelevant(curr)) { - self->handle(currp, curr); - } + self->handle(curr); // post operations @@ -114,38 +144,51 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { self->pushTask(visitPre, currp); } - bool isRelevant(Expression* curr) { - if (curr->is<GetLocal>()) { + void handle(Expression* curr) { + if (auto* set = curr->dynCast<SetLocal>()) { + // Calculate equivalences + equivalences.reset(set->index); + if (auto* get = set->value->dynCast<GetLocal>()) { + equivalences.add(set->index, get->index); + } + // consider the value + auto* value = set->value; + if (isRelevant(value)) { + HashedExpression hashed(value); + auto iter = usables.find(hashed); + if (iter != usables.end()) { + // already exists in the table, this is good to reuse + auto& info = iter->second; + set->value = Builder(*getModule()).makeGetLocal(info.index, value->type); + anotherPass = true; + } else { + // not in table, add this, maybe we can help others later + usables.emplace(std::make_pair(hashed, UsableInfo(value, set->index, getPassOptions()))); + } + } + } else if (auto* get = curr->dynCast<GetLocal>()) { + if (auto* set = equivalences.getEquivalents(get->index)) { + // Canonicalize to the lowest index. This lets hashing and comparisons + // "just work". + get->index = *std::min_element(set->begin(), set->end()); + } + } + } + + // A relevant value is a non-trivial one, something we may want to reuse + // and are able to. + bool isRelevant(Expression* value) { + if (value->is<GetLocal>()) { return false; // trivial, this is what we optimize to! } - if (!isConcreteType(curr->type)) { + if (!isConcreteType(value->type)) { return false; // don't bother with unreachable etc. } - if (EffectAnalyzer(getPassOptions(), curr).hasSideEffects()) { + if (EffectAnalyzer(getPassOptions(), value).hasSideEffects()) { return false; // we can't combine things with side effects } // check what we care about TODO: use optimize/shrink levels? - return Measurer::measure(curr) > 1; - } - - void handle(Expression** currp, Expression* curr) { - HashedExpression hashed(curr); - auto iter = usables.find(hashed); - if (iter != usables.end()) { - // already exists in the table, this is good to reuse - auto& info = iter->second; - if (info.index == UNUSED) { - // we need to assign to a local. create a new one - auto index = info.index = Builder::addVar(getFunction(), curr->type); - (*info.item) = Builder(*getModule()).makeTeeLocal(index, *info.item); - } - replaceCurrent( - Builder(*getModule()).makeGetLocal(info.index, curr->type) - ); - } else { - // not in table, add this, maybe we can help others later - usables.emplace(std::make_pair(hashed, UsableInfo(currp, getPassOptions()))); - } + return Measurer::measure(value) > 1; } }; diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp index d941ce0d4..aadf766ac 100644 --- a/src/passes/SimplifyLocals.cpp +++ b/src/passes/SimplifyLocals.cpp @@ -705,7 +705,7 @@ struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<a void visitGetLocal(GetLocal *curr) { // Canonicalize gets: if some are equivalent, then we can pick more // then one, and other passes may benefit from having more uniformity. - if (auto *set = equivalences.getEquivalents(curr->index)) { + if (auto* set = equivalences.getEquivalents(curr->index)) { // Pick the index with the most uses - maximizing the chance to // lower one's uses to zero. // Helper method that returns the # of gets *ignoring the current get*, diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 6c2aa3731..2f917d779 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -175,10 +175,6 @@ void PassRunner::addDefaultFunctionOptimizationPasses() { } else { add("precompute"); } - if (options.shrinkLevel >= 2) { - add("local-cse"); // TODO: run this early, before first coalesce-locals. right now doing so uncovers some deficiencies we need to fix first - add("coalesce-locals"); // just for localCSE - } if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) { add("rse"); // after all coalesce-locals, and before a final vacuum } diff --git a/src/tools/wasm-reduce.cpp b/src/tools/wasm-reduce.cpp index 0dfb97907..6b19e6216 100644 --- a/src/tools/wasm-reduce.cpp +++ b/src/tools/wasm-reduce.cpp @@ -236,13 +236,15 @@ struct Reducer : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor< "-O1", "-O2", "-O3", + "--flatten -Os", + "--flatten -O3", + "--flatten --local-cse -Os", "--coalesce-locals --vacuum", "--dce", "--duplicate-function-elimination", "--inlining", "--inlining-optimizing", "--optimize-level=3 --inlining-optimizing", - "--local-cse --vacuum", "--memory-packing", "--remove-unused-names --merge-blocks --vacuum", "--optimize-instructions", @@ -263,24 +265,21 @@ struct Reducer : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor< //std::cerr << "| starting passes loop iteration\n"; more = false; // try both combining with a generic shrink (so minor pass overhead is compensated for), and without - for (auto shrinking : { false, true }) { - for (auto pass : passes) { - std::string currCommand = Path::getBinaryenBinaryTool("wasm-opt") + " "; - if (shrinking) currCommand += " --dce --vacuum "; - currCommand += working + " -o " + test + " " + pass; - if (debugInfo) currCommand += " -g "; - if (verbose) std::cerr << "| trying pass command: " << currCommand << "\n"; - if (!ProgramResult(currCommand).failed()) { - auto newSize = file_size(test); - if (newSize < oldSize) { - // the pass didn't fail, and the size looks smaller, so promising - // see if it is still has the property we are preserving - if (ProgramResult(command) == expected) { - std::cerr << "| command \"" << currCommand << "\" succeeded, reduced size to " << newSize << ", and preserved the property\n"; - copy_file(test, working); - more = true; - oldSize = newSize; - } + for (auto pass : passes) { + std::string currCommand = Path::getBinaryenBinaryTool("wasm-opt") + " "; + currCommand += working + " -o " + test + " " + pass; + if (debugInfo) currCommand += " -g "; + if (verbose) std::cerr << "| trying pass command: " << currCommand << "\n"; + if (!ProgramResult(currCommand).failed()) { + auto newSize = file_size(test); + if (newSize < oldSize) { + // the pass didn't fail, and the size looks smaller, so promising + // see if it is still has the property we are preserving + if (ProgramResult(command) == expected) { + std::cerr << "| command \"" << currCommand << "\" succeeded, reduced size to " << newSize << ", and preserved the property\n"; + copy_file(test, working); + more = true; + oldSize = newSize; } } } |