diff options
Diffstat (limited to 'src/passes/SimplifyLocals.cpp')
-rw-r--r-- | src/passes/SimplifyLocals.cpp | 277 |
1 files changed, 205 insertions, 72 deletions
diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp index af5a9a4c4..27d868da1 100644 --- a/src/passes/SimplifyLocals.cpp +++ b/src/passes/SimplifyLocals.cpp @@ -52,30 +52,12 @@ #include <pass.h> #include <ir/count.h> #include <ir/effects.h> +#include "ir/equivalent_sets.h" #include <ir/find_all.h> #include <ir/manipulation.h> namespace wasm { -// Helper classes - -struct SetLocalRemover : public PostWalker<SetLocalRemover> { - std::vector<Index>* numGetLocals; - - void visitSetLocal(SetLocal *curr) { - if ((*numGetLocals)[curr->index] == 0) { - auto* value = curr->value; - if (curr->isTee()) { - replaceCurrent(value); - } else { - Drop* drop = ExpressionManipulator::convert<SetLocal, Drop>(curr); - drop->value = value; - drop->finalize(); - } - } - } -}; - // Main class template<bool allowTee = true, bool allowStructure = true, bool allowNesting = true> @@ -130,6 +112,7 @@ struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<a GetLocalCounter getCounter; static void doNoteNonLinear(SimplifyLocals<allowTee, allowStructure, allowNesting>* self, Expression** currp) { + // Main processing. auto* curr = *currp; if (curr->is<Break>()) { auto* br = curr->cast<Break>(); @@ -583,69 +566,219 @@ struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<a // output patterns. further cycles do fully general sinking. firstCycle = true; do { - anotherCycle = false; - // main operation - WalkerPass<LinearExecutionWalker<SimplifyLocals<allowTee, allowStructure, allowNesting>>>::doWalkFunction(func); - // enlarge blocks that were marked, for the next round - if (blocksToEnlarge.size() > 0) { - for (auto* block : blocksToEnlarge) { - block->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + anotherCycle = runMainOptimizations(func); + // After the special first cycle, definitely do another. + if (firstCycle) { + firstCycle = false; + anotherCycle = true; + } + // If we are all done, run the final optimizations, which may suggest we + // can do more work. + if (!anotherCycle) { + // Don't run multiple cycles of just the final optimizations - in + // particular, get canonicalization is not guaranteed to converge. + // Instead, if final opts help then see if they enable main + // opts; continue only if they do. In other words, do not end up + // doing final opts again and again when no main opts are being + // enabled. + if (runFinalOptimizations(func) && runMainOptimizations(func)) { + anotherCycle = true; } - blocksToEnlarge.clear(); + } + } while (anotherCycle); + } + + bool runMainOptimizations(Function* func) { + anotherCycle = false; + WalkerPass<LinearExecutionWalker<SimplifyLocals<allowTee, allowStructure, allowNesting>>>::doWalkFunction(func); + // enlarge blocks that were marked, for the next round + if (blocksToEnlarge.size() > 0) { + for (auto* block : blocksToEnlarge) { + block->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + } + blocksToEnlarge.clear(); + anotherCycle = true; + } + // enlarge ifs that were marked, for the next round + if (ifsToEnlarge.size() > 0) { + for (auto* iff : ifsToEnlarge) { + auto ifTrue = Builder(*this->getModule()).blockify(iff->ifTrue); + iff->ifTrue = ifTrue; + if (ifTrue->list.size() == 0 || !ifTrue->list.back()->template is<Nop>()) { + ifTrue->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + } + auto ifFalse = Builder(*this->getModule()).blockify(iff->ifFalse); + iff->ifFalse = ifFalse; + if (ifFalse->list.size() == 0 || !ifFalse->list.back()->template is<Nop>()) { + ifFalse->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + } + } + ifsToEnlarge.clear(); + anotherCycle = true; + } + // handle loops. note that a lot happens in this pass, and we can't just modify + // set_locals when we see a loop - it might be tracked - and we can't also just + // assume our loop didn't move either (might be in a block now). So we do this + // when all other work is done - as loop return values are rare, that is fine. + if (!anotherCycle) { + for (auto* currp : loops) { + auto* curr = (*currp)->template cast<Loop>(); + assert(canUseLoopReturnValue(curr)); + auto* set = curr->body->template cast<SetLocal>(); + curr->body = set->value; + set->value = curr; + curr->finalize(curr->body->type); + *currp = set; anotherCycle = true; } - // enlarge ifs that were marked, for the next round - if (ifsToEnlarge.size() > 0) { - for (auto* iff : ifsToEnlarge) { - auto ifTrue = Builder(*this->getModule()).blockify(iff->ifTrue); - iff->ifTrue = ifTrue; - if (ifTrue->list.size() == 0 || !ifTrue->list.back()->template is<Nop>()) { - ifTrue->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + } + loops.clear(); + // clean up + sinkables.clear(); + blockBreaks.clear(); + unoptimizableBlocks.clear(); + return anotherCycle; + } + + bool runFinalOptimizations(Function* func) { + // Finally, after optimizing a function we can do some additional + // optimization. + getCounter.analyze(func); + // Remove equivalent copies - assignment of + // a local to another local that already contains that value. Note that + // we do that at the very end, and only after structure, as removing + // the copy here: + // (if + // (get_local $var$0) + // (set_local $var$0 + // (get_local $var$0) + // ) + // (set_local $var$0 + // (i32.const 208) + // ) + // ) + // will inhibit us creating an if return value. + struct EquivalentOptimizer : public LinearExecutionWalker<EquivalentOptimizer> { + std::vector<Index>* numGetLocals; + bool removeEquivalentSets; + Module* module; + + bool anotherCycle = false; + + // We track locals containing the same value. + EquivalentSets equivalences; + + static void doNoteNonLinear(EquivalentOptimizer* self, Expression** currp) { + // TODO do this across non-linear paths too, in coalesce-locals perhaps? (would inhibit structure + // opts here, though. + self->equivalences.clear(); + } + + void visitSetLocal(SetLocal *curr) { + // Remove trivial copies, even through a tee + auto* value = curr->value; + while (auto* subSet = value->dynCast<SetLocal>()) { + value = subSet->value; + } + if (auto* get = value->dynCast<GetLocal>()) { + if (equivalences.check(curr->index, get->index)) { + // This is an unnecessary copy! + if (removeEquivalentSets) { + if (curr->isTee()) { + this->replaceCurrent(value); + } else if (curr->value->is<GetLocal>()) { + ExpressionManipulator::nop(curr); + } else { + this->replaceCurrent(Builder(*module).makeDrop(value)); + } + anotherCycle = true; + } + } else { + // There is a new equivalence now. + equivalences.reset(curr->index); + equivalences.add(curr->index, get->index); } - auto ifFalse = Builder(*this->getModule()).blockify(iff->ifFalse); - iff->ifFalse = ifFalse; - if (ifFalse->list.size() == 0 || !ifFalse->list.back()->template is<Nop>()) { - ifFalse->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + } else { + // A new value is assigned here. + equivalences.reset(curr->index); + } + } + + void visitGetLocal(GetLocal *curr) { + // Canonicalize gets: if some are equivalent, then we can pick more + // then one, and other passes may benefit from having more uniformity. + if (auto *set = equivalences.getEquivalents(curr->index)) { + // Pick the index with the most uses - maximizing the chance to + // lower one's uses to zero. + // Helper method that returns the # of gets *ignoring the current get*, + // as we want to see what is best overall, treating this one as + // to be decided upon. + auto getNumGetsIgnoringCurr = [&](Index index) { + auto ret = (*numGetLocals)[index]; + if (index == curr->index) { + assert(ret >= 1); + ret--; + } + return ret; + }; + Index best = -1; + for (auto index : *set) { + if (best == Index(-1) || + getNumGetsIgnoringCurr(index) > getNumGetsIgnoringCurr(best)) { + best = index; + } + } + assert(best != Index(-1)); + // Due to ordering, the best index may be different from us but have + // the same # of locals - make sure we actually improve. + if (best != curr->index && + getNumGetsIgnoringCurr(best) > getNumGetsIgnoringCurr(curr->index)) { + // Update the get counts. + (*numGetLocals)[best]++; + assert((*numGetLocals)[curr->index] >= 1); + (*numGetLocals)[curr->index]--; + // Make the change. + curr->index = best; + anotherCycle = true; } } - ifsToEnlarge.clear(); - anotherCycle = true; } - // handle loops. note that a lot happens in this pass, and we can't just modify - // set_locals when we see a loop - it might be tracked - and we can't also just - // assume our loop didn't move either (might be in a block now). So we do this - // when all other work is done - as loop return values are rare, that is fine. - if (!anotherCycle) { - for (auto* currp : loops) { - auto* curr = (*currp)->template cast<Loop>(); - assert(canUseLoopReturnValue(curr)); - auto* set = curr->body->template cast<SetLocal>(); - curr->body = set->value; - set->value = curr; - curr->finalize(curr->body->type); - *currp = set; + }; + + EquivalentOptimizer eqOpter; + eqOpter.module = this->getModule(); + eqOpter.numGetLocals = &getCounter.num; + eqOpter.removeEquivalentSets = allowStructure; + eqOpter.walkFunction(func); + + // We may have already had a local with no uses, or we may have just + // gotten there thanks to the EquivalentOptimizer. If there are such + // locals, remove all their sets. + struct UneededSetRemover : public PostWalker<UneededSetRemover> { + std::vector<Index>* numGetLocals; + + bool anotherCycle = false; + + void visitSetLocal(SetLocal *curr) { + if ((*numGetLocals)[curr->index] == 0) { + auto* value = curr->value; + if (curr->isTee()) { + this->replaceCurrent(value); + } else { + Drop* drop = ExpressionManipulator::convert<SetLocal, Drop>(curr); + drop->value = value; + drop->finalize(); + } anotherCycle = true; } } - loops.clear(); - // clean up - sinkables.clear(); - blockBreaks.clear(); - unoptimizableBlocks.clear(); - if (firstCycle) { - firstCycle = false; - anotherCycle = true; - } - } while (anotherCycle); - // Finally, after optimizing a function, we can see if we have set_locals - // for a local with no remaining gets, in which case, we can - // remove the set. - // First, recount get_locals - getCounter.analyze(func); - // Second, remove unneeded sets - SetLocalRemover remover; - remover.numGetLocals = &getCounter.num; - remover.walkFunction(func); + }; + + UneededSetRemover setRemover; + setRemover.numGetLocals = &getCounter.num; + setRemover.walkFunction(func); + + return eqOpter.anotherCycle || setRemover.anotherCycle; } bool canUseLoopReturnValue(Loop* curr) { |