summaryrefslogtreecommitdiff
path: root/src/passes/SimplifyLocals.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/passes/SimplifyLocals.cpp')
-rw-r--r--src/passes/SimplifyLocals.cpp277
1 files changed, 205 insertions, 72 deletions
diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp
index af5a9a4c4..27d868da1 100644
--- a/src/passes/SimplifyLocals.cpp
+++ b/src/passes/SimplifyLocals.cpp
@@ -52,30 +52,12 @@
#include <pass.h>
#include <ir/count.h>
#include <ir/effects.h>
+#include "ir/equivalent_sets.h"
#include <ir/find_all.h>
#include <ir/manipulation.h>
namespace wasm {
-// Helper classes
-
-struct SetLocalRemover : public PostWalker<SetLocalRemover> {
- std::vector<Index>* numGetLocals;
-
- void visitSetLocal(SetLocal *curr) {
- if ((*numGetLocals)[curr->index] == 0) {
- auto* value = curr->value;
- if (curr->isTee()) {
- replaceCurrent(value);
- } else {
- Drop* drop = ExpressionManipulator::convert<SetLocal, Drop>(curr);
- drop->value = value;
- drop->finalize();
- }
- }
- }
-};
-
// Main class
template<bool allowTee = true, bool allowStructure = true, bool allowNesting = true>
@@ -130,6 +112,7 @@ struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<a
GetLocalCounter getCounter;
static void doNoteNonLinear(SimplifyLocals<allowTee, allowStructure, allowNesting>* self, Expression** currp) {
+ // Main processing.
auto* curr = *currp;
if (curr->is<Break>()) {
auto* br = curr->cast<Break>();
@@ -583,69 +566,219 @@ struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<a
// output patterns. further cycles do fully general sinking.
firstCycle = true;
do {
- anotherCycle = false;
- // main operation
- WalkerPass<LinearExecutionWalker<SimplifyLocals<allowTee, allowStructure, allowNesting>>>::doWalkFunction(func);
- // enlarge blocks that were marked, for the next round
- if (blocksToEnlarge.size() > 0) {
- for (auto* block : blocksToEnlarge) {
- block->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ anotherCycle = runMainOptimizations(func);
+ // After the special first cycle, definitely do another.
+ if (firstCycle) {
+ firstCycle = false;
+ anotherCycle = true;
+ }
+ // If we are all done, run the final optimizations, which may suggest we
+ // can do more work.
+ if (!anotherCycle) {
+ // Don't run multiple cycles of just the final optimizations - in
+ // particular, get canonicalization is not guaranteed to converge.
+ // Instead, if final opts help then see if they enable main
+ // opts; continue only if they do. In other words, do not end up
+ // doing final opts again and again when no main opts are being
+ // enabled.
+ if (runFinalOptimizations(func) && runMainOptimizations(func)) {
+ anotherCycle = true;
}
- blocksToEnlarge.clear();
+ }
+ } while (anotherCycle);
+ }
+
+ bool runMainOptimizations(Function* func) {
+ anotherCycle = false;
+ WalkerPass<LinearExecutionWalker<SimplifyLocals<allowTee, allowStructure, allowNesting>>>::doWalkFunction(func);
+ // enlarge blocks that were marked, for the next round
+ if (blocksToEnlarge.size() > 0) {
+ for (auto* block : blocksToEnlarge) {
+ block->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ }
+ blocksToEnlarge.clear();
+ anotherCycle = true;
+ }
+ // enlarge ifs that were marked, for the next round
+ if (ifsToEnlarge.size() > 0) {
+ for (auto* iff : ifsToEnlarge) {
+ auto ifTrue = Builder(*this->getModule()).blockify(iff->ifTrue);
+ iff->ifTrue = ifTrue;
+ if (ifTrue->list.size() == 0 || !ifTrue->list.back()->template is<Nop>()) {
+ ifTrue->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ }
+ auto ifFalse = Builder(*this->getModule()).blockify(iff->ifFalse);
+ iff->ifFalse = ifFalse;
+ if (ifFalse->list.size() == 0 || !ifFalse->list.back()->template is<Nop>()) {
+ ifFalse->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ }
+ }
+ ifsToEnlarge.clear();
+ anotherCycle = true;
+ }
+ // handle loops. note that a lot happens in this pass, and we can't just modify
+ // set_locals when we see a loop - it might be tracked - and we can't also just
+ // assume our loop didn't move either (might be in a block now). So we do this
+ // when all other work is done - as loop return values are rare, that is fine.
+ if (!anotherCycle) {
+ for (auto* currp : loops) {
+ auto* curr = (*currp)->template cast<Loop>();
+ assert(canUseLoopReturnValue(curr));
+ auto* set = curr->body->template cast<SetLocal>();
+ curr->body = set->value;
+ set->value = curr;
+ curr->finalize(curr->body->type);
+ *currp = set;
anotherCycle = true;
}
- // enlarge ifs that were marked, for the next round
- if (ifsToEnlarge.size() > 0) {
- for (auto* iff : ifsToEnlarge) {
- auto ifTrue = Builder(*this->getModule()).blockify(iff->ifTrue);
- iff->ifTrue = ifTrue;
- if (ifTrue->list.size() == 0 || !ifTrue->list.back()->template is<Nop>()) {
- ifTrue->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ }
+ loops.clear();
+ // clean up
+ sinkables.clear();
+ blockBreaks.clear();
+ unoptimizableBlocks.clear();
+ return anotherCycle;
+ }
+
+ bool runFinalOptimizations(Function* func) {
+ // Finally, after optimizing a function we can do some additional
+ // optimization.
+ getCounter.analyze(func);
+ // Remove equivalent copies - assignment of
+ // a local to another local that already contains that value. Note that
+ // we do that at the very end, and only after structure, as removing
+ // the copy here:
+ // (if
+ // (get_local $var$0)
+ // (set_local $var$0
+ // (get_local $var$0)
+ // )
+ // (set_local $var$0
+ // (i32.const 208)
+ // )
+ // )
+ // will inhibit us creating an if return value.
+ struct EquivalentOptimizer : public LinearExecutionWalker<EquivalentOptimizer> {
+ std::vector<Index>* numGetLocals;
+ bool removeEquivalentSets;
+ Module* module;
+
+ bool anotherCycle = false;
+
+ // We track locals containing the same value.
+ EquivalentSets equivalences;
+
+ static void doNoteNonLinear(EquivalentOptimizer* self, Expression** currp) {
+ // TODO do this across non-linear paths too, in coalesce-locals perhaps? (would inhibit structure
+ // opts here, though.
+ self->equivalences.clear();
+ }
+
+ void visitSetLocal(SetLocal *curr) {
+ // Remove trivial copies, even through a tee
+ auto* value = curr->value;
+ while (auto* subSet = value->dynCast<SetLocal>()) {
+ value = subSet->value;
+ }
+ if (auto* get = value->dynCast<GetLocal>()) {
+ if (equivalences.check(curr->index, get->index)) {
+ // This is an unnecessary copy!
+ if (removeEquivalentSets) {
+ if (curr->isTee()) {
+ this->replaceCurrent(value);
+ } else if (curr->value->is<GetLocal>()) {
+ ExpressionManipulator::nop(curr);
+ } else {
+ this->replaceCurrent(Builder(*module).makeDrop(value));
+ }
+ anotherCycle = true;
+ }
+ } else {
+ // There is a new equivalence now.
+ equivalences.reset(curr->index);
+ equivalences.add(curr->index, get->index);
}
- auto ifFalse = Builder(*this->getModule()).blockify(iff->ifFalse);
- iff->ifFalse = ifFalse;
- if (ifFalse->list.size() == 0 || !ifFalse->list.back()->template is<Nop>()) {
- ifFalse->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ } else {
+ // A new value is assigned here.
+ equivalences.reset(curr->index);
+ }
+ }
+
+ void visitGetLocal(GetLocal *curr) {
+ // Canonicalize gets: if some are equivalent, then we can pick more
+ // then one, and other passes may benefit from having more uniformity.
+ if (auto *set = equivalences.getEquivalents(curr->index)) {
+ // Pick the index with the most uses - maximizing the chance to
+ // lower one's uses to zero.
+ // Helper method that returns the # of gets *ignoring the current get*,
+ // as we want to see what is best overall, treating this one as
+ // to be decided upon.
+ auto getNumGetsIgnoringCurr = [&](Index index) {
+ auto ret = (*numGetLocals)[index];
+ if (index == curr->index) {
+ assert(ret >= 1);
+ ret--;
+ }
+ return ret;
+ };
+ Index best = -1;
+ for (auto index : *set) {
+ if (best == Index(-1) ||
+ getNumGetsIgnoringCurr(index) > getNumGetsIgnoringCurr(best)) {
+ best = index;
+ }
+ }
+ assert(best != Index(-1));
+ // Due to ordering, the best index may be different from us but have
+ // the same # of locals - make sure we actually improve.
+ if (best != curr->index &&
+ getNumGetsIgnoringCurr(best) > getNumGetsIgnoringCurr(curr->index)) {
+ // Update the get counts.
+ (*numGetLocals)[best]++;
+ assert((*numGetLocals)[curr->index] >= 1);
+ (*numGetLocals)[curr->index]--;
+ // Make the change.
+ curr->index = best;
+ anotherCycle = true;
}
}
- ifsToEnlarge.clear();
- anotherCycle = true;
}
- // handle loops. note that a lot happens in this pass, and we can't just modify
- // set_locals when we see a loop - it might be tracked - and we can't also just
- // assume our loop didn't move either (might be in a block now). So we do this
- // when all other work is done - as loop return values are rare, that is fine.
- if (!anotherCycle) {
- for (auto* currp : loops) {
- auto* curr = (*currp)->template cast<Loop>();
- assert(canUseLoopReturnValue(curr));
- auto* set = curr->body->template cast<SetLocal>();
- curr->body = set->value;
- set->value = curr;
- curr->finalize(curr->body->type);
- *currp = set;
+ };
+
+ EquivalentOptimizer eqOpter;
+ eqOpter.module = this->getModule();
+ eqOpter.numGetLocals = &getCounter.num;
+ eqOpter.removeEquivalentSets = allowStructure;
+ eqOpter.walkFunction(func);
+
+ // We may have already had a local with no uses, or we may have just
+ // gotten there thanks to the EquivalentOptimizer. If there are such
+ // locals, remove all their sets.
+ struct UneededSetRemover : public PostWalker<UneededSetRemover> {
+ std::vector<Index>* numGetLocals;
+
+ bool anotherCycle = false;
+
+ void visitSetLocal(SetLocal *curr) {
+ if ((*numGetLocals)[curr->index] == 0) {
+ auto* value = curr->value;
+ if (curr->isTee()) {
+ this->replaceCurrent(value);
+ } else {
+ Drop* drop = ExpressionManipulator::convert<SetLocal, Drop>(curr);
+ drop->value = value;
+ drop->finalize();
+ }
anotherCycle = true;
}
}
- loops.clear();
- // clean up
- sinkables.clear();
- blockBreaks.clear();
- unoptimizableBlocks.clear();
- if (firstCycle) {
- firstCycle = false;
- anotherCycle = true;
- }
- } while (anotherCycle);
- // Finally, after optimizing a function, we can see if we have set_locals
- // for a local with no remaining gets, in which case, we can
- // remove the set.
- // First, recount get_locals
- getCounter.analyze(func);
- // Second, remove unneeded sets
- SetLocalRemover remover;
- remover.numGetLocals = &getCounter.num;
- remover.walkFunction(func);
+ };
+
+ UneededSetRemover setRemover;
+ setRemover.numGetLocals = &getCounter.num;
+ setRemover.walkFunction(func);
+
+ return eqOpter.anotherCycle || setRemover.anotherCycle;
}
bool canUseLoopReturnValue(Loop* curr) {