summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2018-05-10 08:58:09 -0700
committerGitHub <noreply@github.com>2018-05-10 08:58:09 -0700
commit111b4f1950467d51a78211af183f4ae6398aac49 (patch)
tree7f53ab6389e2bcaacf761cbf33846ac0210bd098 /src
parent6a9ececa2fc9eca99a12b65ca130612942babdce (diff)
downloadbinaryen-111b4f1950467d51a78211af183f4ae6398aac49.tar.gz
binaryen-111b4f1950467d51a78211af183f4ae6398aac49.tar.bz2
binaryen-111b4f1950467d51a78211af183f4ae6398aac49.zip
Optimize equivalent locals (#1540)
If locals are known to contain the same value, we can * Pick which local to use for a get_local of any of them. Makes sense to prefer the most common, to increase the chance of one dropping to zero uses. * Remove copies between a local and one that we know contains the same value. This is a consistent win, small though, around 0.1-0.2%.
Diffstat (limited to 'src')
-rw-r--r--src/cfg/liveness-traversal.h16
-rw-r--r--src/ir/equivalent_sets.h94
-rw-r--r--src/passes/CoalesceLocals.cpp8
-rw-r--r--src/passes/ConstHoisting.cpp7
-rw-r--r--src/passes/SimplifyLocals.cpp277
-rw-r--r--src/passes/pass.cpp6
-rw-r--r--src/wasm/wasm-validator.cpp1
7 files changed, 329 insertions, 80 deletions
diff --git a/src/cfg/liveness-traversal.h b/src/cfg/liveness-traversal.h
index 8ba80e015..edbe52fd6 100644
--- a/src/cfg/liveness-traversal.h
+++ b/src/cfg/liveness-traversal.h
@@ -26,6 +26,7 @@
#include "wasm-builder.h"
#include "wasm-traversal.h"
#include "cfg-traversal.h"
+#include "ir/utils.h"
namespace wasm {
@@ -60,6 +61,21 @@ struct LivenessAction {
bool isGet() { return what == Get; }
bool isSet() { return what == Set; }
bool isOther() { return what == Other; }
+
+ // Helper to remove a set that is a copy we know is not needed. This
+ // updates both the IR and the action.
+ void removeCopy() {
+ auto* set = (*origin)->cast<SetLocal>();
+ if (set->isTee()) {
+ *origin = set->value->cast<GetLocal>();
+ } else {
+ ExpressionManipulator::nop(set);
+ }
+ // Mark as an other: even if we turned the origin into a get,
+ // we already have another Action for that get, that properly
+ // represents it.
+ what = Other;
+ }
};
// information about liveness in a basic block
diff --git a/src/ir/equivalent_sets.h b/src/ir/equivalent_sets.h
new file mode 100644
index 000000000..d1a957bbd
--- /dev/null
+++ b/src/ir/equivalent_sets.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2018 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef wasm_ir_equivalent_sets_h
+#define wasm_ir_equivalent_sets_h
+
+#include <wasm.h>
+
+namespace wasm {
+
+//
+// A map of each index to all those it is equivalent to, and some helpers.
+//
+struct EquivalentSets {
+ // A set of indexes.
+ typedef std::unordered_set<Index> Set;
+
+ std::unordered_map<Index, std::shared_ptr<Set>> indexSets;
+
+ // Clears the state completely, removing all equivalences.
+ void clear() {
+ indexSets.clear();
+ }
+
+ // Resets an index, removing any equivalences between it and others.
+ void reset(Index index) {
+ auto iter = indexSets.find(index);
+ if (iter != indexSets.end()) {
+ auto& set = iter->second;
+ assert(!set->empty()); // can't be empty - we are equal to ourselves!
+ if (set->size() > 1) {
+ // We are not the last item, fix things up
+ set->erase(index);
+ }
+ indexSets.erase(iter);
+ }
+ }
+
+ // Adds a new equivalence between two indexes.
+ // `justReset` is an index that was just reset, and has no
+ // equivalences. `other` may have existing equivalences.
+ void add(Index justReset, Index other) {
+ auto iter = indexSets.find(other);
+ if (iter != indexSets.end()) {
+ auto& set = iter->second;
+ set->insert(justReset);
+ indexSets[justReset] = set;
+ } else {
+ auto set = std::make_shared<Set>();
+ set->insert(justReset);
+ set->insert(other);
+ indexSets[justReset] = set;
+ indexSets[other] = set;
+ }
+ }
+
+ // Checks whether two indexes contain the same data.
+ bool check(Index a, Index b) {
+ if (a == b) return true;
+ if (auto* set = getEquivalents(a)) {
+ if (set->find(b) != set->end()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // Returns the equivalent set, or nullptr
+ Set* getEquivalents(Index index) {
+ auto iter = indexSets.find(index);
+ if (iter != indexSets.end()) {
+ return iter->second.get();
+ }
+ return nullptr;
+ }
+};
+
+} // namespace wasm
+
+#endif // wasm_ir_equivalent_sets_h
+
diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp
index ef0699436..63912db19 100644
--- a/src/passes/CoalesceLocals.cpp
+++ b/src/passes/CoalesceLocals.cpp
@@ -342,17 +342,13 @@ void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root)
if (action.isGet()) {
auto* get = (*action.origin)->cast<GetLocal>();
get->index = indices[get->index];
- } else {
+ } else if (action.isSet()) {
auto* set = (*action.origin)->cast<SetLocal>();
set->index = indices[set->index];
// in addition, we can optimize out redundant copies and ineffective sets
GetLocal* get;
if ((get = set->value->dynCast<GetLocal>()) && get->index == set->index) {
- if (set->isTee()) {
- *action.origin = get;
- } else {
- ExpressionManipulator::nop(set);
- }
+ action.removeCopy();
continue;
}
// remove ineffective actions
diff --git a/src/passes/ConstHoisting.cpp b/src/passes/ConstHoisting.cpp
index e808f7af4..e598a2995 100644
--- a/src/passes/ConstHoisting.cpp
+++ b/src/passes/ConstHoisting.cpp
@@ -22,7 +22,12 @@
// WARNING: this often shrinks code size, but can *increase* gzip
// size. apparently having the constants in their proper
// places lets them be compressed better, across
-// functions, etc.
+// functions, etc. TODO investigate
+// TODO: hoisting a zero does not even require an initial set!
+// TODO: hoisting a float or double zero is especially beneficial as there
+// is no LEB compression for them, and no need for the set, so
+// each f32.const is 5 bytes and f64.const is 9 bytes, while it is
+// <= 1 byte to declare the local and 2-3 to use it!
//
#include <map>
diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp
index af5a9a4c4..27d868da1 100644
--- a/src/passes/SimplifyLocals.cpp
+++ b/src/passes/SimplifyLocals.cpp
@@ -52,30 +52,12 @@
#include <pass.h>
#include <ir/count.h>
#include <ir/effects.h>
+#include "ir/equivalent_sets.h"
#include <ir/find_all.h>
#include <ir/manipulation.h>
namespace wasm {
-// Helper classes
-
-struct SetLocalRemover : public PostWalker<SetLocalRemover> {
- std::vector<Index>* numGetLocals;
-
- void visitSetLocal(SetLocal *curr) {
- if ((*numGetLocals)[curr->index] == 0) {
- auto* value = curr->value;
- if (curr->isTee()) {
- replaceCurrent(value);
- } else {
- Drop* drop = ExpressionManipulator::convert<SetLocal, Drop>(curr);
- drop->value = value;
- drop->finalize();
- }
- }
- }
-};
-
// Main class
template<bool allowTee = true, bool allowStructure = true, bool allowNesting = true>
@@ -130,6 +112,7 @@ struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<a
GetLocalCounter getCounter;
static void doNoteNonLinear(SimplifyLocals<allowTee, allowStructure, allowNesting>* self, Expression** currp) {
+ // Main processing.
auto* curr = *currp;
if (curr->is<Break>()) {
auto* br = curr->cast<Break>();
@@ -583,69 +566,219 @@ struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<a
// output patterns. further cycles do fully general sinking.
firstCycle = true;
do {
- anotherCycle = false;
- // main operation
- WalkerPass<LinearExecutionWalker<SimplifyLocals<allowTee, allowStructure, allowNesting>>>::doWalkFunction(func);
- // enlarge blocks that were marked, for the next round
- if (blocksToEnlarge.size() > 0) {
- for (auto* block : blocksToEnlarge) {
- block->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ anotherCycle = runMainOptimizations(func);
+ // After the special first cycle, definitely do another.
+ if (firstCycle) {
+ firstCycle = false;
+ anotherCycle = true;
+ }
+ // If we are all done, run the final optimizations, which may suggest we
+ // can do more work.
+ if (!anotherCycle) {
+ // Don't run multiple cycles of just the final optimizations - in
+ // particular, get canonicalization is not guaranteed to converge.
+ // Instead, if final opts help then see if they enable main
+ // opts; continue only if they do. In other words, do not end up
+ // doing final opts again and again when no main opts are being
+ // enabled.
+ if (runFinalOptimizations(func) && runMainOptimizations(func)) {
+ anotherCycle = true;
}
- blocksToEnlarge.clear();
+ }
+ } while (anotherCycle);
+ }
+
+ bool runMainOptimizations(Function* func) {
+ anotherCycle = false;
+ WalkerPass<LinearExecutionWalker<SimplifyLocals<allowTee, allowStructure, allowNesting>>>::doWalkFunction(func);
+ // enlarge blocks that were marked, for the next round
+ if (blocksToEnlarge.size() > 0) {
+ for (auto* block : blocksToEnlarge) {
+ block->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ }
+ blocksToEnlarge.clear();
+ anotherCycle = true;
+ }
+ // enlarge ifs that were marked, for the next round
+ if (ifsToEnlarge.size() > 0) {
+ for (auto* iff : ifsToEnlarge) {
+ auto ifTrue = Builder(*this->getModule()).blockify(iff->ifTrue);
+ iff->ifTrue = ifTrue;
+ if (ifTrue->list.size() == 0 || !ifTrue->list.back()->template is<Nop>()) {
+ ifTrue->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ }
+ auto ifFalse = Builder(*this->getModule()).blockify(iff->ifFalse);
+ iff->ifFalse = ifFalse;
+ if (ifFalse->list.size() == 0 || !ifFalse->list.back()->template is<Nop>()) {
+ ifFalse->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ }
+ }
+ ifsToEnlarge.clear();
+ anotherCycle = true;
+ }
+ // handle loops. note that a lot happens in this pass, and we can't just modify
+ // set_locals when we see a loop - it might be tracked - and we can't also just
+ // assume our loop didn't move either (might be in a block now). So we do this
+ // when all other work is done - as loop return values are rare, that is fine.
+ if (!anotherCycle) {
+ for (auto* currp : loops) {
+ auto* curr = (*currp)->template cast<Loop>();
+ assert(canUseLoopReturnValue(curr));
+ auto* set = curr->body->template cast<SetLocal>();
+ curr->body = set->value;
+ set->value = curr;
+ curr->finalize(curr->body->type);
+ *currp = set;
anotherCycle = true;
}
- // enlarge ifs that were marked, for the next round
- if (ifsToEnlarge.size() > 0) {
- for (auto* iff : ifsToEnlarge) {
- auto ifTrue = Builder(*this->getModule()).blockify(iff->ifTrue);
- iff->ifTrue = ifTrue;
- if (ifTrue->list.size() == 0 || !ifTrue->list.back()->template is<Nop>()) {
- ifTrue->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ }
+ loops.clear();
+ // clean up
+ sinkables.clear();
+ blockBreaks.clear();
+ unoptimizableBlocks.clear();
+ return anotherCycle;
+ }
+
+ bool runFinalOptimizations(Function* func) {
+ // Finally, after optimizing a function we can do some additional
+ // optimization.
+ getCounter.analyze(func);
+ // Remove equivalent copies - assignment of
+ // a local to another local that already contains that value. Note that
+ // we do that at the very end, and only after structure, as removing
+ // the copy here:
+ // (if
+ // (get_local $var$0)
+ // (set_local $var$0
+ // (get_local $var$0)
+ // )
+ // (set_local $var$0
+ // (i32.const 208)
+ // )
+ // )
+ // will inhibit us creating an if return value.
+ struct EquivalentOptimizer : public LinearExecutionWalker<EquivalentOptimizer> {
+ std::vector<Index>* numGetLocals;
+ bool removeEquivalentSets;
+ Module* module;
+
+ bool anotherCycle = false;
+
+ // We track locals containing the same value.
+ EquivalentSets equivalences;
+
+ static void doNoteNonLinear(EquivalentOptimizer* self, Expression** currp) {
+ // TODO do this across non-linear paths too, in coalesce-locals perhaps? (would inhibit structure
+ // opts here, though.
+ self->equivalences.clear();
+ }
+
+ void visitSetLocal(SetLocal *curr) {
+ // Remove trivial copies, even through a tee
+ auto* value = curr->value;
+ while (auto* subSet = value->dynCast<SetLocal>()) {
+ value = subSet->value;
+ }
+ if (auto* get = value->dynCast<GetLocal>()) {
+ if (equivalences.check(curr->index, get->index)) {
+ // This is an unnecessary copy!
+ if (removeEquivalentSets) {
+ if (curr->isTee()) {
+ this->replaceCurrent(value);
+ } else if (curr->value->is<GetLocal>()) {
+ ExpressionManipulator::nop(curr);
+ } else {
+ this->replaceCurrent(Builder(*module).makeDrop(value));
+ }
+ anotherCycle = true;
+ }
+ } else {
+ // There is a new equivalence now.
+ equivalences.reset(curr->index);
+ equivalences.add(curr->index, get->index);
}
- auto ifFalse = Builder(*this->getModule()).blockify(iff->ifFalse);
- iff->ifFalse = ifFalse;
- if (ifFalse->list.size() == 0 || !ifFalse->list.back()->template is<Nop>()) {
- ifFalse->list.push_back(this->getModule()->allocator.template alloc<Nop>());
+ } else {
+ // A new value is assigned here.
+ equivalences.reset(curr->index);
+ }
+ }
+
+ void visitGetLocal(GetLocal *curr) {
+ // Canonicalize gets: if some are equivalent, then we can pick more
+ // then one, and other passes may benefit from having more uniformity.
+ if (auto *set = equivalences.getEquivalents(curr->index)) {
+ // Pick the index with the most uses - maximizing the chance to
+ // lower one's uses to zero.
+ // Helper method that returns the # of gets *ignoring the current get*,
+ // as we want to see what is best overall, treating this one as
+ // to be decided upon.
+ auto getNumGetsIgnoringCurr = [&](Index index) {
+ auto ret = (*numGetLocals)[index];
+ if (index == curr->index) {
+ assert(ret >= 1);
+ ret--;
+ }
+ return ret;
+ };
+ Index best = -1;
+ for (auto index : *set) {
+ if (best == Index(-1) ||
+ getNumGetsIgnoringCurr(index) > getNumGetsIgnoringCurr(best)) {
+ best = index;
+ }
+ }
+ assert(best != Index(-1));
+ // Due to ordering, the best index may be different from us but have
+ // the same # of locals - make sure we actually improve.
+ if (best != curr->index &&
+ getNumGetsIgnoringCurr(best) > getNumGetsIgnoringCurr(curr->index)) {
+ // Update the get counts.
+ (*numGetLocals)[best]++;
+ assert((*numGetLocals)[curr->index] >= 1);
+ (*numGetLocals)[curr->index]--;
+ // Make the change.
+ curr->index = best;
+ anotherCycle = true;
}
}
- ifsToEnlarge.clear();
- anotherCycle = true;
}
- // handle loops. note that a lot happens in this pass, and we can't just modify
- // set_locals when we see a loop - it might be tracked - and we can't also just
- // assume our loop didn't move either (might be in a block now). So we do this
- // when all other work is done - as loop return values are rare, that is fine.
- if (!anotherCycle) {
- for (auto* currp : loops) {
- auto* curr = (*currp)->template cast<Loop>();
- assert(canUseLoopReturnValue(curr));
- auto* set = curr->body->template cast<SetLocal>();
- curr->body = set->value;
- set->value = curr;
- curr->finalize(curr->body->type);
- *currp = set;
+ };
+
+ EquivalentOptimizer eqOpter;
+ eqOpter.module = this->getModule();
+ eqOpter.numGetLocals = &getCounter.num;
+ eqOpter.removeEquivalentSets = allowStructure;
+ eqOpter.walkFunction(func);
+
+ // We may have already had a local with no uses, or we may have just
+ // gotten there thanks to the EquivalentOptimizer. If there are such
+ // locals, remove all their sets.
+ struct UneededSetRemover : public PostWalker<UneededSetRemover> {
+ std::vector<Index>* numGetLocals;
+
+ bool anotherCycle = false;
+
+ void visitSetLocal(SetLocal *curr) {
+ if ((*numGetLocals)[curr->index] == 0) {
+ auto* value = curr->value;
+ if (curr->isTee()) {
+ this->replaceCurrent(value);
+ } else {
+ Drop* drop = ExpressionManipulator::convert<SetLocal, Drop>(curr);
+ drop->value = value;
+ drop->finalize();
+ }
anotherCycle = true;
}
}
- loops.clear();
- // clean up
- sinkables.clear();
- blockBreaks.clear();
- unoptimizableBlocks.clear();
- if (firstCycle) {
- firstCycle = false;
- anotherCycle = true;
- }
- } while (anotherCycle);
- // Finally, after optimizing a function, we can see if we have set_locals
- // for a local with no remaining gets, in which case, we can
- // remove the set.
- // First, recount get_locals
- getCounter.analyze(func);
- // Second, remove unneeded sets
- SetLocalRemover remover;
- remover.numGetLocals = &getCounter.num;
- remover.walkFunction(func);
+ };
+
+ UneededSetRemover setRemover;
+ setRemover.numGetLocals = &getCounter.num;
+ setRemover.walkFunction(func);
+
+ return eqOpter.anotherCycle || setRemover.anotherCycle;
}
bool canUseLoopReturnValue(Loop* curr) {
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 2d93d99ae..5c43aaa4e 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -198,7 +198,11 @@ void PassRunner::addDefaultGlobalOptimizationPostPasses() {
static void dumpWast(Name name, Module* wasm) {
// write out the wast
static int counter = 0;
- auto fullName = std::string("byn-") + std::to_string(counter++) + "-" + name.str + ".wasm";
+ std::string numstr = std::to_string(counter++);
+ while (numstr.size() < 3) {
+ numstr = '0' + numstr;
+ }
+ auto fullName = std::string("byn-") + numstr + "-" + name.str + ".wasm";
Colors::disable();
ModuleWriter writer;
writer.setBinary(false); // TODO: add an option for binary
diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp
index 07f0a634b..7246a15a6 100644
--- a/src/wasm/wasm-validator.cpp
+++ b/src/wasm/wasm-validator.cpp
@@ -474,6 +474,7 @@ void FunctionValidator::visitCallIndirect(CallIndirect* curr) {
void FunctionValidator::visitGetLocal(GetLocal* curr) {
shouldBeTrue(curr->index < getFunction()->getNumLocals(), curr, "get_local index must be small enough");
shouldBeTrue(isConcreteType(curr->type), curr, "get_local must have a valid type - check what you provided when you constructed the node");
+ shouldBeTrue(curr->type == getFunction()->getLocalType(curr->index), curr, "get_local must have proper type");
}
void FunctionValidator::visitSetLocal(SetLocal* curr) {