summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2018-06-08 15:45:02 -0700
committerGitHub <noreply@github.com>2018-06-08 15:45:02 -0700
commite3d201158d9136d6ffb655f70904dae5f9079317 (patch)
tree93329d0026eab20e5344358a902a6e2cf8b49d62 /src
parent7676221b837bbd20daf1889dbdabf3cb76721658 (diff)
downloadbinaryen-e3d201158d9136d6ffb655f70904dae5f9079317.tar.gz
binaryen-e3d201158d9136d6ffb655f70904dae5f9079317.tar.bz2
binaryen-e3d201158d9136d6ffb655f70904dae5f9079317.zip
Improve local-cse (#1594)
This makes it much more effective, by rewriting it to depend on flatten. In flattened IR, it is very simple to check if an expression is equivalent to one already available for use in a local, and use that one instead, basically we just track values in locals. Helps with #1521
Diffstat (limited to 'src')
-rw-r--r--src/passes/LocalCSE.cpp111
-rw-r--r--src/passes/SimplifyLocals.cpp2
-rw-r--r--src/passes/pass.cpp4
-rw-r--r--src/tools/wasm-reduce.cpp37
4 files changed, 96 insertions, 58 deletions
diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp
index 7524ad0fa..a458937db 100644
--- a/src/passes/LocalCSE.cpp
+++ b/src/passes/LocalCSE.cpp
@@ -17,6 +17,15 @@
//
// Local CSE
//
+// This requires --flatten to be run before in order to be effective,
+// and preserves flatness. The reason flatness is required is that
+// this pass assumes everything is stored in a local, and all it does
+// is alter set_locals to do get_locals of an existing value when
+// possible, replacing a recomputing of that value. That design means that
+// if there are block and if return values, nested expressions not stored
+// to a local, etc., then it can't operate on them (and will just not
+// do anything for them).
+//
// In each linear area of execution,
// * track each relevant (big enough) expression
// * if already seen, write to a local if not already, and reuse
@@ -25,17 +34,19 @@
// TODO: global, inter-block gvn etc.
//
+#include <algorithm>
+#include <memory>
+
#include <wasm.h>
#include <wasm-builder.h>
#include <wasm-traversal.h>
#include <pass.h>
#include <ir/effects.h>
+#include <ir/equivalent_sets.h>
#include <ir/hashed.h>
namespace wasm {
-const Index UNUSED = -1;
-
struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> {
bool isFunctionParallel() override { return true; }
@@ -43,11 +54,11 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> {
// information for an expression we can reuse
struct UsableInfo {
- Expression** item;
- Index index; // if not UNUSED, then the local we are assigned to, use that to reuse us
+ Expression* value; // the value we can reuse
+ Index index; // the local we are assigned to, get_local that to reuse us
EffectAnalyzer effects;
- UsableInfo(Expression** item, PassOptions& passOptions) : item(item), index(UNUSED), effects(passOptions, *item) {}
+ UsableInfo(Expression* value, Index index, PassOptions& passOptions) : value(value), index(index), effects(passOptions, value) {}
};
// a list of usables in a linear execution trace
@@ -56,8 +67,29 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> {
// locals in current linear execution trace, which we try to sink
Usables usables;
+ // We track locals containing the same value - the value is what matters, not
+ // the index.
+ EquivalentSets equivalences;
+
+ bool anotherPass;
+
+ void doWalkFunction(Function* func) {
+ anotherPass = true;
+ // we may need multiple rounds
+ while (anotherPass) {
+ anotherPass = false;
+ clear();
+ super::doWalkFunction(func);
+ }
+ }
+
static void doNoteNonLinear(LocalCSE* self, Expression** currp) {
- self->usables.clear();
+ self->clear();
+ }
+
+ void clear() {
+ usables.clear();
+ equivalences.clear();
}
void checkInvalidations(EffectAnalyzer& effects) {
@@ -91,9 +123,7 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> {
auto* curr = *currp;
// main operations
- if (self->isRelevant(curr)) {
- self->handle(currp, curr);
- }
+ self->handle(curr);
// post operations
@@ -114,38 +144,51 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> {
self->pushTask(visitPre, currp);
}
- bool isRelevant(Expression* curr) {
- if (curr->is<GetLocal>()) {
+ void handle(Expression* curr) {
+ if (auto* set = curr->dynCast<SetLocal>()) {
+ // Calculate equivalences
+ equivalences.reset(set->index);
+ if (auto* get = set->value->dynCast<GetLocal>()) {
+ equivalences.add(set->index, get->index);
+ }
+ // consider the value
+ auto* value = set->value;
+ if (isRelevant(value)) {
+ HashedExpression hashed(value);
+ auto iter = usables.find(hashed);
+ if (iter != usables.end()) {
+ // already exists in the table, this is good to reuse
+ auto& info = iter->second;
+ set->value = Builder(*getModule()).makeGetLocal(info.index, value->type);
+ anotherPass = true;
+ } else {
+ // not in table, add this, maybe we can help others later
+ usables.emplace(std::make_pair(hashed, UsableInfo(value, set->index, getPassOptions())));
+ }
+ }
+ } else if (auto* get = curr->dynCast<GetLocal>()) {
+ if (auto* set = equivalences.getEquivalents(get->index)) {
+ // Canonicalize to the lowest index. This lets hashing and comparisons
+ // "just work".
+ get->index = *std::min_element(set->begin(), set->end());
+ }
+ }
+ }
+
+ // A relevant value is a non-trivial one, something we may want to reuse
+ // and are able to.
+ bool isRelevant(Expression* value) {
+ if (value->is<GetLocal>()) {
return false; // trivial, this is what we optimize to!
}
- if (!isConcreteType(curr->type)) {
+ if (!isConcreteType(value->type)) {
return false; // don't bother with unreachable etc.
}
- if (EffectAnalyzer(getPassOptions(), curr).hasSideEffects()) {
+ if (EffectAnalyzer(getPassOptions(), value).hasSideEffects()) {
return false; // we can't combine things with side effects
}
// check what we care about TODO: use optimize/shrink levels?
- return Measurer::measure(curr) > 1;
- }
-
- void handle(Expression** currp, Expression* curr) {
- HashedExpression hashed(curr);
- auto iter = usables.find(hashed);
- if (iter != usables.end()) {
- // already exists in the table, this is good to reuse
- auto& info = iter->second;
- if (info.index == UNUSED) {
- // we need to assign to a local. create a new one
- auto index = info.index = Builder::addVar(getFunction(), curr->type);
- (*info.item) = Builder(*getModule()).makeTeeLocal(index, *info.item);
- }
- replaceCurrent(
- Builder(*getModule()).makeGetLocal(info.index, curr->type)
- );
- } else {
- // not in table, add this, maybe we can help others later
- usables.emplace(std::make_pair(hashed, UsableInfo(currp, getPassOptions())));
- }
+ return Measurer::measure(value) > 1;
}
};
diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp
index d941ce0d4..aadf766ac 100644
--- a/src/passes/SimplifyLocals.cpp
+++ b/src/passes/SimplifyLocals.cpp
@@ -705,7 +705,7 @@ struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<a
void visitGetLocal(GetLocal *curr) {
// Canonicalize gets: if some are equivalent, then we can pick more
// then one, and other passes may benefit from having more uniformity.
- if (auto *set = equivalences.getEquivalents(curr->index)) {
+ if (auto* set = equivalences.getEquivalents(curr->index)) {
// Pick the index with the most uses - maximizing the chance to
// lower one's uses to zero.
// Helper method that returns the # of gets *ignoring the current get*,
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 6c2aa3731..2f917d779 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -175,10 +175,6 @@ void PassRunner::addDefaultFunctionOptimizationPasses() {
} else {
add("precompute");
}
- if (options.shrinkLevel >= 2) {
- add("local-cse"); // TODO: run this early, before first coalesce-locals. right now doing so uncovers some deficiencies we need to fix first
- add("coalesce-locals"); // just for localCSE
- }
if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) {
add("rse"); // after all coalesce-locals, and before a final vacuum
}
diff --git a/src/tools/wasm-reduce.cpp b/src/tools/wasm-reduce.cpp
index 0dfb97907..6b19e6216 100644
--- a/src/tools/wasm-reduce.cpp
+++ b/src/tools/wasm-reduce.cpp
@@ -236,13 +236,15 @@ struct Reducer : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor<
"-O1",
"-O2",
"-O3",
+ "--flatten -Os",
+ "--flatten -O3",
+ "--flatten --local-cse -Os",
"--coalesce-locals --vacuum",
"--dce",
"--duplicate-function-elimination",
"--inlining",
"--inlining-optimizing",
"--optimize-level=3 --inlining-optimizing",
- "--local-cse --vacuum",
"--memory-packing",
"--remove-unused-names --merge-blocks --vacuum",
"--optimize-instructions",
@@ -263,24 +265,21 @@ struct Reducer : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor<
//std::cerr << "| starting passes loop iteration\n";
more = false;
// try both combining with a generic shrink (so minor pass overhead is compensated for), and without
- for (auto shrinking : { false, true }) {
- for (auto pass : passes) {
- std::string currCommand = Path::getBinaryenBinaryTool("wasm-opt") + " ";
- if (shrinking) currCommand += " --dce --vacuum ";
- currCommand += working + " -o " + test + " " + pass;
- if (debugInfo) currCommand += " -g ";
- if (verbose) std::cerr << "| trying pass command: " << currCommand << "\n";
- if (!ProgramResult(currCommand).failed()) {
- auto newSize = file_size(test);
- if (newSize < oldSize) {
- // the pass didn't fail, and the size looks smaller, so promising
- // see if it is still has the property we are preserving
- if (ProgramResult(command) == expected) {
- std::cerr << "| command \"" << currCommand << "\" succeeded, reduced size to " << newSize << ", and preserved the property\n";
- copy_file(test, working);
- more = true;
- oldSize = newSize;
- }
+ for (auto pass : passes) {
+ std::string currCommand = Path::getBinaryenBinaryTool("wasm-opt") + " ";
+ currCommand += working + " -o " + test + " " + pass;
+ if (debugInfo) currCommand += " -g ";
+ if (verbose) std::cerr << "| trying pass command: " << currCommand << "\n";
+ if (!ProgramResult(currCommand).failed()) {
+ auto newSize = file_size(test);
+ if (newSize < oldSize) {
+ // the pass didn't fail, and the size looks smaller, so promising
+ // see if it is still has the property we are preserving
+ if (ProgramResult(command) == expected) {
+ std::cerr << "| command \"" << currCommand << "\" succeeded, reduced size to " << newSize << ", and preserved the property\n";
+ copy_file(test, working);
+ more = true;
+ oldSize = newSize;
}
}
}