summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ir/LocalGraph.cpp7
-rw-r--r--src/ir/local-graph.h3
-rw-r--r--src/passes/CMakeLists.txt1
-rw-r--r--src/passes/MergeLocals.cpp220
-rw-r--r--src/passes/pass.cpp5
-rw-r--r--src/passes/passes.h1
-rw-r--r--src/tools/fuzzing.h14
-rw-r--r--src/tools/wasm-opt.cpp48
8 files changed, 269 insertions, 30 deletions
diff --git a/src/ir/LocalGraph.cpp b/src/ir/LocalGraph.cpp
index cee187c6d..53c6a52e2 100644
--- a/src/ir/LocalGraph.cpp
+++ b/src/ir/LocalGraph.cpp
@@ -127,11 +127,8 @@ void LocalGraph::visitLoop(Loop* curr) {
// the get trivially has fewer sets, so it overrode the loop entry sets
return;
}
- std::vector<SetLocal*> intersection;
- std::set_intersection(beforeSets.begin(), beforeSets.end(),
- getSets.begin(), getSets.end(),
- std::back_inserter(intersection));
- if (intersection.size() < beforeSets.size()) {
+ if (!std::includes(getSets.begin(), getSets.end(),
+ beforeSets.begin(), beforeSets.end())) {
// the get has not the same sets as in the loop entry
return;
}
diff --git a/src/ir/local-graph.h b/src/ir/local-graph.h
index 4c4c1ee0a..ea27fa1fb 100644
--- a/src/ir/local-graph.h
+++ b/src/ir/local-graph.h
@@ -28,8 +28,7 @@ namespace wasm {
// on this).
//
// TODO: the algorithm here is pretty simple, but also pretty slow,
-// we should optimize it. e.g. we rely on set_interaction
-// here, and worse we only use it to compute the size...
+// we should optimize it.
struct LocalGraph : public PostWalker<LocalGraph> {
// main API
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index 168af2761..e9a157499 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -17,6 +17,7 @@ SET(passes_SOURCES
InstrumentMemory.cpp
MemoryPacking.cpp
MergeBlocks.cpp
+ MergeLocals.cpp
Metrics.cpp
NameList.cpp
OptimizeInstructions.cpp
diff --git a/src/passes/MergeLocals.cpp b/src/passes/MergeLocals.cpp
new file mode 100644
index 000000000..31be495c1
--- /dev/null
+++ b/src/passes/MergeLocals.cpp
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2016 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Merges locals when it is beneficial to do so.
+//
+// An obvious case is when locals are copied. In that case, two locals have the
+// same value in a range, and we can pick which of the two to use. For
+// example, in
+//
+// (if (result i32)
+// (tee_local $x
+// (get_local $y)
+// )
+// (i32.const 100)
+// (get_local $x)
+// )
+//
+// If that assignment of $y is never used again, everything is fine. But if
+// if is, then the live range of $y does not end in that get, and will
+// necessarily overlap with that of $x - making them appear to interfere
+// with each other in coalesce-locals, even though the value is identical.
+//
+// To fix that, we replace uses of $y with uses of $x. This extends $x's
+// live range and shrinks $y's live range. This tradeoff is not always good,
+// but $x and $y definitely overlap already, so trying to shrink the overlap
+// makes sense - if we remove the overlap entirely, we may be able to let
+// $x and $y be coalesced later.
+//
+// If we can remove only some of $y's uses, then we are definitely not
+// removing the overlap, and they do conflict. In that case, it's not clear
+// if this is beneficial or not, and we don't do it for now
+// TODO: investigate more
+//
+
+#include <wasm.h>
+#include <pass.h>
+#include <wasm-builder.h>
+#include <ir/local-graph.h>
+
+namespace wasm {
+
+struct MergeLocals : public WalkerPass<PostWalker<MergeLocals, UnifiedExpressionVisitor<MergeLocals>>> {
+ bool isFunctionParallel() override { return true; }
+
+ Pass* create() override { return new MergeLocals(); }
+
+ void doWalkFunction(Function* func) {
+ // first, instrument the graph by modifying each copy
+ // (set_local $x
+ // (get_local $y)
+ // )
+ // to
+ // (set_local $x
+ // (tee_local $y
+ // (get_local $y)
+ // )
+ // )
+ // That is, we add a trivial assign of $y. This ensures we
+ // have a new assignment of $y at the location of the copy,
+ // which makes it easy for us to see if the value if $y
+ // is still used after that point
+ super::doWalkFunction(func);
+
+ // optimize the copies, merging when we can, and removing
+ // the trivial assigns we added temporarily
+ optimizeCopies();
+ }
+
+ std::vector<SetLocal*> copies;
+
+ void visitSetLocal(SetLocal* curr) {
+ if (auto* get = curr->value->dynCast<GetLocal>()) {
+ if (get->index != curr->index) {
+ Builder builder(*getModule());
+ auto* trivial = builder.makeTeeLocal(get->index, get);
+ curr->value = trivial;
+ copies.push_back(curr);
+ }
+ }
+ }
+
+ void optimizeCopies() {
+ if (copies.empty()) return;
+ // compute all dependencies
+ LocalGraph preGraph(getFunction(), getModule());
+ preGraph.computeInfluences();
+ // optimize each copy
+ std::unordered_map<SetLocal*, SetLocal*> optimizedToCopy, optimizedToTrivial;
+ for (auto* copy : copies) {
+ auto* trivial = copy->value->cast<SetLocal>();
+ bool canOptimizeToCopy = false;
+ auto& trivialInfluences = preGraph.setInfluences[trivial];
+ if (!trivialInfluences.empty()) {
+ canOptimizeToCopy = true;
+ for (auto* influencedGet : trivialInfluences) {
+ // this get uses the trivial write, so it uses the value in the copy.
+ // however, it may depend on other writes too, if there is a merge/phi,
+ // and in that case we can't do anything
+ assert(influencedGet->index == trivial->index);
+ if (preGraph.getSetses[influencedGet].size() == 1) {
+ // this is ok
+ assert(*preGraph.getSetses[influencedGet].begin() == trivial);
+ } else {
+ canOptimizeToCopy = false;
+ break;
+ }
+ }
+ }
+ if (canOptimizeToCopy) {
+ // worth it for this copy, do it
+ for (auto* influencedGet : trivialInfluences) {
+ influencedGet->index = copy->index;
+ }
+ optimizedToCopy[copy] = trivial;
+ } else {
+ // alternatively, we can try to remove the conflict in the opposite way: given
+ // (set_local $x
+ // (get_local $y)
+ // )
+ // we can look for uses of $x that could instead be uses of $y. this extends
+ // $y's live range, but if it removes the conflict between $x and $y, it may be
+ // worth it
+ if (!trivialInfluences.empty()) { // if the trivial set we added has influences, it means $y lives on
+ auto& copyInfluences = preGraph.setInfluences[copy];
+ if (!copyInfluences.empty()) {
+ bool canOptimizeToTrivial = true;
+ for (auto* influencedGet : copyInfluences) {
+ // as above, avoid merges/phis
+ assert(influencedGet->index == copy->index);
+ if (preGraph.getSetses[influencedGet].size() == 1) {
+ // this is ok
+ assert(*preGraph.getSetses[influencedGet].begin() == copy);
+ } else {
+ canOptimizeToTrivial = false;
+ break;
+ }
+ }
+ if (canOptimizeToTrivial) {
+ // worth it for this copy, do it
+ for (auto* influencedGet : copyInfluences) {
+ influencedGet->index = trivial->index;
+ }
+ optimizedToTrivial[copy] = trivial;
+ // note that we don't
+ }
+ }
+ }
+ }
+ }
+ if (!optimizedToCopy.empty() || !optimizedToTrivial.empty()) {
+ // finally, we need to verify that the changes work properly, that is,
+ // they use the value from the right place (and are not affected by
+ // another set of the index we changed to).
+ // if one does not work, we need to undo all its siblings (don't extend
+ // the live range unless we are definitely removing a conflict, same
+ // logic as before).
+ LocalGraph postGraph(getFunction(), getModule());
+ postGraph.computeInfluences();
+ for (auto& pair : optimizedToCopy) {
+ auto* copy = pair.first;
+ auto* trivial = pair.second;
+ auto& trivialInfluences = preGraph.setInfluences[trivial];
+ for (auto* influencedGet : trivialInfluences) {
+ // verify the set
+ auto& sets = postGraph.getSetses[influencedGet];
+ if (sets.size() != 1 || *sets.begin() != copy) {
+ // not good, undo all the changes for this copy
+ for (auto* undo : trivialInfluences) {
+ undo->index = trivial->index;
+ }
+ break;
+ }
+ }
+ }
+ for (auto& pair : optimizedToTrivial) {
+ auto* copy = pair.first;
+ auto* trivial = pair.second;
+ auto& copyInfluences = preGraph.setInfluences[copy];
+ for (auto* influencedGet : copyInfluences) {
+ // verify the set
+ auto& sets = postGraph.getSetses[influencedGet];
+ if (sets.size() != 1 || *sets.begin() != trivial) {
+ // not good, undo all the changes for this copy
+ for (auto* undo : copyInfluences) {
+ undo->index = copy->index;
+ }
+ break;
+ }
+ }
+ // if this change was ok, we can probably remove the copy itself,
+ // but we leave that for other passes
+ }
+ }
+ // remove the trivial sets
+ for (auto* copy : copies) {
+ copy->value = copy->value->cast<SetLocal>()->value;
+ }
+ }
+};
+
+Pass *createMergeLocalsPass() {
+ return new MergeLocals();
+}
+
+} // namespace wasm
+
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index f2463691a..89467899c 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -83,6 +83,7 @@ void PassRegistry::registerPasses() {
registerPass("instrument-memory", "instrument the build with code to intercept all loads and stores", createInstrumentMemoryPass);
registerPass("memory-packing", "packs memory into separate segments, skipping zeros", createMemoryPackingPass);
registerPass("merge-blocks", "merges blocks to their parents", createMergeBlocksPass);
+ registerPass("merge-locals", "merges locals when beneficial", createMergeLocalsPass);
registerPass("metrics", "reports metrics", createMetricsPass);
registerPass("nm", "name list", createNameListPass);
registerPass("optimize-instructions", "optimizes instruction combinations", createOptimizeInstructionsPass);
@@ -140,6 +141,10 @@ void PassRunner::addDefaultFunctionOptimizationPasses() {
add("vacuum"); // previous pass creates garbage
add("reorder-locals");
add("remove-unused-brs"); // simplify-locals opens opportunities for optimizations
+ // if we are willing to work hard, also optimize copies before coalescing
+ if (options.optimizeLevel >= 3 || options.shrinkLevel >= 2) {
+ add("merge-locals"); // very slow on e.g. sqlite
+ }
add("coalesce-locals");
add("simplify-locals");
add("vacuum"); // previous pass creates garbage
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 957ca2d68..5cefdea17 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -42,6 +42,7 @@ Pass* createInstrumentLocalsPass();
Pass* createInstrumentMemoryPass();
Pass* createMemoryPackingPass();
Pass* createMergeBlocksPass();
+Pass* createMergeLocalsPass();
Pass* createMinifiedPrinterPass();
Pass* createMetricsPass();
Pass* createNameListPass();
diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h
index be12b561a..baaaf1b23 100644
--- a/src/tools/fuzzing.h
+++ b/src/tools/fuzzing.h
@@ -480,6 +480,14 @@ private:
}
Expression* _makeConcrete(WasmType type) {
+ auto choice = upTo(100);
+ if (choice < 10) return makeConst(type);
+ if (choice < 30) return makeSetLocal(type);
+ if (choice < 50) return makeGetLocal(type);
+ if (choice < 60) return makeBlock(type);
+ if (choice < 70) return makeIf(type);
+ if (choice < 80) return makeLoop(type);
+ if (choice < 90) return makeBreak(type);
switch (upTo(15)) {
case 0: return makeBlock(type);
case 1: return makeIf(type);
@@ -501,6 +509,12 @@ private:
}
Expression* _makenone() {
+ auto choice = upTo(100);
+ if (choice < 50) return makeSetLocal(none);
+ if (choice < 60) return makeBlock(none);
+ if (choice < 70) return makeIf(none);
+ if (choice < 80) return makeLoop(none);
+ if (choice < 90) return makeBreak(none);
switch (upTo(11)) {
case 0: return makeBlock(none);
case 1: return makeIf(none);
diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp
index 8fe55aef1..374c1b5bd 100644
--- a/src/tools/wasm-opt.cpp
+++ b/src/tools/wasm-opt.cpp
@@ -182,36 +182,38 @@ int main(int argc, const char* argv[]) {
std::cout << "[extra-fuzz-command first output:]\n" << firstOutput << '\n';
}
+ Module* curr = &wasm;
+ Module other;
+
+ if (fuzzExec && fuzzBinary) {
+ BufferWithRandomAccess buffer(false);
+ // write the binary
+ WasmBinaryWriter writer(&wasm, buffer, false);
+ writer.write();
+ // read the binary
+ auto input = buffer.getAsChars();
+ WasmBinaryBuilder parser(other, input, false);
+ parser.read();
+ bool valid = WasmValidator().validate(other, features);
+ if (!valid) {
+ WasmPrinter::printModule(&other);
+ }
+ assert(valid);
+ curr = &other;
+ }
+
if (options.runningPasses()) {
if (options.debug) std::cerr << "running passes...\n";
- options.runPasses(wasm);
- bool valid = WasmValidator().validate(wasm, features);
+ options.runPasses(*curr);
+ bool valid = WasmValidator().validate(*curr, features);
if (!valid) {
- WasmPrinter::printModule(&wasm);
+ WasmPrinter::printModule(&*curr);
}
assert(valid);
}
if (fuzzExec) {
- auto* compare = &wasm;
- Module second;
- if (fuzzBinary) {
- compare = &second;
- BufferWithRandomAccess buffer(false);
- // write the binary
- WasmBinaryWriter writer(&wasm, buffer, false);
- writer.write();
- // read the binary
- auto input = buffer.getAsChars();
- WasmBinaryBuilder parser(second, input, false);
- parser.read();
- bool valid = WasmValidator().validate(second, features);
- if (!valid) {
- WasmPrinter::printModule(&second);
- }
- assert(valid);
- }
- results.check(*compare);
+ results.check(*curr);
}
if (options.extra.count("output") > 0) {
@@ -220,7 +222,7 @@ int main(int argc, const char* argv[]) {
writer.setDebug(options.debug);
writer.setBinary(emitBinary);
writer.setDebugInfo(debugInfo);
- writer.write(wasm, options.extra["output"]);
+ writer.write(*curr, options.extra["output"]);
if (extraFuzzCommand.size() > 0) {
auto secondOutput = runCommand(extraFuzzCommand);