diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/ir/LocalGraph.cpp | 7 | ||||
-rw-r--r-- | src/ir/local-graph.h | 3 | ||||
-rw-r--r-- | src/passes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/passes/MergeLocals.cpp | 220 | ||||
-rw-r--r-- | src/passes/pass.cpp | 5 | ||||
-rw-r--r-- | src/passes/passes.h | 1 | ||||
-rw-r--r-- | src/tools/fuzzing.h | 14 | ||||
-rw-r--r-- | src/tools/wasm-opt.cpp | 48 |
8 files changed, 269 insertions, 30 deletions
diff --git a/src/ir/LocalGraph.cpp b/src/ir/LocalGraph.cpp index cee187c6d..53c6a52e2 100644 --- a/src/ir/LocalGraph.cpp +++ b/src/ir/LocalGraph.cpp @@ -127,11 +127,8 @@ void LocalGraph::visitLoop(Loop* curr) { // the get trivially has fewer sets, so it overrode the loop entry sets return; } - std::vector<SetLocal*> intersection; - std::set_intersection(beforeSets.begin(), beforeSets.end(), - getSets.begin(), getSets.end(), - std::back_inserter(intersection)); - if (intersection.size() < beforeSets.size()) { + if (!std::includes(getSets.begin(), getSets.end(), + beforeSets.begin(), beforeSets.end())) { // the get has not the same sets as in the loop entry return; } diff --git a/src/ir/local-graph.h b/src/ir/local-graph.h index 4c4c1ee0a..ea27fa1fb 100644 --- a/src/ir/local-graph.h +++ b/src/ir/local-graph.h @@ -28,8 +28,7 @@ namespace wasm { // on this). // // TODO: the algorithm here is pretty simple, but also pretty slow, -// we should optimize it. e.g. we rely on set_interaction -// here, and worse we only use it to compute the size... +// we should optimize it. struct LocalGraph : public PostWalker<LocalGraph> { // main API diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 168af2761..e9a157499 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -17,6 +17,7 @@ SET(passes_SOURCES InstrumentMemory.cpp MemoryPacking.cpp MergeBlocks.cpp + MergeLocals.cpp Metrics.cpp NameList.cpp OptimizeInstructions.cpp diff --git a/src/passes/MergeLocals.cpp b/src/passes/MergeLocals.cpp new file mode 100644 index 000000000..31be495c1 --- /dev/null +++ b/src/passes/MergeLocals.cpp @@ -0,0 +1,220 @@ +/* + * Copyright 2016 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Merges locals when it is beneficial to do so. +// +// An obvious case is when locals are copied. In that case, two locals have the +// same value in a range, and we can pick which of the two to use. For +// example, in +// +// (if (result i32) +// (tee_local $x +// (get_local $y) +// ) +// (i32.const 100) +// (get_local $x) +// ) +// +// If that assignment of $y is never used again, everything is fine. But if +// if is, then the live range of $y does not end in that get, and will +// necessarily overlap with that of $x - making them appear to interfere +// with each other in coalesce-locals, even though the value is identical. +// +// To fix that, we replace uses of $y with uses of $x. This extends $x's +// live range and shrinks $y's live range. This tradeoff is not always good, +// but $x and $y definitely overlap already, so trying to shrink the overlap +// makes sense - if we remove the overlap entirely, we may be able to let +// $x and $y be coalesced later. +// +// If we can remove only some of $y's uses, then we are definitely not +// removing the overlap, and they do conflict. In that case, it's not clear +// if this is beneficial or not, and we don't do it for now +// TODO: investigate more +// + +#include <wasm.h> +#include <pass.h> +#include <wasm-builder.h> +#include <ir/local-graph.h> + +namespace wasm { + +struct MergeLocals : public WalkerPass<PostWalker<MergeLocals, UnifiedExpressionVisitor<MergeLocals>>> { + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new MergeLocals(); } + + void doWalkFunction(Function* func) { + // first, instrument the graph by modifying each copy + // (set_local $x + // (get_local $y) + // ) + // to + // (set_local $x + // (tee_local $y + // (get_local $y) + // ) + // ) + // That is, we add a trivial assign of $y. This ensures we + // have a new assignment of $y at the location of the copy, + // which makes it easy for us to see if the value if $y + // is still used after that point + super::doWalkFunction(func); + + // optimize the copies, merging when we can, and removing + // the trivial assigns we added temporarily + optimizeCopies(); + } + + std::vector<SetLocal*> copies; + + void visitSetLocal(SetLocal* curr) { + if (auto* get = curr->value->dynCast<GetLocal>()) { + if (get->index != curr->index) { + Builder builder(*getModule()); + auto* trivial = builder.makeTeeLocal(get->index, get); + curr->value = trivial; + copies.push_back(curr); + } + } + } + + void optimizeCopies() { + if (copies.empty()) return; + // compute all dependencies + LocalGraph preGraph(getFunction(), getModule()); + preGraph.computeInfluences(); + // optimize each copy + std::unordered_map<SetLocal*, SetLocal*> optimizedToCopy, optimizedToTrivial; + for (auto* copy : copies) { + auto* trivial = copy->value->cast<SetLocal>(); + bool canOptimizeToCopy = false; + auto& trivialInfluences = preGraph.setInfluences[trivial]; + if (!trivialInfluences.empty()) { + canOptimizeToCopy = true; + for (auto* influencedGet : trivialInfluences) { + // this get uses the trivial write, so it uses the value in the copy. + // however, it may depend on other writes too, if there is a merge/phi, + // and in that case we can't do anything + assert(influencedGet->index == trivial->index); + if (preGraph.getSetses[influencedGet].size() == 1) { + // this is ok + assert(*preGraph.getSetses[influencedGet].begin() == trivial); + } else { + canOptimizeToCopy = false; + break; + } + } + } + if (canOptimizeToCopy) { + // worth it for this copy, do it + for (auto* influencedGet : trivialInfluences) { + influencedGet->index = copy->index; + } + optimizedToCopy[copy] = trivial; + } else { + // alternatively, we can try to remove the conflict in the opposite way: given + // (set_local $x + // (get_local $y) + // ) + // we can look for uses of $x that could instead be uses of $y. this extends + // $y's live range, but if it removes the conflict between $x and $y, it may be + // worth it + if (!trivialInfluences.empty()) { // if the trivial set we added has influences, it means $y lives on + auto& copyInfluences = preGraph.setInfluences[copy]; + if (!copyInfluences.empty()) { + bool canOptimizeToTrivial = true; + for (auto* influencedGet : copyInfluences) { + // as above, avoid merges/phis + assert(influencedGet->index == copy->index); + if (preGraph.getSetses[influencedGet].size() == 1) { + // this is ok + assert(*preGraph.getSetses[influencedGet].begin() == copy); + } else { + canOptimizeToTrivial = false; + break; + } + } + if (canOptimizeToTrivial) { + // worth it for this copy, do it + for (auto* influencedGet : copyInfluences) { + influencedGet->index = trivial->index; + } + optimizedToTrivial[copy] = trivial; + // note that we don't + } + } + } + } + } + if (!optimizedToCopy.empty() || !optimizedToTrivial.empty()) { + // finally, we need to verify that the changes work properly, that is, + // they use the value from the right place (and are not affected by + // another set of the index we changed to). + // if one does not work, we need to undo all its siblings (don't extend + // the live range unless we are definitely removing a conflict, same + // logic as before). + LocalGraph postGraph(getFunction(), getModule()); + postGraph.computeInfluences(); + for (auto& pair : optimizedToCopy) { + auto* copy = pair.first; + auto* trivial = pair.second; + auto& trivialInfluences = preGraph.setInfluences[trivial]; + for (auto* influencedGet : trivialInfluences) { + // verify the set + auto& sets = postGraph.getSetses[influencedGet]; + if (sets.size() != 1 || *sets.begin() != copy) { + // not good, undo all the changes for this copy + for (auto* undo : trivialInfluences) { + undo->index = trivial->index; + } + break; + } + } + } + for (auto& pair : optimizedToTrivial) { + auto* copy = pair.first; + auto* trivial = pair.second; + auto& copyInfluences = preGraph.setInfluences[copy]; + for (auto* influencedGet : copyInfluences) { + // verify the set + auto& sets = postGraph.getSetses[influencedGet]; + if (sets.size() != 1 || *sets.begin() != trivial) { + // not good, undo all the changes for this copy + for (auto* undo : copyInfluences) { + undo->index = copy->index; + } + break; + } + } + // if this change was ok, we can probably remove the copy itself, + // but we leave that for other passes + } + } + // remove the trivial sets + for (auto* copy : copies) { + copy->value = copy->value->cast<SetLocal>()->value; + } + } +}; + +Pass *createMergeLocalsPass() { + return new MergeLocals(); +} + +} // namespace wasm + diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index f2463691a..89467899c 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -83,6 +83,7 @@ void PassRegistry::registerPasses() { registerPass("instrument-memory", "instrument the build with code to intercept all loads and stores", createInstrumentMemoryPass); registerPass("memory-packing", "packs memory into separate segments, skipping zeros", createMemoryPackingPass); registerPass("merge-blocks", "merges blocks to their parents", createMergeBlocksPass); + registerPass("merge-locals", "merges locals when beneficial", createMergeLocalsPass); registerPass("metrics", "reports metrics", createMetricsPass); registerPass("nm", "name list", createNameListPass); registerPass("optimize-instructions", "optimizes instruction combinations", createOptimizeInstructionsPass); @@ -140,6 +141,10 @@ void PassRunner::addDefaultFunctionOptimizationPasses() { add("vacuum"); // previous pass creates garbage add("reorder-locals"); add("remove-unused-brs"); // simplify-locals opens opportunities for optimizations + // if we are willing to work hard, also optimize copies before coalescing + if (options.optimizeLevel >= 3 || options.shrinkLevel >= 2) { + add("merge-locals"); // very slow on e.g. sqlite + } add("coalesce-locals"); add("simplify-locals"); add("vacuum"); // previous pass creates garbage diff --git a/src/passes/passes.h b/src/passes/passes.h index 957ca2d68..5cefdea17 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -42,6 +42,7 @@ Pass* createInstrumentLocalsPass(); Pass* createInstrumentMemoryPass(); Pass* createMemoryPackingPass(); Pass* createMergeBlocksPass(); +Pass* createMergeLocalsPass(); Pass* createMinifiedPrinterPass(); Pass* createMetricsPass(); Pass* createNameListPass(); diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h index be12b561a..baaaf1b23 100644 --- a/src/tools/fuzzing.h +++ b/src/tools/fuzzing.h @@ -480,6 +480,14 @@ private: } Expression* _makeConcrete(WasmType type) { + auto choice = upTo(100); + if (choice < 10) return makeConst(type); + if (choice < 30) return makeSetLocal(type); + if (choice < 50) return makeGetLocal(type); + if (choice < 60) return makeBlock(type); + if (choice < 70) return makeIf(type); + if (choice < 80) return makeLoop(type); + if (choice < 90) return makeBreak(type); switch (upTo(15)) { case 0: return makeBlock(type); case 1: return makeIf(type); @@ -501,6 +509,12 @@ private: } Expression* _makenone() { + auto choice = upTo(100); + if (choice < 50) return makeSetLocal(none); + if (choice < 60) return makeBlock(none); + if (choice < 70) return makeIf(none); + if (choice < 80) return makeLoop(none); + if (choice < 90) return makeBreak(none); switch (upTo(11)) { case 0: return makeBlock(none); case 1: return makeIf(none); diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp index 8fe55aef1..374c1b5bd 100644 --- a/src/tools/wasm-opt.cpp +++ b/src/tools/wasm-opt.cpp @@ -182,36 +182,38 @@ int main(int argc, const char* argv[]) { std::cout << "[extra-fuzz-command first output:]\n" << firstOutput << '\n'; } + Module* curr = &wasm; + Module other; + + if (fuzzExec && fuzzBinary) { + BufferWithRandomAccess buffer(false); + // write the binary + WasmBinaryWriter writer(&wasm, buffer, false); + writer.write(); + // read the binary + auto input = buffer.getAsChars(); + WasmBinaryBuilder parser(other, input, false); + parser.read(); + bool valid = WasmValidator().validate(other, features); + if (!valid) { + WasmPrinter::printModule(&other); + } + assert(valid); + curr = &other; + } + if (options.runningPasses()) { if (options.debug) std::cerr << "running passes...\n"; - options.runPasses(wasm); - bool valid = WasmValidator().validate(wasm, features); + options.runPasses(*curr); + bool valid = WasmValidator().validate(*curr, features); if (!valid) { - WasmPrinter::printModule(&wasm); + WasmPrinter::printModule(&*curr); } assert(valid); } if (fuzzExec) { - auto* compare = &wasm; - Module second; - if (fuzzBinary) { - compare = &second; - BufferWithRandomAccess buffer(false); - // write the binary - WasmBinaryWriter writer(&wasm, buffer, false); - writer.write(); - // read the binary - auto input = buffer.getAsChars(); - WasmBinaryBuilder parser(second, input, false); - parser.read(); - bool valid = WasmValidator().validate(second, features); - if (!valid) { - WasmPrinter::printModule(&second); - } - assert(valid); - } - results.check(*compare); + results.check(*curr); } if (options.extra.count("output") > 0) { @@ -220,7 +222,7 @@ int main(int argc, const char* argv[]) { writer.setDebug(options.debug); writer.setBinary(emitBinary); writer.setDebugInfo(debugInfo); - writer.write(wasm, options.extra["output"]); + writer.write(*curr, options.extra["output"]); if (extraFuzzCommand.size() > 0) { auto secondOutput = runCommand(extraFuzzCommand); |