diff options
Diffstat (limited to 'src/tools/wasm-reduce.cpp')
-rw-r--r-- | src/tools/wasm-reduce.cpp | 661 |
1 files changed, 661 insertions, 0 deletions
diff --git a/src/tools/wasm-reduce.cpp b/src/tools/wasm-reduce.cpp new file mode 100644 index 000000000..355b70278 --- /dev/null +++ b/src/tools/wasm-reduce.cpp @@ -0,0 +1,661 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Tries to reduce the input wasm into the smallest possible wasm +// that still generates the same result on a given command. This is +// useful to reduce bug testcases, for example, if a file crashes +// the optimizer, you can reduce it to find the smallest file that +// also crashes it (which generally will show the same bug, in a +// much more debuggable manner). +// + +#include <memory> +#include <cstdio> +#include <cstdlib> + +#include "pass.h" +#include "support/command-line.h" +#include "support/file.h" +#include "wasm-io.h" +#include "wasm-builder.h" +#include "ast/literal-utils.h" +#include "wasm-validator.h" + +using namespace wasm; + +struct ProgramResult { + int code; + std::string output; + + ProgramResult() {} + ProgramResult(std::string command) { + getFromExecution(command); + } + + // runs the command and notes the output + // TODO: also stderr, not just stdout? + void getFromExecution(std::string command) { + // do this using just core stdio.h and stdlib.h, for portability + // sadly this requires two invokes + code = system(("timeout 2s " + command + " > /dev/null 2> /dev/null").c_str()); + const int MAX_BUFFER = 1024; + char buffer[MAX_BUFFER]; + FILE *stream = popen(("timeout 2s " + command + " 2> /dev/null").c_str(), "r"); + while (fgets(buffer, MAX_BUFFER, stream) != NULL) { + output.append(buffer); + } + pclose(stream); + } + + bool operator==(ProgramResult& other) { + return code == other.code && output == other.output; + } + bool operator!=(ProgramResult& other) { + return !(*this == other); + } + + bool failed() { + return code != 0; + } + + void dump(std::ostream& o) { + o << "[ProgramResult] code: " << code << " stdout: \n" << output << "[/ProgramResult]\n"; + } +}; + +namespace std { + +inline std::ostream& operator<<(std::ostream& o, ProgramResult& result) { + result.dump(o); + return o; +} + +} + +ProgramResult expected; + +struct Reducer : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor<Reducer>>> { + std::string command, test, working; + bool verbose, debugInfo; + + // test is the file we write to that the command will operate on + // working is the current temporary state, the reduction so far + Reducer(std::string command, std::string test, std::string working, bool verbose, bool debugInfo) : command(command), test(test), working(working), verbose(verbose), debugInfo(debugInfo) {} + + // runs passes in order to reduce, until we can't reduce any more + // the criterion here is wasm binary size + void reduceUsingPasses() { + // run optimization passes until we can't shrink it any more + std::vector<std::string> passes = { + "-Oz", + "-Os", + "-O1", + "-O2", + "-O3", + "--coalesce-locals --vacuum", + "--dce", + "--duplicate-function-elimination", + "--inlining", + "--inlining-optimizing", + "--optimize-level=3 --inlining-optimizing", + "--local-cse --vacuum", + "--memory-packing", + "--remove-unused-names --merge-blocks --vacuum", + "--optimize-instructions", + "--precompute", + "--remove-imports", + "--remove-memory", + "--remove-unused-names --remove-unused-brs", + "--remove-unused-module-elements", + "--reorder-functions", + "--reorder-locals", + "--simplify-locals --vacuum", + "--vacuum" + }; + auto oldSize = file_size(working); + bool more = true; + while (more) { + //std::cerr << "| starting passes loop iteration\n"; + more = false; + // try both combining with a generic shrink (so minor pass overhead is compensated for), and without + for (auto shrinking : { false, true }) { + for (auto pass : passes) { + std::string currCommand = "bin/wasm-opt "; + if (shrinking) currCommand += " --dce --vacuum "; + currCommand += working + " -o " + test + " " + pass; + if (debugInfo) currCommand += " -g "; + if (verbose) std::cerr << "| trying pass command: " << currCommand << "\n"; + if (!ProgramResult(currCommand).failed()) { + auto newSize = file_size(test); + if (newSize < oldSize) { + // the pass didn't fail, and the size looks smaller, so promising + // see if it is still has the property we are preserving + if (ProgramResult(command) == expected) { + std::cerr << "| command \"" << currCommand << "\" succeeded, reduced size to " << newSize << ", and preserved the property\n"; + copy_file(test, working); + more = true; + oldSize = newSize; + } + } + } + } + } + } + if (verbose) std::cerr << "| done with passes for now\n"; + } + + // does one pass of slow and destructive reduction. returns whether it + // succeeded or not + // the criterion here is a logical change in the program. this may actually + // increase wasm size in some cases, but it should allow more reduction later. + // @param factor how much to ignore. starting with a high factor skips through + // most of the file, which is often faster than going one by one + // from the start + size_t reduceDestructively(int factor_) { + factor = factor_; + Module wasm; + ModuleReader reader; + reader.read(working, wasm); + // prepare + reduced = 0; + builder = make_unique<Builder>(wasm); + funcsSeen = 0; + // before we do any changes, it should be valid to write out the module: + // size should be as expected, and output should be as expected + setModule(&wasm); + if (!writeAndTestReduction()) { + std::cerr << "\n|! WARNING: writing before destructive reduction fails, very unlikely reduction can work\n\n"; + } + // destroy! + walkModule(&wasm); + return reduced; + } + + // destructive reduction state + + size_t reduced; + Expression* beforeReduction; + std::unique_ptr<Builder> builder; + Index funcsSeen; + int factor; + + // write the module and see if the command still fails on it as expected + bool writeAndTestReduction() { + ProgramResult result; + return writeAndTestReduction(result); + } + + bool writeAndTestReduction(ProgramResult& out) { + // write the module out + ModuleWriter writer; + writer.setBinary(true); + writer.setDebugInfo(debugInfo); + writer.write(*getModule(), test); + // note that it is ok for the destructively-reduced module to be bigger + // than the previous - each destructive reduction removes logical code, + // and so is strictly better, even if the wasm binary format happens to + // encode things slightly less efficiently. + // test it + out.getFromExecution(command); + return out == expected; + } + + bool shouldTryToReduce(size_t bonus = 1) { + static size_t counter = 0; + counter += bonus; + return (counter % factor) <= bonus; + } + + // tests a reduction on the current traversal node, and undos if it failed + bool tryToReplaceCurrent(Expression* with) { + auto* curr = getCurrent(); + //std::cerr << "try " << curr << " => " << with << '\n'; + if (curr->type != with->type) return false; + if (!shouldTryToReduce()) return false; + replaceCurrent(with); + if (!writeAndTestReduction()) { + replaceCurrent(curr); + return false; + } + std::cerr << "| tryToReplaceCurrent succeeded (in " << getLocation() << ")\n"; + noteReduction(); + return true; + } + + void noteReduction() { + reduced++; + copy_file(test, working); + } + + // tests a reduction on an arbitrary child + bool tryToReplaceChild(Expression*& child, Expression* with) { + if (child->type != with->type) return false; + if (!shouldTryToReduce()) return false; + auto* before = child; + child = with; + if (!writeAndTestReduction()) { + child = before; + return false; + } + std::cerr << "| tryToReplaceChild succeeded (in " << getLocation() << ")\n"; + //std::cerr << "| " << before << " => " << with << '\n'; + noteReduction(); + return true; + } + + std::string getLocation() { + if (getFunction()) return getFunction()->name.str; + return "(non-function context)"; + } + + // visitors. in each we try to remove code in a destructive and nontrivial way. + // "nontrivial" means something that optimization passes can't achieve, since we + // don't need to duplicate work that they do + + void visitExpression(Expression* curr) { + if (curr->type == none) { + if (tryToReduceCurrentToNone()) return; + } else if (isConcreteWasmType(curr->type)) { + if (tryToReduceCurrentToConst()) return; + } else { + assert(curr->type == unreachable); + if (tryToReduceCurrentToUnreachable()) return; + } + // specific reductions + if (auto* iff = curr->dynCast<If>()) { + if (iff->type == none) { + // perhaps we need just the condition? + if (tryToReplaceCurrent(builder->makeDrop(iff->condition))) { + return; + } + } + handleCondition(iff->condition); + } else if (auto* br = curr->dynCast<Break>()) { + handleCondition(br->condition); + } else if (auto* select = curr->dynCast<Select>()) { + handleCondition(select->condition); + } else if (auto* sw = curr->dynCast<Switch>()) { + handleCondition(sw->condition); + } else if (auto* set = curr->dynCast<SetLocal>()) { + if (set->isTee()) { + // maybe we don't need the set + tryToReplaceCurrent(set->value); + } + } else if (auto* unary = curr->dynCast<Unary>()) { + // maybe we can pass through + tryToReplaceCurrent(unary->value); + } else if (auto* binary = curr->dynCast<Binary>()) { + // maybe we can pass through + if (!tryToReplaceCurrent(binary->left)) { + tryToReplaceCurrent(binary->right); + } + } else if (auto* call = curr->dynCast<Call>()) { + handleCall(call); + } else if (auto* call = curr->dynCast<CallImport>()) { + handleCall(call); + } else if (auto* call = curr->dynCast<CallIndirect>()) { + if (tryToReplaceCurrent(call->target)) return; + handleCall(call); + } + } + + void visitFunction(Function* curr) { + // extra chance to work on the function toplevel element, as if it can + // be reduced it's great + visitExpression(curr->body); + // finish function + funcsSeen++; + static int last = 0; + int percentage = (100 * funcsSeen) / getModule()->functions.size(); + if (std::abs(percentage - last) >= 5) { + std::cerr << "| " << percentage << "% of funcs complete\n"; + last = percentage; + } + } + + // TODO: bisection on segment shrinking? + + void visitTable(Table* curr) { + std::cerr << "| try to simplify table\n"; + Name first; + for (auto& segment : curr->segments) { + for (auto item : segment.data) { + first = item; + break; + } + if (!first.isNull()) break; + } + visitSegmented(curr, first, 100); + } + + void visitMemory(Memory* curr) { + std::cerr << "| try to simplify memory\n"; + visitSegmented(curr, 0, 2); + } + + template<typename T, typename U> + void visitSegmented(T* curr, U zero, size_t bonus) { + // try to reduce to first function + // shrink segment elements + for (auto& segment : curr->segments) { + auto& data = segment.data; + size_t skip = 1; // when we succeed, try to shrink by more and more, similar to bisection + for (size_t i = 0; i < data.size() && !data.empty(); i++) { + if (!shouldTryToReduce(bonus)) continue; + auto save = data; + for (size_t j = 0; j < skip; j++) { + if (!data.empty()) data.pop_back(); + } + if (writeAndTestReduction()) { + std::cerr << "| shrank segment (skip: " << skip << ")\n"; + noteReduction(); + skip = std::min(size_t(factor), 2 * skip); + } else { + data = save; + break; + } + } + } + // the "opposite" of shrinking: copy a 'zero' element + for (auto& segment : curr->segments) { + if (segment.data.empty()) continue; + for (auto& item : segment.data) { + if (!shouldTryToReduce(bonus)) continue; + if (item == zero) continue; + auto save = item; + item = zero; + if (writeAndTestReduction()) { + std::cerr << "| zeroed segment\n"; + noteReduction(); + } else { + item = save; + } + } + } + } + + void visitModule(Module* curr) { + // try to remove exports + std::cerr << "| try to remove exports\n"; + std::vector<Export> exports; + for (auto& exp : curr->exports) { + if (!shouldTryToReduce(10000)) continue; + exports.push_back(*exp); + } + for (auto& exp : exports) { + curr->removeExport(exp.name); + if (!writeAndTestReduction()) { + curr->addExport(new Export(exp)); + } else { + std::cerr << "| removed export " << exp.name << '\n'; + noteReduction(); + } + } + // try to remove functions + std::cerr << "| try to remove functions\n"; + std::vector<Function> functions; + for (auto& func : curr->functions) { + if (!shouldTryToReduce(10000)) continue; + functions.push_back(*func); + } + for (auto& func : functions) { + curr->removeFunction(func.name); + WasmValidator validator; + validator.quiet = true; + if (validator.validate(*curr) && writeAndTestReduction()) { + std::cerr << "| removed function " << func.name << '\n'; + noteReduction(); + } else { + curr->addFunction(new Function(func)); + } + } + } + + // helpers + + // try to replace condition with always true and always false + void handleCondition(Expression*& condition) { + if (!condition) return; + if (condition->is<Const>()) return; + auto* c = builder->makeConst(Literal(int32_t(0))); + if (!tryToReplaceChild(condition, c)) { + c->value = Literal(int32_t(1)); + tryToReplaceChild(condition, c); + } + } + + template<typename T> + void handleCall(T* call) { + for (auto* op : call->operands) { + if (tryToReplaceCurrent(op)) return; + } + } + + bool tryToReduceCurrentToNone() { + auto* curr = getCurrent(); + if (curr->is<Nop>()) return false; + // try to replace with a trivial value + Nop nop; + if (tryToReplaceCurrent(&nop)) { + replaceCurrent(builder->makeNop()); + return true; + } + return false; + } + + // try to replace a concrete value with a trivial constant + bool tryToReduceCurrentToConst() { + auto* curr = getCurrent(); + if (curr->is<Const>()) return false; + // try to replace with a trivial value + Const* c = builder->makeConst(Literal(int32_t(0))); + if (tryToReplaceCurrent(c)) return true; + c->value = LiteralUtils::makeLiteralFromInt32(1, curr->type); + c->type = curr->type; + return tryToReplaceCurrent(c); + } + + bool tryToReduceCurrentToUnreachable() { + auto* curr = getCurrent(); + if (curr->is<Unreachable>()) return false; + // try to replace with a trivial value + Unreachable un; + if (tryToReplaceCurrent(&un)) { + replaceCurrent(builder->makeUnreachable()); + return true; + } + // maybe a return? TODO + return false; + } +}; + +// +// main +// + +int main(int argc, const char* argv[]) { + std::string input, test, working, command; + bool verbose = false, + debugInfo = false, + force = false; + Options options("wasm-reduce", "Reduce a wasm file to a smaller one that has the same behavior on a given command"); + options + .add("--command", "-cmd", "The command to run on the test, that we want to reduce while keeping the command's output identical. " + "We look at the command's return code and stdout here (TODO: stderr), " + "and we reduce while keeping those unchanged.", + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + command = argument; + }) + .add("--test", "-t", "Test file (this will be written to to test, the given command should read it when we call it)", + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + test = argument; + }) + .add("--working", "-w", "Working file (this will contain the current good state while doing temporary computations, " + "and will contain the final best result at the end)", + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + working = argument; + }) + .add("--verbose", "-v", "Verbose output mode", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + verbose = true; + }) + .add("--debugInfo", "-g", "Keep debug info in binaries", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + debugInfo = true; + }) + .add("--force", "-f", "Force the reduction attempt, ignoring problems that imply it is unlikely to succeed", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + force = true; + }) + .add_positional("INFILE", Options::Arguments::One, + [&](Options* o, const std::string& argument) { + input = argument; + }); + options.parse(argc, argv); + + if (test.size() == 0) Fatal() << "test file not provided\n"; + if (working.size() == 0) Fatal() << "working file not provided\n"; + + std::cerr << "|input: " << input << '\n'; + std::cerr << "|test: " << test << '\n'; + std::cerr << "|working: " << working << '\n'; + + // get the expected output + copy_file(input, test); + expected.getFromExecution(command); + + std::cerr << "|expected result:\n" << expected << '\n'; + + auto stopIfNotForced = [&](std::string message, ProgramResult& result) { + std::cerr << "|! " << message << '\n' << result << '\n'; + if (!force) { + Fatal() << "|! stopping, as it is very unlikely reduction can succeed (use -f to ignore this check)"; + } + }; + + std::cerr << "|checking that command has different behavior on invalid binary\n"; + { + { + std::ofstream dst(test, std::ios::binary); + dst << "waka waka\n"; + } + ProgramResult result(command); + if (result == expected) { + stopIfNotForced("running command on an invalid module should give different results", result); + } + } + + std::cerr << "|checking that command has expected behavior on canonicalized (read-written) binary\n"; + { + // read and write it + ProgramResult readWrite(std::string("bin/wasm-opt ") + input + " -o " + test); + if (readWrite.failed()) { + stopIfNotForced("failed to read and write the binary", readWrite); + } else { + ProgramResult result(command); + if (result != expected) { + stopIfNotForced("running command on the canonicalized module should give the same results", result); + } + } + } + + copy_file(input, working); + std::cerr << "|input size: " << file_size(working) << "\n"; + + std::cerr << "|starting reduction!\n"; + + int factor = 4096; + size_t lastDestructiveReductions = 0; + size_t lastPostPassesSize = 0; + + bool stopping = false; + + while (1) { + Reducer reducer(command, test, working, verbose, debugInfo); + + // run binaryen optimization passes to reduce. passes are fast to run + // and can often reduce large amounts of code efficiently, as opposed + // to detructive reduction (i.e., that doesn't preserve correctness as + // passes do) since destrucive must operate one change at a time + std::cerr << "| reduce using passes...\n"; + auto oldSize = file_size(working); + reducer.reduceUsingPasses(); + auto newSize = file_size(working); + auto passProgress = oldSize - newSize; + std::cerr << "| after pass reduction: " << newSize << "\n"; + + // always stop after a pass reduction attempt, for final cleanup + if (stopping) break; + + // check if the full cycle (destructive/passes) has helped or not + if (lastPostPassesSize && newSize >= lastPostPassesSize) { + std::cerr << "| progress has stopped, skipping to the end\n"; + if (factor == 1) { + // this is after doing work with factor 1, so after the remaining work, stop + stopping = true; + } else { + // just try to remove all we can and finish up + factor = 1; + } + } + lastPostPassesSize = newSize; + + // if destructive reductions lead to useful proportionate pass reductions, keep + // going at the same factor, as pass reductions are far faster + std::cerr << "| pass progress: " << passProgress << ", last destructive: " << lastDestructiveReductions << '\n'; + if (passProgress >= 4 * lastDestructiveReductions) { + // don't change + std::cerr << "| progress is good, do not quickly decrease factor\n"; + } else { + if (factor > 10) { + factor = (factor / 3) + 1; + } else { + factor = (factor + 1) / 2; // stable on 1 + } + } + + // no point in a factor lorger than the size + assert(newSize > 4); // wasm modules are >4 bytes anyhow + factor = std::min(factor, int(newSize) / 4); + + // try to reduce destructively. if a high factor fails to find anything, + // quickly try a lower one (no point in doing passes until we reduce + // destructively at least a little) + while (1) { + std::cerr << "| reduce destructively... (factor: " << factor << ")\n"; + lastDestructiveReductions = reducer.reduceDestructively(factor); + if (lastDestructiveReductions > 0) break; + // we failed to reduce destructively + if (factor == 1) { + stopping = true; + break; + } + factor = std::max(1, factor / 4); // quickly now, try to find *something* we can reduce + } + + std::cerr << "| destructive reduction led to size: " << file_size(working) << '\n'; + } + std::cerr << "|finished, final size: " << file_size(working) << "\n"; + copy_file(working, test); // just to avoid confusion +} + |