summaryrefslogtreecommitdiff
path: root/src/tools/wasm-reduce.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/tools/wasm-reduce.cpp')
-rw-r--r--src/tools/wasm-reduce.cpp661
1 files changed, 661 insertions, 0 deletions
diff --git a/src/tools/wasm-reduce.cpp b/src/tools/wasm-reduce.cpp
new file mode 100644
index 000000000..355b70278
--- /dev/null
+++ b/src/tools/wasm-reduce.cpp
@@ -0,0 +1,661 @@
+/*
+ * Copyright 2017 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Tries to reduce the input wasm into the smallest possible wasm
+// that still generates the same result on a given command. This is
+// useful to reduce bug testcases, for example, if a file crashes
+// the optimizer, you can reduce it to find the smallest file that
+// also crashes it (which generally will show the same bug, in a
+// much more debuggable manner).
+//
+
+#include <memory>
+#include <cstdio>
+#include <cstdlib>
+
+#include "pass.h"
+#include "support/command-line.h"
+#include "support/file.h"
+#include "wasm-io.h"
+#include "wasm-builder.h"
+#include "ast/literal-utils.h"
+#include "wasm-validator.h"
+
+using namespace wasm;
+
+struct ProgramResult {
+ int code;
+ std::string output;
+
+ ProgramResult() {}
+ ProgramResult(std::string command) {
+ getFromExecution(command);
+ }
+
+ // runs the command and notes the output
+ // TODO: also stderr, not just stdout?
+ void getFromExecution(std::string command) {
+ // do this using just core stdio.h and stdlib.h, for portability
+ // sadly this requires two invokes
+ code = system(("timeout 2s " + command + " > /dev/null 2> /dev/null").c_str());
+ const int MAX_BUFFER = 1024;
+ char buffer[MAX_BUFFER];
+ FILE *stream = popen(("timeout 2s " + command + " 2> /dev/null").c_str(), "r");
+ while (fgets(buffer, MAX_BUFFER, stream) != NULL) {
+ output.append(buffer);
+ }
+ pclose(stream);
+ }
+
+ bool operator==(ProgramResult& other) {
+ return code == other.code && output == other.output;
+ }
+ bool operator!=(ProgramResult& other) {
+ return !(*this == other);
+ }
+
+ bool failed() {
+ return code != 0;
+ }
+
+ void dump(std::ostream& o) {
+ o << "[ProgramResult] code: " << code << " stdout: \n" << output << "[/ProgramResult]\n";
+ }
+};
+
+namespace std {
+
+inline std::ostream& operator<<(std::ostream& o, ProgramResult& result) {
+ result.dump(o);
+ return o;
+}
+
+}
+
+ProgramResult expected;
+
+struct Reducer : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor<Reducer>>> {
+ std::string command, test, working;
+ bool verbose, debugInfo;
+
+ // test is the file we write to that the command will operate on
+ // working is the current temporary state, the reduction so far
+ Reducer(std::string command, std::string test, std::string working, bool verbose, bool debugInfo) : command(command), test(test), working(working), verbose(verbose), debugInfo(debugInfo) {}
+
+ // runs passes in order to reduce, until we can't reduce any more
+ // the criterion here is wasm binary size
+ void reduceUsingPasses() {
+ // run optimization passes until we can't shrink it any more
+ std::vector<std::string> passes = {
+ "-Oz",
+ "-Os",
+ "-O1",
+ "-O2",
+ "-O3",
+ "--coalesce-locals --vacuum",
+ "--dce",
+ "--duplicate-function-elimination",
+ "--inlining",
+ "--inlining-optimizing",
+ "--optimize-level=3 --inlining-optimizing",
+ "--local-cse --vacuum",
+ "--memory-packing",
+ "--remove-unused-names --merge-blocks --vacuum",
+ "--optimize-instructions",
+ "--precompute",
+ "--remove-imports",
+ "--remove-memory",
+ "--remove-unused-names --remove-unused-brs",
+ "--remove-unused-module-elements",
+ "--reorder-functions",
+ "--reorder-locals",
+ "--simplify-locals --vacuum",
+ "--vacuum"
+ };
+ auto oldSize = file_size(working);
+ bool more = true;
+ while (more) {
+ //std::cerr << "| starting passes loop iteration\n";
+ more = false;
+ // try both combining with a generic shrink (so minor pass overhead is compensated for), and without
+ for (auto shrinking : { false, true }) {
+ for (auto pass : passes) {
+ std::string currCommand = "bin/wasm-opt ";
+ if (shrinking) currCommand += " --dce --vacuum ";
+ currCommand += working + " -o " + test + " " + pass;
+ if (debugInfo) currCommand += " -g ";
+ if (verbose) std::cerr << "| trying pass command: " << currCommand << "\n";
+ if (!ProgramResult(currCommand).failed()) {
+ auto newSize = file_size(test);
+ if (newSize < oldSize) {
+ // the pass didn't fail, and the size looks smaller, so promising
+ // see if it is still has the property we are preserving
+ if (ProgramResult(command) == expected) {
+ std::cerr << "| command \"" << currCommand << "\" succeeded, reduced size to " << newSize << ", and preserved the property\n";
+ copy_file(test, working);
+ more = true;
+ oldSize = newSize;
+ }
+ }
+ }
+ }
+ }
+ }
+ if (verbose) std::cerr << "| done with passes for now\n";
+ }
+
+ // does one pass of slow and destructive reduction. returns whether it
+ // succeeded or not
+ // the criterion here is a logical change in the program. this may actually
+ // increase wasm size in some cases, but it should allow more reduction later.
+ // @param factor how much to ignore. starting with a high factor skips through
+ // most of the file, which is often faster than going one by one
+ // from the start
+ size_t reduceDestructively(int factor_) {
+ factor = factor_;
+ Module wasm;
+ ModuleReader reader;
+ reader.read(working, wasm);
+ // prepare
+ reduced = 0;
+ builder = make_unique<Builder>(wasm);
+ funcsSeen = 0;
+ // before we do any changes, it should be valid to write out the module:
+ // size should be as expected, and output should be as expected
+ setModule(&wasm);
+ if (!writeAndTestReduction()) {
+ std::cerr << "\n|! WARNING: writing before destructive reduction fails, very unlikely reduction can work\n\n";
+ }
+ // destroy!
+ walkModule(&wasm);
+ return reduced;
+ }
+
+ // destructive reduction state
+
+ size_t reduced;
+ Expression* beforeReduction;
+ std::unique_ptr<Builder> builder;
+ Index funcsSeen;
+ int factor;
+
+ // write the module and see if the command still fails on it as expected
+ bool writeAndTestReduction() {
+ ProgramResult result;
+ return writeAndTestReduction(result);
+ }
+
+ bool writeAndTestReduction(ProgramResult& out) {
+ // write the module out
+ ModuleWriter writer;
+ writer.setBinary(true);
+ writer.setDebugInfo(debugInfo);
+ writer.write(*getModule(), test);
+ // note that it is ok for the destructively-reduced module to be bigger
+ // than the previous - each destructive reduction removes logical code,
+ // and so is strictly better, even if the wasm binary format happens to
+ // encode things slightly less efficiently.
+ // test it
+ out.getFromExecution(command);
+ return out == expected;
+ }
+
+ bool shouldTryToReduce(size_t bonus = 1) {
+ static size_t counter = 0;
+ counter += bonus;
+ return (counter % factor) <= bonus;
+ }
+
+ // tests a reduction on the current traversal node, and undos if it failed
+ bool tryToReplaceCurrent(Expression* with) {
+ auto* curr = getCurrent();
+ //std::cerr << "try " << curr << " => " << with << '\n';
+ if (curr->type != with->type) return false;
+ if (!shouldTryToReduce()) return false;
+ replaceCurrent(with);
+ if (!writeAndTestReduction()) {
+ replaceCurrent(curr);
+ return false;
+ }
+ std::cerr << "| tryToReplaceCurrent succeeded (in " << getLocation() << ")\n";
+ noteReduction();
+ return true;
+ }
+
+ void noteReduction() {
+ reduced++;
+ copy_file(test, working);
+ }
+
+ // tests a reduction on an arbitrary child
+ bool tryToReplaceChild(Expression*& child, Expression* with) {
+ if (child->type != with->type) return false;
+ if (!shouldTryToReduce()) return false;
+ auto* before = child;
+ child = with;
+ if (!writeAndTestReduction()) {
+ child = before;
+ return false;
+ }
+ std::cerr << "| tryToReplaceChild succeeded (in " << getLocation() << ")\n";
+ //std::cerr << "| " << before << " => " << with << '\n';
+ noteReduction();
+ return true;
+ }
+
+ std::string getLocation() {
+ if (getFunction()) return getFunction()->name.str;
+ return "(non-function context)";
+ }
+
+ // visitors. in each we try to remove code in a destructive and nontrivial way.
+ // "nontrivial" means something that optimization passes can't achieve, since we
+ // don't need to duplicate work that they do
+
+ void visitExpression(Expression* curr) {
+ if (curr->type == none) {
+ if (tryToReduceCurrentToNone()) return;
+ } else if (isConcreteWasmType(curr->type)) {
+ if (tryToReduceCurrentToConst()) return;
+ } else {
+ assert(curr->type == unreachable);
+ if (tryToReduceCurrentToUnreachable()) return;
+ }
+ // specific reductions
+ if (auto* iff = curr->dynCast<If>()) {
+ if (iff->type == none) {
+ // perhaps we need just the condition?
+ if (tryToReplaceCurrent(builder->makeDrop(iff->condition))) {
+ return;
+ }
+ }
+ handleCondition(iff->condition);
+ } else if (auto* br = curr->dynCast<Break>()) {
+ handleCondition(br->condition);
+ } else if (auto* select = curr->dynCast<Select>()) {
+ handleCondition(select->condition);
+ } else if (auto* sw = curr->dynCast<Switch>()) {
+ handleCondition(sw->condition);
+ } else if (auto* set = curr->dynCast<SetLocal>()) {
+ if (set->isTee()) {
+ // maybe we don't need the set
+ tryToReplaceCurrent(set->value);
+ }
+ } else if (auto* unary = curr->dynCast<Unary>()) {
+ // maybe we can pass through
+ tryToReplaceCurrent(unary->value);
+ } else if (auto* binary = curr->dynCast<Binary>()) {
+ // maybe we can pass through
+ if (!tryToReplaceCurrent(binary->left)) {
+ tryToReplaceCurrent(binary->right);
+ }
+ } else if (auto* call = curr->dynCast<Call>()) {
+ handleCall(call);
+ } else if (auto* call = curr->dynCast<CallImport>()) {
+ handleCall(call);
+ } else if (auto* call = curr->dynCast<CallIndirect>()) {
+ if (tryToReplaceCurrent(call->target)) return;
+ handleCall(call);
+ }
+ }
+
+ void visitFunction(Function* curr) {
+ // extra chance to work on the function toplevel element, as if it can
+ // be reduced it's great
+ visitExpression(curr->body);
+ // finish function
+ funcsSeen++;
+ static int last = 0;
+ int percentage = (100 * funcsSeen) / getModule()->functions.size();
+ if (std::abs(percentage - last) >= 5) {
+ std::cerr << "| " << percentage << "% of funcs complete\n";
+ last = percentage;
+ }
+ }
+
+ // TODO: bisection on segment shrinking?
+
+ void visitTable(Table* curr) {
+ std::cerr << "| try to simplify table\n";
+ Name first;
+ for (auto& segment : curr->segments) {
+ for (auto item : segment.data) {
+ first = item;
+ break;
+ }
+ if (!first.isNull()) break;
+ }
+ visitSegmented(curr, first, 100);
+ }
+
+ void visitMemory(Memory* curr) {
+ std::cerr << "| try to simplify memory\n";
+ visitSegmented(curr, 0, 2);
+ }
+
+ template<typename T, typename U>
+ void visitSegmented(T* curr, U zero, size_t bonus) {
+ // try to reduce to first function
+ // shrink segment elements
+ for (auto& segment : curr->segments) {
+ auto& data = segment.data;
+ size_t skip = 1; // when we succeed, try to shrink by more and more, similar to bisection
+ for (size_t i = 0; i < data.size() && !data.empty(); i++) {
+ if (!shouldTryToReduce(bonus)) continue;
+ auto save = data;
+ for (size_t j = 0; j < skip; j++) {
+ if (!data.empty()) data.pop_back();
+ }
+ if (writeAndTestReduction()) {
+ std::cerr << "| shrank segment (skip: " << skip << ")\n";
+ noteReduction();
+ skip = std::min(size_t(factor), 2 * skip);
+ } else {
+ data = save;
+ break;
+ }
+ }
+ }
+ // the "opposite" of shrinking: copy a 'zero' element
+ for (auto& segment : curr->segments) {
+ if (segment.data.empty()) continue;
+ for (auto& item : segment.data) {
+ if (!shouldTryToReduce(bonus)) continue;
+ if (item == zero) continue;
+ auto save = item;
+ item = zero;
+ if (writeAndTestReduction()) {
+ std::cerr << "| zeroed segment\n";
+ noteReduction();
+ } else {
+ item = save;
+ }
+ }
+ }
+ }
+
+ void visitModule(Module* curr) {
+ // try to remove exports
+ std::cerr << "| try to remove exports\n";
+ std::vector<Export> exports;
+ for (auto& exp : curr->exports) {
+ if (!shouldTryToReduce(10000)) continue;
+ exports.push_back(*exp);
+ }
+ for (auto& exp : exports) {
+ curr->removeExport(exp.name);
+ if (!writeAndTestReduction()) {
+ curr->addExport(new Export(exp));
+ } else {
+ std::cerr << "| removed export " << exp.name << '\n';
+ noteReduction();
+ }
+ }
+ // try to remove functions
+ std::cerr << "| try to remove functions\n";
+ std::vector<Function> functions;
+ for (auto& func : curr->functions) {
+ if (!shouldTryToReduce(10000)) continue;
+ functions.push_back(*func);
+ }
+ for (auto& func : functions) {
+ curr->removeFunction(func.name);
+ WasmValidator validator;
+ validator.quiet = true;
+ if (validator.validate(*curr) && writeAndTestReduction()) {
+ std::cerr << "| removed function " << func.name << '\n';
+ noteReduction();
+ } else {
+ curr->addFunction(new Function(func));
+ }
+ }
+ }
+
+ // helpers
+
+ // try to replace condition with always true and always false
+ void handleCondition(Expression*& condition) {
+ if (!condition) return;
+ if (condition->is<Const>()) return;
+ auto* c = builder->makeConst(Literal(int32_t(0)));
+ if (!tryToReplaceChild(condition, c)) {
+ c->value = Literal(int32_t(1));
+ tryToReplaceChild(condition, c);
+ }
+ }
+
+ template<typename T>
+ void handleCall(T* call) {
+ for (auto* op : call->operands) {
+ if (tryToReplaceCurrent(op)) return;
+ }
+ }
+
+ bool tryToReduceCurrentToNone() {
+ auto* curr = getCurrent();
+ if (curr->is<Nop>()) return false;
+ // try to replace with a trivial value
+ Nop nop;
+ if (tryToReplaceCurrent(&nop)) {
+ replaceCurrent(builder->makeNop());
+ return true;
+ }
+ return false;
+ }
+
+ // try to replace a concrete value with a trivial constant
+ bool tryToReduceCurrentToConst() {
+ auto* curr = getCurrent();
+ if (curr->is<Const>()) return false;
+ // try to replace with a trivial value
+ Const* c = builder->makeConst(Literal(int32_t(0)));
+ if (tryToReplaceCurrent(c)) return true;
+ c->value = LiteralUtils::makeLiteralFromInt32(1, curr->type);
+ c->type = curr->type;
+ return tryToReplaceCurrent(c);
+ }
+
+ bool tryToReduceCurrentToUnreachable() {
+ auto* curr = getCurrent();
+ if (curr->is<Unreachable>()) return false;
+ // try to replace with a trivial value
+ Unreachable un;
+ if (tryToReplaceCurrent(&un)) {
+ replaceCurrent(builder->makeUnreachable());
+ return true;
+ }
+ // maybe a return? TODO
+ return false;
+ }
+};
+
+//
+// main
+//
+
+int main(int argc, const char* argv[]) {
+ std::string input, test, working, command;
+ bool verbose = false,
+ debugInfo = false,
+ force = false;
+ Options options("wasm-reduce", "Reduce a wasm file to a smaller one that has the same behavior on a given command");
+ options
+ .add("--command", "-cmd", "The command to run on the test, that we want to reduce while keeping the command's output identical. "
+ "We look at the command's return code and stdout here (TODO: stderr), "
+ "and we reduce while keeping those unchanged.",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) {
+ command = argument;
+ })
+ .add("--test", "-t", "Test file (this will be written to to test, the given command should read it when we call it)",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) {
+ test = argument;
+ })
+ .add("--working", "-w", "Working file (this will contain the current good state while doing temporary computations, "
+ "and will contain the final best result at the end)",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) {
+ working = argument;
+ })
+ .add("--verbose", "-v", "Verbose output mode",
+ Options::Arguments::Zero,
+ [&](Options* o, const std::string& argument) {
+ verbose = true;
+ })
+ .add("--debugInfo", "-g", "Keep debug info in binaries",
+ Options::Arguments::Zero,
+ [&](Options* o, const std::string& argument) {
+ debugInfo = true;
+ })
+ .add("--force", "-f", "Force the reduction attempt, ignoring problems that imply it is unlikely to succeed",
+ Options::Arguments::Zero,
+ [&](Options* o, const std::string& argument) {
+ force = true;
+ })
+ .add_positional("INFILE", Options::Arguments::One,
+ [&](Options* o, const std::string& argument) {
+ input = argument;
+ });
+ options.parse(argc, argv);
+
+ if (test.size() == 0) Fatal() << "test file not provided\n";
+ if (working.size() == 0) Fatal() << "working file not provided\n";
+
+ std::cerr << "|input: " << input << '\n';
+ std::cerr << "|test: " << test << '\n';
+ std::cerr << "|working: " << working << '\n';
+
+ // get the expected output
+ copy_file(input, test);
+ expected.getFromExecution(command);
+
+ std::cerr << "|expected result:\n" << expected << '\n';
+
+ auto stopIfNotForced = [&](std::string message, ProgramResult& result) {
+ std::cerr << "|! " << message << '\n' << result << '\n';
+ if (!force) {
+ Fatal() << "|! stopping, as it is very unlikely reduction can succeed (use -f to ignore this check)";
+ }
+ };
+
+ std::cerr << "|checking that command has different behavior on invalid binary\n";
+ {
+ {
+ std::ofstream dst(test, std::ios::binary);
+ dst << "waka waka\n";
+ }
+ ProgramResult result(command);
+ if (result == expected) {
+ stopIfNotForced("running command on an invalid module should give different results", result);
+ }
+ }
+
+ std::cerr << "|checking that command has expected behavior on canonicalized (read-written) binary\n";
+ {
+ // read and write it
+ ProgramResult readWrite(std::string("bin/wasm-opt ") + input + " -o " + test);
+ if (readWrite.failed()) {
+ stopIfNotForced("failed to read and write the binary", readWrite);
+ } else {
+ ProgramResult result(command);
+ if (result != expected) {
+ stopIfNotForced("running command on the canonicalized module should give the same results", result);
+ }
+ }
+ }
+
+ copy_file(input, working);
+ std::cerr << "|input size: " << file_size(working) << "\n";
+
+ std::cerr << "|starting reduction!\n";
+
+ int factor = 4096;
+ size_t lastDestructiveReductions = 0;
+ size_t lastPostPassesSize = 0;
+
+ bool stopping = false;
+
+ while (1) {
+ Reducer reducer(command, test, working, verbose, debugInfo);
+
+ // run binaryen optimization passes to reduce. passes are fast to run
+ // and can often reduce large amounts of code efficiently, as opposed
+ // to detructive reduction (i.e., that doesn't preserve correctness as
+ // passes do) since destrucive must operate one change at a time
+ std::cerr << "| reduce using passes...\n";
+ auto oldSize = file_size(working);
+ reducer.reduceUsingPasses();
+ auto newSize = file_size(working);
+ auto passProgress = oldSize - newSize;
+ std::cerr << "| after pass reduction: " << newSize << "\n";
+
+ // always stop after a pass reduction attempt, for final cleanup
+ if (stopping) break;
+
+ // check if the full cycle (destructive/passes) has helped or not
+ if (lastPostPassesSize && newSize >= lastPostPassesSize) {
+ std::cerr << "| progress has stopped, skipping to the end\n";
+ if (factor == 1) {
+ // this is after doing work with factor 1, so after the remaining work, stop
+ stopping = true;
+ } else {
+ // just try to remove all we can and finish up
+ factor = 1;
+ }
+ }
+ lastPostPassesSize = newSize;
+
+ // if destructive reductions lead to useful proportionate pass reductions, keep
+ // going at the same factor, as pass reductions are far faster
+ std::cerr << "| pass progress: " << passProgress << ", last destructive: " << lastDestructiveReductions << '\n';
+ if (passProgress >= 4 * lastDestructiveReductions) {
+ // don't change
+ std::cerr << "| progress is good, do not quickly decrease factor\n";
+ } else {
+ if (factor > 10) {
+ factor = (factor / 3) + 1;
+ } else {
+ factor = (factor + 1) / 2; // stable on 1
+ }
+ }
+
+ // no point in a factor lorger than the size
+ assert(newSize > 4); // wasm modules are >4 bytes anyhow
+ factor = std::min(factor, int(newSize) / 4);
+
+ // try to reduce destructively. if a high factor fails to find anything,
+ // quickly try a lower one (no point in doing passes until we reduce
+ // destructively at least a little)
+ while (1) {
+ std::cerr << "| reduce destructively... (factor: " << factor << ")\n";
+ lastDestructiveReductions = reducer.reduceDestructively(factor);
+ if (lastDestructiveReductions > 0) break;
+ // we failed to reduce destructively
+ if (factor == 1) {
+ stopping = true;
+ break;
+ }
+ factor = std::max(1, factor / 4); // quickly now, try to find *something* we can reduce
+ }
+
+ std::cerr << "| destructive reduction led to size: " << file_size(working) << '\n';
+ }
+ std::cerr << "|finished, final size: " << file_size(working) << "\n";
+ copy_file(working, test); // just to avoid confusion
+}
+