diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | CMakeLists.txt | 16 | ||||
-rwxr-xr-x | auto_update_tests.py | 18 | ||||
-rwxr-xr-x | check.py | 20 | ||||
-rw-r--r-- | scripts/test/shared.py | 17 | ||||
-rw-r--r-- | src/passes/RemoveImports.cpp | 6 | ||||
-rw-r--r-- | src/support/file.cpp | 12 | ||||
-rw-r--r-- | src/support/file.h | 9 | ||||
-rw-r--r-- | src/tools/wasm-reduce.cpp | 661 | ||||
-rw-r--r-- | src/wasm-validator.h | 4 | ||||
-rw-r--r-- | src/wasm.h | 1 | ||||
-rw-r--r-- | src/wasm/wasm-validator.cpp | 19 | ||||
-rw-r--r-- | src/wasm/wasm.cpp | 10 | ||||
-rw-r--r-- | test/passes/remove-imports.txt | 1 | ||||
-rw-r--r-- | test/passes/remove-imports.wast | 1 | ||||
-rw-r--r-- | test/reduce/destructive.wast | 10 | ||||
-rw-r--r-- | test/reduce/destructive.wast.txt | 9 | ||||
-rw-r--r-- | test/reduce/memory_table.wast | 31 | ||||
-rw-r--r-- | test/reduce/memory_table.wast.txt | 38 | ||||
-rw-r--r-- | test/reduce/simple.wast | 15 | ||||
-rw-r--r-- | test/reduce/simple.wast.txt | 9 |
21 files changed, 889 insertions, 19 deletions
diff --git a/.gitignore b/.gitignore index 7e937aa4c..28042929a 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ shared.bc # autogenerated files by check.py a.* b.* +c.* ab.wast actual.out example.o diff --git a/CMakeLists.txt b/CMakeLists.txt index 103bff049..671a6c162 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -294,3 +294,19 @@ TARGET_LINK_LIBRARIES(wasm-ctor-eval emscripten-optimizer passes wasm asmjs ast SET_PROPERTY(TARGET wasm-ctor-eval PROPERTY CXX_STANDARD 11) SET_PROPERTY(TARGET wasm-ctor-eval PROPERTY CXX_STANDARD_REQUIRED ON) INSTALL(TARGETS wasm-ctor-eval DESTINATION bin) + +IF (UNIX) # TODO: port to windows + + SET(wasm-reduce_SOURCES + src/tools/wasm-reduce.cpp + src/wasm-interpreter.cpp + ) + ADD_EXECUTABLE(wasm-reduce + ${wasm-reduce_SOURCES}) + TARGET_LINK_LIBRARIES(wasm-reduce wasm asmjs passes wasm ast cfg support) + SET_PROPERTY(TARGET wasm-reduce PROPERTY CXX_STANDARD 11) + SET_PROPERTY(TARGET wasm-reduce PROPERTY CXX_STANDARD_REQUIRED ON) + INSTALL(TARGETS wasm-reduce DESTINATION ${CMAKE_INSTALL_BINDIR}) + +ENDIF() + diff --git a/auto_update_tests.py b/auto_update_tests.py index 876c162e2..49dce464e 100755 --- a/auto_update_tests.py +++ b/auto_update_tests.py @@ -5,7 +5,8 @@ import os, sys, subprocess, difflib from scripts.test.support import run_command, split_wast from scripts.test.shared import ( ASM2WASM, MOZJS, S2WASM, WASM_SHELL, WASM_OPT, WASM_AS, WASM_DIS, - WASM_CTOR_EVAL, WASM_MERGE, BINARYEN_INSTALL_DIR) + WASM_CTOR_EVAL, WASM_MERGE, WASM_REDUCE, + BINARYEN_INSTALL_DIR, has_shell_timeout) print '[ processing and updating testcases... ]\n' @@ -260,4 +261,17 @@ for t in os.listdir(os.path.join('test', 'ctor-eval')): out = t + '.out' with open(out, 'w') as o: o.write(actual) -print '\n[ success! ]' +if has_shell_timeout(): + print '\n[ checking wasm-reduce ]\n' + + for t in os.listdir(os.path.join('test', 'reduce')): + if t.endswith('.wast'): + print '..', t + t = os.path.join('test', 'reduce', t) + # convert to wasm + run_command(WASM_AS + [t, '-o', 'a.wasm']) + print run_command(WASM_REDUCE + ['a.wasm', '--command=bin/wasm-opt b.wasm --fuzz-exec', '-t', 'b.wasm', '-w', 'c.wasm']) + expected = t + '.txt' + run_command(WASM_DIS + ['c.wasm', '-o', expected]) + +print '\n[ success! ]' @@ -24,10 +24,10 @@ from scripts.test.support import run_command, split_wast from scripts.test.shared import ( ASM2WASM, BIN_DIR, EMCC, MOZJS, NATIVECC, NATIVEXX, NODEJS, S2WASM_EXE, WASM_AS, WASM_CTOR_EVAL, WASM_OPT, WASM_SHELL, WASM_MERGE, WASM_SHELL_EXE, - WASM_DIS, binary_format_check, delete_from_orbit, fail, fail_with_error, + WASM_DIS, WASM_REDUCE, binary_format_check, delete_from_orbit, fail, fail_with_error, fail_if_not_identical, fail_if_not_contained, has_vanilla_emcc, has_vanilla_llvm, minify_check, num_failures, options, tests, - requested, warnings + requested, warnings, has_shell_timeout ) import scripts.test.s2wasm as s2wasm @@ -321,6 +321,22 @@ for t in os.listdir(os.path.join('test', 'ctor-eval')): with open(out) as f: fail_if_not_identical(f.read(), actual) +if has_shell_timeout(): + print '\n[ checking wasm-reduce ]\n' + + for t in os.listdir(os.path.join('test', 'reduce')): + if t.endswith('.wast'): + print '..', t + t = os.path.join('test', 'reduce', t) + # convert to wasm + run_command(WASM_AS + [t, '-o', 'a.wasm']) + print run_command(WASM_REDUCE + ['a.wasm', '--command=bin/wasm-opt b.wasm --fuzz-exec', '-t', 'b.wasm', '-w', 'c.wasm']) + expected = t + '.txt' + run_command(WASM_DIS + ['c.wasm', '-o', 'a.wast']) + with open('a.wast') as seen: + with open(expected) as correct: + fail_if_not_identical(seen.read(), correct.read()) + print '\n[ checking wasm-shell spec testcases... ]\n' if len(requested) == 0: diff --git a/scripts/test/shared.py b/scripts/test/shared.py index 4f72b4c0e..2ff0be355 100644 --- a/scripts/test/shared.py +++ b/scripts/test/shared.py @@ -163,6 +163,7 @@ WASM_CTOR_EVAL = [os.path.join(options.binaryen_bin, 'wasm-ctor-eval')] WASM_SHELL = [os.path.join(options.binaryen_bin, 'wasm-shell')] WASM_MERGE = [os.path.join(options.binaryen_bin, 'wasm-merge')] S2WASM = [os.path.join(options.binaryen_bin, 's2wasm')] +WASM_REDUCE = [os.path.join(options.binaryen_bin, 'wasm-reduce')] S2WASM_EXE = S2WASM[0] WASM_SHELL_EXE = WASM_SHELL[0] @@ -188,12 +189,20 @@ if options.valgrind: os.environ['BINARYEN'] = os.getcwd() +def get_platform(): + return {'linux2': 'linux', + 'darwin': 'mac', + 'win32': 'windows', + 'cygwin': 'windows'}[sys.platform] + + +def has_shell_timeout(): + return get_platform() != 'windows' and os.system('timeout 1s pwd') == 0 + + def fetch_waterfall(): rev = open(os.path.join(options.binaryen_test, 'revision')).read().strip() - buildername = {'linux2': 'linux', - 'darwin': 'mac', - 'win32': 'windows', - 'cygwin': 'windows'}[sys.platform] + buildername = get_platform() local_rev_path = os.path.join(WATERFALL_BUILD_DIR, 'local-revision') if os.path.exists(local_rev_path): with open(local_rev_path) as f: diff --git a/src/passes/RemoveImports.cpp b/src/passes/RemoveImports.cpp index 88fb43520..6de151230 100644 --- a/src/passes/RemoveImports.cpp +++ b/src/passes/RemoveImports.cpp @@ -15,7 +15,7 @@ */ // -// Removeds imports, and replaces them with nops. This is useful +// Removes function imports, and replaces them with nops. This is useful // for running a module through the reference interpreter, which // does not validate imports for a JS environment (by removing // imports, we can at least get the reference interpreter to @@ -42,7 +42,9 @@ struct RemoveImports : public WalkerPass<PostWalker<RemoveImports>> { void visitModule(Module *curr) { std::vector<Name> names; for (auto& import : curr->imports) { - names.push_back(import->name); + if (import->kind == ExternalKind::Function) { + names.push_back(import->name); + } } for (auto& name : names) { curr->removeImport(name); diff --git a/src/support/file.cpp b/src/support/file.cpp index c1d9e6e33..bd6f2d4bd 100644 --- a/src/support/file.cpp +++ b/src/support/file.cpp @@ -73,3 +73,15 @@ wasm::Output::Output(const std::string &filename, Flags::BinaryOption binary, Fl } return buffer; }()) {} + +void wasm::copy_file(std::string input, std::string output) { + std::ifstream src(input, std::ios::binary); + std::ofstream dst(output, std::ios::binary); + dst << src.rdbuf(); +} + +size_t wasm::file_size(std::string filename) { + std::ifstream infile(filename, std::ifstream::ate | std::ifstream::binary); + return infile.tellg(); +} + diff --git a/src/support/file.h b/src/support/file.h index 0f0ec15f3..b0df8fd06 100644 --- a/src/support/file.h +++ b/src/support/file.h @@ -66,6 +66,13 @@ class Output { std::ofstream outfile; std::ostream out; }; -} + +// Copies a file to another file +void copy_file(std::string input, std::string output); + +// Retusn the size of a file +size_t file_size(std::string filename); + +} // namespace wasm #endif // wasm_support_file_h diff --git a/src/tools/wasm-reduce.cpp b/src/tools/wasm-reduce.cpp new file mode 100644 index 000000000..355b70278 --- /dev/null +++ b/src/tools/wasm-reduce.cpp @@ -0,0 +1,661 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Tries to reduce the input wasm into the smallest possible wasm +// that still generates the same result on a given command. This is +// useful to reduce bug testcases, for example, if a file crashes +// the optimizer, you can reduce it to find the smallest file that +// also crashes it (which generally will show the same bug, in a +// much more debuggable manner). +// + +#include <memory> +#include <cstdio> +#include <cstdlib> + +#include "pass.h" +#include "support/command-line.h" +#include "support/file.h" +#include "wasm-io.h" +#include "wasm-builder.h" +#include "ast/literal-utils.h" +#include "wasm-validator.h" + +using namespace wasm; + +struct ProgramResult { + int code; + std::string output; + + ProgramResult() {} + ProgramResult(std::string command) { + getFromExecution(command); + } + + // runs the command and notes the output + // TODO: also stderr, not just stdout? + void getFromExecution(std::string command) { + // do this using just core stdio.h and stdlib.h, for portability + // sadly this requires two invokes + code = system(("timeout 2s " + command + " > /dev/null 2> /dev/null").c_str()); + const int MAX_BUFFER = 1024; + char buffer[MAX_BUFFER]; + FILE *stream = popen(("timeout 2s " + command + " 2> /dev/null").c_str(), "r"); + while (fgets(buffer, MAX_BUFFER, stream) != NULL) { + output.append(buffer); + } + pclose(stream); + } + + bool operator==(ProgramResult& other) { + return code == other.code && output == other.output; + } + bool operator!=(ProgramResult& other) { + return !(*this == other); + } + + bool failed() { + return code != 0; + } + + void dump(std::ostream& o) { + o << "[ProgramResult] code: " << code << " stdout: \n" << output << "[/ProgramResult]\n"; + } +}; + +namespace std { + +inline std::ostream& operator<<(std::ostream& o, ProgramResult& result) { + result.dump(o); + return o; +} + +} + +ProgramResult expected; + +struct Reducer : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor<Reducer>>> { + std::string command, test, working; + bool verbose, debugInfo; + + // test is the file we write to that the command will operate on + // working is the current temporary state, the reduction so far + Reducer(std::string command, std::string test, std::string working, bool verbose, bool debugInfo) : command(command), test(test), working(working), verbose(verbose), debugInfo(debugInfo) {} + + // runs passes in order to reduce, until we can't reduce any more + // the criterion here is wasm binary size + void reduceUsingPasses() { + // run optimization passes until we can't shrink it any more + std::vector<std::string> passes = { + "-Oz", + "-Os", + "-O1", + "-O2", + "-O3", + "--coalesce-locals --vacuum", + "--dce", + "--duplicate-function-elimination", + "--inlining", + "--inlining-optimizing", + "--optimize-level=3 --inlining-optimizing", + "--local-cse --vacuum", + "--memory-packing", + "--remove-unused-names --merge-blocks --vacuum", + "--optimize-instructions", + "--precompute", + "--remove-imports", + "--remove-memory", + "--remove-unused-names --remove-unused-brs", + "--remove-unused-module-elements", + "--reorder-functions", + "--reorder-locals", + "--simplify-locals --vacuum", + "--vacuum" + }; + auto oldSize = file_size(working); + bool more = true; + while (more) { + //std::cerr << "| starting passes loop iteration\n"; + more = false; + // try both combining with a generic shrink (so minor pass overhead is compensated for), and without + for (auto shrinking : { false, true }) { + for (auto pass : passes) { + std::string currCommand = "bin/wasm-opt "; + if (shrinking) currCommand += " --dce --vacuum "; + currCommand += working + " -o " + test + " " + pass; + if (debugInfo) currCommand += " -g "; + if (verbose) std::cerr << "| trying pass command: " << currCommand << "\n"; + if (!ProgramResult(currCommand).failed()) { + auto newSize = file_size(test); + if (newSize < oldSize) { + // the pass didn't fail, and the size looks smaller, so promising + // see if it is still has the property we are preserving + if (ProgramResult(command) == expected) { + std::cerr << "| command \"" << currCommand << "\" succeeded, reduced size to " << newSize << ", and preserved the property\n"; + copy_file(test, working); + more = true; + oldSize = newSize; + } + } + } + } + } + } + if (verbose) std::cerr << "| done with passes for now\n"; + } + + // does one pass of slow and destructive reduction. returns whether it + // succeeded or not + // the criterion here is a logical change in the program. this may actually + // increase wasm size in some cases, but it should allow more reduction later. + // @param factor how much to ignore. starting with a high factor skips through + // most of the file, which is often faster than going one by one + // from the start + size_t reduceDestructively(int factor_) { + factor = factor_; + Module wasm; + ModuleReader reader; + reader.read(working, wasm); + // prepare + reduced = 0; + builder = make_unique<Builder>(wasm); + funcsSeen = 0; + // before we do any changes, it should be valid to write out the module: + // size should be as expected, and output should be as expected + setModule(&wasm); + if (!writeAndTestReduction()) { + std::cerr << "\n|! WARNING: writing before destructive reduction fails, very unlikely reduction can work\n\n"; + } + // destroy! + walkModule(&wasm); + return reduced; + } + + // destructive reduction state + + size_t reduced; + Expression* beforeReduction; + std::unique_ptr<Builder> builder; + Index funcsSeen; + int factor; + + // write the module and see if the command still fails on it as expected + bool writeAndTestReduction() { + ProgramResult result; + return writeAndTestReduction(result); + } + + bool writeAndTestReduction(ProgramResult& out) { + // write the module out + ModuleWriter writer; + writer.setBinary(true); + writer.setDebugInfo(debugInfo); + writer.write(*getModule(), test); + // note that it is ok for the destructively-reduced module to be bigger + // than the previous - each destructive reduction removes logical code, + // and so is strictly better, even if the wasm binary format happens to + // encode things slightly less efficiently. + // test it + out.getFromExecution(command); + return out == expected; + } + + bool shouldTryToReduce(size_t bonus = 1) { + static size_t counter = 0; + counter += bonus; + return (counter % factor) <= bonus; + } + + // tests a reduction on the current traversal node, and undos if it failed + bool tryToReplaceCurrent(Expression* with) { + auto* curr = getCurrent(); + //std::cerr << "try " << curr << " => " << with << '\n'; + if (curr->type != with->type) return false; + if (!shouldTryToReduce()) return false; + replaceCurrent(with); + if (!writeAndTestReduction()) { + replaceCurrent(curr); + return false; + } + std::cerr << "| tryToReplaceCurrent succeeded (in " << getLocation() << ")\n"; + noteReduction(); + return true; + } + + void noteReduction() { + reduced++; + copy_file(test, working); + } + + // tests a reduction on an arbitrary child + bool tryToReplaceChild(Expression*& child, Expression* with) { + if (child->type != with->type) return false; + if (!shouldTryToReduce()) return false; + auto* before = child; + child = with; + if (!writeAndTestReduction()) { + child = before; + return false; + } + std::cerr << "| tryToReplaceChild succeeded (in " << getLocation() << ")\n"; + //std::cerr << "| " << before << " => " << with << '\n'; + noteReduction(); + return true; + } + + std::string getLocation() { + if (getFunction()) return getFunction()->name.str; + return "(non-function context)"; + } + + // visitors. in each we try to remove code in a destructive and nontrivial way. + // "nontrivial" means something that optimization passes can't achieve, since we + // don't need to duplicate work that they do + + void visitExpression(Expression* curr) { + if (curr->type == none) { + if (tryToReduceCurrentToNone()) return; + } else if (isConcreteWasmType(curr->type)) { + if (tryToReduceCurrentToConst()) return; + } else { + assert(curr->type == unreachable); + if (tryToReduceCurrentToUnreachable()) return; + } + // specific reductions + if (auto* iff = curr->dynCast<If>()) { + if (iff->type == none) { + // perhaps we need just the condition? + if (tryToReplaceCurrent(builder->makeDrop(iff->condition))) { + return; + } + } + handleCondition(iff->condition); + } else if (auto* br = curr->dynCast<Break>()) { + handleCondition(br->condition); + } else if (auto* select = curr->dynCast<Select>()) { + handleCondition(select->condition); + } else if (auto* sw = curr->dynCast<Switch>()) { + handleCondition(sw->condition); + } else if (auto* set = curr->dynCast<SetLocal>()) { + if (set->isTee()) { + // maybe we don't need the set + tryToReplaceCurrent(set->value); + } + } else if (auto* unary = curr->dynCast<Unary>()) { + // maybe we can pass through + tryToReplaceCurrent(unary->value); + } else if (auto* binary = curr->dynCast<Binary>()) { + // maybe we can pass through + if (!tryToReplaceCurrent(binary->left)) { + tryToReplaceCurrent(binary->right); + } + } else if (auto* call = curr->dynCast<Call>()) { + handleCall(call); + } else if (auto* call = curr->dynCast<CallImport>()) { + handleCall(call); + } else if (auto* call = curr->dynCast<CallIndirect>()) { + if (tryToReplaceCurrent(call->target)) return; + handleCall(call); + } + } + + void visitFunction(Function* curr) { + // extra chance to work on the function toplevel element, as if it can + // be reduced it's great + visitExpression(curr->body); + // finish function + funcsSeen++; + static int last = 0; + int percentage = (100 * funcsSeen) / getModule()->functions.size(); + if (std::abs(percentage - last) >= 5) { + std::cerr << "| " << percentage << "% of funcs complete\n"; + last = percentage; + } + } + + // TODO: bisection on segment shrinking? + + void visitTable(Table* curr) { + std::cerr << "| try to simplify table\n"; + Name first; + for (auto& segment : curr->segments) { + for (auto item : segment.data) { + first = item; + break; + } + if (!first.isNull()) break; + } + visitSegmented(curr, first, 100); + } + + void visitMemory(Memory* curr) { + std::cerr << "| try to simplify memory\n"; + visitSegmented(curr, 0, 2); + } + + template<typename T, typename U> + void visitSegmented(T* curr, U zero, size_t bonus) { + // try to reduce to first function + // shrink segment elements + for (auto& segment : curr->segments) { + auto& data = segment.data; + size_t skip = 1; // when we succeed, try to shrink by more and more, similar to bisection + for (size_t i = 0; i < data.size() && !data.empty(); i++) { + if (!shouldTryToReduce(bonus)) continue; + auto save = data; + for (size_t j = 0; j < skip; j++) { + if (!data.empty()) data.pop_back(); + } + if (writeAndTestReduction()) { + std::cerr << "| shrank segment (skip: " << skip << ")\n"; + noteReduction(); + skip = std::min(size_t(factor), 2 * skip); + } else { + data = save; + break; + } + } + } + // the "opposite" of shrinking: copy a 'zero' element + for (auto& segment : curr->segments) { + if (segment.data.empty()) continue; + for (auto& item : segment.data) { + if (!shouldTryToReduce(bonus)) continue; + if (item == zero) continue; + auto save = item; + item = zero; + if (writeAndTestReduction()) { + std::cerr << "| zeroed segment\n"; + noteReduction(); + } else { + item = save; + } + } + } + } + + void visitModule(Module* curr) { + // try to remove exports + std::cerr << "| try to remove exports\n"; + std::vector<Export> exports; + for (auto& exp : curr->exports) { + if (!shouldTryToReduce(10000)) continue; + exports.push_back(*exp); + } + for (auto& exp : exports) { + curr->removeExport(exp.name); + if (!writeAndTestReduction()) { + curr->addExport(new Export(exp)); + } else { + std::cerr << "| removed export " << exp.name << '\n'; + noteReduction(); + } + } + // try to remove functions + std::cerr << "| try to remove functions\n"; + std::vector<Function> functions; + for (auto& func : curr->functions) { + if (!shouldTryToReduce(10000)) continue; + functions.push_back(*func); + } + for (auto& func : functions) { + curr->removeFunction(func.name); + WasmValidator validator; + validator.quiet = true; + if (validator.validate(*curr) && writeAndTestReduction()) { + std::cerr << "| removed function " << func.name << '\n'; + noteReduction(); + } else { + curr->addFunction(new Function(func)); + } + } + } + + // helpers + + // try to replace condition with always true and always false + void handleCondition(Expression*& condition) { + if (!condition) return; + if (condition->is<Const>()) return; + auto* c = builder->makeConst(Literal(int32_t(0))); + if (!tryToReplaceChild(condition, c)) { + c->value = Literal(int32_t(1)); + tryToReplaceChild(condition, c); + } + } + + template<typename T> + void handleCall(T* call) { + for (auto* op : call->operands) { + if (tryToReplaceCurrent(op)) return; + } + } + + bool tryToReduceCurrentToNone() { + auto* curr = getCurrent(); + if (curr->is<Nop>()) return false; + // try to replace with a trivial value + Nop nop; + if (tryToReplaceCurrent(&nop)) { + replaceCurrent(builder->makeNop()); + return true; + } + return false; + } + + // try to replace a concrete value with a trivial constant + bool tryToReduceCurrentToConst() { + auto* curr = getCurrent(); + if (curr->is<Const>()) return false; + // try to replace with a trivial value + Const* c = builder->makeConst(Literal(int32_t(0))); + if (tryToReplaceCurrent(c)) return true; + c->value = LiteralUtils::makeLiteralFromInt32(1, curr->type); + c->type = curr->type; + return tryToReplaceCurrent(c); + } + + bool tryToReduceCurrentToUnreachable() { + auto* curr = getCurrent(); + if (curr->is<Unreachable>()) return false; + // try to replace with a trivial value + Unreachable un; + if (tryToReplaceCurrent(&un)) { + replaceCurrent(builder->makeUnreachable()); + return true; + } + // maybe a return? TODO + return false; + } +}; + +// +// main +// + +int main(int argc, const char* argv[]) { + std::string input, test, working, command; + bool verbose = false, + debugInfo = false, + force = false; + Options options("wasm-reduce", "Reduce a wasm file to a smaller one that has the same behavior on a given command"); + options + .add("--command", "-cmd", "The command to run on the test, that we want to reduce while keeping the command's output identical. " + "We look at the command's return code and stdout here (TODO: stderr), " + "and we reduce while keeping those unchanged.", + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + command = argument; + }) + .add("--test", "-t", "Test file (this will be written to to test, the given command should read it when we call it)", + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + test = argument; + }) + .add("--working", "-w", "Working file (this will contain the current good state while doing temporary computations, " + "and will contain the final best result at the end)", + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + working = argument; + }) + .add("--verbose", "-v", "Verbose output mode", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + verbose = true; + }) + .add("--debugInfo", "-g", "Keep debug info in binaries", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + debugInfo = true; + }) + .add("--force", "-f", "Force the reduction attempt, ignoring problems that imply it is unlikely to succeed", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + force = true; + }) + .add_positional("INFILE", Options::Arguments::One, + [&](Options* o, const std::string& argument) { + input = argument; + }); + options.parse(argc, argv); + + if (test.size() == 0) Fatal() << "test file not provided\n"; + if (working.size() == 0) Fatal() << "working file not provided\n"; + + std::cerr << "|input: " << input << '\n'; + std::cerr << "|test: " << test << '\n'; + std::cerr << "|working: " << working << '\n'; + + // get the expected output + copy_file(input, test); + expected.getFromExecution(command); + + std::cerr << "|expected result:\n" << expected << '\n'; + + auto stopIfNotForced = [&](std::string message, ProgramResult& result) { + std::cerr << "|! " << message << '\n' << result << '\n'; + if (!force) { + Fatal() << "|! stopping, as it is very unlikely reduction can succeed (use -f to ignore this check)"; + } + }; + + std::cerr << "|checking that command has different behavior on invalid binary\n"; + { + { + std::ofstream dst(test, std::ios::binary); + dst << "waka waka\n"; + } + ProgramResult result(command); + if (result == expected) { + stopIfNotForced("running command on an invalid module should give different results", result); + } + } + + std::cerr << "|checking that command has expected behavior on canonicalized (read-written) binary\n"; + { + // read and write it + ProgramResult readWrite(std::string("bin/wasm-opt ") + input + " -o " + test); + if (readWrite.failed()) { + stopIfNotForced("failed to read and write the binary", readWrite); + } else { + ProgramResult result(command); + if (result != expected) { + stopIfNotForced("running command on the canonicalized module should give the same results", result); + } + } + } + + copy_file(input, working); + std::cerr << "|input size: " << file_size(working) << "\n"; + + std::cerr << "|starting reduction!\n"; + + int factor = 4096; + size_t lastDestructiveReductions = 0; + size_t lastPostPassesSize = 0; + + bool stopping = false; + + while (1) { + Reducer reducer(command, test, working, verbose, debugInfo); + + // run binaryen optimization passes to reduce. passes are fast to run + // and can often reduce large amounts of code efficiently, as opposed + // to detructive reduction (i.e., that doesn't preserve correctness as + // passes do) since destrucive must operate one change at a time + std::cerr << "| reduce using passes...\n"; + auto oldSize = file_size(working); + reducer.reduceUsingPasses(); + auto newSize = file_size(working); + auto passProgress = oldSize - newSize; + std::cerr << "| after pass reduction: " << newSize << "\n"; + + // always stop after a pass reduction attempt, for final cleanup + if (stopping) break; + + // check if the full cycle (destructive/passes) has helped or not + if (lastPostPassesSize && newSize >= lastPostPassesSize) { + std::cerr << "| progress has stopped, skipping to the end\n"; + if (factor == 1) { + // this is after doing work with factor 1, so after the remaining work, stop + stopping = true; + } else { + // just try to remove all we can and finish up + factor = 1; + } + } + lastPostPassesSize = newSize; + + // if destructive reductions lead to useful proportionate pass reductions, keep + // going at the same factor, as pass reductions are far faster + std::cerr << "| pass progress: " << passProgress << ", last destructive: " << lastDestructiveReductions << '\n'; + if (passProgress >= 4 * lastDestructiveReductions) { + // don't change + std::cerr << "| progress is good, do not quickly decrease factor\n"; + } else { + if (factor > 10) { + factor = (factor / 3) + 1; + } else { + factor = (factor + 1) / 2; // stable on 1 + } + } + + // no point in a factor lorger than the size + assert(newSize > 4); // wasm modules are >4 bytes anyhow + factor = std::min(factor, int(newSize) / 4); + + // try to reduce destructively. if a high factor fails to find anything, + // quickly try a lower one (no point in doing passes until we reduce + // destructively at least a little) + while (1) { + std::cerr << "| reduce destructively... (factor: " << factor << ")\n"; + lastDestructiveReductions = reducer.reduceDestructively(factor); + if (lastDestructiveReductions > 0) break; + // we failed to reduce destructively + if (factor == 1) { + stopping = true; + break; + } + factor = std::max(1, factor / 4); // quickly now, try to find *something* we can reduce + } + + std::cerr << "| destructive reduction led to size: " << file_size(working) << '\n'; + } + std::cerr << "|finished, final size: " << file_size(working) << "\n"; + copy_file(working, test); // just to avoid confusion +} + diff --git a/src/wasm-validator.h b/src/wasm-validator.h index d78b54a47..35304d1c9 100644 --- a/src/wasm-validator.h +++ b/src/wasm-validator.h @@ -67,6 +67,8 @@ struct WasmValidator : public PostWalker<WasmValidator> { bool validateWeb = false; bool validateGlobally = true; + bool quiet = false; // whether to log errors verbosely + struct BreakInfo { WasmType type; Index arity; @@ -98,7 +100,7 @@ public: validateBinaryenIR(module); } // print if an error occurred - if (!valid) { + if (!valid && !quiet) { WasmPrinter::printModule(&module, std::cerr); } return valid; diff --git a/src/wasm.h b/src/wasm.h index 78aa1577b..0cd5b05e8 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -770,6 +770,7 @@ public: void removeImport(Name name); void removeExport(Name name); + void removeFunction(Name name); // TODO: remove* for other elements void updateMaps(); diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 819602608..e30661e8f 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -62,7 +62,7 @@ void WasmValidator::visitBlock(Block *curr) { } if (curr->list.size() > 1) { for (Index i = 0; i < curr->list.size() - 1; i++) { - if (!shouldBeTrue(!isConcreteWasmType(curr->list[i]->type), curr, "non-final block elements returning a value must be drop()ed (binaryen's autodrop option might help you)")) { + if (!shouldBeTrue(!isConcreteWasmType(curr->list[i]->type), curr, "non-final block elements returning a value must be drop()ed (binaryen's autodrop option might help you)") && !quiet) { std::cerr << "(on index " << i << ":\n" << curr->list[i] << "\n), type: " << curr->list[i]->type << "\n"; } } @@ -170,14 +170,14 @@ void WasmValidator::visitCall(Call *curr) { if (!validateGlobally) return; auto* target = getModule()->getFunctionOrNull(curr->target); if (!shouldBeTrue(!!target, curr, "call target must exist")) { - if (getModule()->getImportOrNull(curr->target)) { + if (getModule()->getImportOrNull(curr->target) && !quiet) { std::cerr << "(perhaps it should be a CallImport instead of Call?)\n"; } return; } if (!shouldBeTrue(curr->operands.size() == target->params.size(), curr, "call param number must match")) return; for (size_t i = 0; i < curr->operands.size(); i++) { - if (!shouldBeEqualOrFirstIsUnreachable(curr->operands[i]->type, target->params[i], curr, "call param types must match")) { + if (!shouldBeEqualOrFirstIsUnreachable(curr->operands[i]->type, target->params[i], curr, "call param types must match") && !quiet) { std::cerr << "(on argument " << i << ")\n"; } } @@ -190,7 +190,7 @@ void WasmValidator::visitCallImport(CallImport *curr) { auto* type = getModule()->getFunctionType(import->functionType); if (!shouldBeTrue(curr->operands.size() == type->params.size(), curr, "call param number must match")) return; for (size_t i = 0; i < curr->operands.size(); i++) { - if (!shouldBeEqualOrFirstIsUnreachable(curr->operands[i]->type, type->params[i], curr, "call param types must match")) { + if (!shouldBeEqualOrFirstIsUnreachable(curr->operands[i]->type, type->params[i], curr, "call param types must match") && !quiet) { std::cerr << "(on argument " << i << ")\n"; } } @@ -202,7 +202,7 @@ void WasmValidator::visitCallIndirect(CallIndirect *curr) { shouldBeEqualOrFirstIsUnreachable(curr->target->type, i32, curr, "indirect call target must be an i32"); if (!shouldBeTrue(curr->operands.size() == type->params.size(), curr, "call param number must match")) return; for (size_t i = 0; i < curr->operands.size(); i++) { - if (!shouldBeEqualOrFirstIsUnreachable(curr->operands[i]->type, type->params[i], curr, "call param types must match")) { + if (!shouldBeEqualOrFirstIsUnreachable(curr->operands[i]->type, type->params[i], curr, "call param types must match") && !quiet) { std::cerr << "(on argument " << i << ")\n"; } } @@ -534,7 +534,7 @@ void WasmValidator::visitGlobal(Global* curr) { if (!validateGlobally) return; shouldBeTrue(curr->init != nullptr, curr->name, "global init must be non-null"); shouldBeTrue(curr->init->is<Const>() || curr->init->is<GetGlobal>(), curr->name, "global init must be valid"); - if (!shouldBeEqual(curr->type, curr->init->type, curr->init, "global init must have correct type")) { + if (!shouldBeEqual(curr->type, curr->init->type, curr->init, "global init must have correct type") && !quiet) { std::cerr << "(on global " << curr->name << ")\n"; } } @@ -548,7 +548,7 @@ void WasmValidator::visitFunction(Function *curr) { if (returnType != unreachable) { shouldBeEqual(curr->result, returnType, curr->body, "function result must match, if function has returns"); } - if (!shouldBeTrue(namedBreakTargets.empty(), curr->body, "all named break targets must exist (even if not taken)")) { + if (!shouldBeTrue(namedBreakTargets.empty(), curr->body, "all named break targets must exist (even if not taken)") && !quiet) { std::cerr << "(on label " << *namedBreakTargets.begin() << ")\n"; } returnType = unreachable; @@ -592,6 +592,9 @@ void WasmValidator::visitTable(Table* curr) { for (auto& segment : curr->segments) { shouldBeEqual(segment.offset->type, i32, segment.offset, "segment offset should be i32"); shouldBeTrue(checkOffset(segment.offset, segment.data.size(), getModule()->table.initial * Table::kPageSize), segment.offset, "segment offset should be reasonable"); + for (auto name : segment.data) { + shouldBeTrue(getModule()->getFunctionOrNull(name) || getModule()->getImportOrNull(name), name, "segment name should be valid"); + } } } void WasmValidator::visitModule(Module *curr) { @@ -698,11 +701,13 @@ void WasmValidator::validateBinaryenIR(Module& wasm) { template <typename T, typename S> std::ostream& WasmValidator::fail(S text, T curr) { valid = false; + if (quiet) return std::cerr; auto& ret = printFailureHeader() << text << ", on \n"; return printModuleComponent(curr, ret); } std::ostream& WasmValidator::printFailureHeader() { + if (quiet) return std::cerr; Colors::red(std::cerr); if (getFunction()) { std::cerr << "[wasm-validator error in function "; diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index ab2db6bd5..4594eddd1 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -716,6 +716,16 @@ void Module::removeExport(Name name) { exportsMap.erase(name); } +void Module::removeFunction(Name name) { + for (size_t i = 0; i < functions.size(); i++) { + if (functions[i]->name == name) { + functions.erase(functions.begin() + i); + break; + } + } + functionsMap.erase(name); +} + // TODO: remove* for other elements void Module::updateMaps() { diff --git a/test/passes/remove-imports.txt b/test/passes/remove-imports.txt index f195df7e7..da188e3aa 100644 --- a/test/passes/remove-imports.txt +++ b/test/passes/remove-imports.txt @@ -2,6 +2,7 @@ (type $FUNCSIG$v (func)) (type $FUNCSIG$i (func (result i32))) (type $FUNCSIG$d (func (result f64))) + (import "env" "memBase" (global $import$global0 i32)) (memory $0 1024 1024) (func $nada (type $FUNCSIG$v) (nop) diff --git a/test/passes/remove-imports.wast b/test/passes/remove-imports.wast index bae2e2fc6..d7c883d67 100644 --- a/test/passes/remove-imports.wast +++ b/test/passes/remove-imports.wast @@ -6,6 +6,7 @@ (import $waka "somewhere" "waka") (import $waka-ret "somewhere" "waka-ret" (result i32)) (import $waka-ret-d "somewhere" "waka-ret-d" (result f64)) + (import "env" "memBase" (global i32)) (func $nada (type $FUNCSIG$v) (call $waka) (drop diff --git a/test/reduce/destructive.wast b/test/reduce/destructive.wast new file mode 100644 index 000000000..65786502f --- /dev/null +++ b/test/reduce/destructive.wast @@ -0,0 +1,10 @@ +(module + (export "x" (func $x)) + (func $x (param $x i32) (result i32) + (if (i32.eq (get_local $x) (i32.const 98658746)) + (unreachable) ;; this can be removed destructively, since we do not sent this param + ) + (i32.const 100) + ) +) + diff --git a/test/reduce/destructive.wast.txt b/test/reduce/destructive.wast.txt new file mode 100644 index 000000000..b5776b35e --- /dev/null +++ b/test/reduce/destructive.wast.txt @@ -0,0 +1,9 @@ +(module + (type $0 (func (param i32) (result i32))) + (memory $0 0) + (export "x" (func $0)) + (func $0 (type $0) (param $var$0 i32) (result i32) + (i32.const 100) + ) +) + diff --git a/test/reduce/memory_table.wast b/test/reduce/memory_table.wast new file mode 100644 index 000000000..9e8ad4445 --- /dev/null +++ b/test/reduce/memory_table.wast @@ -0,0 +1,31 @@ +(module + (type $i (func (result i32))) + (memory $0 256 256) + (table 481 481 anyfunc) + (elem (i32.const 0) $f0 $f0 $f1 $f2 $f0 $f3 $f0) + (data (i32.const 0) "p\0bflkj") + (data (i32.const 10960) "1234hello") + (export "f1" (func $f1)) + (export "f2" (func $f2)) + (export "f4" (func $f4)) + (func $f0 (result i32) + (i32.const 1234) + ) + (func $f1 + (i32.store (i32.const 1000) (i32.const 65530)) ;; dead store + ) + (func $f2 (result i32) + (i32.store (i32.const 0) (i32.const 65530)) + (i32.load (i32.const 0)) ;; load the written stuff + ) + (func $f3 (result i32) + (i32.load (i32.const 10964)) ;; load the 'hell' + ) + (func $f4 (result i32) + (i32.add + (call_indirect $i (i32.const 3)) + (call_indirect $i (i32.const 0)) + ) + ) +) + diff --git a/test/reduce/memory_table.wast.txt b/test/reduce/memory_table.wast.txt new file mode 100644 index 000000000..08918c9f3 --- /dev/null +++ b/test/reduce/memory_table.wast.txt @@ -0,0 +1,38 @@ +(module + (type $0 (func (result i32))) + (type $1 (func)) + (table 481 481 anyfunc) + (elem (i32.const 0) $1 $1 $1 $3) + (memory $0 256 256) + (export "f1" (func $2)) + (export "f2" (func $3)) + (export "f4" (func $0)) + (func $0 (type $0) (result i32) + (i32.add + (call_indirect $0 + (i32.const 3) + ) + (call_indirect $0 + (i32.const 0) + ) + ) + ) + (func $1 (type $0) (result i32) + (i32.const 1234) + ) + (func $2 (type $1) + (nop) + ) + (func $3 (type $0) (result i32) + (block $label$0 (result i32) + (i32.store + (i32.const 0) + (i32.const 65530) + ) + (i32.load + (i32.const 0) + ) + ) + ) +) + diff --git a/test/reduce/simple.wast b/test/reduce/simple.wast new file mode 100644 index 000000000..e54424190 --- /dev/null +++ b/test/reduce/simple.wast @@ -0,0 +1,15 @@ +(module + (export "x" (func $x)) + (func $x (result i32) + (nop) + (nop) + (nop) + (drop (i32.const 1234)) + (i32.const 5678) ;; easily reducible + ) + (func $not-exported + (nop) + (unreachable) + ) +) + diff --git a/test/reduce/simple.wast.txt b/test/reduce/simple.wast.txt new file mode 100644 index 000000000..b5209d1bc --- /dev/null +++ b/test/reduce/simple.wast.txt @@ -0,0 +1,9 @@ +(module + (type $0 (func (result i32))) + (memory $0 0) + (export "x" (func $0)) + (func $0 (type $0) (result i32) + (i32.const 5678) + ) +) + |