diff options
author | Alon Zakai <azakai@google.com> | 2024-11-19 09:28:01 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-19 09:28:01 -0800 |
commit | b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7 (patch) | |
tree | 55f1d24ca38d3a0c9b6e9197f0e1a28493c50f50 /src/tools | |
parent | 25b8e6a714d2217e8735a925bc751900bce09d53 (diff) | |
download | binaryen-b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7.tar.gz binaryen-b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7.tar.bz2 binaryen-b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7.zip |
Fuzzing: ClusterFuzz integration (#7079)
The main addition here is a bundle_clusterfuzz.py script which will package up
the exact files that should be uploaded to ClusterFuzz. It also documents the
process and bundling and testing. You can do
bundle.py OUTPUT_FILE.tgz
That bundles wasm-opt from ./bin., which is enough for local testing. For
actually uploading to ClusterFuzz, we need a portable build, and @dschuff
had the idea to reuse the emsdk build, which works nicely. Doing
bundle.py OUTPUT_FILE.tgz --build-dir=/path/to/emsdk/upstream/
will bundle wasm-opt (+libs) from the emsdk. I verified that those builds
work on ClusterFuzz.
I added several forms of testing here. First, our main fuzzer fuzz_opt.py now
has a ClusterFuzz testcase handler, which simulates a ClusterFuzz environment.
Second, there are smoke tests that run in the unit test suite, and can also be
run separately:
python -m unittest test/unit/test_cluster_fuzz.py
Those unit tests can also run on a given bundle, e.g. one created from an
emsdk build, for testing right before upload:
BINARYEN_CLUSTER_FUZZ_BUNDLE=/path/to/bundle.tgz python -m unittest test/unit/test_cluster_fuzz.py
A third piece of testing is to add a --fuzz-passes test. That is a mode for
-ttf (translate random data into a valid wasm fuzz testcase) that uses random
data to pick and run a set of passes, to further shape the wasm. (--fuzz-passes
had no previous testing, and this PR fixes it and tidies it up a little, adding some
newer passes too).
Otherwise this PR includes the key run.py script that is bundled and then
executed by ClusterFuzz, basically a python script that runs wasm-opt -ttf [..]
to generate testcases, sets up their JS, and emits them.
fuzz_shell.js, which is the JS to execute testcases, will now check if it is
provided binary data of a wasm file. If so, it does not read a wasm file from
argv[1]. (This is needed because ClusterFuzz expects a single file for the
testcase, so we make a JS file with bundled wasm inside it.)
Diffstat (limited to 'src/tools')
-rw-r--r-- | src/tools/fuzzing/fuzzing.cpp | 133 | ||||
-rw-r--r-- | src/tools/wasm-opt.cpp | 4 |
2 files changed, 123 insertions, 14 deletions
diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index cbdbff3ca..ed653ef6b 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -55,16 +55,23 @@ TranslateToFuzzReader::TranslateToFuzzReader(Module& wasm, wasm, read_file<std::vector<char>>(filename, Flags::Binary)) {} void TranslateToFuzzReader::pickPasses(OptimizationOptions& options) { + // Pick random passes to further shape the wasm. This is similar to how we + // pick random passes in fuzz_opt.py, but the goal there is to find problems + // in the passes, while the goal here is more to shape the wasm, so that + // translate-to-fuzz emits interesting outputs (the latter is important for + // things like ClusterFuzz, where we are using Binaryen to fuzz other things + // than itself). As a result, the list of passes here is different from + // fuzz_opt.py. while (options.passes.size() < 20 && !random.finished() && !oneIn(3)) { - switch (upTo(32)) { + switch (upTo(42)) { case 0: case 1: case 2: case 3: case 4: { - options.passes.push_back("O"); options.passOptions.optimizeLevel = upTo(4); - options.passOptions.shrinkLevel = upTo(4); + options.passOptions.shrinkLevel = upTo(3); + options.addDefaultOptPasses(); break; } case 5: @@ -83,7 +90,14 @@ void TranslateToFuzzReader::pickPasses(OptimizationOptions& options) { options.passes.push_back("duplicate-function-elimination"); break; case 10: - options.passes.push_back("flatten"); + // Some features do not support flatten yet. + if (!wasm.features.hasReferenceTypes() && + !wasm.features.hasExceptionHandling() && !wasm.features.hasGC()) { + options.passes.push_back("flatten"); + if (oneIn(2)) { + options.passes.push_back("rereloop"); + } + } break; case 11: options.passes.push_back("inlining"); @@ -127,11 +141,9 @@ void TranslateToFuzzReader::pickPasses(OptimizationOptions& options) { case 24: options.passes.push_back("reorder-locals"); break; - case 25: { - options.passes.push_back("flatten"); - options.passes.push_back("rereloop"); + case 25: + options.passes.push_back("directize"); break; - } case 26: options.passes.push_back("simplify-locals"); break; @@ -150,18 +162,115 @@ void TranslateToFuzzReader::pickPasses(OptimizationOptions& options) { case 31: options.passes.push_back("vacuum"); break; + case 32: + options.passes.push_back("merge-locals"); + break; + case 33: + options.passes.push_back("licm"); + break; + case 34: + options.passes.push_back("tuple-optimization"); + break; + case 35: + options.passes.push_back("rse"); + break; + case 36: + options.passes.push_back("monomorphize"); + break; + case 37: + options.passes.push_back("monomorphize-always"); + break; + case 38: + case 39: + case 40: + case 41: + // GC specific passes. + if (wasm.features.hasGC()) { + // Most of these depend on closed world, so just set that. + options.passOptions.closedWorld = true; + + switch (upTo(16)) { + case 0: + options.passes.push_back("abstract-type-refining"); + break; + case 1: + options.passes.push_back("cfp"); + break; + case 2: + options.passes.push_back("gsi"); + break; + case 3: + options.passes.push_back("gto"); + break; + case 4: + options.passes.push_back("heap2local"); + break; + case 5: + options.passes.push_back("heap-store-optimization"); + break; + case 6: + options.passes.push_back("minimize-rec-groups"); + break; + case 7: + options.passes.push_back("remove-unused-types"); + break; + case 8: + options.passes.push_back("signature-pruning"); + break; + case 9: + options.passes.push_back("signature-refining"); + break; + case 10: + options.passes.push_back("type-finalizing"); + break; + case 11: + options.passes.push_back("type-refining"); + break; + case 12: + options.passes.push_back("type-merging"); + break; + case 13: + options.passes.push_back("type-ssa"); + break; + case 14: + options.passes.push_back("type-unfinalizing"); + break; + case 15: + options.passes.push_back("unsubtyping"); + break; + default: + WASM_UNREACHABLE("unexpected value"); + } + } + break; default: WASM_UNREACHABLE("unexpected value"); } } + if (oneIn(2)) { + // We randomize these when we pick -O?, but sometimes do so even without, as + // they affect some passes. options.passOptions.optimizeLevel = upTo(4); + options.passOptions.shrinkLevel = upTo(3); } - if (oneIn(2)) { - options.passOptions.shrinkLevel = upTo(4); + + if (!options.passOptions.closedWorld && oneIn(2)) { + options.passOptions.closedWorld = true; + } + + // Usually DCE at the very end, to ensure that our binaries validate in other + // VMs, due to how non-nullable local validation and unreachable code + // interact. See fuzz_opt.py and + // https://github.com/WebAssembly/binaryen/pull/5665 + // https://github.com/WebAssembly/binaryen/issues/5599 + if (wasm.features.hasGC() && !oneIn(10)) { + options.passes.push_back("dce"); } - std::cout << "opt level: " << options.passOptions.optimizeLevel << '\n'; - std::cout << "shrink level: " << options.passOptions.shrinkLevel << '\n'; + + // TODO: We could in theory run some function-level passes on particular + // functions, but then we'd need to do this after generation, not + // before (and random data no longer remains then). } void TranslateToFuzzReader::build() { diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp index 3e1152179..3e429a976 100644 --- a/src/tools/wasm-opt.cpp +++ b/src/tools/wasm-opt.cpp @@ -161,8 +161,8 @@ int main(int argc, const char* argv[]) { }) .add("--fuzz-passes", "-fp", - "Pick a random set of passes to run, useful for fuzzing. this depends " - "on translate-to-fuzz (it picks the passes from the input)", + "When doing translate-to-fuzz, pick a set of random passes from the " + "input to further shape the wasm", WasmOptOption, Options::Arguments::Zero, [&](Options* o, const std::string& arguments) { fuzzPasses = true; }) |