summaryrefslogtreecommitdiff
path: root/src/tools
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2024-11-19 09:28:01 -0800
committerGitHub <noreply@github.com>2024-11-19 09:28:01 -0800
commitb0e999a2b8841d8be21cbcdc84cbc1d6469e36d7 (patch)
tree55f1d24ca38d3a0c9b6e9197f0e1a28493c50f50 /src/tools
parent25b8e6a714d2217e8735a925bc751900bce09d53 (diff)
downloadbinaryen-b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7.tar.gz
binaryen-b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7.tar.bz2
binaryen-b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7.zip
Fuzzing: ClusterFuzz integration (#7079)
The main addition here is a bundle_clusterfuzz.py script which will package up the exact files that should be uploaded to ClusterFuzz. It also documents the process and bundling and testing. You can do bundle.py OUTPUT_FILE.tgz That bundles wasm-opt from ./bin., which is enough for local testing. For actually uploading to ClusterFuzz, we need a portable build, and @dschuff had the idea to reuse the emsdk build, which works nicely. Doing bundle.py OUTPUT_FILE.tgz --build-dir=/path/to/emsdk/upstream/ will bundle wasm-opt (+libs) from the emsdk. I verified that those builds work on ClusterFuzz. I added several forms of testing here. First, our main fuzzer fuzz_opt.py now has a ClusterFuzz testcase handler, which simulates a ClusterFuzz environment. Second, there are smoke tests that run in the unit test suite, and can also be run separately: python -m unittest test/unit/test_cluster_fuzz.py Those unit tests can also run on a given bundle, e.g. one created from an emsdk build, for testing right before upload: BINARYEN_CLUSTER_FUZZ_BUNDLE=/path/to/bundle.tgz python -m unittest test/unit/test_cluster_fuzz.py A third piece of testing is to add a --fuzz-passes test. That is a mode for -ttf (translate random data into a valid wasm fuzz testcase) that uses random data to pick and run a set of passes, to further shape the wasm. (--fuzz-passes had no previous testing, and this PR fixes it and tidies it up a little, adding some newer passes too). Otherwise this PR includes the key run.py script that is bundled and then executed by ClusterFuzz, basically a python script that runs wasm-opt -ttf [..] to generate testcases, sets up their JS, and emits them. fuzz_shell.js, which is the JS to execute testcases, will now check if it is provided binary data of a wasm file. If so, it does not read a wasm file from argv[1]. (This is needed because ClusterFuzz expects a single file for the testcase, so we make a JS file with bundled wasm inside it.)
Diffstat (limited to 'src/tools')
-rw-r--r--src/tools/fuzzing/fuzzing.cpp133
-rw-r--r--src/tools/wasm-opt.cpp4
2 files changed, 123 insertions, 14 deletions
diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp
index cbdbff3ca..ed653ef6b 100644
--- a/src/tools/fuzzing/fuzzing.cpp
+++ b/src/tools/fuzzing/fuzzing.cpp
@@ -55,16 +55,23 @@ TranslateToFuzzReader::TranslateToFuzzReader(Module& wasm,
wasm, read_file<std::vector<char>>(filename, Flags::Binary)) {}
void TranslateToFuzzReader::pickPasses(OptimizationOptions& options) {
+ // Pick random passes to further shape the wasm. This is similar to how we
+ // pick random passes in fuzz_opt.py, but the goal there is to find problems
+ // in the passes, while the goal here is more to shape the wasm, so that
+ // translate-to-fuzz emits interesting outputs (the latter is important for
+ // things like ClusterFuzz, where we are using Binaryen to fuzz other things
+ // than itself). As a result, the list of passes here is different from
+ // fuzz_opt.py.
while (options.passes.size() < 20 && !random.finished() && !oneIn(3)) {
- switch (upTo(32)) {
+ switch (upTo(42)) {
case 0:
case 1:
case 2:
case 3:
case 4: {
- options.passes.push_back("O");
options.passOptions.optimizeLevel = upTo(4);
- options.passOptions.shrinkLevel = upTo(4);
+ options.passOptions.shrinkLevel = upTo(3);
+ options.addDefaultOptPasses();
break;
}
case 5:
@@ -83,7 +90,14 @@ void TranslateToFuzzReader::pickPasses(OptimizationOptions& options) {
options.passes.push_back("duplicate-function-elimination");
break;
case 10:
- options.passes.push_back("flatten");
+ // Some features do not support flatten yet.
+ if (!wasm.features.hasReferenceTypes() &&
+ !wasm.features.hasExceptionHandling() && !wasm.features.hasGC()) {
+ options.passes.push_back("flatten");
+ if (oneIn(2)) {
+ options.passes.push_back("rereloop");
+ }
+ }
break;
case 11:
options.passes.push_back("inlining");
@@ -127,11 +141,9 @@ void TranslateToFuzzReader::pickPasses(OptimizationOptions& options) {
case 24:
options.passes.push_back("reorder-locals");
break;
- case 25: {
- options.passes.push_back("flatten");
- options.passes.push_back("rereloop");
+ case 25:
+ options.passes.push_back("directize");
break;
- }
case 26:
options.passes.push_back("simplify-locals");
break;
@@ -150,18 +162,115 @@ void TranslateToFuzzReader::pickPasses(OptimizationOptions& options) {
case 31:
options.passes.push_back("vacuum");
break;
+ case 32:
+ options.passes.push_back("merge-locals");
+ break;
+ case 33:
+ options.passes.push_back("licm");
+ break;
+ case 34:
+ options.passes.push_back("tuple-optimization");
+ break;
+ case 35:
+ options.passes.push_back("rse");
+ break;
+ case 36:
+ options.passes.push_back("monomorphize");
+ break;
+ case 37:
+ options.passes.push_back("monomorphize-always");
+ break;
+ case 38:
+ case 39:
+ case 40:
+ case 41:
+ // GC specific passes.
+ if (wasm.features.hasGC()) {
+ // Most of these depend on closed world, so just set that.
+ options.passOptions.closedWorld = true;
+
+ switch (upTo(16)) {
+ case 0:
+ options.passes.push_back("abstract-type-refining");
+ break;
+ case 1:
+ options.passes.push_back("cfp");
+ break;
+ case 2:
+ options.passes.push_back("gsi");
+ break;
+ case 3:
+ options.passes.push_back("gto");
+ break;
+ case 4:
+ options.passes.push_back("heap2local");
+ break;
+ case 5:
+ options.passes.push_back("heap-store-optimization");
+ break;
+ case 6:
+ options.passes.push_back("minimize-rec-groups");
+ break;
+ case 7:
+ options.passes.push_back("remove-unused-types");
+ break;
+ case 8:
+ options.passes.push_back("signature-pruning");
+ break;
+ case 9:
+ options.passes.push_back("signature-refining");
+ break;
+ case 10:
+ options.passes.push_back("type-finalizing");
+ break;
+ case 11:
+ options.passes.push_back("type-refining");
+ break;
+ case 12:
+ options.passes.push_back("type-merging");
+ break;
+ case 13:
+ options.passes.push_back("type-ssa");
+ break;
+ case 14:
+ options.passes.push_back("type-unfinalizing");
+ break;
+ case 15:
+ options.passes.push_back("unsubtyping");
+ break;
+ default:
+ WASM_UNREACHABLE("unexpected value");
+ }
+ }
+ break;
default:
WASM_UNREACHABLE("unexpected value");
}
}
+
if (oneIn(2)) {
+ // We randomize these when we pick -O?, but sometimes do so even without, as
+ // they affect some passes.
options.passOptions.optimizeLevel = upTo(4);
+ options.passOptions.shrinkLevel = upTo(3);
}
- if (oneIn(2)) {
- options.passOptions.shrinkLevel = upTo(4);
+
+ if (!options.passOptions.closedWorld && oneIn(2)) {
+ options.passOptions.closedWorld = true;
+ }
+
+ // Usually DCE at the very end, to ensure that our binaries validate in other
+ // VMs, due to how non-nullable local validation and unreachable code
+ // interact. See fuzz_opt.py and
+ // https://github.com/WebAssembly/binaryen/pull/5665
+ // https://github.com/WebAssembly/binaryen/issues/5599
+ if (wasm.features.hasGC() && !oneIn(10)) {
+ options.passes.push_back("dce");
}
- std::cout << "opt level: " << options.passOptions.optimizeLevel << '\n';
- std::cout << "shrink level: " << options.passOptions.shrinkLevel << '\n';
+
+ // TODO: We could in theory run some function-level passes on particular
+ // functions, but then we'd need to do this after generation, not
+ // before (and random data no longer remains then).
}
void TranslateToFuzzReader::build() {
diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp
index 3e1152179..3e429a976 100644
--- a/src/tools/wasm-opt.cpp
+++ b/src/tools/wasm-opt.cpp
@@ -161,8 +161,8 @@ int main(int argc, const char* argv[]) {
})
.add("--fuzz-passes",
"-fp",
- "Pick a random set of passes to run, useful for fuzzing. this depends "
- "on translate-to-fuzz (it picks the passes from the input)",
+ "When doing translate-to-fuzz, pick a set of random passes from the "
+ "input to further shape the wasm",
WasmOptOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& arguments) { fuzzPasses = true; })