From b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 19 Nov 2024 09:28:01 -0800 Subject: Fuzzing: ClusterFuzz integration (#7079) The main addition here is a bundle_clusterfuzz.py script which will package up the exact files that should be uploaded to ClusterFuzz. It also documents the process and bundling and testing. You can do bundle.py OUTPUT_FILE.tgz That bundles wasm-opt from ./bin., which is enough for local testing. For actually uploading to ClusterFuzz, we need a portable build, and @dschuff had the idea to reuse the emsdk build, which works nicely. Doing bundle.py OUTPUT_FILE.tgz --build-dir=/path/to/emsdk/upstream/ will bundle wasm-opt (+libs) from the emsdk. I verified that those builds work on ClusterFuzz. I added several forms of testing here. First, our main fuzzer fuzz_opt.py now has a ClusterFuzz testcase handler, which simulates a ClusterFuzz environment. Second, there are smoke tests that run in the unit test suite, and can also be run separately: python -m unittest test/unit/test_cluster_fuzz.py Those unit tests can also run on a given bundle, e.g. one created from an emsdk build, for testing right before upload: BINARYEN_CLUSTER_FUZZ_BUNDLE=/path/to/bundle.tgz python -m unittest test/unit/test_cluster_fuzz.py A third piece of testing is to add a --fuzz-passes test. That is a mode for -ttf (translate random data into a valid wasm fuzz testcase) that uses random data to pick and run a set of passes, to further shape the wasm. (--fuzz-passes had no previous testing, and this PR fixes it and tidies it up a little, adding some newer passes too). Otherwise this PR includes the key run.py script that is bundled and then executed by ClusterFuzz, basically a python script that runs wasm-opt -ttf [..] to generate testcases, sets up their JS, and emits them. fuzz_shell.js, which is the JS to execute testcases, will now check if it is provided binary data of a wasm file. If so, it does not read a wasm file from argv[1]. (This is needed because ClusterFuzz expects a single file for the testcase, so we make a JS file with bundled wasm inside it.) --- scripts/fuzz_opt.py | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) (limited to 'scripts/fuzz_opt.py') diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index bf712c821..cd583e026 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -36,6 +36,7 @@ import subprocess import random import re import sys +import tarfile import time import traceback from os.path import abspath @@ -1574,6 +1575,84 @@ class RoundtripText(TestCaseHandler): run([in_bin('wasm-opt'), abspath('a.wast')] + FEATURE_OPTS) +# Fuzz in a near-identical manner to how we fuzz on ClusterFuzz. This is mainly +# to see that fuzzing that way works properly (it likely won't catch anything +# the other fuzzers here catch, though it is possible). That is, running this +# script continuously will give continuous cover that ClusterFuzz should be +# running ok. +# +# Note that this is *not* deterministic like the other fuzzers: it runs run.py +# like ClusterFuzz does, and that generates its own random data. If a bug is +# caught here, it must be reduced manually. +class ClusterFuzz(TestCaseHandler): + frequency = 0.1 + + def handle(self, wasm): + self.ensure() + + # run.py() should emit these two files. Delete them to make sure they + # are created by run.py() in the next step. + fuzz_file = 'fuzz-binaryen-1.js' + flags_file = 'flags-binaryen-1.js' + for f in [fuzz_file, flags_file]: + if os.path.exists(f): + os.unlink(f) + + # Call run.py(), similarly to how ClusterFuzz does. + run([sys.executable, + os.path.join(self.clusterfuzz_dir, 'run.py'), + '--output_dir=' + os.getcwd(), + '--no_of_files=1']) + + # We should see the two files. + assert os.path.exists(fuzz_file) + assert os.path.exists(flags_file) + + # Run the testcase in V8, similarly to how ClusterFuzz does. + cmd = [shared.V8] + # The flags are given in the flags file - we do *not* use our normal + # flags here! + with open(flags_file, 'r') as f: + flags = f.read() + cmd.append(flags) + # Run the fuzz file, which contains a modified fuzz_shell.js - we do + # *not* run fuzz_shell.js normally. + cmd.append(os.path.abspath(fuzz_file)) + # No wasm file needs to be provided: it is hardcoded into the JS. Note + # that we use run_vm(), which will ignore known issues in our output and + # in V8. Those issues may cause V8 to e.g. reject a binary we emit that + # is invalid, but that should not be a problem for ClusterFuzz (it isn't + # a crash). + output = run_vm(cmd) + + # Verify that we called something. The fuzzer should always emit at + # least one exported function (unless we've decided to ignore the entire + # run). + if output != IGNORE: + assert FUZZ_EXEC_CALL_PREFIX in output + + def ensure(self): + # The first time we actually run, set things up: make a bundle like the + # one ClusterFuzz receives, and unpack it for execution into a dir. The + # existence of that dir shows we've ensured all we need. + if hasattr(self, 'clusterfuzz_dir'): + return + + self.clusterfuzz_dir = 'clusterfuzz' + if os.path.exists(self.clusterfuzz_dir): + shutil.rmtree(self.clusterfuzz_dir) + os.mkdir(self.clusterfuzz_dir) + + print('Bundling for ClusterFuzz') + bundle = 'fuzz_opt_clusterfuzz_bundle.tgz' + run([in_binaryen('scripts', 'bundle_clusterfuzz.py'), bundle]) + + print('Unpacking for ClusterFuzz') + tar = tarfile.open(bundle, "r:gz") + tar.extractall(path=self.clusterfuzz_dir) + tar.close() + + # The global list of all test case handlers testcase_handlers = [ FuzzExec(), @@ -1585,7 +1664,8 @@ testcase_handlers = [ Merge(), # TODO: enable when stable enough, and adjust |frequency| (see above) # Split(), - RoundtripText() + RoundtripText(), + ClusterFuzz(), ] -- cgit v1.2.3