diff options
author | Alon Zakai <azakai@google.com> | 2024-11-19 09:28:01 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-19 09:28:01 -0800 |
commit | b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7 (patch) | |
tree | 55f1d24ca38d3a0c9b6e9197f0e1a28493c50f50 /scripts/clusterfuzz | |
parent | 25b8e6a714d2217e8735a925bc751900bce09d53 (diff) | |
download | binaryen-b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7.tar.gz binaryen-b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7.tar.bz2 binaryen-b0e999a2b8841d8be21cbcdc84cbc1d6469e36d7.zip |
Fuzzing: ClusterFuzz integration (#7079)
The main addition here is a bundle_clusterfuzz.py script which will package up
the exact files that should be uploaded to ClusterFuzz. It also documents the
process and bundling and testing. You can do
bundle.py OUTPUT_FILE.tgz
That bundles wasm-opt from ./bin., which is enough for local testing. For
actually uploading to ClusterFuzz, we need a portable build, and @dschuff
had the idea to reuse the emsdk build, which works nicely. Doing
bundle.py OUTPUT_FILE.tgz --build-dir=/path/to/emsdk/upstream/
will bundle wasm-opt (+libs) from the emsdk. I verified that those builds
work on ClusterFuzz.
I added several forms of testing here. First, our main fuzzer fuzz_opt.py now
has a ClusterFuzz testcase handler, which simulates a ClusterFuzz environment.
Second, there are smoke tests that run in the unit test suite, and can also be
run separately:
python -m unittest test/unit/test_cluster_fuzz.py
Those unit tests can also run on a given bundle, e.g. one created from an
emsdk build, for testing right before upload:
BINARYEN_CLUSTER_FUZZ_BUNDLE=/path/to/bundle.tgz python -m unittest test/unit/test_cluster_fuzz.py
A third piece of testing is to add a --fuzz-passes test. That is a mode for
-ttf (translate random data into a valid wasm fuzz testcase) that uses random
data to pick and run a set of passes, to further shape the wasm. (--fuzz-passes
had no previous testing, and this PR fixes it and tidies it up a little, adding some
newer passes too).
Otherwise this PR includes the key run.py script that is bundled and then
executed by ClusterFuzz, basically a python script that runs wasm-opt -ttf [..]
to generate testcases, sets up their JS, and emits them.
fuzz_shell.js, which is the JS to execute testcases, will now check if it is
provided binary data of a wasm file. If so, it does not read a wasm file from
argv[1]. (This is needed because ClusterFuzz expects a single file for the
testcase, so we make a JS file with bundled wasm inside it.)
Diffstat (limited to 'scripts/clusterfuzz')
-rwxr-xr-x | scripts/clusterfuzz/run.py | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/scripts/clusterfuzz/run.py b/scripts/clusterfuzz/run.py new file mode 100755 index 000000000..efddfc2d4 --- /dev/null +++ b/scripts/clusterfuzz/run.py @@ -0,0 +1,163 @@ +# +# Copyright 2024 WebAssembly Community Group participants +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +ClusterFuzz run.py script: when run by ClusterFuzz, it uses wasm-opt to generate +a fixed number of testcases. This is a "blackbox fuzzer", see + +https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/ + +This file should be bundled up together with the other files it needs, see +bundle_clusterfuzz.py. +''' + +import os +import getopt +import random +import subprocess +import sys + +# The V8 flags we put in the "fuzzer flags" files, which tell ClusterFuzz how to +# run V8. By default we apply all staging flags. +FUZZER_FLAGS_FILE_CONTENTS = '--wasm-staging' + +# Maximum size of the random data that we feed into wasm-opt -ttf. This is +# smaller than fuzz_opt.py's INPUT_SIZE_MAX because that script is tuned for +# fuzzing large wasm files (to reduce the overhead we have of launching many +# processes per file), which is less of an issue on ClusterFuzz. +MAX_RANDOM_SIZE = 15 * 1024 + +# The prefix for fuzz files. +FUZZ_FILENAME_PREFIX = 'fuzz-' + +# The prefix for flags files. +FLAGS_FILENAME_PREFIX = 'flags-' + +# The name of the fuzzer (appears after FUZZ_FILENAME_PREFIX / +# FLAGS_FILENAME_PREFIX). +FUZZER_NAME_PREFIX = 'binaryen-' + +# The root directory of the bundle this will be in, which is the directory of +# this very file. +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) + +# The path to the wasm-opt binary that we run to generate testcases. +FUZZER_BINARY_PATH = os.path.join(ROOT_DIR, 'bin', 'wasm-opt') + +# The path to the fuzz_shell.js script that will execute the wasm in each +# testcase. +JS_SHELL_PATH = os.path.join(ROOT_DIR, 'scripts', 'fuzz_shell.js') + +# The arguments we provide to wasm-opt to generate wasm files. +FUZZER_ARGS = [ + # Generate a wasm from random data. + '--translate-to-fuzz', + # Run some random passes, to further shape the random wasm we emit. + '--fuzz-passes', + # Enable all features but disable ones not yet ready for fuzzing. This may + # be a smaller set than fuzz_opt.py, as that enables a few experimental + # flags, while here we just fuzz with d8's --wasm-staging. + '-all', + '--disable-shared-everything', + '--disable-fp16', +] + + +# Returns the file name for fuzz or flags files. +def get_file_name(prefix, index): + return f'{prefix}{FUZZER_NAME_PREFIX}{index}.js' + + +# Returns the contents of a .js fuzz file, given particular wasm contents that +# we want to be executed. +def get_js_file_contents(wasm_contents): + # Start with the standard JS shell. + with open(JS_SHELL_PATH) as file: + js = file.read() + + # Prepend the wasm contents, so they are used (rather than the normal + # mechanism where the wasm file's name is provided in argv). + wasm_contents = ','.join([str(c) for c in wasm_contents]) + js = f'var binary = new Uint8Array([{wasm_contents}]);\n\n' + js + return js + + +def main(argv): + # Parse the options. See + # https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/#uploading-a-fuzzer + output_dir = '.' + num = 100 + expected_flags = ['input_dir=', 'output_dir=', 'no_of_files='] + optlist, _ = getopt.getopt(argv[1:], '', expected_flags) + for option, value in optlist: + if option == '--output_dir': + output_dir = value + elif option == '--no_of_files': + num = int(value) + + for i in range(1, num + 1): + input_data_file_path = os.path.join(output_dir, f'{i}.input') + wasm_file_path = os.path.join(output_dir, f'{i}.wasm') + + # wasm-opt may fail to run in rare cases (when the fuzzer emits code it + # detects as invalid). Just try again in such a case. + for attempt in range(0, 100): + # Generate random data. + random_size = random.SystemRandom().randint(1, MAX_RANDOM_SIZE) + with open(input_data_file_path, 'wb') as file: + file.write(os.urandom(random_size)) + + # Generate wasm from the random data. + cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS + cmd += ['-o', wasm_file_path, input_data_file_path] + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + # Try again. + print('(oops, retrying wasm-opt)') + attempt += 1 + if attempt == 99: + # Something is very wrong! + raise + continue + # Success, leave the loop. + break + + # Generate a testcase from the wasm + with open(wasm_file_path, 'rb') as file: + wasm_contents = file.read() + testcase_file_path = os.path.join(output_dir, + get_file_name(FUZZ_FILENAME_PREFIX, i)) + js_file_contents = get_js_file_contents(wasm_contents) + with open(testcase_file_path, 'w') as file: + file.write(js_file_contents) + + # Emit a corresponding flags file. + flags_file_path = os.path.join(output_dir, + get_file_name(FLAGS_FILENAME_PREFIX, i)) + with open(flags_file_path, 'w') as file: + file.write(FUZZER_FLAGS_FILE_CONTENTS) + + print(f'Created testcase: {testcase_file_path}, {len(wasm_contents)} bytes') + + # Remove temporary files. + os.remove(input_data_file_path) + os.remove(wasm_file_path) + + print(f'Created {num} testcases.') + + +if __name__ == '__main__': + main(sys.argv) |