diff options
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/bundle_clusterfuzz.py | 135 | ||||
-rwxr-xr-x | scripts/clusterfuzz/run.py | 163 | ||||
-rwxr-xr-x | scripts/fuzz_opt.py | 82 | ||||
-rw-r--r-- | scripts/fuzz_shell.js | 10 |
4 files changed, 386 insertions, 4 deletions
diff --git a/scripts/bundle_clusterfuzz.py b/scripts/bundle_clusterfuzz.py new file mode 100755 index 000000000..a03553837 --- /dev/null +++ b/scripts/bundle_clusterfuzz.py @@ -0,0 +1,135 @@ +#!/usr/bin/python3 + +''' +Bundle files for uploading to ClusterFuzz. + +Usage: + +bundle.py OUTPUT_FILE.tgz [--build-dir=BUILD_DIR] + +The output file will be a .tgz file. + +if a build directory is provided, we will look under there to find bin/wasm-opt +and lib/libbinaryen.so. A useful place to get builds from is the Emscripten SDK, +as you can do + + ./emsdk install tot + +after which ./upstream/ (from the emsdk dir) will contain builds of wasm-opt and +libbinaryen.so (that are designed to run on as many systems as possible, by not +depending on newer libc symbols, etc., as opposed to a normal local build). +Thus, the full workflow could be + + cd emsdk + ./emsdk install tot + cd ../binaryen + python3 scripts/bundle_clusterfuzz.py binaryen_wasm_fuzzer.tgz --build-dir=../emsdk/upstream + +When using --build-dir in this way, you are responsible for ensuring that the +wasm-opt in the build dir is compatible with the scripts in the current dir +(e.g., if run.py here passes a flag that is only in a new/older version of +wasm-opt, a problem can happen). + +Before uploading to ClusterFuzz, it is worth doing the following: + + 1. Run the local fuzzer (scripts/fuzz_opt.py). That includes a ClusterFuzz + testcase handler, which simulates what ClusterFuzz does. + + 2. Run the unit tests, which include smoke tests for our ClusterFuzz support: + + python -m unittest test/unit/test_cluster_fuzz.py + + Look at the logs, which will contain statistics on the wasm files the + fuzzer emits, and see that they look reasonable. + + You should run the unit tests on the bundle you are about to upload, by + setting the proper env var like this (using the same filename as above): + + BINARYEN_CLUSTER_FUZZ_BUNDLE=`pwd`/binaryen_wasm_fuzzer.tgz python -m unittest test/unit/test_cluster_fuzz.py + + Note that you must pass an absolute filename (e.g. using pwd as shown). + + The unittest logs should reflect that that bundle is being used at the + very start ("Using existing bundle: ..." rather than "Making a new + bundle"). Note that some of the unittests also create their own bundles, to + test the bundling script itself, so later down you will see logging of + bundle creation even if you provide a bundle. + +After uploading to ClusterFuzz, you can wait a while for it to run, and then: + + 1. Inspect the log to see that we generate all the testcases properly, and + their sizes look reasonably random, etc. + + 2. Inspect the sample testcase and run it locally, to see that + + d8 --wasm-staging testcase.js + + properly runs the testcase, emitting logging etc. + + 3. Check the stats and crashes page (known crashes should at least be showing + up). Note that these may take longer to show up than 1 and 2. +''' + +import os +import sys +import tarfile + +# Read the filenames first, as importing |shared| changes the directory. +output_file = os.path.abspath(sys.argv[1]) +print(f'Bundling to: {output_file}') +assert output_file.endswith('.tgz'), 'Can only generate a .tgz' + +build_dir = None +if len(sys.argv) >= 3: + assert sys.argv[2].startswith('--build-dir=') + build_dir = sys.argv[2].split('=')[1] + build_dir = os.path.abspath(build_dir) + # Delete the argument, as importing |shared| scans it. + sys.argv.pop() + +from test import shared # noqa + +# Pick where to get the builds +if build_dir: + binaryen_bin = os.path.join(build_dir, 'bin') + binaryen_lib = os.path.join(build_dir, 'lib') +else: + binaryen_bin = shared.options.binaryen_bin + binaryen_lib = shared.options.binaryen_lib + +with tarfile.open(output_file, "w:gz") as tar: + # run.py + run = os.path.join(shared.options.binaryen_root, 'scripts', 'clusterfuzz', 'run.py') + print(f' .. run: {run}') + tar.add(run, arcname='run.py') + + # fuzz_shell.js + fuzz_shell = os.path.join(shared.options.binaryen_root, 'scripts', 'fuzz_shell.js') + print(f' .. fuzz_shell: {fuzz_shell}') + tar.add(fuzz_shell, arcname='scripts/fuzz_shell.js') + + # wasm-opt binary + wasm_opt = os.path.join(binaryen_bin, 'wasm-opt') + print(f' .. wasm-opt: {wasm_opt}') + tar.add(wasm_opt, arcname='bin/wasm-opt') + + # For a dynamic build we also need libbinaryen.so and possibly other files. + # Try both .so and .dylib suffixes for more OS coverage. + for suffix in ['.so', '.dylib']: + libbinaryen = os.path.join(binaryen_lib, f'libbinaryen{suffix}') + if os.path.exists(libbinaryen): + print(f' .. libbinaryen: {libbinaryen}') + tar.add(libbinaryen, arcname=f'lib/libbinaryen{suffix}') + + # The emsdk build also includes some more necessary files. + for name in [f'libc++{suffix}', f'libc++{suffix}.2', f'libc++{suffix}.2.0']: + path = os.path.join(binaryen_lib, name) + if os.path.exists(path): + print(f' ......... : {path}') + tar.add(path, arcname=f'lib/{name}') + +print('Done.') +print('To run the tests on this bundle, do:') +print() +print(f'BINARYEN_CLUSTER_FUZZ_BUNDLE={output_file} python -m unittest test/unit/test_cluster_fuzz.py') +print() diff --git a/scripts/clusterfuzz/run.py b/scripts/clusterfuzz/run.py new file mode 100755 index 000000000..efddfc2d4 --- /dev/null +++ b/scripts/clusterfuzz/run.py @@ -0,0 +1,163 @@ +# +# Copyright 2024 WebAssembly Community Group participants +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +ClusterFuzz run.py script: when run by ClusterFuzz, it uses wasm-opt to generate +a fixed number of testcases. This is a "blackbox fuzzer", see + +https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/ + +This file should be bundled up together with the other files it needs, see +bundle_clusterfuzz.py. +''' + +import os +import getopt +import random +import subprocess +import sys + +# The V8 flags we put in the "fuzzer flags" files, which tell ClusterFuzz how to +# run V8. By default we apply all staging flags. +FUZZER_FLAGS_FILE_CONTENTS = '--wasm-staging' + +# Maximum size of the random data that we feed into wasm-opt -ttf. This is +# smaller than fuzz_opt.py's INPUT_SIZE_MAX because that script is tuned for +# fuzzing large wasm files (to reduce the overhead we have of launching many +# processes per file), which is less of an issue on ClusterFuzz. +MAX_RANDOM_SIZE = 15 * 1024 + +# The prefix for fuzz files. +FUZZ_FILENAME_PREFIX = 'fuzz-' + +# The prefix for flags files. +FLAGS_FILENAME_PREFIX = 'flags-' + +# The name of the fuzzer (appears after FUZZ_FILENAME_PREFIX / +# FLAGS_FILENAME_PREFIX). +FUZZER_NAME_PREFIX = 'binaryen-' + +# The root directory of the bundle this will be in, which is the directory of +# this very file. +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) + +# The path to the wasm-opt binary that we run to generate testcases. +FUZZER_BINARY_PATH = os.path.join(ROOT_DIR, 'bin', 'wasm-opt') + +# The path to the fuzz_shell.js script that will execute the wasm in each +# testcase. +JS_SHELL_PATH = os.path.join(ROOT_DIR, 'scripts', 'fuzz_shell.js') + +# The arguments we provide to wasm-opt to generate wasm files. +FUZZER_ARGS = [ + # Generate a wasm from random data. + '--translate-to-fuzz', + # Run some random passes, to further shape the random wasm we emit. + '--fuzz-passes', + # Enable all features but disable ones not yet ready for fuzzing. This may + # be a smaller set than fuzz_opt.py, as that enables a few experimental + # flags, while here we just fuzz with d8's --wasm-staging. + '-all', + '--disable-shared-everything', + '--disable-fp16', +] + + +# Returns the file name for fuzz or flags files. +def get_file_name(prefix, index): + return f'{prefix}{FUZZER_NAME_PREFIX}{index}.js' + + +# Returns the contents of a .js fuzz file, given particular wasm contents that +# we want to be executed. +def get_js_file_contents(wasm_contents): + # Start with the standard JS shell. + with open(JS_SHELL_PATH) as file: + js = file.read() + + # Prepend the wasm contents, so they are used (rather than the normal + # mechanism where the wasm file's name is provided in argv). + wasm_contents = ','.join([str(c) for c in wasm_contents]) + js = f'var binary = new Uint8Array([{wasm_contents}]);\n\n' + js + return js + + +def main(argv): + # Parse the options. See + # https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/#uploading-a-fuzzer + output_dir = '.' + num = 100 + expected_flags = ['input_dir=', 'output_dir=', 'no_of_files='] + optlist, _ = getopt.getopt(argv[1:], '', expected_flags) + for option, value in optlist: + if option == '--output_dir': + output_dir = value + elif option == '--no_of_files': + num = int(value) + + for i in range(1, num + 1): + input_data_file_path = os.path.join(output_dir, f'{i}.input') + wasm_file_path = os.path.join(output_dir, f'{i}.wasm') + + # wasm-opt may fail to run in rare cases (when the fuzzer emits code it + # detects as invalid). Just try again in such a case. + for attempt in range(0, 100): + # Generate random data. + random_size = random.SystemRandom().randint(1, MAX_RANDOM_SIZE) + with open(input_data_file_path, 'wb') as file: + file.write(os.urandom(random_size)) + + # Generate wasm from the random data. + cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS + cmd += ['-o', wasm_file_path, input_data_file_path] + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + # Try again. + print('(oops, retrying wasm-opt)') + attempt += 1 + if attempt == 99: + # Something is very wrong! + raise + continue + # Success, leave the loop. + break + + # Generate a testcase from the wasm + with open(wasm_file_path, 'rb') as file: + wasm_contents = file.read() + testcase_file_path = os.path.join(output_dir, + get_file_name(FUZZ_FILENAME_PREFIX, i)) + js_file_contents = get_js_file_contents(wasm_contents) + with open(testcase_file_path, 'w') as file: + file.write(js_file_contents) + + # Emit a corresponding flags file. + flags_file_path = os.path.join(output_dir, + get_file_name(FLAGS_FILENAME_PREFIX, i)) + with open(flags_file_path, 'w') as file: + file.write(FUZZER_FLAGS_FILE_CONTENTS) + + print(f'Created testcase: {testcase_file_path}, {len(wasm_contents)} bytes') + + # Remove temporary files. + os.remove(input_data_file_path) + os.remove(wasm_file_path) + + print(f'Created {num} testcases.') + + +if __name__ == '__main__': + main(sys.argv) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index bf712c821..cd583e026 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -36,6 +36,7 @@ import subprocess import random import re import sys +import tarfile import time import traceback from os.path import abspath @@ -1574,6 +1575,84 @@ class RoundtripText(TestCaseHandler): run([in_bin('wasm-opt'), abspath('a.wast')] + FEATURE_OPTS) +# Fuzz in a near-identical manner to how we fuzz on ClusterFuzz. This is mainly +# to see that fuzzing that way works properly (it likely won't catch anything +# the other fuzzers here catch, though it is possible). That is, running this +# script continuously will give continuous cover that ClusterFuzz should be +# running ok. +# +# Note that this is *not* deterministic like the other fuzzers: it runs run.py +# like ClusterFuzz does, and that generates its own random data. If a bug is +# caught here, it must be reduced manually. +class ClusterFuzz(TestCaseHandler): + frequency = 0.1 + + def handle(self, wasm): + self.ensure() + + # run.py() should emit these two files. Delete them to make sure they + # are created by run.py() in the next step. + fuzz_file = 'fuzz-binaryen-1.js' + flags_file = 'flags-binaryen-1.js' + for f in [fuzz_file, flags_file]: + if os.path.exists(f): + os.unlink(f) + + # Call run.py(), similarly to how ClusterFuzz does. + run([sys.executable, + os.path.join(self.clusterfuzz_dir, 'run.py'), + '--output_dir=' + os.getcwd(), + '--no_of_files=1']) + + # We should see the two files. + assert os.path.exists(fuzz_file) + assert os.path.exists(flags_file) + + # Run the testcase in V8, similarly to how ClusterFuzz does. + cmd = [shared.V8] + # The flags are given in the flags file - we do *not* use our normal + # flags here! + with open(flags_file, 'r') as f: + flags = f.read() + cmd.append(flags) + # Run the fuzz file, which contains a modified fuzz_shell.js - we do + # *not* run fuzz_shell.js normally. + cmd.append(os.path.abspath(fuzz_file)) + # No wasm file needs to be provided: it is hardcoded into the JS. Note + # that we use run_vm(), which will ignore known issues in our output and + # in V8. Those issues may cause V8 to e.g. reject a binary we emit that + # is invalid, but that should not be a problem for ClusterFuzz (it isn't + # a crash). + output = run_vm(cmd) + + # Verify that we called something. The fuzzer should always emit at + # least one exported function (unless we've decided to ignore the entire + # run). + if output != IGNORE: + assert FUZZ_EXEC_CALL_PREFIX in output + + def ensure(self): + # The first time we actually run, set things up: make a bundle like the + # one ClusterFuzz receives, and unpack it for execution into a dir. The + # existence of that dir shows we've ensured all we need. + if hasattr(self, 'clusterfuzz_dir'): + return + + self.clusterfuzz_dir = 'clusterfuzz' + if os.path.exists(self.clusterfuzz_dir): + shutil.rmtree(self.clusterfuzz_dir) + os.mkdir(self.clusterfuzz_dir) + + print('Bundling for ClusterFuzz') + bundle = 'fuzz_opt_clusterfuzz_bundle.tgz' + run([in_binaryen('scripts', 'bundle_clusterfuzz.py'), bundle]) + + print('Unpacking for ClusterFuzz') + tar = tarfile.open(bundle, "r:gz") + tar.extractall(path=self.clusterfuzz_dir) + tar.close() + + # The global list of all test case handlers testcase_handlers = [ FuzzExec(), @@ -1585,7 +1664,8 @@ testcase_handlers = [ Merge(), # TODO: enable when stable enough, and adjust |frequency| (see above) # Split(), - RoundtripText() + RoundtripText(), + ClusterFuzz(), ] diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js index d9a994896..ce817646e 100644 --- a/scripts/fuzz_shell.js +++ b/scripts/fuzz_shell.js @@ -25,14 +25,18 @@ if (typeof process === 'object' && typeof require === 'function') { }; } -// We are given the binary to run as a parameter. -var binary = readBinary(argv[0]); +// The binary to be run. This may be set already (by code that runs before this +// script), and if not, we get the filename from argv. +var binary; +if (!binary) { + binary = readBinary(argv[0]); +} // Normally we call all the exports of the given wasm file. But, if we are // passed a final parameter in the form of "exports:X,Y,Z" then we call // specifically the exports X, Y, and Z. var exportsToCall; -if (argv[argv.length - 1].startsWith('exports:')) { +if (argv.length > 0 && argv[argv.length - 1].startsWith('exports:')) { exportsToCall = argv[argv.length - 1].substr('exports:'.length).split(','); argv.pop(); } |