summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/bundle_clusterfuzz.py135
-rwxr-xr-xscripts/clusterfuzz/run.py163
-rwxr-xr-xscripts/fuzz_opt.py82
-rw-r--r--scripts/fuzz_shell.js10
4 files changed, 386 insertions, 4 deletions
diff --git a/scripts/bundle_clusterfuzz.py b/scripts/bundle_clusterfuzz.py
new file mode 100755
index 000000000..a03553837
--- /dev/null
+++ b/scripts/bundle_clusterfuzz.py
@@ -0,0 +1,135 @@
+#!/usr/bin/python3
+
+'''
+Bundle files for uploading to ClusterFuzz.
+
+Usage:
+
+bundle.py OUTPUT_FILE.tgz [--build-dir=BUILD_DIR]
+
+The output file will be a .tgz file.
+
+if a build directory is provided, we will look under there to find bin/wasm-opt
+and lib/libbinaryen.so. A useful place to get builds from is the Emscripten SDK,
+as you can do
+
+ ./emsdk install tot
+
+after which ./upstream/ (from the emsdk dir) will contain builds of wasm-opt and
+libbinaryen.so (that are designed to run on as many systems as possible, by not
+depending on newer libc symbols, etc., as opposed to a normal local build).
+Thus, the full workflow could be
+
+ cd emsdk
+ ./emsdk install tot
+ cd ../binaryen
+ python3 scripts/bundle_clusterfuzz.py binaryen_wasm_fuzzer.tgz --build-dir=../emsdk/upstream
+
+When using --build-dir in this way, you are responsible for ensuring that the
+wasm-opt in the build dir is compatible with the scripts in the current dir
+(e.g., if run.py here passes a flag that is only in a new/older version of
+wasm-opt, a problem can happen).
+
+Before uploading to ClusterFuzz, it is worth doing the following:
+
+ 1. Run the local fuzzer (scripts/fuzz_opt.py). That includes a ClusterFuzz
+ testcase handler, which simulates what ClusterFuzz does.
+
+ 2. Run the unit tests, which include smoke tests for our ClusterFuzz support:
+
+ python -m unittest test/unit/test_cluster_fuzz.py
+
+ Look at the logs, which will contain statistics on the wasm files the
+ fuzzer emits, and see that they look reasonable.
+
+ You should run the unit tests on the bundle you are about to upload, by
+ setting the proper env var like this (using the same filename as above):
+
+ BINARYEN_CLUSTER_FUZZ_BUNDLE=`pwd`/binaryen_wasm_fuzzer.tgz python -m unittest test/unit/test_cluster_fuzz.py
+
+ Note that you must pass an absolute filename (e.g. using pwd as shown).
+
+ The unittest logs should reflect that that bundle is being used at the
+ very start ("Using existing bundle: ..." rather than "Making a new
+ bundle"). Note that some of the unittests also create their own bundles, to
+ test the bundling script itself, so later down you will see logging of
+ bundle creation even if you provide a bundle.
+
+After uploading to ClusterFuzz, you can wait a while for it to run, and then:
+
+ 1. Inspect the log to see that we generate all the testcases properly, and
+ their sizes look reasonably random, etc.
+
+ 2. Inspect the sample testcase and run it locally, to see that
+
+ d8 --wasm-staging testcase.js
+
+ properly runs the testcase, emitting logging etc.
+
+ 3. Check the stats and crashes page (known crashes should at least be showing
+ up). Note that these may take longer to show up than 1 and 2.
+'''
+
+import os
+import sys
+import tarfile
+
+# Read the filenames first, as importing |shared| changes the directory.
+output_file = os.path.abspath(sys.argv[1])
+print(f'Bundling to: {output_file}')
+assert output_file.endswith('.tgz'), 'Can only generate a .tgz'
+
+build_dir = None
+if len(sys.argv) >= 3:
+ assert sys.argv[2].startswith('--build-dir=')
+ build_dir = sys.argv[2].split('=')[1]
+ build_dir = os.path.abspath(build_dir)
+ # Delete the argument, as importing |shared| scans it.
+ sys.argv.pop()
+
+from test import shared # noqa
+
+# Pick where to get the builds
+if build_dir:
+ binaryen_bin = os.path.join(build_dir, 'bin')
+ binaryen_lib = os.path.join(build_dir, 'lib')
+else:
+ binaryen_bin = shared.options.binaryen_bin
+ binaryen_lib = shared.options.binaryen_lib
+
+with tarfile.open(output_file, "w:gz") as tar:
+ # run.py
+ run = os.path.join(shared.options.binaryen_root, 'scripts', 'clusterfuzz', 'run.py')
+ print(f' .. run: {run}')
+ tar.add(run, arcname='run.py')
+
+ # fuzz_shell.js
+ fuzz_shell = os.path.join(shared.options.binaryen_root, 'scripts', 'fuzz_shell.js')
+ print(f' .. fuzz_shell: {fuzz_shell}')
+ tar.add(fuzz_shell, arcname='scripts/fuzz_shell.js')
+
+ # wasm-opt binary
+ wasm_opt = os.path.join(binaryen_bin, 'wasm-opt')
+ print(f' .. wasm-opt: {wasm_opt}')
+ tar.add(wasm_opt, arcname='bin/wasm-opt')
+
+ # For a dynamic build we also need libbinaryen.so and possibly other files.
+ # Try both .so and .dylib suffixes for more OS coverage.
+ for suffix in ['.so', '.dylib']:
+ libbinaryen = os.path.join(binaryen_lib, f'libbinaryen{suffix}')
+ if os.path.exists(libbinaryen):
+ print(f' .. libbinaryen: {libbinaryen}')
+ tar.add(libbinaryen, arcname=f'lib/libbinaryen{suffix}')
+
+ # The emsdk build also includes some more necessary files.
+ for name in [f'libc++{suffix}', f'libc++{suffix}.2', f'libc++{suffix}.2.0']:
+ path = os.path.join(binaryen_lib, name)
+ if os.path.exists(path):
+ print(f' ......... : {path}')
+ tar.add(path, arcname=f'lib/{name}')
+
+print('Done.')
+print('To run the tests on this bundle, do:')
+print()
+print(f'BINARYEN_CLUSTER_FUZZ_BUNDLE={output_file} python -m unittest test/unit/test_cluster_fuzz.py')
+print()
diff --git a/scripts/clusterfuzz/run.py b/scripts/clusterfuzz/run.py
new file mode 100755
index 000000000..efddfc2d4
--- /dev/null
+++ b/scripts/clusterfuzz/run.py
@@ -0,0 +1,163 @@
+#
+# Copyright 2024 WebAssembly Community Group participants
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''
+ClusterFuzz run.py script: when run by ClusterFuzz, it uses wasm-opt to generate
+a fixed number of testcases. This is a "blackbox fuzzer", see
+
+https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/
+
+This file should be bundled up together with the other files it needs, see
+bundle_clusterfuzz.py.
+'''
+
+import os
+import getopt
+import random
+import subprocess
+import sys
+
+# The V8 flags we put in the "fuzzer flags" files, which tell ClusterFuzz how to
+# run V8. By default we apply all staging flags.
+FUZZER_FLAGS_FILE_CONTENTS = '--wasm-staging'
+
+# Maximum size of the random data that we feed into wasm-opt -ttf. This is
+# smaller than fuzz_opt.py's INPUT_SIZE_MAX because that script is tuned for
+# fuzzing large wasm files (to reduce the overhead we have of launching many
+# processes per file), which is less of an issue on ClusterFuzz.
+MAX_RANDOM_SIZE = 15 * 1024
+
+# The prefix for fuzz files.
+FUZZ_FILENAME_PREFIX = 'fuzz-'
+
+# The prefix for flags files.
+FLAGS_FILENAME_PREFIX = 'flags-'
+
+# The name of the fuzzer (appears after FUZZ_FILENAME_PREFIX /
+# FLAGS_FILENAME_PREFIX).
+FUZZER_NAME_PREFIX = 'binaryen-'
+
+# The root directory of the bundle this will be in, which is the directory of
+# this very file.
+ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+# The path to the wasm-opt binary that we run to generate testcases.
+FUZZER_BINARY_PATH = os.path.join(ROOT_DIR, 'bin', 'wasm-opt')
+
+# The path to the fuzz_shell.js script that will execute the wasm in each
+# testcase.
+JS_SHELL_PATH = os.path.join(ROOT_DIR, 'scripts', 'fuzz_shell.js')
+
+# The arguments we provide to wasm-opt to generate wasm files.
+FUZZER_ARGS = [
+ # Generate a wasm from random data.
+ '--translate-to-fuzz',
+ # Run some random passes, to further shape the random wasm we emit.
+ '--fuzz-passes',
+ # Enable all features but disable ones not yet ready for fuzzing. This may
+ # be a smaller set than fuzz_opt.py, as that enables a few experimental
+ # flags, while here we just fuzz with d8's --wasm-staging.
+ '-all',
+ '--disable-shared-everything',
+ '--disable-fp16',
+]
+
+
+# Returns the file name for fuzz or flags files.
+def get_file_name(prefix, index):
+ return f'{prefix}{FUZZER_NAME_PREFIX}{index}.js'
+
+
+# Returns the contents of a .js fuzz file, given particular wasm contents that
+# we want to be executed.
+def get_js_file_contents(wasm_contents):
+ # Start with the standard JS shell.
+ with open(JS_SHELL_PATH) as file:
+ js = file.read()
+
+ # Prepend the wasm contents, so they are used (rather than the normal
+ # mechanism where the wasm file's name is provided in argv).
+ wasm_contents = ','.join([str(c) for c in wasm_contents])
+ js = f'var binary = new Uint8Array([{wasm_contents}]);\n\n' + js
+ return js
+
+
+def main(argv):
+ # Parse the options. See
+ # https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/#uploading-a-fuzzer
+ output_dir = '.'
+ num = 100
+ expected_flags = ['input_dir=', 'output_dir=', 'no_of_files=']
+ optlist, _ = getopt.getopt(argv[1:], '', expected_flags)
+ for option, value in optlist:
+ if option == '--output_dir':
+ output_dir = value
+ elif option == '--no_of_files':
+ num = int(value)
+
+ for i in range(1, num + 1):
+ input_data_file_path = os.path.join(output_dir, f'{i}.input')
+ wasm_file_path = os.path.join(output_dir, f'{i}.wasm')
+
+ # wasm-opt may fail to run in rare cases (when the fuzzer emits code it
+ # detects as invalid). Just try again in such a case.
+ for attempt in range(0, 100):
+ # Generate random data.
+ random_size = random.SystemRandom().randint(1, MAX_RANDOM_SIZE)
+ with open(input_data_file_path, 'wb') as file:
+ file.write(os.urandom(random_size))
+
+ # Generate wasm from the random data.
+ cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS
+ cmd += ['-o', wasm_file_path, input_data_file_path]
+ try:
+ subprocess.check_call(cmd)
+ except subprocess.CalledProcessError:
+ # Try again.
+ print('(oops, retrying wasm-opt)')
+ attempt += 1
+ if attempt == 99:
+ # Something is very wrong!
+ raise
+ continue
+ # Success, leave the loop.
+ break
+
+ # Generate a testcase from the wasm
+ with open(wasm_file_path, 'rb') as file:
+ wasm_contents = file.read()
+ testcase_file_path = os.path.join(output_dir,
+ get_file_name(FUZZ_FILENAME_PREFIX, i))
+ js_file_contents = get_js_file_contents(wasm_contents)
+ with open(testcase_file_path, 'w') as file:
+ file.write(js_file_contents)
+
+ # Emit a corresponding flags file.
+ flags_file_path = os.path.join(output_dir,
+ get_file_name(FLAGS_FILENAME_PREFIX, i))
+ with open(flags_file_path, 'w') as file:
+ file.write(FUZZER_FLAGS_FILE_CONTENTS)
+
+ print(f'Created testcase: {testcase_file_path}, {len(wasm_contents)} bytes')
+
+ # Remove temporary files.
+ os.remove(input_data_file_path)
+ os.remove(wasm_file_path)
+
+ print(f'Created {num} testcases.')
+
+
+if __name__ == '__main__':
+ main(sys.argv)
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
index bf712c821..cd583e026 100755
--- a/scripts/fuzz_opt.py
+++ b/scripts/fuzz_opt.py
@@ -36,6 +36,7 @@ import subprocess
import random
import re
import sys
+import tarfile
import time
import traceback
from os.path import abspath
@@ -1574,6 +1575,84 @@ class RoundtripText(TestCaseHandler):
run([in_bin('wasm-opt'), abspath('a.wast')] + FEATURE_OPTS)
+# Fuzz in a near-identical manner to how we fuzz on ClusterFuzz. This is mainly
+# to see that fuzzing that way works properly (it likely won't catch anything
+# the other fuzzers here catch, though it is possible). That is, running this
+# script continuously will give continuous cover that ClusterFuzz should be
+# running ok.
+#
+# Note that this is *not* deterministic like the other fuzzers: it runs run.py
+# like ClusterFuzz does, and that generates its own random data. If a bug is
+# caught here, it must be reduced manually.
+class ClusterFuzz(TestCaseHandler):
+ frequency = 0.1
+
+ def handle(self, wasm):
+ self.ensure()
+
+ # run.py() should emit these two files. Delete them to make sure they
+ # are created by run.py() in the next step.
+ fuzz_file = 'fuzz-binaryen-1.js'
+ flags_file = 'flags-binaryen-1.js'
+ for f in [fuzz_file, flags_file]:
+ if os.path.exists(f):
+ os.unlink(f)
+
+ # Call run.py(), similarly to how ClusterFuzz does.
+ run([sys.executable,
+ os.path.join(self.clusterfuzz_dir, 'run.py'),
+ '--output_dir=' + os.getcwd(),
+ '--no_of_files=1'])
+
+ # We should see the two files.
+ assert os.path.exists(fuzz_file)
+ assert os.path.exists(flags_file)
+
+ # Run the testcase in V8, similarly to how ClusterFuzz does.
+ cmd = [shared.V8]
+ # The flags are given in the flags file - we do *not* use our normal
+ # flags here!
+ with open(flags_file, 'r') as f:
+ flags = f.read()
+ cmd.append(flags)
+ # Run the fuzz file, which contains a modified fuzz_shell.js - we do
+ # *not* run fuzz_shell.js normally.
+ cmd.append(os.path.abspath(fuzz_file))
+ # No wasm file needs to be provided: it is hardcoded into the JS. Note
+ # that we use run_vm(), which will ignore known issues in our output and
+ # in V8. Those issues may cause V8 to e.g. reject a binary we emit that
+ # is invalid, but that should not be a problem for ClusterFuzz (it isn't
+ # a crash).
+ output = run_vm(cmd)
+
+ # Verify that we called something. The fuzzer should always emit at
+ # least one exported function (unless we've decided to ignore the entire
+ # run).
+ if output != IGNORE:
+ assert FUZZ_EXEC_CALL_PREFIX in output
+
+ def ensure(self):
+ # The first time we actually run, set things up: make a bundle like the
+ # one ClusterFuzz receives, and unpack it for execution into a dir. The
+ # existence of that dir shows we've ensured all we need.
+ if hasattr(self, 'clusterfuzz_dir'):
+ return
+
+ self.clusterfuzz_dir = 'clusterfuzz'
+ if os.path.exists(self.clusterfuzz_dir):
+ shutil.rmtree(self.clusterfuzz_dir)
+ os.mkdir(self.clusterfuzz_dir)
+
+ print('Bundling for ClusterFuzz')
+ bundle = 'fuzz_opt_clusterfuzz_bundle.tgz'
+ run([in_binaryen('scripts', 'bundle_clusterfuzz.py'), bundle])
+
+ print('Unpacking for ClusterFuzz')
+ tar = tarfile.open(bundle, "r:gz")
+ tar.extractall(path=self.clusterfuzz_dir)
+ tar.close()
+
+
# The global list of all test case handlers
testcase_handlers = [
FuzzExec(),
@@ -1585,7 +1664,8 @@ testcase_handlers = [
Merge(),
# TODO: enable when stable enough, and adjust |frequency| (see above)
# Split(),
- RoundtripText()
+ RoundtripText(),
+ ClusterFuzz(),
]
diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js
index d9a994896..ce817646e 100644
--- a/scripts/fuzz_shell.js
+++ b/scripts/fuzz_shell.js
@@ -25,14 +25,18 @@ if (typeof process === 'object' && typeof require === 'function') {
};
}
-// We are given the binary to run as a parameter.
-var binary = readBinary(argv[0]);
+// The binary to be run. This may be set already (by code that runs before this
+// script), and if not, we get the filename from argv.
+var binary;
+if (!binary) {
+ binary = readBinary(argv[0]);
+}
// Normally we call all the exports of the given wasm file. But, if we are
// passed a final parameter in the form of "exports:X,Y,Z" then we call
// specifically the exports X, Y, and Z.
var exportsToCall;
-if (argv[argv.length - 1].startsWith('exports:')) {
+if (argv.length > 0 && argv[argv.length - 1].startsWith('exports:')) {
exportsToCall = argv[argv.length - 1].substr('exports:'.length).split(',');
argv.pop();
}