# # Copyright 2024 WebAssembly Community Group participants # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ''' ClusterFuzz run.py script: when run by ClusterFuzz, it uses wasm-opt to generate a fixed number of testcases. This is a "blackbox fuzzer", see https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/ This file should be bundled up together with the other files it needs, see bundle_clusterfuzz.py. ''' import os import getopt import math import random import subprocess import sys # The V8 flags we put in the "fuzzer flags" files, which tell ClusterFuzz how to # run V8. By default we apply all staging flags. FUZZER_FLAGS_FILE_CONTENTS = '--wasm-staging' # Maximum size of the random data that we feed into wasm-opt -ttf. This is # smaller than fuzz_opt.py's INPUT_SIZE_MAX because that script is tuned for # fuzzing large wasm files (to reduce the overhead we have of launching many # processes per file), which is less of an issue on ClusterFuzz. MAX_RANDOM_SIZE = 15 * 1024 # Max and median amount of extra JS operations we append, like extra compiles or # runs of the wasm. We allow a high max, but the median is far lower, so that # typical testcases are not long-running. MAX_EXTRA_JS_OPERATIONS = 40 MEDIAN_EXTRA_JS_OPERATIONS = 2 # The prefix for fuzz files. FUZZ_FILENAME_PREFIX = 'fuzz-' # The prefix for flags files. FLAGS_FILENAME_PREFIX = 'flags-' # The name of the fuzzer (appears after FUZZ_FILENAME_PREFIX / # FLAGS_FILENAME_PREFIX). FUZZER_NAME_PREFIX = 'binaryen-' # The root directory of the bundle this will be in, which is the directory of # this very file. ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) # The path to the wasm-opt binary that we run to generate testcases. FUZZER_BINARY_PATH = os.path.join(ROOT_DIR, 'bin', 'wasm-opt') # The path to the fuzz_shell.js script that will execute the wasm in each # testcase. JS_SHELL_PATH = os.path.join(ROOT_DIR, 'scripts', 'fuzz_shell.js') # The arguments we provide to wasm-opt to generate wasm files. FUZZER_ARGS = [ # Generate a wasm from random data. '--translate-to-fuzz', # Run some random passes, to further shape the random wasm we emit. '--fuzz-passes', # Enable all features but disable ones not yet ready for fuzzing. This may # be a smaller set than fuzz_opt.py, as that enables a few experimental # flags, while here we just fuzz with d8's --wasm-staging. '-all', '--disable-shared-everything', '--disable-fp16', ] # Returns the file name for fuzz or flags files. def get_file_name(prefix, index): return f'{prefix}{FUZZER_NAME_PREFIX}{index}.js' # We should only use the system's random number generation, which is the best. # (We also use urandom below, which uses this under the hood.) system_random = random.SystemRandom() # Generate a random wasm file, and return a string that creates a typed array of # those bytes, suitable for use in a JS file, in the form # # new Uint8Array([..wasm_contents..]) # # Receives the testcase index and the output dir. def get_wasm_contents(i, output_dir): input_data_file_path = os.path.join(output_dir, f'{i}.input') wasm_file_path = os.path.join(output_dir, f'{i}.wasm') # wasm-opt may fail to run in rare cases (when the fuzzer emits code it # detects as invalid). Just try again in such a case. for attempt in range(0, 100): # Generate random data. random_size = system_random.randint(1, MAX_RANDOM_SIZE) with open(input_data_file_path, 'wb') as file: file.write(os.urandom(random_size)) # Generate wasm from the random data. cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS cmd += ['-o', wasm_file_path, input_data_file_path] try: subprocess.check_call(cmd) except subprocess.CalledProcessError: # Try again. print('(oops, retrying wasm-opt)') attempt += 1 if attempt == 99: # Something is very wrong! raise continue # Success, leave the loop. break # Generate a testcase from the wasm with open(wasm_file_path, 'rb') as file: wasm_contents = file.read() # Clean up temp files. os.remove(wasm_file_path) os.remove(input_data_file_path) # Convert to a string, and wrap into a typed array. wasm_contents = ','.join([str(c) for c in wasm_contents]) return f'new Uint8Array([{wasm_contents}])' # Returns the contents of a .js fuzz file, given the index of the testcase and # the output dir. def get_js_file_contents(i, output_dir): # Start with the standard JS shell. with open(JS_SHELL_PATH) as file: js = file.read() # Prepend the wasm contents, so they are used (rather than the normal # mechanism where the wasm file's name is provided in argv). wasm_contents = get_wasm_contents(i, output_dir) pre = f'var binary = {wasm_contents};\n' bytes = wasm_contents.count(',') # Sometimes add a second wasm file as well. has_second = False if system_random.random() < 0.333: has_second = True wasm_contents = get_wasm_contents(i, output_dir) pre += f'var secondBinary = {wasm_contents};\n' bytes += wasm_contents.count(',') js = pre + '\n' + js # The default JS builds and runs the wasm. Append some random additional # operations as well, as more compiles and executions can find things. To # approximate a number in the range [0, MAX_EXTRA_JS_OPERATIONS) but with a # median of MEDIAN_EXTRA_JS_OPERATIONS, start in the range [0, 1) and then # raise it to the proper power, as multiplying by itself keeps the range # unchanged, but lowers the median. Specifically, the median begins at 0.5, # so # # 0.5^power = MEDIAN_EXTRA_JS_OPERATIONS / MAX_EXTRA_JS_OPERATIONS # # is what we want, and if we take log2 of each side, gives us # # power = log2(MEDIAN_EXTRA_JS_OPERATIONS / MAX_EXTRA_JS_OPERATIONS) / log2(0.5) # = -log2(MEDIAN_EXTRA_JS_OPERATIONS / MAX_EXTRA_JS_OPERATIONS) power = -math.log2(float(MEDIAN_EXTRA_JS_OPERATIONS) / MAX_EXTRA_JS_OPERATIONS) x = system_random.random() x = math.pow(x, power) num = math.floor(x * MAX_EXTRA_JS_OPERATIONS) assert num >= 0 and num <= MAX_EXTRA_JS_OPERATIONS extra_js_operations = [ # Compile and link the wasm again. Each link adds more to the total # exports that we can call. 'build(binary);\n', # Run all the exports we've accumulated. 'callExports();\n', ] if has_second: extra_js_operations += [ 'build(secondBinary);\n', ] for i in range(num): js += system_random.choice(extra_js_operations) print(f'Created {bytes} wasm bytes') # Some of the time, fuzz JSPI (similar to fuzz_opt.py, see details there). if system_random.random() < 0.25: # Prepend the flag to enable JSPI. js = 'var JSPI = 1;\n\n' + js # Un-comment the async and await keywords. js = js.replace('/* async */', 'async') js = js.replace('/* await */', 'await') return js def main(argv): # Parse the options. See # https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/#uploading-a-fuzzer output_dir = '.' num = 100 expected_flags = ['input_dir=', 'output_dir=', 'no_of_files='] optlist, _ = getopt.getopt(argv[1:], '', expected_flags) for option, value in optlist: if option == '--output_dir': output_dir = value elif option == '--no_of_files': num = int(value) for i in range(1, num + 1): testcase_file_path = os.path.join(output_dir, get_file_name(FUZZ_FILENAME_PREFIX, i)) # Emit the JS file. js_file_contents = get_js_file_contents(i, output_dir) with open(testcase_file_path, 'w') as file: file.write(js_file_contents) # Emit a corresponding flags file. flags_file_path = os.path.join(output_dir, get_file_name(FLAGS_FILENAME_PREFIX, i)) with open(flags_file_path, 'w') as file: file.write(FUZZER_FLAGS_FILE_CONTENTS) print(f'Created testcase: {testcase_file_path}') print(f'Created {num} testcases.') if __name__ == '__main__': main(sys.argv)