diff options
author | Alon Zakai <azakai@google.com> | 2024-11-21 15:04:29 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-21 15:04:29 -0800 |
commit | 4488a3e351214e038600f58e5806c31ad0bfae46 (patch) | |
tree | 1306b6a89a5fd54ec77e9ed7ce6cdcb4fd5fde1c | |
parent | 901ba6024f3ca9117c5720be3cf19ab75034070a (diff) | |
download | binaryen-4488a3e351214e038600f58e5806c31ad0bfae46.tar.gz binaryen-4488a3e351214e038600f58e5806c31ad0bfae46.tar.bz2 binaryen-4488a3e351214e038600f58e5806c31ad0bfae46.zip |
[NFC] Refactor ClusterFuzz run.py (#7101)
This just moves code around. It will allow more code reuse in a later PR.
Also add a bit of test logging.
-rwxr-xr-x | scripts/clusterfuzz/run.py | 99 | ||||
-rw-r--r-- | test/unit/test_cluster_fuzz.py | 16 |
2 files changed, 66 insertions, 49 deletions
diff --git a/scripts/clusterfuzz/run.py b/scripts/clusterfuzz/run.py index 4b5e67fde..6bbb74ef8 100755 --- a/scripts/clusterfuzz/run.py +++ b/scripts/clusterfuzz/run.py @@ -93,17 +93,64 @@ def get_file_name(prefix, index): system_random = random.SystemRandom() -# Returns the contents of a .js fuzz file, given particular wasm contents that -# we want to be executed. -def get_js_file_contents(wasm_contents): +# Generate a random wasm file, and return a string that creates a typed array of +# those bytes, suitable for use in a JS file, in the form +# +# new Uint8Array([..wasm_contents..]) +# +# Receives the testcase index and the output dir. +def get_wasm_contents(i, output_dir): + input_data_file_path = os.path.join(output_dir, f'{i}.input') + wasm_file_path = os.path.join(output_dir, f'{i}.wasm') + + # wasm-opt may fail to run in rare cases (when the fuzzer emits code it + # detects as invalid). Just try again in such a case. + for attempt in range(0, 100): + # Generate random data. + random_size = system_random.randint(1, MAX_RANDOM_SIZE) + with open(input_data_file_path, 'wb') as file: + file.write(os.urandom(random_size)) + + # Generate wasm from the random data. + cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS + cmd += ['-o', wasm_file_path, input_data_file_path] + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + # Try again. + print('(oops, retrying wasm-opt)') + attempt += 1 + if attempt == 99: + # Something is very wrong! + raise + continue + # Success, leave the loop. + break + + # Generate a testcase from the wasm + with open(wasm_file_path, 'rb') as file: + wasm_contents = file.read() + + # Clean up temp files. + os.remove(wasm_file_path) + os.remove(input_data_file_path) + + # Convert to a string, and wrap into a typed array. + wasm_contents = ','.join([str(c) for c in wasm_contents]) + return f'new Uint8Array([{wasm_contents}])' + + +# Returns the contents of a .js fuzz file, given the index of the testcase and +# the output dir. +def get_js_file_contents(i, output_dir): # Start with the standard JS shell. with open(JS_SHELL_PATH) as file: js = file.read() # Prepend the wasm contents, so they are used (rather than the normal # mechanism where the wasm file's name is provided in argv). - wasm_contents = ','.join([str(c) for c in wasm_contents]) - js = f'var binary = new Uint8Array([{wasm_contents}]);\n\n' + js + wasm_contents = get_wasm_contents(i, output_dir) + js = f'var binary = {wasm_contents};\n\n' + js # The default JS builds and runs the wasm. Append some random additional # operations as well, as more compiles and executions can find things. To @@ -133,6 +180,8 @@ def get_js_file_contents(wasm_contents): 'callExports();\n', ]) + print(f'Created {wasm_contents.count(",")} wasm bytes') + return js @@ -150,39 +199,11 @@ def main(argv): num = int(value) for i in range(1, num + 1): - input_data_file_path = os.path.join(output_dir, f'{i}.input') - wasm_file_path = os.path.join(output_dir, f'{i}.wasm') - - # wasm-opt may fail to run in rare cases (when the fuzzer emits code it - # detects as invalid). Just try again in such a case. - for attempt in range(0, 100): - # Generate random data. - random_size = system_random.randint(1, MAX_RANDOM_SIZE) - with open(input_data_file_path, 'wb') as file: - file.write(os.urandom(random_size)) - - # Generate wasm from the random data. - cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS - cmd += ['-o', wasm_file_path, input_data_file_path] - try: - subprocess.check_call(cmd) - except subprocess.CalledProcessError: - # Try again. - print('(oops, retrying wasm-opt)') - attempt += 1 - if attempt == 99: - # Something is very wrong! - raise - continue - # Success, leave the loop. - break - - # Generate a testcase from the wasm - with open(wasm_file_path, 'rb') as file: - wasm_contents = file.read() testcase_file_path = os.path.join(output_dir, get_file_name(FUZZ_FILENAME_PREFIX, i)) - js_file_contents = get_js_file_contents(wasm_contents) + + # Emit the JS file. + js_file_contents = get_js_file_contents(i, output_dir) with open(testcase_file_path, 'w') as file: file.write(js_file_contents) @@ -192,11 +213,7 @@ def main(argv): with open(flags_file_path, 'w') as file: file.write(FUZZER_FLAGS_FILE_CONTENTS) - print(f'Created testcase: {testcase_file_path}, {len(wasm_contents)} bytes') - - # Remove temporary files. - os.remove(input_data_file_path) - os.remove(wasm_file_path) + print(f'Created testcase: {testcase_file_path}') print(f'Created {num} testcases.') diff --git a/test/unit/test_cluster_fuzz.py b/test/unit/test_cluster_fuzz.py index 8ec1d8928..387f65fd1 100644 --- a/test/unit/test_cluster_fuzz.py +++ b/test/unit/test_cluster_fuzz.py @@ -217,10 +217,10 @@ class ClusterFuzz(utils.BinaryenTestCase): print() - # struct.news appear to be distributed as mean 15, stddev 24, median 10, - # so over 100 samples we are incredibly likely to see an interesting - # number at least once. It is also incredibly unlikely for the stdev to - # be zero. + print('struct.news are distributed as ~ mean 15, stddev 24, median 10') + # Given that, with 100 samples we are incredibly likely to see an + # interesting number at least once. It is also incredibly unlikely for + # the stdev to be zero. print(f'mean struct.news: {statistics.mean(seen_struct_news)}') print(f'stdev struct.news: {statistics.stdev(seen_struct_news)}') print(f'median struct.news: {statistics.median(seen_struct_news)}') @@ -229,7 +229,7 @@ class ClusterFuzz(utils.BinaryenTestCase): print() - # sizes appear to be distributed as mean 2933, stddev 2011, median 2510. + print('sizes are distributed as ~ mean 2933, stddev 2011, median 2510') print(f'mean sizes: {statistics.mean(seen_sizes)}') print(f'stdev sizes: {statistics.stdev(seen_sizes)}') print(f'median sizes: {statistics.median(seen_sizes)}') @@ -238,7 +238,7 @@ class ClusterFuzz(utils.BinaryenTestCase): print() - # exports appear to be distributed as mean 9, stddev 6, median 8. + print('exports are distributed as ~ mean 9, stddev 6, median 8') print(f'mean exports: {statistics.mean(seen_exports)}') print(f'stdev exports: {statistics.stdev(seen_exports)}') print(f'median exports: {statistics.median(seen_exports)}') @@ -264,8 +264,7 @@ class ClusterFuzz(utils.BinaryenTestCase): # probability to be a build or a call, so over the 100 testcases here we # have an overwhelming probability to see at least one extra build and # one extra call. - # - # builds and calls are distributed as mean 4, stddev 5, median 2. + print('JS builds are distributed as ~ mean 4, stddev 5, median 2') print(f'mean JS builds: {statistics.mean(seen_builds)}') print(f'stdev JS builds: {statistics.stdev(seen_builds)}') print(f'median JS builds: {statistics.median(seen_builds)}') @@ -276,6 +275,7 @@ class ClusterFuzz(utils.BinaryenTestCase): print() + print('JS calls are distributed as ~ mean 4, stddev 5, median 2') print(f'mean JS calls: {statistics.mean(seen_calls)}') print(f'stdev JS calls: {statistics.stdev(seen_calls)}') print(f'median JS calls: {statistics.median(seen_calls)}') |