summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2024-11-21 15:04:29 -0800
committerGitHub <noreply@github.com>2024-11-21 15:04:29 -0800
commit4488a3e351214e038600f58e5806c31ad0bfae46 (patch)
tree1306b6a89a5fd54ec77e9ed7ce6cdcb4fd5fde1c
parent901ba6024f3ca9117c5720be3cf19ab75034070a (diff)
downloadbinaryen-4488a3e351214e038600f58e5806c31ad0bfae46.tar.gz
binaryen-4488a3e351214e038600f58e5806c31ad0bfae46.tar.bz2
binaryen-4488a3e351214e038600f58e5806c31ad0bfae46.zip
[NFC] Refactor ClusterFuzz run.py (#7101)
This just moves code around. It will allow more code reuse in a later PR. Also add a bit of test logging.
-rwxr-xr-xscripts/clusterfuzz/run.py99
-rw-r--r--test/unit/test_cluster_fuzz.py16
2 files changed, 66 insertions, 49 deletions
diff --git a/scripts/clusterfuzz/run.py b/scripts/clusterfuzz/run.py
index 4b5e67fde..6bbb74ef8 100755
--- a/scripts/clusterfuzz/run.py
+++ b/scripts/clusterfuzz/run.py
@@ -93,17 +93,64 @@ def get_file_name(prefix, index):
system_random = random.SystemRandom()
-# Returns the contents of a .js fuzz file, given particular wasm contents that
-# we want to be executed.
-def get_js_file_contents(wasm_contents):
+# Generate a random wasm file, and return a string that creates a typed array of
+# those bytes, suitable for use in a JS file, in the form
+#
+# new Uint8Array([..wasm_contents..])
+#
+# Receives the testcase index and the output dir.
+def get_wasm_contents(i, output_dir):
+ input_data_file_path = os.path.join(output_dir, f'{i}.input')
+ wasm_file_path = os.path.join(output_dir, f'{i}.wasm')
+
+ # wasm-opt may fail to run in rare cases (when the fuzzer emits code it
+ # detects as invalid). Just try again in such a case.
+ for attempt in range(0, 100):
+ # Generate random data.
+ random_size = system_random.randint(1, MAX_RANDOM_SIZE)
+ with open(input_data_file_path, 'wb') as file:
+ file.write(os.urandom(random_size))
+
+ # Generate wasm from the random data.
+ cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS
+ cmd += ['-o', wasm_file_path, input_data_file_path]
+ try:
+ subprocess.check_call(cmd)
+ except subprocess.CalledProcessError:
+ # Try again.
+ print('(oops, retrying wasm-opt)')
+ attempt += 1
+ if attempt == 99:
+ # Something is very wrong!
+ raise
+ continue
+ # Success, leave the loop.
+ break
+
+ # Generate a testcase from the wasm
+ with open(wasm_file_path, 'rb') as file:
+ wasm_contents = file.read()
+
+ # Clean up temp files.
+ os.remove(wasm_file_path)
+ os.remove(input_data_file_path)
+
+ # Convert to a string, and wrap into a typed array.
+ wasm_contents = ','.join([str(c) for c in wasm_contents])
+ return f'new Uint8Array([{wasm_contents}])'
+
+
+# Returns the contents of a .js fuzz file, given the index of the testcase and
+# the output dir.
+def get_js_file_contents(i, output_dir):
# Start with the standard JS shell.
with open(JS_SHELL_PATH) as file:
js = file.read()
# Prepend the wasm contents, so they are used (rather than the normal
# mechanism where the wasm file's name is provided in argv).
- wasm_contents = ','.join([str(c) for c in wasm_contents])
- js = f'var binary = new Uint8Array([{wasm_contents}]);\n\n' + js
+ wasm_contents = get_wasm_contents(i, output_dir)
+ js = f'var binary = {wasm_contents};\n\n' + js
# The default JS builds and runs the wasm. Append some random additional
# operations as well, as more compiles and executions can find things. To
@@ -133,6 +180,8 @@ def get_js_file_contents(wasm_contents):
'callExports();\n',
])
+ print(f'Created {wasm_contents.count(",")} wasm bytes')
+
return js
@@ -150,39 +199,11 @@ def main(argv):
num = int(value)
for i in range(1, num + 1):
- input_data_file_path = os.path.join(output_dir, f'{i}.input')
- wasm_file_path = os.path.join(output_dir, f'{i}.wasm')
-
- # wasm-opt may fail to run in rare cases (when the fuzzer emits code it
- # detects as invalid). Just try again in such a case.
- for attempt in range(0, 100):
- # Generate random data.
- random_size = system_random.randint(1, MAX_RANDOM_SIZE)
- with open(input_data_file_path, 'wb') as file:
- file.write(os.urandom(random_size))
-
- # Generate wasm from the random data.
- cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS
- cmd += ['-o', wasm_file_path, input_data_file_path]
- try:
- subprocess.check_call(cmd)
- except subprocess.CalledProcessError:
- # Try again.
- print('(oops, retrying wasm-opt)')
- attempt += 1
- if attempt == 99:
- # Something is very wrong!
- raise
- continue
- # Success, leave the loop.
- break
-
- # Generate a testcase from the wasm
- with open(wasm_file_path, 'rb') as file:
- wasm_contents = file.read()
testcase_file_path = os.path.join(output_dir,
get_file_name(FUZZ_FILENAME_PREFIX, i))
- js_file_contents = get_js_file_contents(wasm_contents)
+
+ # Emit the JS file.
+ js_file_contents = get_js_file_contents(i, output_dir)
with open(testcase_file_path, 'w') as file:
file.write(js_file_contents)
@@ -192,11 +213,7 @@ def main(argv):
with open(flags_file_path, 'w') as file:
file.write(FUZZER_FLAGS_FILE_CONTENTS)
- print(f'Created testcase: {testcase_file_path}, {len(wasm_contents)} bytes')
-
- # Remove temporary files.
- os.remove(input_data_file_path)
- os.remove(wasm_file_path)
+ print(f'Created testcase: {testcase_file_path}')
print(f'Created {num} testcases.')
diff --git a/test/unit/test_cluster_fuzz.py b/test/unit/test_cluster_fuzz.py
index 8ec1d8928..387f65fd1 100644
--- a/test/unit/test_cluster_fuzz.py
+++ b/test/unit/test_cluster_fuzz.py
@@ -217,10 +217,10 @@ class ClusterFuzz(utils.BinaryenTestCase):
print()
- # struct.news appear to be distributed as mean 15, stddev 24, median 10,
- # so over 100 samples we are incredibly likely to see an interesting
- # number at least once. It is also incredibly unlikely for the stdev to
- # be zero.
+ print('struct.news are distributed as ~ mean 15, stddev 24, median 10')
+ # Given that, with 100 samples we are incredibly likely to see an
+ # interesting number at least once. It is also incredibly unlikely for
+ # the stdev to be zero.
print(f'mean struct.news: {statistics.mean(seen_struct_news)}')
print(f'stdev struct.news: {statistics.stdev(seen_struct_news)}')
print(f'median struct.news: {statistics.median(seen_struct_news)}')
@@ -229,7 +229,7 @@ class ClusterFuzz(utils.BinaryenTestCase):
print()
- # sizes appear to be distributed as mean 2933, stddev 2011, median 2510.
+ print('sizes are distributed as ~ mean 2933, stddev 2011, median 2510')
print(f'mean sizes: {statistics.mean(seen_sizes)}')
print(f'stdev sizes: {statistics.stdev(seen_sizes)}')
print(f'median sizes: {statistics.median(seen_sizes)}')
@@ -238,7 +238,7 @@ class ClusterFuzz(utils.BinaryenTestCase):
print()
- # exports appear to be distributed as mean 9, stddev 6, median 8.
+ print('exports are distributed as ~ mean 9, stddev 6, median 8')
print(f'mean exports: {statistics.mean(seen_exports)}')
print(f'stdev exports: {statistics.stdev(seen_exports)}')
print(f'median exports: {statistics.median(seen_exports)}')
@@ -264,8 +264,7 @@ class ClusterFuzz(utils.BinaryenTestCase):
# probability to be a build or a call, so over the 100 testcases here we
# have an overwhelming probability to see at least one extra build and
# one extra call.
- #
- # builds and calls are distributed as mean 4, stddev 5, median 2.
+ print('JS builds are distributed as ~ mean 4, stddev 5, median 2')
print(f'mean JS builds: {statistics.mean(seen_builds)}')
print(f'stdev JS builds: {statistics.stdev(seen_builds)}')
print(f'median JS builds: {statistics.median(seen_builds)}')
@@ -276,6 +275,7 @@ class ClusterFuzz(utils.BinaryenTestCase):
print()
+ print('JS calls are distributed as ~ mean 4, stddev 5, median 2')
print(f'mean JS calls: {statistics.mean(seen_calls)}')
print(f'stdev JS calls: {statistics.stdev(seen_calls)}')
print(f'median JS calls: {statistics.median(seen_calls)}')