[NFC] Refactor ClusterFuzz run.py (#7101)

This just moves code around. It will allow more code reuse in a later PR. Also add a bit of test logging.
author: Alon Zakai <azakai@google.com> 2024-11-21 15:04:29 -0800
committer: GitHub <noreply@github.com> 2024-11-21 15:04:29 -0800
commit: 4488a3e351214e038600f58e5806c31ad0bfae46 (patch)
tree: 1306b6a89a5fd54ec77e9ed7ce6cdcb4fd5fde1c
parent: 901ba6024f3ca9117c5720be3cf19ab75034070a (diff)
download: binaryen-4488a3e351214e038600f58e5806c31ad0bfae46.tar.gz
binaryen-4488a3e351214e038600f58e5806c31ad0bfae46.tar.bz2
binaryen-4488a3e351214e038600f58e5806c31ad0bfae46.zip
2 files changed, 66 insertions, 49 deletions
diff --git a/scripts/clusterfuzz/run.py b/scripts/clusterfuzz/run.py
index 4b5e67fde..6bbb74ef8 100755
--- a/scripts/clusterfuzz/run.py
+++ b/scripts/clusterfuzz/run.py
@@ -93,17 +93,64 @@ def get_file_name(prefix, index):
 system_random = random.SystemRandom()
 
 
-# Returns the contents of a .js fuzz file, given particular wasm contents that
-# we want to be executed.
-def get_js_file_contents(wasm_contents):
+# Generate a random wasm file, and return a string that creates a typed array of
+# those bytes, suitable for use in a JS file, in the form
+#
+#   new Uint8Array([..wasm_contents..])
+#
+# Receives the testcase index and the output dir.
+def get_wasm_contents(i, output_dir):
+    input_data_file_path = os.path.join(output_dir, f'{i}.input')
+    wasm_file_path = os.path.join(output_dir, f'{i}.wasm')
+
+    # wasm-opt may fail to run in rare cases (when the fuzzer emits code it
+    # detects as invalid). Just try again in such a case.
+    for attempt in range(0, 100):
+        # Generate random data.
+        random_size = system_random.randint(1, MAX_RANDOM_SIZE)
+        with open(input_data_file_path, 'wb') as file:
+            file.write(os.urandom(random_size))
+
+        # Generate wasm from the random data.
+        cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS
+        cmd += ['-o', wasm_file_path, input_data_file_path]
+        try:
+            subprocess.check_call(cmd)
+        except subprocess.CalledProcessError:
+            # Try again.
+            print('(oops, retrying wasm-opt)')
+            attempt += 1
+            if attempt == 99:
+                # Something is very wrong!
+                raise
+            continue
+        # Success, leave the loop.
+        break
+
+    # Generate a testcase from the wasm
+    with open(wasm_file_path, 'rb') as file:
+        wasm_contents = file.read()
+
+    # Clean up temp files.
+    os.remove(wasm_file_path)
+    os.remove(input_data_file_path)
+
+    # Convert to a string, and wrap into a typed array.
+    wasm_contents = ','.join([str(c) for c in wasm_contents])
+    return f'new Uint8Array([{wasm_contents}])'
+
+
+# Returns the contents of a .js fuzz file, given the index of the testcase and
+# the output dir.
+def get_js_file_contents(i, output_dir):
     # Start with the standard JS shell.
     with open(JS_SHELL_PATH) as file:
         js = file.read()
 
     # Prepend the wasm contents, so they are used (rather than the normal
     # mechanism where the wasm file's name is provided in argv).
-    wasm_contents = ','.join([str(c) for c in wasm_contents])
-    js = f'var binary = new Uint8Array([{wasm_contents}]);\n\n' + js
+    wasm_contents = get_wasm_contents(i, output_dir)
+    js = f'var binary = {wasm_contents};\n\n' + js
 
     # The default JS builds and runs the wasm. Append some random additional
     # operations as well, as more compiles and executions can find things. To
@@ -133,6 +180,8 @@ def get_js_file_contents(wasm_contents):
             'callExports();\n',
         ])
 
+    print(f'Created {wasm_contents.count(",")} wasm bytes')
+
     return js
 
 
@@ -150,39 +199,11 @@ def main(argv):
             num = int(value)
 
     for i in range(1, num + 1):
-        input_data_file_path = os.path.join(output_dir, f'{i}.input')
-        wasm_file_path = os.path.join(output_dir, f'{i}.wasm')
-
-        # wasm-opt may fail to run in rare cases (when the fuzzer emits code it
-        # detects as invalid). Just try again in such a case.
-        for attempt in range(0, 100):
-            # Generate random data.
-            random_size = system_random.randint(1, MAX_RANDOM_SIZE)
-            with open(input_data_file_path, 'wb') as file:
-                file.write(os.urandom(random_size))
-
-            # Generate wasm from the random data.
-            cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS
-            cmd += ['-o', wasm_file_path, input_data_file_path]
-            try:
-                subprocess.check_call(cmd)
-            except subprocess.CalledProcessError:
-                # Try again.
-                print('(oops, retrying wasm-opt)')
-                attempt += 1
-                if attempt == 99:
-                    # Something is very wrong!
-                    raise
-                continue
-            # Success, leave the loop.
-            break
-
-        # Generate a testcase from the wasm
-        with open(wasm_file_path, 'rb') as file:
-            wasm_contents = file.read()
         testcase_file_path = os.path.join(output_dir,
                                           get_file_name(FUZZ_FILENAME_PREFIX, i))
-        js_file_contents = get_js_file_contents(wasm_contents)
+
+        # Emit the JS file.
+        js_file_contents = get_js_file_contents(i, output_dir)
         with open(testcase_file_path, 'w') as file:
             file.write(js_file_contents)
 
@@ -192,11 +213,7 @@ def main(argv):
         with open(flags_file_path, 'w') as file:
             file.write(FUZZER_FLAGS_FILE_CONTENTS)
 
-        print(f'Created testcase: {testcase_file_path}, {len(wasm_contents)} bytes')
-
-        # Remove temporary files.
-        os.remove(input_data_file_path)
-        os.remove(wasm_file_path)
+        print(f'Created testcase: {testcase_file_path}')
 
     print(f'Created {num} testcases.')
 
diff --git a/test/unit/test_cluster_fuzz.py b/test/unit/test_cluster_fuzz.py
index 8ec1d8928..387f65fd1 100644
--- a/test/unit/test_cluster_fuzz.py
+++ b/test/unit/test_cluster_fuzz.py
@@ -217,10 +217,10 @@ class ClusterFuzz(utils.BinaryenTestCase):
 
         print()
 
-        # struct.news appear to be distributed as mean 15, stddev 24, median 10,
-        # so over 100 samples we are incredibly likely to see an interesting
-        # number at least once. It is also incredibly unlikely for the stdev to
-        # be zero.
+        print('struct.news are distributed as ~ mean 15, stddev 24, median 10')
+        # Given that, with 100 samples we are incredibly likely to see an
+        # interesting number at least once. It is also incredibly unlikely for
+        # the stdev to be zero.
         print(f'mean struct.news:   {statistics.mean(seen_struct_news)}')
         print(f'stdev struct.news:  {statistics.stdev(seen_struct_news)}')
         print(f'median struct.news: {statistics.median(seen_struct_news)}')
@@ -229,7 +229,7 @@ class ClusterFuzz(utils.BinaryenTestCase):
 
         print()
 
-        # sizes appear to be distributed as mean 2933, stddev 2011, median 2510.
+        print('sizes are distributed as ~ mean 2933, stddev 2011, median 2510')
         print(f'mean sizes:   {statistics.mean(seen_sizes)}')
         print(f'stdev sizes:  {statistics.stdev(seen_sizes)}')
         print(f'median sizes: {statistics.median(seen_sizes)}')
@@ -238,7 +238,7 @@ class ClusterFuzz(utils.BinaryenTestCase):
 
         print()
 
-        # exports appear to be distributed as mean 9, stddev 6, median 8.
+        print('exports are distributed as ~ mean 9, stddev 6, median 8')
         print(f'mean exports:   {statistics.mean(seen_exports)}')
         print(f'stdev exports:  {statistics.stdev(seen_exports)}')
         print(f'median exports: {statistics.median(seen_exports)}')
@@ -264,8 +264,7 @@ class ClusterFuzz(utils.BinaryenTestCase):
         # probability to be a build or a call, so over the 100 testcases here we
         # have an overwhelming probability to see at least one extra build and
         # one extra call.
-        #
-        # builds and calls are distributed as mean 4, stddev 5, median 2.
+        print('JS builds are distributed as ~ mean 4, stddev 5, median 2')
         print(f'mean JS builds:   {statistics.mean(seen_builds)}')
         print(f'stdev JS builds:  {statistics.stdev(seen_builds)}')
         print(f'median JS builds: {statistics.median(seen_builds)}')
@@ -276,6 +275,7 @@ class ClusterFuzz(utils.BinaryenTestCase):
 
         print()
 
+        print('JS calls are distributed as ~ mean 4, stddev 5, median 2')
         print(f'mean JS calls:   {statistics.mean(seen_calls)}')
         print(f'stdev JS calls:  {statistics.stdev(seen_calls)}')
         print(f'median JS calls: {statistics.median(seen_calls)}')
author	Alon Zakai <azakai@google.com>	2024-11-21 15:04:29 -0800
committer	GitHub <noreply@github.com>	2024-11-21 15:04:29 -0800
commit	4488a3e351214e038600f58e5806c31ad0bfae46 (patch)
tree	1306b6a89a5fd54ec77e9ed7ce6cdcb4fd5fde1c
parent	901ba6024f3ca9117c5720be3cf19ab75034070a (diff)
download	binaryen-4488a3e351214e038600f58e5806c31ad0bfae46.tar.gz binaryen-4488a3e351214e038600f58e5806c31ad0bfae46.tar.bz2 binaryen-4488a3e351214e038600f58e5806c31ad0bfae46.zip