summaryrefslogtreecommitdiff
path: root/test/unit/test_cluster_fuzz.py
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2024-11-26 15:12:36 -0800
committerGitHub <noreply@github.com>2024-11-26 15:12:36 -0800
commit73971d78e5355e8f08b4026b741992d78bd77476 (patch)
treee1f3b8761cb2c5a226e9b87daac954eeb5e91ed7 /test/unit/test_cluster_fuzz.py
parent4ffe27255ce99d452d05d4b352e3f6e1e9ca7d83 (diff)
downloadbinaryen-73971d78e5355e8f08b4026b741992d78bd77476.tar.gz
binaryen-73971d78e5355e8f08b4026b741992d78bd77476.tar.bz2
binaryen-73971d78e5355e8f08b4026b741992d78bd77476.zip
[Fuzzing] Emit secondary wasm files in ClusterFuzz testcases (#7122)
The two files are then linked and run by fuzz_shell.js (we had this functionality already in order to fuzz wasm-split). By adding multiple build and run commands of both the primary and secondary wasm files, we can end up with multiple instances of two different wasm files that call between themselves. To help testing, add a script that extracts the wasm files from the testcase. This may also be useful in the future for testcase reduction.
Diffstat (limited to 'test/unit/test_cluster_fuzz.py')
-rw-r--r--test/unit/test_cluster_fuzz.py73
1 files changed, 56 insertions, 17 deletions
diff --git a/test/unit/test_cluster_fuzz.py b/test/unit/test_cluster_fuzz.py
index 387f65fd1..56250d46a 100644
--- a/test/unit/test_cluster_fuzz.py
+++ b/test/unit/test_cluster_fuzz.py
@@ -1,3 +1,4 @@
+import glob
import os
import platform
import re
@@ -159,6 +160,9 @@ class ClusterFuzz(utils.BinaryenTestCase):
seen_sizes = []
seen_exports = []
+ # Second wasm files are also emitted sometimes.
+ seen_second_sizes = []
+
# The number of struct.news appears in the metrics report like this:
#
# StructNew : 18
@@ -179,23 +183,16 @@ class ClusterFuzz(utils.BinaryenTestCase):
with open(flags_file) as f:
self.assertEqual(f.read(), '--wasm-staging')
- # The fuzz files begin with
- #
- # var binary = new Uint8Array([..binary data as numbers..]);
- #
- with open(fuzz_file) as f:
- first_line = f.readline().strip()
- start = 'var binary = new Uint8Array(['
- end = ']);'
- self.assertTrue(first_line.startswith(start))
- self.assertTrue(first_line.endswith(end))
- numbers = first_line[len(start):-len(end)]
-
- # Convert to binary, and see that it is a valid file.
- numbers_array = [int(x) for x in numbers.split(',')]
- binary_file = os.path.join(temp_dir.name, 'file.wasm')
- with open(binary_file, 'wb') as f:
- f.write(bytes(numbers_array))
+ # Extract the wasm file(s) from the JS. Make sure to not notice
+ # stale files.
+ for f in glob.glob('extracted*'):
+ os.unlink(f)
+ extractor = shared.in_binaryen('scripts', 'clusterfuzz', 'extract_wasms.py')
+ subprocess.check_call([sys.executable, extractor, fuzz_file, 'extracted'])
+
+ # One wasm file must always exist, and must be valid.
+ binary_file = 'extracted.0.wasm'
+ assert os.path.exists(binary_file)
metrics = subprocess.check_output(
shared.WASM_OPT + ['-all', '--metrics', binary_file, '-q'], text=True)
@@ -215,6 +212,19 @@ class ClusterFuzz(utils.BinaryenTestCase):
self.assertEqual(len(exports), 1)
seen_exports.append(int(exports[0]))
+ # Sometimes a second wasm file should exist, and it must be valid
+ # too.
+ second_binary_file = 'extracted.1.wasm'
+ if os.path.exists(second_binary_file):
+ subprocess.check_call(
+ shared.WASM_OPT + ['-all', second_binary_file, '-q'])
+
+ # Note its size (we leave detailed metrics for the first one;
+ # they are generated by the same logic in run.py, so just
+ # verifying some valid second wasms are emitted, of random
+ # sizes, is enough).
+ seen_second_sizes.append(os.path.getsize(second_binary_file))
+
print()
print('struct.news are distributed as ~ mean 15, stddev 24, median 10')
@@ -247,10 +257,27 @@ class ClusterFuzz(utils.BinaryenTestCase):
print()
+ # Second files appear in ~ 1/3 of testcases.
+ print('number of second wasms should be around 33 +- 8')
+ print(f'number of second wasms: {len(seen_second_sizes)}')
+ assert seen_second_sizes, 'must see at least one second wasm'
+ print('second sizes are distributed as ~ mean 2933, stddev 2011, median 2510')
+ print(f'mean sizes: {statistics.mean(seen_second_sizes)}')
+ print(f'stdev sizes: {statistics.stdev(seen_second_sizes)}')
+ print(f'median sizes: {statistics.median(seen_second_sizes)}')
+ # Relax the assert on the max seen second size compared to the max seen
+ # primary size, as we see fewer of these. 500 is still proof of an
+ # interesting wasm file.
+ self.assertGreaterEqual(max(seen_second_sizes), 500)
+ self.assertGreater(statistics.stdev(seen_second_sizes), 0)
+
+ print()
+
# To check for interesting JS file contents, we'll note how many times
# we build and run the wasm.
seen_builds = []
seen_calls = []
+ seen_second_builds = []
for i in range(1, N + 1):
fuzz_file = os.path.join(temp_dir.name, f'fuzz-binaryen-{i}.js')
@@ -258,6 +285,7 @@ class ClusterFuzz(utils.BinaryenTestCase):
js = f.read()
seen_builds.append(js.count('build(binary);'))
seen_calls.append(js.count('callExports();'))
+ seen_second_builds.append(js.count('build(secondBinary);'))
# There is always one build and one call (those are in the default
# fuzz_shell.js), and we add a couple of operations, each with equal
@@ -284,6 +312,17 @@ class ClusterFuzz(utils.BinaryenTestCase):
print()
+ # Second wasm files are more rarely added, only 1/3 of the time or so,
+ # but over 100 samples we are still overwhelmingly likely to see one.
+ print('JS second builds are distributed as ~ mean 1.8, stddev 2.2, median 1')
+ print(f'mean JS second builds: {statistics.mean(seen_second_builds)}')
+ print(f'stdev JS second builds: {statistics.stdev(seen_second_builds)}')
+ print(f'median JS second builds: {statistics.median(seen_second_builds)}')
+ self.assertGreaterEqual(max(seen_second_builds), 2)
+ self.assertGreater(statistics.stdev(seen_second_builds), 0)
+
+ print()
+
# "zzz" in test name so that this runs last. If it runs first, it can be
# confusing as it appears next to the logging of which bundle we use (see
# setUpClass).