diff options
author | Alon Zakai <azakai@google.com> | 2024-11-26 15:12:36 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-26 15:12:36 -0800 |
commit | 73971d78e5355e8f08b4026b741992d78bd77476 (patch) | |
tree | e1f3b8761cb2c5a226e9b87daac954eeb5e91ed7 /test/unit/test_cluster_fuzz.py | |
parent | 4ffe27255ce99d452d05d4b352e3f6e1e9ca7d83 (diff) | |
download | binaryen-73971d78e5355e8f08b4026b741992d78bd77476.tar.gz binaryen-73971d78e5355e8f08b4026b741992d78bd77476.tar.bz2 binaryen-73971d78e5355e8f08b4026b741992d78bd77476.zip |
[Fuzzing] Emit secondary wasm files in ClusterFuzz testcases (#7122)
The two files are then linked and run by fuzz_shell.js (we had this functionality
already in order to fuzz wasm-split). By adding multiple build and run commands
of both the primary and secondary wasm files, we can end up with multiple
instances of two different wasm files that call between themselves.
To help testing, add a script that extracts the wasm files from the testcase. This
may also be useful in the future for testcase reduction.
Diffstat (limited to 'test/unit/test_cluster_fuzz.py')
-rw-r--r-- | test/unit/test_cluster_fuzz.py | 73 |
1 files changed, 56 insertions, 17 deletions
diff --git a/test/unit/test_cluster_fuzz.py b/test/unit/test_cluster_fuzz.py index 387f65fd1..56250d46a 100644 --- a/test/unit/test_cluster_fuzz.py +++ b/test/unit/test_cluster_fuzz.py @@ -1,3 +1,4 @@ +import glob import os import platform import re @@ -159,6 +160,9 @@ class ClusterFuzz(utils.BinaryenTestCase): seen_sizes = [] seen_exports = [] + # Second wasm files are also emitted sometimes. + seen_second_sizes = [] + # The number of struct.news appears in the metrics report like this: # # StructNew : 18 @@ -179,23 +183,16 @@ class ClusterFuzz(utils.BinaryenTestCase): with open(flags_file) as f: self.assertEqual(f.read(), '--wasm-staging') - # The fuzz files begin with - # - # var binary = new Uint8Array([..binary data as numbers..]); - # - with open(fuzz_file) as f: - first_line = f.readline().strip() - start = 'var binary = new Uint8Array([' - end = ']);' - self.assertTrue(first_line.startswith(start)) - self.assertTrue(first_line.endswith(end)) - numbers = first_line[len(start):-len(end)] - - # Convert to binary, and see that it is a valid file. - numbers_array = [int(x) for x in numbers.split(',')] - binary_file = os.path.join(temp_dir.name, 'file.wasm') - with open(binary_file, 'wb') as f: - f.write(bytes(numbers_array)) + # Extract the wasm file(s) from the JS. Make sure to not notice + # stale files. + for f in glob.glob('extracted*'): + os.unlink(f) + extractor = shared.in_binaryen('scripts', 'clusterfuzz', 'extract_wasms.py') + subprocess.check_call([sys.executable, extractor, fuzz_file, 'extracted']) + + # One wasm file must always exist, and must be valid. + binary_file = 'extracted.0.wasm' + assert os.path.exists(binary_file) metrics = subprocess.check_output( shared.WASM_OPT + ['-all', '--metrics', binary_file, '-q'], text=True) @@ -215,6 +212,19 @@ class ClusterFuzz(utils.BinaryenTestCase): self.assertEqual(len(exports), 1) seen_exports.append(int(exports[0])) + # Sometimes a second wasm file should exist, and it must be valid + # too. + second_binary_file = 'extracted.1.wasm' + if os.path.exists(second_binary_file): + subprocess.check_call( + shared.WASM_OPT + ['-all', second_binary_file, '-q']) + + # Note its size (we leave detailed metrics for the first one; + # they are generated by the same logic in run.py, so just + # verifying some valid second wasms are emitted, of random + # sizes, is enough). + seen_second_sizes.append(os.path.getsize(second_binary_file)) + print() print('struct.news are distributed as ~ mean 15, stddev 24, median 10') @@ -247,10 +257,27 @@ class ClusterFuzz(utils.BinaryenTestCase): print() + # Second files appear in ~ 1/3 of testcases. + print('number of second wasms should be around 33 +- 8') + print(f'number of second wasms: {len(seen_second_sizes)}') + assert seen_second_sizes, 'must see at least one second wasm' + print('second sizes are distributed as ~ mean 2933, stddev 2011, median 2510') + print(f'mean sizes: {statistics.mean(seen_second_sizes)}') + print(f'stdev sizes: {statistics.stdev(seen_second_sizes)}') + print(f'median sizes: {statistics.median(seen_second_sizes)}') + # Relax the assert on the max seen second size compared to the max seen + # primary size, as we see fewer of these. 500 is still proof of an + # interesting wasm file. + self.assertGreaterEqual(max(seen_second_sizes), 500) + self.assertGreater(statistics.stdev(seen_second_sizes), 0) + + print() + # To check for interesting JS file contents, we'll note how many times # we build and run the wasm. seen_builds = [] seen_calls = [] + seen_second_builds = [] for i in range(1, N + 1): fuzz_file = os.path.join(temp_dir.name, f'fuzz-binaryen-{i}.js') @@ -258,6 +285,7 @@ class ClusterFuzz(utils.BinaryenTestCase): js = f.read() seen_builds.append(js.count('build(binary);')) seen_calls.append(js.count('callExports();')) + seen_second_builds.append(js.count('build(secondBinary);')) # There is always one build and one call (those are in the default # fuzz_shell.js), and we add a couple of operations, each with equal @@ -284,6 +312,17 @@ class ClusterFuzz(utils.BinaryenTestCase): print() + # Second wasm files are more rarely added, only 1/3 of the time or so, + # but over 100 samples we are still overwhelmingly likely to see one. + print('JS second builds are distributed as ~ mean 1.8, stddev 2.2, median 1') + print(f'mean JS second builds: {statistics.mean(seen_second_builds)}') + print(f'stdev JS second builds: {statistics.stdev(seen_second_builds)}') + print(f'median JS second builds: {statistics.median(seen_second_builds)}') + self.assertGreaterEqual(max(seen_second_builds), 2) + self.assertGreater(statistics.stdev(seen_second_builds), 0) + + print() + # "zzz" in test name so that this runs last. If it runs first, it can be # confusing as it appears next to the logging of which bundle we use (see # setUpClass). |