summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2024-11-26 15:12:36 -0800
committerGitHub <noreply@github.com>2024-11-26 15:12:36 -0800
commit73971d78e5355e8f08b4026b741992d78bd77476 (patch)
treee1f3b8761cb2c5a226e9b87daac954eeb5e91ed7 /scripts
parent4ffe27255ce99d452d05d4b352e3f6e1e9ca7d83 (diff)
downloadbinaryen-73971d78e5355e8f08b4026b741992d78bd77476.tar.gz
binaryen-73971d78e5355e8f08b4026b741992d78bd77476.tar.bz2
binaryen-73971d78e5355e8f08b4026b741992d78bd77476.zip
[Fuzzing] Emit secondary wasm files in ClusterFuzz testcases (#7122)
The two files are then linked and run by fuzz_shell.js (we had this functionality already in order to fuzz wasm-split). By adding multiple build and run commands of both the primary and secondary wasm files, we can end up with multiple instances of two different wasm files that call between themselves. To help testing, add a script that extracts the wasm files from the testcase. This may also be useful in the future for testcase reduction.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/clusterfuzz/extract_wasms.py74
-rwxr-xr-xscripts/clusterfuzz/run.py38
2 files changed, 102 insertions, 10 deletions
diff --git a/scripts/clusterfuzz/extract_wasms.py b/scripts/clusterfuzz/extract_wasms.py
new file mode 100644
index 000000000..bb727810d
--- /dev/null
+++ b/scripts/clusterfuzz/extract_wasms.py
@@ -0,0 +1,74 @@
+#
+# Copyright 2024 WebAssembly Community Group participants
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''
+Wasm extractor for testcases generated by the ClusterFuzz run.py script. Usage:
+
+extract_wasms.py INFILE.js OUTFILE
+
+That will find embedded wasm files in INFILE.js, of the form
+
+ var .. = new Uint8Array([..wasm_contents..]);
+
+and extract them into OUTFILE.0.wasm, OUTFILE.1.wasm, etc. It also emits
+OUTFILE.js which will no longer contain the embedded contents, after which the
+script can be run as
+
+ d8 OUTFILE.js -- OUTFILE.0.wasm
+
+That is, the embedded file can now be provided as a filename argument.
+'''
+
+import re
+import sys
+
+file_counter = 0
+
+
+def get_wasm_filename():
+ global file_counter
+ file_counter += 1
+ return f'{out}.{file_counter - 1}.wasm'
+
+
+in_js = sys.argv[1]
+out = sys.argv[2]
+
+with open(in_js) as f:
+ js = f.read()
+
+
+def repl(text):
+ # We found something of the form
+ #
+ # var binary = new Uint8Array([..binary data as numbers..]);
+ #
+ # Parse out the numbers into a binary wasm file.
+ numbers = text.groups()[0]
+ numbers = numbers.split(',')
+ numbers = [int(n) for n in numbers]
+ with open(get_wasm_filename(), 'wb') as f:
+ f.write(bytes(numbers))
+
+ # Replace it with nothing.
+ return ''
+
+
+# Replace the wasm files and write them out.
+js = re.sub(r'var \w+ = new Uint8Array\(\[([\d,]+)\]\);', repl, js)
+
+# Write out the new JS.
+with open(f'{out}.js', 'w') as f:
+ f.write(js)
diff --git a/scripts/clusterfuzz/run.py b/scripts/clusterfuzz/run.py
index 6bbb74ef8..8ac880e0d 100755
--- a/scripts/clusterfuzz/run.py
+++ b/scripts/clusterfuzz/run.py
@@ -150,7 +150,18 @@ def get_js_file_contents(i, output_dir):
# Prepend the wasm contents, so they are used (rather than the normal
# mechanism where the wasm file's name is provided in argv).
wasm_contents = get_wasm_contents(i, output_dir)
- js = f'var binary = {wasm_contents};\n\n' + js
+ pre = f'var binary = {wasm_contents};\n'
+ bytes = wasm_contents.count(',')
+
+ # Sometimes add a second wasm file as well.
+ has_second = False
+ if system_random.random() < 0.333:
+ has_second = True
+ wasm_contents = get_wasm_contents(i, output_dir)
+ pre += f'var secondBinary = {wasm_contents};\n'
+ bytes += wasm_contents.count(',')
+
+ js = pre + '\n' + js
# The default JS builds and runs the wasm. Append some random additional
# operations as well, as more compiles and executions can find things. To
@@ -171,16 +182,23 @@ def get_js_file_contents(i, output_dir):
x = math.pow(x, power)
num = math.floor(x * MAX_EXTRA_JS_OPERATIONS)
assert num >= 0 and num <= MAX_EXTRA_JS_OPERATIONS
+
+ extra_js_operations = [
+ # Compile and link the wasm again. Each link adds more to the total
+ # exports that we can call.
+ 'build(binary);\n',
+ # Run all the exports we've accumulated.
+ 'callExports();\n',
+ ]
+ if has_second:
+ extra_js_operations += [
+ 'build(secondBinary);\n',
+ ]
+
for i in range(num):
- js += system_random.choice([
- # Compile and link the wasm again. Each link adds more to the total
- # exports that we can call.
- 'build(binary);\n',
- # Run all the exports we've accumulated.
- 'callExports();\n',
- ])
-
- print(f'Created {wasm_contents.count(",")} wasm bytes')
+ js += system_random.choice(extra_js_operations)
+
+ print(f'Created {bytes} wasm bytes')
return js