diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/clusterfuzz/extract_wasms.py | 74 | ||||
-rwxr-xr-x | scripts/clusterfuzz/run.py | 38 |
2 files changed, 102 insertions, 10 deletions
diff --git a/scripts/clusterfuzz/extract_wasms.py b/scripts/clusterfuzz/extract_wasms.py new file mode 100644 index 000000000..bb727810d --- /dev/null +++ b/scripts/clusterfuzz/extract_wasms.py @@ -0,0 +1,74 @@ +# +# Copyright 2024 WebAssembly Community Group participants +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +Wasm extractor for testcases generated by the ClusterFuzz run.py script. Usage: + +extract_wasms.py INFILE.js OUTFILE + +That will find embedded wasm files in INFILE.js, of the form + + var .. = new Uint8Array([..wasm_contents..]); + +and extract them into OUTFILE.0.wasm, OUTFILE.1.wasm, etc. It also emits +OUTFILE.js which will no longer contain the embedded contents, after which the +script can be run as + + d8 OUTFILE.js -- OUTFILE.0.wasm + +That is, the embedded file can now be provided as a filename argument. +''' + +import re +import sys + +file_counter = 0 + + +def get_wasm_filename(): + global file_counter + file_counter += 1 + return f'{out}.{file_counter - 1}.wasm' + + +in_js = sys.argv[1] +out = sys.argv[2] + +with open(in_js) as f: + js = f.read() + + +def repl(text): + # We found something of the form + # + # var binary = new Uint8Array([..binary data as numbers..]); + # + # Parse out the numbers into a binary wasm file. + numbers = text.groups()[0] + numbers = numbers.split(',') + numbers = [int(n) for n in numbers] + with open(get_wasm_filename(), 'wb') as f: + f.write(bytes(numbers)) + + # Replace it with nothing. + return '' + + +# Replace the wasm files and write them out. +js = re.sub(r'var \w+ = new Uint8Array\(\[([\d,]+)\]\);', repl, js) + +# Write out the new JS. +with open(f'{out}.js', 'w') as f: + f.write(js) diff --git a/scripts/clusterfuzz/run.py b/scripts/clusterfuzz/run.py index 6bbb74ef8..8ac880e0d 100755 --- a/scripts/clusterfuzz/run.py +++ b/scripts/clusterfuzz/run.py @@ -150,7 +150,18 @@ def get_js_file_contents(i, output_dir): # Prepend the wasm contents, so they are used (rather than the normal # mechanism where the wasm file's name is provided in argv). wasm_contents = get_wasm_contents(i, output_dir) - js = f'var binary = {wasm_contents};\n\n' + js + pre = f'var binary = {wasm_contents};\n' + bytes = wasm_contents.count(',') + + # Sometimes add a second wasm file as well. + has_second = False + if system_random.random() < 0.333: + has_second = True + wasm_contents = get_wasm_contents(i, output_dir) + pre += f'var secondBinary = {wasm_contents};\n' + bytes += wasm_contents.count(',') + + js = pre + '\n' + js # The default JS builds and runs the wasm. Append some random additional # operations as well, as more compiles and executions can find things. To @@ -171,16 +182,23 @@ def get_js_file_contents(i, output_dir): x = math.pow(x, power) num = math.floor(x * MAX_EXTRA_JS_OPERATIONS) assert num >= 0 and num <= MAX_EXTRA_JS_OPERATIONS + + extra_js_operations = [ + # Compile and link the wasm again. Each link adds more to the total + # exports that we can call. + 'build(binary);\n', + # Run all the exports we've accumulated. + 'callExports();\n', + ] + if has_second: + extra_js_operations += [ + 'build(secondBinary);\n', + ] + for i in range(num): - js += system_random.choice([ - # Compile and link the wasm again. Each link adds more to the total - # exports that we can call. - 'build(binary);\n', - # Run all the exports we've accumulated. - 'callExports();\n', - ]) - - print(f'Created {wasm_contents.count(",")} wasm bytes') + js += system_random.choice(extra_js_operations) + + print(f'Created {bytes} wasm bytes') return js |