# # Copyright 2024 WebAssembly Community Group participants # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ''' Wasm extractor for testcases generated by the ClusterFuzz run.py script. Usage: extract_wasms.py INFILE.js OUTFILE That will find embedded wasm files in INFILE.js, of the form var .. = new Uint8Array([..wasm_contents..]); and extract them into OUTFILE.0.wasm, OUTFILE.1.wasm, etc. It also emits OUTFILE.js which will no longer contain the embedded contents, after which the script can be run as d8 OUTFILE.js -- OUTFILE.0.wasm That is, the embedded file can now be provided as a filename argument. ''' import re import sys file_counter = 0 def get_wasm_filename(): global file_counter file_counter += 1 return f'{out}.{file_counter - 1}.wasm' in_js = sys.argv[1] out = sys.argv[2] with open(in_js) as f: js = f.read() def repl(text): # We found something of the form # # var binary = new Uint8Array([..binary data as numbers..]); # # Parse out the numbers into a binary wasm file. numbers = text.groups()[0] numbers = numbers.split(',') numbers = [int(n) for n in numbers] with open(get_wasm_filename(), 'wb') as f: f.write(bytes(numbers)) # Replace it with nothing. return '' # Replace the wasm files and write them out. js = re.sub(r'var \w+ = new Uint8Array\(\[([\d,]+)\]\);', repl, js) # Write out the new JS. with open(f'{out}.js', 'w') as f: f.write(js)