summaryrefslogtreecommitdiff
path: root/scripts/clusterfuzz/extract_wasms.py
blob: bb727810d7593e36c08bdbdad282ef7d68786675 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#
# Copyright 2024 WebAssembly Community Group participants
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

'''
Wasm extractor for testcases generated by the ClusterFuzz run.py script. Usage:

extract_wasms.py INFILE.js OUTFILE

That will find embedded wasm files in INFILE.js, of the form

  var .. = new Uint8Array([..wasm_contents..]);

and extract them into OUTFILE.0.wasm, OUTFILE.1.wasm, etc. It also emits
OUTFILE.js which will no longer contain the embedded contents, after which the
script can be run as

  d8 OUTFILE.js -- OUTFILE.0.wasm

That is, the embedded file can now be provided as a filename argument.
'''

import re
import sys

file_counter = 0


def get_wasm_filename():
    global file_counter
    file_counter += 1
    return f'{out}.{file_counter - 1}.wasm'


in_js = sys.argv[1]
out = sys.argv[2]

with open(in_js) as f:
    js = f.read()


def repl(text):
    # We found something of the form
    #
    #   var binary = new Uint8Array([..binary data as numbers..]);
    #
    # Parse out the numbers into a binary wasm file.
    numbers = text.groups()[0]
    numbers = numbers.split(',')
    numbers = [int(n) for n in numbers]
    with open(get_wasm_filename(), 'wb') as f:
        f.write(bytes(numbers))

    # Replace it with nothing.
    return ''


# Replace the wasm files and write them out.
js = re.sub(r'var \w+ = new Uint8Array\(\[([\d,]+)\]\);', repl, js)

# Write out the new JS.
with open(f'{out}.js', 'w') as f:
    f.write(js)