scripts/clusterfuzz/run.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163

#
# Copyright 2024 WebAssembly Community Group participants
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

'''
ClusterFuzz run.py script: when run by ClusterFuzz, it uses wasm-opt to generate
a fixed number of testcases. This is a "blackbox fuzzer", see

https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/

This file should be bundled up together with the other files it needs, see
bundle_clusterfuzz.py.
'''

import os
import getopt
import random
import subprocess
import sys

# The V8 flags we put in the "fuzzer flags" files, which tell ClusterFuzz how to
# run V8. By default we apply all staging flags.
FUZZER_FLAGS_FILE_CONTENTS = '--wasm-staging'

# Maximum size of the random data that we feed into wasm-opt -ttf. This is
# smaller than fuzz_opt.py's INPUT_SIZE_MAX because that script is tuned for
# fuzzing large wasm files (to reduce the overhead we have of launching many
# processes per file), which is less of an issue on ClusterFuzz.
MAX_RANDOM_SIZE = 15 * 1024

# The prefix for fuzz files.
FUZZ_FILENAME_PREFIX = 'fuzz-'

# The prefix for flags files.
FLAGS_FILENAME_PREFIX = 'flags-'

# The name of the fuzzer (appears after FUZZ_FILENAME_PREFIX /
# FLAGS_FILENAME_PREFIX).
FUZZER_NAME_PREFIX = 'binaryen-'

# The root directory of the bundle this will be in, which is the directory of
# this very file.
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

# The path to the wasm-opt binary that we run to generate testcases.
FUZZER_BINARY_PATH = os.path.join(ROOT_DIR, 'bin', 'wasm-opt')

# The path to the fuzz_shell.js script that will execute the wasm in each
# testcase.
JS_SHELL_PATH = os.path.join(ROOT_DIR, 'scripts', 'fuzz_shell.js')

# The arguments we provide to wasm-opt to generate wasm files.
FUZZER_ARGS = [
    # Generate a wasm from random data.
    '--translate-to-fuzz',
    # Run some random passes, to further shape the random wasm we emit.
    '--fuzz-passes',
    # Enable all features but disable ones not yet ready for fuzzing. This may
    # be a smaller set than fuzz_opt.py, as that enables a few experimental
    # flags, while here we just fuzz with d8's --wasm-staging.
    '-all',
    '--disable-shared-everything',
    '--disable-fp16',
]


# Returns the file name for fuzz or flags files.
def get_file_name(prefix, index):
    return f'{prefix}{FUZZER_NAME_PREFIX}{index}.js'


# Returns the contents of a .js fuzz file, given particular wasm contents that
# we want to be executed.
def get_js_file_contents(wasm_contents):
    # Start with the standard JS shell.
    with open(JS_SHELL_PATH) as file:
        js = file.read()

    # Prepend the wasm contents, so they are used (rather than the normal
    # mechanism where the wasm file's name is provided in argv).
    wasm_contents = ','.join([str(c) for c in wasm_contents])
    js = f'var binary = new Uint8Array([{wasm_contents}]);\n\n' + js
    return js


def main(argv):
    # Parse the options. See
    # https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/#uploading-a-fuzzer
    output_dir = '.'
    num = 100
    expected_flags = ['input_dir=', 'output_dir=', 'no_of_files=']
    optlist, _ = getopt.getopt(argv[1:], '', expected_flags)
    for option, value in optlist:
        if option == '--output_dir':
            output_dir = value
        elif option == '--no_of_files':
            num = int(value)

    for i in range(1, num + 1):
        input_data_file_path = os.path.join(output_dir, f'{i}.input')
        wasm_file_path = os.path.join(output_dir, f'{i}.wasm')

        # wasm-opt may fail to run in rare cases (when the fuzzer emits code it
        # detects as invalid). Just try again in such a case.
        for attempt in range(0, 100):
            # Generate random data.
            random_size = random.SystemRandom().randint(1, MAX_RANDOM_SIZE)
            with open(input_data_file_path, 'wb') as file:
                file.write(os.urandom(random_size))

            # Generate wasm from the random data.
            cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS
            cmd += ['-o', wasm_file_path, input_data_file_path]
            try:
                subprocess.check_call(cmd)
            except subprocess.CalledProcessError:
                # Try again.
                print('(oops, retrying wasm-opt)')
                attempt += 1
                if attempt == 99:
                    # Something is very wrong!
                    raise
                continue
            # Success, leave the loop.
            break

        # Generate a testcase from the wasm
        with open(wasm_file_path, 'rb') as file:
            wasm_contents = file.read()
        testcase_file_path = os.path.join(output_dir,
                                          get_file_name(FUZZ_FILENAME_PREFIX, i))
        js_file_contents = get_js_file_contents(wasm_contents)
        with open(testcase_file_path, 'w') as file:
            file.write(js_file_contents)

        # Emit a corresponding flags file.
        flags_file_path = os.path.join(output_dir,
                                       get_file_name(FLAGS_FILENAME_PREFIX, i))
        with open(flags_file_path, 'w') as file:
            file.write(FUZZER_FLAGS_FILE_CONTENTS)

        print(f'Created testcase: {testcase_file_path}, {len(wasm_contents)} bytes')

        # Remove temporary files.
        os.remove(input_data_file_path)
        os.remove(wasm_file_path)

    print(f'Created {num} testcases.')


if __name__ == '__main__':
    main(sys.argv)