summaryrefslogtreecommitdiff
path: root/scripts/clusterfuzz/run.py
blob: 8ac880e0de07d6b94e673b3104715609c98b043c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#
# Copyright 2024 WebAssembly Community Group participants
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

'''
ClusterFuzz run.py script: when run by ClusterFuzz, it uses wasm-opt to generate
a fixed number of testcases. This is a "blackbox fuzzer", see

https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/

This file should be bundled up together with the other files it needs, see
bundle_clusterfuzz.py.
'''

import os
import getopt
import math
import random
import subprocess
import sys


# The V8 flags we put in the "fuzzer flags" files, which tell ClusterFuzz how to
# run V8. By default we apply all staging flags.
FUZZER_FLAGS_FILE_CONTENTS = '--wasm-staging'

# Maximum size of the random data that we feed into wasm-opt -ttf. This is
# smaller than fuzz_opt.py's INPUT_SIZE_MAX because that script is tuned for
# fuzzing large wasm files (to reduce the overhead we have of launching many
# processes per file), which is less of an issue on ClusterFuzz.
MAX_RANDOM_SIZE = 15 * 1024

# Max and median amount of extra JS operations we append, like extra compiles or
# runs of the wasm. We allow a high max, but the median is far lower, so that
# typical testcases are not long-running.
MAX_EXTRA_JS_OPERATIONS = 40
MEDIAN_EXTRA_JS_OPERATIONS = 2

# The prefix for fuzz files.
FUZZ_FILENAME_PREFIX = 'fuzz-'

# The prefix for flags files.
FLAGS_FILENAME_PREFIX = 'flags-'

# The name of the fuzzer (appears after FUZZ_FILENAME_PREFIX /
# FLAGS_FILENAME_PREFIX).
FUZZER_NAME_PREFIX = 'binaryen-'

# The root directory of the bundle this will be in, which is the directory of
# this very file.
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

# The path to the wasm-opt binary that we run to generate testcases.
FUZZER_BINARY_PATH = os.path.join(ROOT_DIR, 'bin', 'wasm-opt')

# The path to the fuzz_shell.js script that will execute the wasm in each
# testcase.
JS_SHELL_PATH = os.path.join(ROOT_DIR, 'scripts', 'fuzz_shell.js')

# The arguments we provide to wasm-opt to generate wasm files.
FUZZER_ARGS = [
    # Generate a wasm from random data.
    '--translate-to-fuzz',
    # Run some random passes, to further shape the random wasm we emit.
    '--fuzz-passes',
    # Enable all features but disable ones not yet ready for fuzzing. This may
    # be a smaller set than fuzz_opt.py, as that enables a few experimental
    # flags, while here we just fuzz with d8's --wasm-staging.
    '-all',
    '--disable-shared-everything',
    '--disable-fp16',
]


# Returns the file name for fuzz or flags files.
def get_file_name(prefix, index):
    return f'{prefix}{FUZZER_NAME_PREFIX}{index}.js'


# We should only use the system's random number generation, which is the best.
# (We also use urandom below, which uses this under the hood.)
system_random = random.SystemRandom()


# Generate a random wasm file, and return a string that creates a typed array of
# those bytes, suitable for use in a JS file, in the form
#
#   new Uint8Array([..wasm_contents..])
#
# Receives the testcase index and the output dir.
def get_wasm_contents(i, output_dir):
    input_data_file_path = os.path.join(output_dir, f'{i}.input')
    wasm_file_path = os.path.join(output_dir, f'{i}.wasm')

    # wasm-opt may fail to run in rare cases (when the fuzzer emits code it
    # detects as invalid). Just try again in such a case.
    for attempt in range(0, 100):
        # Generate random data.
        random_size = system_random.randint(1, MAX_RANDOM_SIZE)
        with open(input_data_file_path, 'wb') as file:
            file.write(os.urandom(random_size))

        # Generate wasm from the random data.
        cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS
        cmd += ['-o', wasm_file_path, input_data_file_path]
        try:
            subprocess.check_call(cmd)
        except subprocess.CalledProcessError:
            # Try again.
            print('(oops, retrying wasm-opt)')
            attempt += 1
            if attempt == 99:
                # Something is very wrong!
                raise
            continue
        # Success, leave the loop.
        break

    # Generate a testcase from the wasm
    with open(wasm_file_path, 'rb') as file:
        wasm_contents = file.read()

    # Clean up temp files.
    os.remove(wasm_file_path)
    os.remove(input_data_file_path)

    # Convert to a string, and wrap into a typed array.
    wasm_contents = ','.join([str(c) for c in wasm_contents])
    return f'new Uint8Array([{wasm_contents}])'


# Returns the contents of a .js fuzz file, given the index of the testcase and
# the output dir.
def get_js_file_contents(i, output_dir):
    # Start with the standard JS shell.
    with open(JS_SHELL_PATH) as file:
        js = file.read()

    # Prepend the wasm contents, so they are used (rather than the normal
    # mechanism where the wasm file's name is provided in argv).
    wasm_contents = get_wasm_contents(i, output_dir)
    pre = f'var binary = {wasm_contents};\n'
    bytes = wasm_contents.count(',')

    # Sometimes add a second wasm file as well.
    has_second = False
    if system_random.random() < 0.333:
        has_second = True
        wasm_contents = get_wasm_contents(i, output_dir)
        pre += f'var secondBinary = {wasm_contents};\n'
        bytes += wasm_contents.count(',')

    js = pre + '\n' + js

    # The default JS builds and runs the wasm. Append some random additional
    # operations as well, as more compiles and executions can find things. To
    # approximate a number in the range [0, MAX_EXTRA_JS_OPERATIONS) but with a
    # median of MEDIAN_EXTRA_JS_OPERATIONS, start in the range [0, 1) and then
    # raise it to the proper power, as multiplying by itself keeps the range
    # unchanged, but lowers the median. Specifically, the median begins at 0.5,
    # so
    #
    #   0.5^power = MEDIAN_EXTRA_JS_OPERATIONS / MAX_EXTRA_JS_OPERATIONS
    #
    # is what we want, and if we take log2 of each side, gives us
    #
    #   power =  log2(MEDIAN_EXTRA_JS_OPERATIONS / MAX_EXTRA_JS_OPERATIONS) / log2(0.5)
    #         = -log2(MEDIAN_EXTRA_JS_OPERATIONS / MAX_EXTRA_JS_OPERATIONS)
    power = -math.log2(float(MEDIAN_EXTRA_JS_OPERATIONS) / MAX_EXTRA_JS_OPERATIONS)
    x = system_random.random()
    x = math.pow(x, power)
    num = math.floor(x * MAX_EXTRA_JS_OPERATIONS)
    assert num >= 0 and num <= MAX_EXTRA_JS_OPERATIONS

    extra_js_operations = [
        # Compile and link the wasm again. Each link adds more to the total
        # exports that we can call.
        'build(binary);\n',
        # Run all the exports we've accumulated.
        'callExports();\n',
    ]
    if has_second:
        extra_js_operations += [
            'build(secondBinary);\n',
        ]

    for i in range(num):
        js += system_random.choice(extra_js_operations)

    print(f'Created {bytes} wasm bytes')

    return js


def main(argv):
    # Parse the options. See
    # https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/#uploading-a-fuzzer
    output_dir = '.'
    num = 100
    expected_flags = ['input_dir=', 'output_dir=', 'no_of_files=']
    optlist, _ = getopt.getopt(argv[1:], '', expected_flags)
    for option, value in optlist:
        if option == '--output_dir':
            output_dir = value
        elif option == '--no_of_files':
            num = int(value)

    for i in range(1, num + 1):
        testcase_file_path = os.path.join(output_dir,
                                          get_file_name(FUZZ_FILENAME_PREFIX, i))

        # Emit the JS file.
        js_file_contents = get_js_file_contents(i, output_dir)
        with open(testcase_file_path, 'w') as file:
            file.write(js_file_contents)

        # Emit a corresponding flags file.
        flags_file_path = os.path.join(output_dir,
                                       get_file_name(FLAGS_FILENAME_PREFIX, i))
        with open(flags_file_path, 'w') as file:
            file.write(FUZZER_FLAGS_FILE_CONTENTS)

        print(f'Created testcase: {testcase_file_path}')

    print(f'Created {num} testcases.')


if __name__ == '__main__':
    main(sys.argv)