diff options
author | Alon Zakai <azakai@google.com> | 2023-05-16 11:03:45 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-16 11:03:45 -0700 |
commit | 972e659bf59740c3ee44129812f95bec143d01a6 (patch) | |
tree | f86d70fa692a45e3dfbf951b0d1af06204d4ecf7 /scripts | |
parent | 44cd751d9feda7c4b4b6c9d6af1e71541b90abac (diff) | |
download | binaryen-972e659bf59740c3ee44129812f95bec143d01a6.tar.gz binaryen-972e659bf59740c3ee44129812f95bec143d01a6.tar.bz2 binaryen-972e659bf59740c3ee44129812f95bec143d01a6.zip |
Reintroduce wasm-merge (#5709)
We used to have a wasm-merge tool but removed it for a lack of use cases. Recently
use cases have been showing up in the wasm GC space and elsewhere, as people are
using more diverse toolchains together, for example a project might build some C++
code alongside some wasm GC code. Merging those wasm files together can allow
for nice optimizations like inlining and better DCE etc., so it makes sense to have a
tool for merging.
Background:
* Removal: #1969
* Requests:
* wasm-merge - why it has been deleted #2174
* Compiling and linking wat files #2276
* wasm-link? #2767
This PR is a compete rewrite of wasm-merge, not a restoration of the original
codebase. The original code was quite messy (my fault), and also, since then
we've added multi-memory and multi-table which makes things a lot simpler.
The linking semantics are as described in the "wasm-link" issue #2767 : all we do
is merge normal wasm files together and connect imports and export. That is, we
have a graph of modules and their names, and each import to a module name can
be resolved to that module. Basically, like a JS bundler would do for JS, or, in other
words, we do the same operations as JS code would do to glue wasm modules
together at runtime, but at compile time. See the README update in this PR for a
concrete example.
There are no plans to do more than that simple bundling, so this should not
really overlap with wasm-ld's use cases.
This should be fairly fast as it works in linear time on the total input code. However,
it won't be as fast as wasm-ld, of course, as it does build Binaryen IR for each
module. An advantage to working on Binaryen IR is that we can easily do some
global DCE after merging, and further optimizations are possible later.
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/fuzz_opt.py | 69 | ||||
-rwxr-xr-x | scripts/update_help_checks.py | 2 |
2 files changed, 66 insertions, 5 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 8dc3041bd..452ab84f8 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -82,6 +82,11 @@ def random_size(): return random.randint(INPUT_SIZE_MIN, 2 * INPUT_SIZE_MEAN - INPUT_SIZE_MIN) +def make_random_input(input_size, raw_input_data): + with open(raw_input_data, 'wb') as f: + f.write(bytes([random.randint(0, 255) for x in range(input_size)])) + + def run(cmd, stderr=None, silent=False): if not silent: print(' '.join(cmd)) @@ -1284,6 +1289,62 @@ class CtorEval(TestCaseHandler): compare_between_vms(fix_output(wasm_exec), fix_output(evalled_wasm_exec), 'CtorEval') +# Tests wasm-merge +class Merge(TestCaseHandler): + frequency = 0.15 + + def handle(self, wasm): + # generate a second wasm file to merge. note that we intentionally pick + # a smaller size than the main wasm file, so that reduction is + # effective (i.e., as we reduce the main wasm to small sizes, we also + # end up with small secondary wasms) + # TODO: add imports and exports that connect between the two + wasm_size = os.stat(wasm).st_size + second_size = min(wasm_size, random_size()) + second_input = abspath('second_input.dat') + make_random_input(second_size, second_input) + second_wasm = abspath('second.wasm') + run([in_bin('wasm-opt'), second_input, '-ttf', '-o', second_wasm] + FUZZ_OPTS + FEATURE_OPTS) + + # sometimes also optimize the second module + if random.random() < 0.5: + opts = get_random_opts() + run([in_bin('wasm-opt'), second_wasm, '-o', second_wasm, '-all'] + FEATURE_OPTS + opts) + + # merge the wasm files. note that we must pass -all, as even if the two + # inputs are MVP, the output may have multiple tables and multiple + # memories (and we must also do that in the commands later down). + # + # Use --skip-export-conflicts as we only look at the first module's + # exports for now - we don't care about the second module's. + # TODO: compare the second module's exports as well, but we'd need + # to handle renaming of conflicting exports. + merged = abspath('merged.wasm') + run([in_bin('wasm-merge'), wasm, 'first', + abspath('second.wasm'), 'second', '-o', merged, + '--skip-export-conflicts'] + FEATURE_OPTS + ['-all']) + + # sometimes also optimize the merged module + if random.random() < 0.5: + opts = get_random_opts() + run([in_bin('wasm-opt'), merged, '-o', merged, '-all'] + FEATURE_OPTS + opts) + + # verify that merging in the second module did not alter the output. + output = run_bynterp(wasm, ['--fuzz-exec-before', '-all']) + output = fix_output(output) + merged_output = run_bynterp(merged, ['--fuzz-exec-before', '-all']) + merged_output = fix_output(merged_output) + + # a complication is that the second module's exports are appended, so we + # have extra output. to handle that, just prune the tail, so that we + # only compare the original exports from the first module. + # TODO: compare the second module's exports to themselves as well, but + # they may have been renamed due to overlaps... + merged_output = merged_output[:len(output)] + + compare_between_vms(output, merged_output, 'Merge') + + # Check that the text format round-trips without error. class RoundtripText(TestCaseHandler): frequency = 0.05 @@ -1306,6 +1367,7 @@ testcase_handlers = [ Asyncify(), TrapsNeverHappen(), CtorEval(), + Merge(), # FIXME: Re-enable after https://github.com/WebAssembly/binaryen/issues/3989 # RoundtripText() ] @@ -1329,7 +1391,7 @@ def test_one(random_input, given_wasm): randomize_fuzz_settings() pick_initial_contents() - opts = randomize_opt_flags() + opts = get_random_opts() print('randomized opts:', '\n ' + '\n '.join(opts)) print() @@ -1503,7 +1565,7 @@ requires_closed_world = {("--type-refining",), ("--type-merging",)} -def randomize_opt_flags(): +def get_random_opts(): flag_groups = [] has_flatten = False @@ -1643,8 +1705,7 @@ if __name__ == '__main__': 'iters/sec, ', total_wasm_size / elapsed, 'wasm_bytes/sec, ', ignored_vm_runs, 'ignored\n') - with open(raw_input_data, 'wb') as f: - f.write(bytes([random.randint(0, 255) for x in range(input_size)])) + make_random_input(input_size, raw_input_data) assert os.path.getsize(raw_input_data) == input_size # remove the generated wasm file, so that we can tell if the fuzzer # fails to create one diff --git a/scripts/update_help_checks.py b/scripts/update_help_checks.py index e439b2ecc..1756a0686 100755 --- a/scripts/update_help_checks.py +++ b/scripts/update_help_checks.py @@ -27,7 +27,7 @@ test_dir = os.path.join(root_dir, 'test', 'lit', 'help') TOOLS = ['wasm-opt', 'wasm-as', 'wasm-dis', 'wasm2js', 'wasm-ctor-eval', 'wasm-shell', 'wasm-reduce', 'wasm-metadce', 'wasm-split', - 'wasm-fuzz-types', 'wasm-emscripten-finalize'] + 'wasm-fuzz-types', 'wasm-emscripten-finalize', 'wasm-merge'] def main(): |