Fuzzing: Compare wasm2js to the interpreter (#3026)

Comparing to the interpreter, and not just wasm2js to itself (which we've done on the same file before and after opts) ensures wasm2js has the right semantics. To do this, we need to make sure the wasm doesn't contain things where wasm2js semantics diverge from normal wasm, which includes: * Legalize so that there are no i64 exports. * Remove operations JS can't handle with full precision, like i64 -> f32. * Force all loads/stores to be 1-byte, as unexpectedly-unaligned operations fail in wasm2js. This also requires ignoring subnormals when comparing between JS VMs and the interpreter.
author: Alon Zakai <azakai@google.com> 2020-08-06 19:17:13 -0700
committer: GitHub <noreply@github.com> 2020-08-06 19:17:13 -0700
commit: cdc38d3162eb62cb313704f264f17d230989084b (patch)
tree: 9bc8ba453e5030ee9371f28af6782ea20e1649f5 /scripts/fuzz_opt.py
parent: 1b102f2ca87d54781b10110c375ea7d0cf5da837 (diff)
download: binaryen-cdc38d3162eb62cb313704f264f17d230989084b.tar.gz
binaryen-cdc38d3162eb62cb313704f264f17d230989084b.tar.bz2
binaryen-cdc38d3162eb62cb313704f264f17d230989084b.zip
1 files changed, 76 insertions, 14 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
index 9cf65d541..d9477a146 100755
--- a/scripts/fuzz_opt.py
+++ b/scripts/fuzz_opt.py
@@ -192,9 +192,9 @@ def numbers_are_close_enough(x, y):
         pass
     # otherwise, try a full eval which can handle i64s too
     try:
-        x = eval(x)
-        y = eval(y)
-        return x == y or float(x) == float(y)
+        ex = eval(x)
+        ey = eval(y)
+        return ex == ey or float(ex) == float(ey)
     except Exception as e:
         print('failed to check if numbers are close enough:', e)
         return False
@@ -521,9 +521,34 @@ class Wasm2JS(TestCaseHandler):
     frequency = 0.6
 
     def handle_pair(self, input, before_wasm, after_wasm, opts):
+        before_wasm_temp = before_wasm + '.temp.wasm'
+        after_wasm_temp = after_wasm + '.temp.wasm'
+        # legalize the before wasm, so that comparisons to the interpreter
+        # later make sense (if we don't do this, the wasm may have i64 exports).
+        # after applying other necessary fixes, we'll recreate the after wasm
+        # from scratch.
+        run([in_bin('wasm-opt'), before_wasm, '--legalize-js-interface', '-o', before_wasm_temp] + FEATURE_OPTS)
+        compare_before_to_after = random.random() < 0.5
+        compare_to_interpreter = compare_before_to_after and random.random() < 0.5
+        if compare_before_to_after:
+            # to compare the wasm before and after optimizations, we must
+            # remove operations that wasm2js does not support with full
+            # precision, such as i64-to-f32, as the optimizer can give different
+            # results.
+            simplification_passes = ['--stub-unsupported-js']
+            if compare_to_interpreter:
+                # unexpectedly-unaligned loads/stores work fine in wasm in general but
+                # not in wasm2js, since typed arrays silently round down, effectively.
+                # if we want to compare to the interpreter, remove unaligned
+                # operations (by forcing alignment 1, then lowering those into aligned
+                # components, which means all loads and stores are of a single byte).
+                simplification_passes += ['--dealign', '--alignment-lowering']
+            run([in_bin('wasm-opt'), before_wasm_temp, '-o', before_wasm_temp] + simplification_passes + FEATURE_OPTS)
+        # now that the before wasm is fixed up, generate a proper after wasm
+        run([in_bin('wasm-opt'), before_wasm_temp, '-o', after_wasm_temp] + opts + FEATURE_OPTS)
         # always check for compiler crashes
-        before = self.run(before_wasm)
-        after = self.run(after_wasm)
+        before = self.run(before_wasm_temp)
+        after = self.run(after_wasm_temp)
         if NANS:
             # with NaNs we can't compare the output, as a reinterpret through
             # memory might end up different in JS than wasm
@@ -532,18 +557,55 @@ class Wasm2JS(TestCaseHandler):
         # trap on many things wasm would, and in those cases it can do weird
         # undefined things. in such a case, at least compare up until before
         # the trap, which lets us compare at least some results in some cases.
-        interpreter_output = run([in_bin('wasm-opt'), before_wasm, '--fuzz-exec-before'])
-        if TRAP_PREFIX in interpreter_output:
-            trap_index = interpreter_output.index(TRAP_PREFIX)
+        # (this is why wasm2js is not in CompareVMs, which does full
+        # comparisons - we need to limit the comparison in a special way here)
+        interpreter = run([in_bin('wasm-opt'), before_wasm_temp, '--fuzz-exec-before'])
+        if TRAP_PREFIX in interpreter:
+            trap_index = interpreter.index(TRAP_PREFIX)
             # we can't test this function, which the trap is in the middle of.
             # erase everything from this function's output and onward, so we
             # only compare the previous trap-free code
-            call_start = interpreter_output.rindex(FUZZ_EXEC_CALL_PREFIX, 0, trap_index)
-            call_end = interpreter_output.index('\n', call_start)
-            call_line = interpreter_output[call_start:call_end]
+            call_start = interpreter.rindex(FUZZ_EXEC_CALL_PREFIX, 0, trap_index)
+            call_end = interpreter.index('\n', call_start)
+            call_line = interpreter[call_start:call_end]
             before = before[:before.index(call_line)]
             after = after[:after.index(call_line)]
-        compare(before, after, 'Wasm2JS')
+            interpreter = interpreter[:interpreter.index(call_line)]
+
+        def fix_output_for_js(x):
+            # start with the normal output fixes that all VMs need
+            x = fix_output(x)
+
+            # check if a number is 0 or a subnormal, which is basically zero
+            def is_basically_zero(x):
+                # to check if something is a subnormal, compare it to the largest one
+                return x >= 0 and x <= 2.22507385850720088902e-308
+
+            def fix_number(x):
+                x = x.group(1)
+                try:
+                    x = float(x)
+                    # There appear to be some cases where JS VMs will print
+                    # subnormals in full detail while other VMs do not, and vice
+                    # versa. Ignore such really tiny numbers.
+                    if is_basically_zero(x):
+                        x = 0
+                except ValueError:
+                    # not a floating-point number, nothing to do
+                    pass
+                return ' => ' + str(x)
+
+            # logging notation is "function_name => result", look for that with
+            # a floating-point result that may need to be fixed up
+            return re.sub(r' => (-?[\d+-.e\-+]+)', fix_number, x)
+
+        before = fix_output_for_js(before)
+        after = fix_output_for_js(after)
+        if compare_before_to_after:
+            compare_between_vms(before, after, 'Wasm2JS (before/after)')
+            if compare_to_interpreter:
+                interpreter = fix_output_for_js(interpreter)
+                compare_between_vms(before, interpreter, 'Wasm2JS (vs interpreter)')
 
     def run(self, wasm):
         wrapper = run([in_bin('wasm-opt'), wasm, '--emit-js-wrapper=/dev/stdout'] + FEATURE_OPTS)
@@ -568,7 +630,7 @@ class Wasm2JS(TestCaseHandler):
             f.write(glue)
             f.write(main)
             f.write(wrapper)
-        return fix_output(run_vm([shared.NODEJS, js_file, 'a.wasm']))
+        return run_vm([shared.NODEJS, js_file, 'a.wasm'])
 
     def can_run_on_feature_opts(self, feature_opts):
         return all([x in feature_opts for x in ['--disable-exception-handling', '--disable-simd', '--disable-threads', '--disable-bulk-memory', '--disable-nontrapping-float-to-int', '--disable-tail-call', '--disable-sign-ext', '--disable-reference-types', '--disable-multivalue']])
@@ -650,7 +712,7 @@ def test_one(random_input, opts, given_wasm):
         # apply properties like not having any NaNs, which the original fuzz
         # wasm had applied. that is, we need to preserve properties like not
         # having nans through reduction.
-        run([in_bin('wasm-opt'), given_wasm, '-o', 'a.wasm'] + FUZZ_OPTS)
+        run([in_bin('wasm-opt'), given_wasm, '-o', 'a.wasm'] + FUZZ_OPTS + FEATURE_OPTS)
     else:
         # emit the target features section so that reduction can work later,
         # without needing to specify the features
author	Alon Zakai <azakai@google.com>	2020-08-06 19:17:13 -0700
committer	GitHub <noreply@github.com>	2020-08-06 19:17:13 -0700
commit	cdc38d3162eb62cb313704f264f17d230989084b (patch)
tree	9bc8ba453e5030ee9371f28af6782ea20e1649f5 /scripts/fuzz_opt.py
parent	1b102f2ca87d54781b10110c375ea7d0cf5da837 (diff)
download	binaryen-cdc38d3162eb62cb313704f264f17d230989084b.tar.gz binaryen-cdc38d3162eb62cb313704f264f17d230989084b.tar.bz2 binaryen-cdc38d3162eb62cb313704f264f17d230989084b.zip