[Fuzzer] Fuzz TrapsNeverHappen mode (#4936)

This mode is tricky to fuzz because the mode is basically "assume traps never happen; if a trap does happen, that is undefined behavior". So if any trap occurs in the random fuzz testcase, we can't optimize with -tnh and assume the results stay to same. To avoid that, we ignore all functions from the first one that traps, that is, we only compare the code that ran without trapping. That often is a small subset of the total functions, sadly, but I do see that this ends up with some useful coverage despite the drawback. This also requires some fixes to comparing of references, specifically, funcrefs are printed with the function name/index, but that can change during opts, so ignore that. This wasn't noticed before because this new fuzzer mode does comparisons of --fuzz-exec-before output, instead of doing --fuzz-exec which runs it before and after and compares it internally in wasm-opt. Here we are comparing the output externally, which we didn't do before.
author: Alon Zakai <azakai@google.com> 2022-08-22 16:03:00 -0700
committer: GitHub <noreply@github.com> 2022-08-22 23:03:00 +0000
commit: 195c4e1804d5a4530c8216d4c9a138b56f676d10 (patch)
tree: 5aeca907c334f2673865868ce30ff5f111cbae4d
parent: 0e0c2d9d45068c450ad5df5de47948532dd12c53 (diff)
download: binaryen-195c4e1804d5a4530c8216d4c9a138b56f676d10.tar.gz
binaryen-195c4e1804d5a4530c8216d4c9a138b56f676d10.tar.bz2
binaryen-195c4e1804d5a4530c8216d4c9a138b56f676d10.zip
1 files changed, 85 insertions, 2 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
index e8c497306..57b1c0400 100755
--- a/scripts/fuzz_opt.py
+++ b/scripts/fuzz_opt.py
@@ -472,6 +472,9 @@ def numbers_are_close_enough(x, y):
         return False
 
 
+FUZZ_EXEC_NOTE_RESULT = '[fuzz-exec] note result'
+
+
 # compare between vms, which may slightly change how numbers are printed
 def compare_between_vms(x, y, context):
     x_lines = x.splitlines()
@@ -491,8 +494,7 @@ def compare_between_vms(x, y, context):
                 y_val = y_line[len(LEI_LOGGING) + 1:-1]
                 if numbers_are_close_enough(x_val, y_val):
                     continue
-            NOTE_RESULT = '[fuzz-exec] note result'
-            if x_line.startswith(NOTE_RESULT) and y_line.startswith(NOTE_RESULT):
+            if x_line.startswith(FUZZ_EXEC_NOTE_RESULT) and y_line.startswith(FUZZ_EXEC_NOTE_RESULT):
                 x_val = x_line.split(' ')[-1]
                 y_val = y_line.split(' ')[-1]
                 if numbers_are_close_enough(x_val, y_val):
@@ -516,8 +518,14 @@ def fix_output(out):
             x = str(float(x))
         return 'f64.const ' + x
     out = re.sub(r'f64\.const (-?[nanN:abcdefxIity\d+-.]+)', fix_double, out)
+
     # mark traps from wasm-opt as exceptions, even though they didn't run in a vm
     out = out.replace(TRAP_PREFIX, 'exception: ' + TRAP_PREFIX)
+
+    # funcref(0) has the index of the function in it, and optimizations can
+    # change that index, so ignore it
+    out = re.sub(r'funcref\([\d\w$+-_:]+\)', 'funcref()', out)
+
     lines = out.splitlines()
     for i in range(len(lines)):
         line = lines[i]
@@ -1036,6 +1044,80 @@ class Asyncify(TestCaseHandler):
         return all_disallowed(['exception-handling', 'simd', 'tail-call', 'reference-types', 'multivalue', 'gc'])
 
 
+# Fuzz the interpreter with --fuzz-exec -tnh. The tricky thing with traps-never-
+# happen mode is that if a trap *does* happen then that is undefined behavior,
+# and the optimizer was free to make changes to observable behavior there. The
+# fuzzer therefore needs to ignore code that traps.
+class TrapsNeverHappen(TestCaseHandler):
+    frequency = 1
+
+    def handle_pair(self, input, before_wasm, after_wasm, opts):
+        before = run_bynterp(before_wasm, ['--fuzz-exec-before'])
+        after_wasm_tnh = after_wasm + '.tnh.wasm'
+        run([in_bin('wasm-opt'), before_wasm, '-o', after_wasm_tnh, '-tnh'] + opts + FEATURE_OPTS)
+        after = run_bynterp(after_wasm_tnh, ['--fuzz-exec-before'])
+
+        # if a trap happened, we must stop comparing from that.
+        if TRAP_PREFIX in before:
+            trap_index = before.index(TRAP_PREFIX)
+            # we can't test this function, which the trap is in the middle of
+            # (tnh could move the trap around, so even things before the trap
+            # are unsafe). erase everything from this function's output and
+            # onward, so we only compare the previous trap-free code. first,
+            # find the function call during which the trap happened, by finding
+            # the call line right before us. that is, the output looks like
+            # this:
+            #
+            #   [fuzz-exec] calling foo
+            #   .. stuff happening during foo ..
+            #   [fuzz-exec] calling bar
+            #   .. stuff happening during bar ..
+            #
+            # if the trap happened during bar, the relevant call line is
+            # "[fuzz-exec] calling bar".
+            call_start = before.rfind(FUZZ_EXEC_CALL_PREFIX, 0, trap_index)
+            if call_start < 0:
+                # the trap happened before we called an export, so it occured
+                # during startup (the start function, or memory segment
+                # operations, etc.). in that case there is nothing for us to
+                # compare here; just leave.
+                return
+            call_end = before.index('\n', call_start)
+            # we now know the contents of the call line after which the trap
+            # happens, which is something like "[fuzz-exec] calling bar", and
+            # it is unique since it contains the function being called.
+            call_line = before[call_start:call_end]
+            # find that call line, and remove everything from it onward.
+            before_index = before.index(call_line)
+            lines_pre = before.count(os.linesep)
+            before = before[:before_index]
+            lines_post = before.count(os.linesep)
+            print(f'ignoring code due to trap (from "{call_line}"), lines to compare goes {lines_pre} => {lines_post} ')
+
+            # also remove the relevant lines from after.
+            after_index = after.index(call_line)
+            after = after[:after_index]
+
+        # some results cannot be compared, so we must filter them out here.
+        def ignore_references(out):
+            ret = []
+            for line in out.splitlines():
+                # only result lines are relevant here, which look like
+                # [fuzz-exec] note result: foo => [...]
+                if FUZZ_EXEC_NOTE_RESULT in line:
+                    # we want to filter out things like "anyref(null)" or
+                    # "[ref null data]".
+                    if 'ref(' in line or 'ref ' in line:
+                        line = line[:line.index('=>') + 2] + ' ?'
+                ret.append(line)
+            return '\n'.join(ret)
+
+        before = fix_output(ignore_references(before))
+        after = fix_output(ignore_references(after))
+
+        compare_between_vms(before, after, 'TrapsNeverHappen')
+
+
 # Check that the text format round-trips without error.
 class RoundtripText(TestCaseHandler):
     frequency = 0.05
@@ -1056,6 +1138,7 @@ testcase_handlers = [
     CheckDeterminism(),
     Wasm2JS(),
     Asyncify(),
+    TrapsNeverHappen(),
     # FIXME: Re-enable after https://github.com/WebAssembly/binaryen/issues/3989
     # RoundtripText()
 ]
author	Alon Zakai <azakai@google.com>	2022-08-22 16:03:00 -0700
committer	GitHub <noreply@github.com>	2022-08-22 23:03:00 +0000
commit	195c4e1804d5a4530c8216d4c9a138b56f676d10 (patch)
tree	5aeca907c334f2673865868ce30ff5f111cbae4d
parent	0e0c2d9d45068c450ad5df5de47948532dd12c53 (diff)
download	binaryen-195c4e1804d5a4530c8216d4c9a138b56f676d10.tar.gz binaryen-195c4e1804d5a4530c8216d4c9a138b56f676d10.tar.bz2 binaryen-195c4e1804d5a4530c8216d4c9a138b56f676d10.zip