wasm-opt fuzz script (#1682) [ci skip]

A small fuzz script I've been using locally. Runs wasm-opt on random inputs and random passes, looking for breakage or the passes changing something. Can also run VMs before and after the passes, and compare the VMs.
author: Alon Zakai <alonzakai@gmail.com> 2018-09-19 08:24:20 -0700
committer: GitHub <noreply@github.com> 2018-09-19 08:24:20 -0700
commit: a53356ab155a7d8c2f334dc9a3c1432bacbc78fe (patch)
tree: 03e8bb153cc0bd0505a4a1c1987c4d1c54c227a0 /scripts
parent: cefbbfadae87135ff37fb4fc1058c6baf8de0140 (diff)
download: binaryen-a53356ab155a7d8c2f334dc9a3c1432bacbc78fe.tar.gz
binaryen-a53356ab155a7d8c2f334dc9a3c1432bacbc78fe.tar.bz2
binaryen-a53356ab155a7d8c2f334dc9a3c1432bacbc78fe.zip
1 files changed, 226 insertions, 0 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
new file mode 100644
index 000000000..3330000c9
--- /dev/null
+++ b/scripts/fuzz_opt.py
@@ -0,0 +1,226 @@
+'''
+Runs random passes and options on random inputs, using wasm-opt.
+
+Can be configured to run just wasm-opt itself (using --fuzz-exec)
+or also run VMs on it.
+
+For afl-fuzz integration, you probably don't want this, and can use
+something like
+
+BINARYEN_CORES=1 BINARYEN_PASS_DEBUG=1 afl-fuzz -i afl-testcases/ -o afl-findings/ -m 100 -d -- bin/wasm-opt -ttf --fuzz-exec --Os @@
+
+(that is on a fixed set of arguments to wasm-opt, though - this
+script covers different options being passed)
+'''
+
+import os
+import sys
+import difflib
+import subprocess
+import random
+import shutil
+import time
+
+# parameters
+
+LOG_LIMIT = 125
+INPUT_SIZE_LIMIT = 250 * 1024
+
+
+def random_size():
+  return random.randint(1, INPUT_SIZE_LIMIT)
+
+
+def run(cmd):
+  print(' '.join(cmd)[:LOG_LIMIT])
+  return subprocess.check_output(cmd)
+
+
+def run_unchecked(cmd):
+  print(' '.join(cmd)[:LOG_LIMIT])
+  return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()[0]
+
+
+def randomize_pass_debug():
+  if random.random() < 0.125:
+    print('[pass-debug]')
+    os.environ['BINARYEN_PASS_DEBUG'] = '1'
+  else:
+    os.environ['BINARYEN_PASS_DEBUG'] = '0'
+    del os.environ['BINARYEN_PASS_DEBUG']
+
+
+def test_one(infile, opts):
+  def compare(x, y, comment):
+    if x != y:
+      message = ''.join([a.rstrip() + '\n' for a in difflib.unified_diff(x.split('\n'), y.split('\n'), fromfile='expected', tofile='actual')])
+      raise Exception(str(comment) + ": Expected to have '%s' == '%s', diff:\n\n%s" % (
+        x, y,
+        message
+      ))
+
+  def run_vms(prefix):
+    def fix_output(out):
+      # exceptions may differ when optimizing, but an exception should occur. so ignore their types
+      # also js engines print them out slightly differently
+      return '\n'.join(map(lambda x: '   *exception*' if 'exception' in x else x, out.split('\n')))
+
+      # normalize different vm output
+      # also the binaryen optimizer can reorder traps (but not remove them), so
+      # it really just matters if you trap, not how you trap
+      return out.replace('unreachable executed', 'unreachable') \
+                .replace('integer result unrepresentable', 'integer overflow') \
+                .replace('invalid conversion to integer', 'integer overflow') \
+                .replace('memory access out of bounds', 'index out of bounds') \
+                .replace('integer divide by zero', 'divide by zero') \
+                .replace('integer remainder by zero', 'remainder by zero') \
+                .replace('remainder by zero', 'divide by zero') \
+                .replace('divide result unrepresentable', 'integer overflow') \
+                .replace('divide by zero', 'integer overflow') \
+                .replace('index out of bounds', 'integer overflow') \
+                .replace('out of bounds memory access', 'integer overflow')
+
+    def fix_spec_output(out):
+      out = fix_output(out)
+      # spec shows a pointer when it traps, remove that
+      out = '\n'.join(map(lambda x: x if 'runtime trap' not in x else x[x.find('runtime trap'):], out.split('\n')))
+      # https://github.com/WebAssembly/spec/issues/543 , float consts are messed up
+      out = '\n'.join(map(lambda x: x if 'f32' not in x and 'f64' not in x else '', out.split('\n')))
+      return out
+
+    results = []
+    # append to this list to add results from VMs
+    # results += [fix_output(run([os.path.expanduser('d8'), '--', prefix + 'js', prefix + 'wasm']))]
+    # spec has no mechanism to not halt on a trap. so we just check until the first trap, basically
+    # run(['../spec/interpreter/wasm', prefix + 'wasm'])
+    # results += [fix_spec_output(run_unchecked(['../spec/interpreter/wasm', prefix + 'wasm', '-e', open(prefix + 'wat').read()]))]
+
+    if len(results) == 0:
+      results = [0]
+
+    first = results[0]
+    for i in range(len(results)):
+      compare(first, results[i], 'comparing between vms at ' + str(i))
+
+    return results
+
+  randomize_pass_debug()
+
+  bytes = 0
+
+  # fuzz vms
+  # gather VM outputs on input file
+  run(['bin/wasm-opt', infile, '-ttf', '--emit-js-wrapper=a.js', '--emit-spec-wrapper=a.wat', '-o', 'a.wasm'])
+  wasm_size = os.stat('a.wasm').st_size
+  bytes += wasm_size
+  print('pre js size :', os.stat('a.js').st_size, ' wasm size:', wasm_size)
+  before = run_vms('a.')
+  print('----------------')
+  # gather VM outputs on processed file
+  run(['bin/wasm-opt', 'a.wasm', '-o', 'b.wasm'] + opts)
+  wasm_size = os.stat('b.wasm').st_size
+  bytes += wasm_size
+  print('post js size:', os.stat('a.js').st_size, ' wasm size:', wasm_size)
+  shutil.copyfile('a.js', 'b.js')
+  after = run_vms('b.')
+  for i in range(len(before)):
+    compare(before[i], after[i], 'comparing between builds at ' + str(i))
+  # fuzz binaryen interpreter itself. separate invocation so result is easily fuzzable
+  run(['bin/wasm-opt', 'a.wasm', '--fuzz-exec', '--fuzz-binary'] + opts)
+
+  return bytes
+
+
+# main
+
+opt_choices = [
+  [],
+  ['-O1'], ['-O2'], ['-O3'], ['-O4'], ['-Os'], ['-Oz'],
+  ["--coalesce-locals"],
+  # XXX slow, non-default ["--coalesce-locals-learning"],
+  ["--code-pushing"],
+  ["--code-folding"],
+  ["--const-hoisting"],
+  ["--dae"],
+  ["--dae-optimizing"],
+  ["--dce"],
+  ["--flatten", "--dfo"],
+  ["--duplicate-function-elimination"],
+  ["--flatten"],
+  # ["--fpcast-emu"], # removes indirect call failures as it makes them go through regardless of type
+  ["--inlining"],
+  ["--inlining-optimizing"],
+  ["--flatten", "--local-cse"],
+  ["--generate-stack-ir"],
+  ["--licm"],
+  ["--memory-packing"],
+  ["--merge-blocks"],
+  ['--merge-locals'],
+  ["--optimize-instructions"],
+  ["--optimize-stack-ir"],
+  ["--generate-stack-ir", "--optimize-stack-ir"],
+  ["--pick-load-signs"],
+  ["--precompute"],
+  ["--precompute-propagate"],
+  ["--remove-unused-brs"],
+  ["--remove-unused-nonfunction-module-elements"],
+  ["--remove-unused-module-elements"],
+  ["--remove-unused-names"],
+  ["--reorder-functions"],
+  ["--reorder-locals"],
+  ["--flatten", "--rereloop"],
+  ["--rse"],
+  ["--simplify-locals"],
+  ["--simplify-locals-nonesting"],
+  ["--simplify-locals-nostructure"],
+  ["--simplify-locals-notee"],
+  ["--simplify-locals-notee-nostructure"],
+  ["--ssa"],
+  ["--vacuum"],
+]
+
+
+def get_multiple_opt_choices():
+  ret = []
+  # core opts
+  while 1:
+    ret += random.choice(opt_choices)
+    if len(ret) > 20 or random.random() < 0.3:
+      break
+  # modifiers (if not already implied by a -O? option)
+  if '-O' not in str(ret):
+    if random.random() < 0.5:
+      ret += ['--optimize-level=' + str(random.randint(0, 3))]
+    if random.random() < 0.5:
+      ret += ['--shrink-level=' + str(random.randint(0, 3))]
+  return ret
+
+
+# main
+
+if len(sys.argv) >= 2:
+  print('checking given input')
+  if len(sys.argv) >= 3:
+    test_one(sys.argv[1], sys.argv[2:])
+  else:
+    for opts in opt_choices:
+      print(opts)
+      test_one(sys.argv[1], opts)
+else:
+  print('checking infinite random inputs')
+  random.seed(time.time())
+  temp = 'input.dat'
+  counter = 0
+  bytes = 0  # wasm bytes tested
+  start_time = time.time()
+  while True:
+    counter += 1
+    f = open(temp, 'w')
+    size = random_size()
+    print('\nITERATION:', counter, 'size:', size, 'speed:', counter / (time.time() - start_time), 'iters/sec, ', bytes / (time.time() - start_time), 'bytes/sec\n')
+    for x in range(size):
+      f.write(chr(random.randint(0, 255)))
+    f.close()
+    opts = get_multiple_opt_choices()
+    print('opts:', ' '.join(opts))
+    bytes += test_one('input.dat', opts)
author	Alon Zakai <alonzakai@gmail.com>	2018-09-19 08:24:20 -0700
committer	GitHub <noreply@github.com>	2018-09-19 08:24:20 -0700
commit	a53356ab155a7d8c2f334dc9a3c1432bacbc78fe (patch)
tree	03e8bb153cc0bd0505a4a1c1987c4d1c54c227a0 /scripts
parent	cefbbfadae87135ff37fb4fc1058c6baf8de0140 (diff)
download	binaryen-a53356ab155a7d8c2f334dc9a3c1432bacbc78fe.tar.gz binaryen-a53356ab155a7d8c2f334dc9a3c1432bacbc78fe.tar.bz2 binaryen-a53356ab155a7d8c2f334dc9a3c1432bacbc78fe.zip