Fuzzer: Add an option to fuzz with initial wasm contents (#3276)

Previously the fuzzer constructed a new random valid wasm file from scratch. The new --initial-fuzz=FILENAME option makes it start from an existing wasm file, and then add random contents on top of that. It also randomly modifies the existing contents, for example tweaking a Const, replacing some nodes with other things of the same type, etc. It also has a chance to replace a drop with a logging (as some of our tests just drop a result, and we match the optimized output's wasm instead of the result; by logging, the fuzzer can check things). The goal is to find bugs by using existing hand-written testcases as a basis. This PR uses the test suite's testcases as initial fuzz contents. This can find issues as they often check for corner cases - they are designed to be "interesting", which random data may be less likely to find. This has found several bugs already, see recent fuzz fixes. I mentioned the first few on Twitter but past 4 I stopped counting... https://twitter.com/kripken/status/1314323318036602880 This required various changes to the fuzzer's generation to account for the fact that there can be existing functions and so forth before it starts to run, so it needs to avoid collisions and so forth.
author: Alon Zakai <azakai@google.com> 2020-10-27 10:30:24 -0700
committer: GitHub <noreply@github.com> 2020-10-27 10:30:24 -0700
commit: 9a174f0ce9dc44b74db0e04728de62e6556176f9 (patch)
tree: 6f8a331eb2a23f795df6e12373bff22d28c1dfdd /src
parent: ca16f7cdc57549333474d7042128d2007bc35fad (diff)
download: binaryen-9a174f0ce9dc44b74db0e04728de62e6556176f9.tar.gz
binaryen-9a174f0ce9dc44b74db0e04728de62e6556176f9.tar.bz2
binaryen-9a174f0ce9dc44b74db0e04728de62e6556176f9.zip
3 files changed, 240 insertions, 29 deletions
diff --git a/src/ir/names.h b/src/ir/names.h
index 3d49a4a36..a725c515a 100644
--- a/src/ir/names.h
+++ b/src/ir/names.h
@@ -72,6 +72,10 @@ inline Name getValidFunctionName(Module& module, Name root) {
   return getValidName(
     module, root, [&](Name test) { return !module.getFunctionOrNull(test); });
 }
+inline Name getValidEventName(Module& module, Name root) {
+  return getValidName(
+    module, root, [&](Name test) { return !module.getEventOrNull(test); });
+}
 
 } // namespace Names
 
diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h
index 709ebe1e8..f4c29be55 100644
--- a/src/tools/fuzzing.h
+++ b/src/tools/fuzzing.h
@@ -31,6 +31,7 @@ high chance for set at start of loop
 #include <ir/find_all.h>
 #include <ir/literal-utils.h>
 #include <ir/manipulation.h>
+#include <ir/names.h>
 #include <ir/utils.h>
 #include <support/file.h>
 #include <tools/optimization-options.h>
@@ -188,6 +189,9 @@ public:
   void setAllowOOB(bool allowOOB_) { allowOOB = allowOOB_; }
 
   void build() {
+    if (HANG_LIMIT > 0) {
+      prepareHangLimitSupport();
+    }
     if (allowMemory) {
       setupMemory();
     }
@@ -196,6 +200,7 @@ public:
     if (wasm.features.hasExceptionHandling()) {
       setupEvents();
     }
+    modifyInitialFunctions();
     addImportLoggingSupport();
     // keep adding functions until we run out of input
     while (!finishedInput) {
@@ -205,6 +210,9 @@ public:
     if (HANG_LIMIT > 0) {
       addHangLimitSupport();
     }
+    if (allowMemory) {
+      finalizeMemory();
+    }
     finalizeTable();
   }
 
@@ -241,7 +249,7 @@ private:
   // the memory that we use, a small portion so that we have a good chance of
   // looking at writes (we also look outside of this region with small
   // probability) this should be a power of 2
-  static const int USABLE_MEMORY = 16;
+  const Address USABLE_MEMORY = 16;
 
   // the number of runtime iterations (function calls, loop backbranches) we
   // allow before we stop execution with a trap, to prevent hangs. 0 means
@@ -406,21 +414,44 @@ private:
     wasm.addExport(
       builder.makeExport(hasher->name, hasher->name, ExternalKind::Function));
     // Export memory so JS fuzzing can use it
-    wasm.addExport(builder.makeExport("memory", "0", ExternalKind::Memory));
+    if (!wasm.getExportOrNull("memory")) {
+      wasm.addExport(builder.makeExport("memory", "0", ExternalKind::Memory));
+    }
   }
 
   void setupTable() {
     wasm.table.exists = true;
+    wasm.table.initial = wasm.table.max = 0;
     wasm.table.segments.emplace_back(builder.makeConst(int32_t(0)));
   }
 
   std::map<Type, std::vector<Name>> globalsByType;
 
   void setupGlobals() {
+    // If there were initial wasm contents, there may be imported globals. That
+    // would be a problem in the fuzzer harness as we'd error if we do not
+    // provide them (and provide the proper type, etc.).
+    // Avoid that, so that all the standard fuzzing infrastructure can always
+    // run the wasm.
+    for (auto& global : wasm.globals) {
+      if (global->imported()) {
+        // Remove import info from imported globals, and give them a simple
+        // initializer.
+        global->module = global->base = Name();
+        global->init = makeConst(global->type);
+      } else {
+        // If the initialization referred to an imported global, it no longer
+        // can point to the same global after we make it a non-imported global
+        // (as wasm doesn't allow that - you can only use an imported one).
+        if (global->init->is<GlobalGet>()) {
+          global->init = makeConst(global->type);
+        }
+      }
+    }
     for (size_t index = upTo(MAX_GLOBALS); index > 0; --index) {
       auto type = getConcreteType();
       auto* global =
-        builder.makeGlobal(std::string("global$") + std::to_string(index),
+        builder.makeGlobal(Names::getValidGlobalName(wasm, "global$"),
                            type,
                            makeConst(type),
                            Builder::Mutable);
@@ -433,20 +464,87 @@ private:
     Index num = upTo(3);
     for (size_t i = 0; i < num; i++) {
       auto* event =
-        builder.makeEvent(std::string("event$") + std::to_string(i),
+        builder.makeEvent(Names::getValidEventName(wasm, "event$"),
                           WASM_EVENT_ATTRIBUTE_EXCEPTION,
                           Signature(getControlFlowType(), Type::none));
       wasm.addEvent(event);
     }
   }
 
+  void finalizeMemory() {
+    for (auto& segment : wasm.memory.segments) {
+      Address maxOffset = segment.data.size();
+      if (!segment.isPassive) {
+        if (auto* offset = segment.offset->dynCast<GlobalGet>()) {
+          // Using a non-imported global in a segment offset is not valid in
+          // wasm. This can occur due to us making what used to be an imported
+          // global, in initial contents, be not imported any more. To fix that,
+          // replace such invalid things with a constant.
+          // Note that it is still possible in theory to have imported globals
+          // here, as we only do the above for initial contents. While the
+          // fuzzer doesn't do so as of the time of this comment, do a check
+          // for full generality, so that this code essentially does "if this
+          // is invalid wasm, fix it up."
+          if (!wasm.getGlobal(offset->name)->imported()) {
+            // TODO: It would be better to avoid segment overlap so that
+            //       MemoryPacking can run.
+            segment.offset =
+              builder.makeConst(Literal::makeFromInt32(0, Type::i32));
+          }
+        }
+        if (auto* offset = segment.offset->dynCast<Const>()) {
+          maxOffset = maxOffset + offset->value.getInteger();
+        }
+      }
+      wasm.memory.initial = std::max(
+        wasm.memory.initial,
+        Address((maxOffset + Memory::kPageSize - 1) / Memory::kPageSize));
+    }
+    wasm.memory.initial = std::max(wasm.memory.initial, USABLE_MEMORY);
+    // Avoid an unlimited memory size, which would make fuzzing very difficult
+    // as different VMs will run out of system memory in different ways.
+    if (wasm.memory.max == Memory::kUnlimitedSize) {
+      wasm.memory.max = wasm.memory.initial;
+    }
+    if (wasm.memory.max <= wasm.memory.initial) {
+      // To allow growth to work (which a testcase may assume), try to make the
+      // maximum larger than the initial.
+      // TODO: scan the wasm for grow instructions?
+      wasm.memory.max =
+        std::min(Address(wasm.memory.initial + 1), Address(Memory::kMaxSize32));
+    }
+    // Avoid an imported memory (which the fuzz harness would need to handle).
+    wasm.memory.module = wasm.memory.base = Name();
+  }
+
   void finalizeTable() {
-    wasm.table.initial = wasm.table.segments[0].data.size();
+    for (auto& segment : wasm.table.segments) {
+      // If the offset is a global that was imported (which is ok) but no
+      // longer is (not ok) we need to change that.
+      if (auto* offset = segment.offset->dynCast<GlobalGet>()) {
+        if (!wasm.getGlobal(offset->name)->imported()) {
+          // TODO: the segments must not overlap...
+          segment.offset =
+            builder.makeConst(Literal::makeFromInt32(0, Type::i32));
+        }
+      }
+      Address maxOffset = segment.data.size();
+      if (auto* offset = segment.offset->dynCast<Const>()) {
+        maxOffset = maxOffset + offset->value.getInteger();
+      }
+      wasm.table.initial = std::max(wasm.table.initial, maxOffset);
+    }
     wasm.table.max =
       oneIn(2) ? Address(Table::kUnlimitedSize) : wasm.table.initial;
+    // Avoid an imported table (which the fuzz harness would need to handle).
+    wasm.table.module = wasm.table.base = Name();
   }
 
-  const Name HANG_LIMIT_GLOBAL = "hangLimit";
+  Name HANG_LIMIT_GLOBAL;
+
+  void prepareHangLimitSupport() {
+    HANG_LIMIT_GLOBAL = Names::getValidGlobalName(wasm, "hangLimit");
+  }
 
   void addHangLimitSupport() {
     auto* glob = builder.makeGlobal(HANG_LIMIT_GLOBAL,
@@ -455,15 +553,22 @@ private:
                                     Builder::Mutable);
     wasm.addGlobal(glob);
 
+    Name exportName = "hangLimitInitializer";
+    auto funcName = Names::getValidFunctionName(wasm, exportName);
     auto* func = new Function;
-    func->name = "hangLimitInitializer";
+    func->name = funcName;
     func->sig = Signature(Type::none, Type::none);
-    func->body =
-      builder.makeGlobalSet(glob->name, builder.makeConst(int32_t(HANG_LIMIT)));
+    func->body = builder.makeGlobalSet(HANG_LIMIT_GLOBAL,
+                                       builder.makeConst(int32_t(HANG_LIMIT)));
     wasm.addFunction(func);
 
+    if (wasm.getExportOrNull(exportName)) {
+      // We must export our actual hang limit function - remove anything
+      // previously existing.
+      wasm.removeExport(exportName);
+    }
     auto* export_ = new Export;
-    export_->name = func->name;
+    export_->name = exportName;
     export_->value = func->name;
     export_->kind = ExternalKind::Function;
     wasm.addExport(export_);
@@ -507,11 +612,28 @@ private:
   std::map<Type, std::vector<Index>>
     typeLocals; // type => list of locals with that type
 
+  void prepareToCreateFunctionContents(Function* func_) {
+    func = func_;
+    labelIndex = 0;
+    assert(breakableStack.empty());
+    assert(hangStack.empty());
+  }
+
+  void finishCreatingFunctionContents() {
+    if (HANG_LIMIT > 0) {
+      addHangLimitChecks(func);
+    }
+    typeLocals.clear();
+    assert(breakableStack.empty());
+    assert(hangStack.empty());
+  }
+
+  Index numAddedFunctions = 0;
+
   Function* addFunction() {
     LOGGING_PERCENT = upToSquared(100);
-    Index num = wasm.functions.size();
     func = new Function;
-    func->name = std::string("func_") + std::to_string(num);
+    func->name = Names::getValidFunctionName(wasm, "func");
     assert(typeLocals.empty());
     Index numParams = upToSquared(MAX_PARAMS);
     std::vector<Type> params;
@@ -528,9 +650,7 @@ private:
       typeLocals[type].push_back(params.size() + func->vars.size());
       func->vars.push_back(type);
     }
-    labelIndex = 0;
-    assert(breakableStack.empty());
-    assert(hangStack.empty());
+    prepareToCreateFunctionContents(func);
     // with small chance, make the body unreachable
     auto bodyType = func->sig.results;
     if (oneIn(10)) {
@@ -558,15 +678,11 @@ private:
       fixLabels(func);
     }
     // Add hang limit checks after all other operations on the function body.
-    if (HANG_LIMIT > 0) {
-      addHangLimitChecks(func);
-    }
-    assert(breakableStack.empty());
-    assert(hangStack.empty());
     wasm.addFunction(func);
     // export some, but not all (to allow inlining etc.). make sure to
     // export at least one, though, to keep each testcase interesting
-    if (num == 0 || oneIn(2)) {
+    if ((numAddedFunctions == 0 || oneIn(2)) &&
+        !wasm.getExportOrNull(func->name)) {
       auto* export_ = new Export;
       export_->name = func->name;
       export_->value = func->name;
@@ -578,7 +694,8 @@ private:
       wasm.table.segments[0].data.push_back(func->name);
     }
     // cleanup
-    typeLocals.clear();
+    finishCreatingFunctionContents();
+    numAddedFunctions++;
     return func;
   }
 
@@ -783,10 +900,86 @@ private:
     ReFinalize().walkFunctionInModule(func, &wasm);
   }
 
+  void modifyInitialFunctions() {
+    if (wasm.functions.empty()) {
+      return;
+    }
+    // Pick a chance to fuzz the contents of a function.
+    const int RESOLUTION = 10;
+    auto chance = upTo(RESOLUTION + 1);
+    for (auto& ref : wasm.functions) {
+      auto* func = ref.get();
+      prepareToCreateFunctionContents(func);
+      if (func->imported()) {
+        // We can't allow extra imports, as the fuzzing infrastructure wouldn't
+        // know what to provide.
+        func->module = func->base = Name();
+        func->body = make(func->sig.results);
+      }
+      // Optionally, fuzz the function contents.
+      if (upTo(RESOLUTION) >= chance) {
+        dropToLog(func);
+        // TODO add some locals? and the rest of addFunction's operations?
+        // TODO: interposition, replace initial a(b) with a(RANDOM_THING(b))
+        // TODO: if we add OOB checks after creation, then we can do it on
+        //       initial contents too, and it may be nice to *not* run these
+        //       passes, like we don't run them on new functions. But, we may
+        //       still want to run them some of the time, at least, so that we
+        //       check variations on initial testcases even at the risk of OOB.
+        recombine(func);
+        mutate(func);
+        fixLabels(func);
+      }
+      // Note that even if we don't fuzz the contents we still need to call
+      // finish so that we add hang limit protection and other general things.
+      finishCreatingFunctionContents();
+    }
+    // Remove a start function - the fuzzing harness expects code to run only
+    // from exports.
+    wasm.start = Name();
+  }
+
+  // Initial wasm contents may have come from a test that uses the drop pattern:
+  //
+  //  (drop ..something interesting..)
+  //
+  // The dropped interesting thing is optimized to some other interesting thing
+  // by a pass, and we verify it is the expected one. But this does not use the
+  // value in a way the fuzzer can notice. Replace some drops with a logging
+  // operation instead.
+  void dropToLog(Function* func) {
+    // Don't always do this.
+    if (oneIn(2)) {
+      return;
+    }
+    struct Modder : public PostWalker<Modder> {
+      Module& wasm;
+      TranslateToFuzzReader& parent;
+
+      Modder(Module& wasm, TranslateToFuzzReader& parent)
+        : wasm(wasm), parent(parent) {}
+
+      void visitDrop(Drop* curr) {
+        if (parent.isLoggableType(curr->value->type) && parent.oneIn(2)) {
+          replaceCurrent(parent.builder.makeCall(std::string("log-") +
+                                                   curr->value->type.toString(),
+                                                 {curr->value},
+                                                 Type::none));
+        }
+      }
+    };
+    Modder modder(wasm, *this);
+    modder.walk(func->body);
+  }
+
   // the fuzzer external interface sends in zeros (simpler to compare
   // across invocations from JS or wasm-opt etc.). Add invocations in
   // the wasm, so they run everywhere
   void addInvocations(Function* func) {
+    Name name = func->name.str + std::string("_invoker");
+    if (wasm.getFunctionOrNull(name) || wasm.getExportOrNull(name)) {
+      return;
+    }
     std::vector<Expression*> invocations;
     while (oneIn(2) && !finishedInput) {
       std::vector<Expression*> args;
@@ -808,13 +1001,13 @@ private:
       return;
     }
     auto* invoker = new Function;
-    invoker->name = func->name.str + std::string("_invoker");
+    invoker->name = name;
     invoker->sig = Signature(Type::none, Type::none);
     invoker->body = builder.makeBlock(invocations);
     wasm.addFunction(invoker);
     auto* export_ = new Export;
-    export_->name = invoker->name;
-    export_->value = invoker->name;
+    export_->name = name;
+    export_->value = name;
     export_->kind = ExternalKind::Function;
     wasm.addExport(export_);
   }
diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp
index ecf4f1c7c..56a26ccfb 100644
--- a/src/tools/wasm-opt.cpp
+++ b/src/tools/wasm-opt.cpp
@@ -81,6 +81,7 @@ int main(int argc, const char* argv[]) {
   bool fuzzExecAfter = false;
   std::string extraFuzzCommand;
   bool translateToFuzz = false;
+  std::string initialFuzz;
   bool fuzzPasses = false;
   bool fuzzMemory = true;
   bool fuzzOOB = true;
@@ -141,6 +142,13 @@ int main(int argc, const char* argv[]) {
       "fuzzing",
       Options::Arguments::Zero,
       [&](Options* o, const std::string& arguments) { translateToFuzz = true; })
+    .add("--initial-fuzz",
+         "-if",
+         "Initial wasm content in translate-to-fuzz (-ttf) mode",
+         Options::Arguments::One,
+         [&initialFuzz](Options* o, const std::string& argument) {
+           initialFuzz = argument;
+         })
     .add("--fuzz-passes",
          "-fp",
          "Pick a random set of passes to run, useful for fuzzing. this depends "
@@ -224,7 +232,13 @@ int main(int argc, const char* argv[]) {
     Fatal() << message;
   };
 
-  if (!translateToFuzz) {
+  // In normal (non-translate-to-fuzz) mode we read the input file. In
+  // translate-to-fuzz mode the input file is the random data, and used later
+  // down in TranslateToFuzzReader, but there is also an optional initial fuzz
+  // file that if it exists we read it, then add more fuzz on top.
+  if (!translateToFuzz || initialFuzz.size()) {
+    std::string inputFile =
+      translateToFuzz ? initialFuzz : options.extra["infile"];
     ModuleReader reader;
     // Enable DWARF parsing if we were asked for debug info, and were not
     // asked to remove it.
@@ -232,7 +246,7 @@ int main(int argc, const char* argv[]) {
                     !willRemoveDebugInfo(options.passes));
     reader.setProfile(options.profile);
     try {
-      reader.read(options.extra["infile"], wasm, inputSourceMapFilename);
+      reader.read(inputFile, wasm, inputSourceMapFilename);
     } catch (ParseException& p) {
       p.dump(std::cerr);
       std::cerr << '\n';
@@ -253,8 +267,8 @@ int main(int argc, const char* argv[]) {
         exitOnInvalidWasm("error validating input");
       }
     }
-  } else {
-    // translate-to-fuzz
+  }
+  if (translateToFuzz) {
     options.applyFeatures(wasm);
     TranslateToFuzzReader reader(wasm, options.extra["infile"]);
     if (fuzzPasses) {
author	Alon Zakai <azakai@google.com>	2020-10-27 10:30:24 -0700
committer	GitHub <noreply@github.com>	2020-10-27 10:30:24 -0700
commit	9a174f0ce9dc44b74db0e04728de62e6556176f9 (patch)
tree	6f8a331eb2a23f795df6e12373bff22d28c1dfdd /src
parent	ca16f7cdc57549333474d7042128d2007bc35fad (diff)
download	binaryen-9a174f0ce9dc44b74db0e04728de62e6556176f9.tar.gz binaryen-9a174f0ce9dc44b74db0e04728de62e6556176f9.tar.bz2 binaryen-9a174f0ce9dc44b74db0e04728de62e6556176f9.zip