[ctor-eval] Add an option to keep some exports (#4441)

By default wasm-ctor-eval removes exports that it manages to completely eval (if it just partially evals then the export remains, but points to a function with partially-evalled contents). However, in some cases we do want to keep the export around even so, for example during fuzzing (as the fuzzer wants to call the same exports before and after wasm-ctor-eval runs) and also if there is an ABI we need to preserve (like if we manage to eval all of main()), or if the function returns a value (which we don't support yet, but this is a PR to prepare for that). Specifically, there is now a new option: --kept-exports foo,bar That is a list of exports to keep around. Note that when we keep around an export after evalling the ctor we make the export point to a new function. That new function just contains a nop, so that nothing happens when it is called. But the original function is kept around as it may have other callers, who we do not want to modify.
author: Alon Zakai <azakai@google.com> 2022-01-11 13:29:10 -0800
committer: GitHub <noreply@github.com> 2022-01-11 13:29:10 -0800
commit: cc36ffd13f6794cc5212a1f0ba7f58e816e8a0de (patch)
tree: 48557d4f56f512d2a07f82c4b27a28f052d2eff7
parent: 1e7e248d890d27a40f3dc287bd82834930f81e5c (diff)
download: binaryen-cc36ffd13f6794cc5212a1f0ba7f58e816e8a0de.tar.gz
binaryen-cc36ffd13f6794cc5212a1f0ba7f58e816e8a0de.tar.bz2
binaryen-cc36ffd13f6794cc5212a1f0ba7f58e816e8a0de.zip
7 files changed, 132 insertions, 20 deletions
diff --git a/auto_update_tests.py b/auto_update_tests.py
index 9f93142a8..7277f5e77 100755
--- a/auto_update_tests.py
+++ b/auto_update_tests.py
@@ -89,6 +89,8 @@ def update_ctor_eval_tests():
         cmd = shared.WASM_CTOR_EVAL + [t, '-all', '-o', 'a.wast', '-S', '--ctors', ctors]
         if 'ignore-external-input' in t:
             cmd += ['--ignore-external-input']
+        if 'results' in t:
+            cmd += ['--kept-exports', 'test1,test3']
         support.run_command(cmd)
         actual = open('a.wast').read()
         out = t + '.out'
diff --git a/check.py b/check.py
index 6b272f4b4..6b4d1833a 100755
--- a/check.py
+++ b/check.py
@@ -118,6 +118,8 @@ def run_ctor_eval_tests():
         cmd = shared.WASM_CTOR_EVAL + [t, '-all', '-o', 'a.wat', '-S', '--ctors', ctors]
         if 'ignore-external-input' in t:
             cmd += ['--ignore-external-input']
+        if 'results' in t:
+            cmd += ['--kept-exports', 'test1,test3']
         support.run_command(cmd)
         actual = open('a.wat').read()
         out = t + '.out'
diff --git a/src/tools/wasm-ctor-eval.cpp b/src/tools/wasm-ctor-eval.cpp
index 867335a20..e6883e351 100644
--- a/src/tools/wasm-ctor-eval.cpp
+++ b/src/tools/wasm-ctor-eval.cpp
@@ -33,6 +33,7 @@
 #include "pass.h"
 #include "support/colors.h"
 #include "support/file.h"
+#include "support/string.h"
 #include "tool-options.h"
 #include "wasm-builder.h"
 #include "wasm-interpreter.h"
@@ -645,7 +646,12 @@ bool evalCtor(EvallingModuleInstance& instance,
 }
 
 // Eval all ctors in a module.
-void evalCtors(Module& wasm, std::vector<std::string> ctors) {
+void evalCtors(Module& wasm,
+               std::vector<std::string>& ctors,
+               std::vector<std::string>& keptExports) {
+  std::unordered_set<std::string> keptExportsSet(keptExports.begin(),
+                                                 keptExports.end());
+
   std::map<Name, std::shared_ptr<EvallingModuleInstance>> linkedInstances;
 
   // build and link the env module
@@ -683,7 +689,20 @@ void evalCtors(Module& wasm, std::vector<std::string> ctors) {
 
       // Success! Remove the export, and continue.
       std::cout << "  ...success on " << ctor << ".\n";
-      wasm.removeExport(ctor);
+
+      // Remove the export if we should.
+      auto* exp = wasm.getExport(ctor);
+      if (!keptExportsSet.count(ctor)) {
+        wasm.removeExport(exp->name);
+      } else {
+        // We are keeping around the export, which should now refer to an
+        // empty function since calling the export should do nothing.
+        auto* func = wasm.getFunction(exp->value);
+        auto copyName = Names::getValidFunctionName(wasm, func->name);
+        auto* copyFunc = ModuleUtils::copyFunction(func, wasm, copyName);
+        copyFunc->body = Builder(wasm).makeNop();
+        wasm.getExport(exp->name)->value = copyName;
+      }
     }
   } catch (FailToEvalException& fail) {
     // that's it, we failed to even create the instance
@@ -704,7 +723,8 @@ int main(int argc, const char* argv[]) {
   std::vector<std::string> passes;
   bool emitBinary = true;
   bool debugInfo = false;
-  std::string ctorsString;
+  String::Split ctors;
+  String::Split keptExports;
 
   const std::string WasmCtorEvalOption = "wasm-ctor-eval options";
 
@@ -732,13 +752,24 @@ int main(int argc, const char* argv[]) {
          WasmCtorEvalOption,
          Options::Arguments::Zero,
          [&](Options* o, const std::string& arguments) { debugInfo = true; })
+    .add("--ctors",
+         "-c",
+         "Comma-separated list of global constructor functions to evaluate",
+         WasmCtorEvalOption,
+         Options::Arguments::One,
+         [&](Options* o, const std::string& argument) {
+           ctors = String::Split(argument, ",");
+         })
     .add(
-      "--ctors",
-      "-c",
-      "Comma-separated list of global constructor functions to evaluate",
+      "--kept-exports",
+      "-ke",
+      "Comma-separated list of ctors whose exports we keep around even if we "
+      "eval those ctors",
       WasmCtorEvalOption,
       Options::Arguments::One,
-      [&](Options* o, const std::string& argument) { ctorsString = argument; })
+      [&](Options* o, const std::string& argument) {
+        keptExports = String::Split(argument, ",");
+      })
     .add("--ignore-external-input",
          "-ipi",
          "Assumes no env vars are to be read, stdin is empty, etc.",
@@ -777,14 +808,7 @@ int main(int argc, const char* argv[]) {
     Fatal() << "error in validating input";
   }
 
-  // get list of ctors, and eval them
-  std::vector<std::string> ctors;
-  std::istringstream stream(ctorsString);
-  std::string temp;
-  while (std::getline(stream, temp, ',')) {
-    ctors.push_back(temp);
-  }
-  evalCtors(wasm, ctors);
+  evalCtors(wasm, ctors, keptExports);
 
   // Do some useful optimizations after the evalling
   {
diff --git a/test/ctor-eval/results.wast b/test/ctor-eval/results.wast
index 83fc245fe..a01904149 100644
--- a/test/ctor-eval/results.wast
+++ b/test/ctor-eval/results.wast
@@ -1,7 +1,54 @@
 (module
-  (func "test1" (result i32)
+  (global $global1 (mut i32) (i32.const 1))
+  (global $global2 (mut i32) (i32.const 2))
+  (global $global3 (mut i32) (i32.const 3))
+
+  (func $test1 (export "test1")
+    ;; This function can be evalled. But in this test we keep this export,
+    ;; so we should still see an export, but the export should do nothing since
+    ;; the code has already run.
+    ;;
+    ;; In comparison, test3 below, with a result, will still contain a
+    ;; (constant) result in the remaining export once we can handle results.
+
+    (global.set $global1
+      (i32.const 11)
+    )
+  )
+
+  (func $test2 (export "test2")
+    ;; As the above function, but the export is *not* kept.
+    (global.set $global2
+      (i32.const 12)
+    )
+  )
+
+  (func $test3 (export "test3") (result i32)
     ;; The presence of a result stops us from evalling this function (at least
-    ;; for now).
+    ;; for now). Not even the global set will be evalled.
+    (global.set $global3
+      (i32.const 13)
+    )
     (i32.const 42)
   )
+
+  (func "keepalive" (result i32)
+    ;; Keep everything alive to see the changes.
+
+    ;; These should call the original $test1, not the one that is nopped out
+    ;; after evalling.
+    (call $test1)
+    (call $test2)
+
+    (drop
+      (call $test3)
+    )
+
+    ;; Keeping these alive should show the changes to the globals (that should
+    ;; contain 11, 12, and 3).
+    (i32.add
+      (global.get $global1)
+      (global.get $global2)
+    )
+  )
 )
diff --git a/test/ctor-eval/results.wast.ctors b/test/ctor-eval/results.wast.ctors
index a5bce3fd2..c7060ede5 100644
--- a/test/ctor-eval/results.wast.ctors
+++ b/test/ctor-eval/results.wast.ctors
@@ -1 +1 @@
-test1
+test1,test2,test3
diff --git a/test/ctor-eval/results.wast.out b/test/ctor-eval/results.wast.out
index b9dc2cf57..b4c947eb7 100644
--- a/test/ctor-eval/results.wast.out
+++ b/test/ctor-eval/results.wast.out
@@ -1,7 +1,40 @@
 (module
+ (type $none_=>_none (func))
  (type $none_=>_i32 (func (result i32)))
- (export "test1" (func $0))
- (func $0 (result i32)
+ (global $global1 (mut i32) (i32.const 11))
+ (global $global2 (mut i32) (i32.const 12))
+ (global $global3 (mut i32) (i32.const 3))
+ (export "test1" (func $test1_0))
+ (export "test3" (func $test3))
+ (export "keepalive" (func $3))
+ (func $test1
+  (global.set $global1
+   (i32.const 11)
+  )
+ )
+ (func $test2
+  (global.set $global2
+   (i32.const 12)
+  )
+ )
+ (func $test3 (result i32)
+  (global.set $global3
+   (i32.const 13)
+  )
   (i32.const 42)
  )
+ (func $3 (result i32)
+  (call $test1)
+  (call $test2)
+  (drop
+   (call $test3)
+  )
+  (i32.add
+   (global.get $global1)
+   (global.get $global2)
+  )
+ )
+ (func $test1_0
+  (nop)
+ )
 )
diff --git a/test/lit/help/wasm-ctor-eval.test b/test/lit/help/wasm-ctor-eval.test
index 364ac5d4c..b3b577250 100644
--- a/test/lit/help/wasm-ctor-eval.test
+++ b/test/lit/help/wasm-ctor-eval.test
@@ -19,6 +19,10 @@
 ;; CHECK-NEXT:   --ctors,-c                           Comma-separated list of global
 ;; CHECK-NEXT:                                        constructor functions to evaluate
 ;; CHECK-NEXT:
+;; CHECK-NEXT:   --kept-exports,-ke                   Comma-separated list of ctors whose
+;; CHECK-NEXT:                                        exports we keep around even if we eval
+;; CHECK-NEXT:                                        those ctors
+;; CHECK-NEXT:
 ;; CHECK-NEXT:   --ignore-external-input,-ipi         Assumes no env vars are to be read, stdin
 ;; CHECK-NEXT:                                        is empty, etc.
 ;; CHECK-NEXT:
author	Alon Zakai <azakai@google.com>	2022-01-11 13:29:10 -0800
committer	GitHub <noreply@github.com>	2022-01-11 13:29:10 -0800
commit	cc36ffd13f6794cc5212a1f0ba7f58e816e8a0de (patch)
tree	48557d4f56f512d2a07f82c4b27a28f052d2eff7
parent	1e7e248d890d27a40f3dc287bd82834930f81e5c (diff)
download	binaryen-cc36ffd13f6794cc5212a1f0ba7f58e816e8a0de.tar.gz binaryen-cc36ffd13f6794cc5212a1f0ba7f58e816e8a0de.tar.bz2 binaryen-cc36ffd13f6794cc5212a1f0ba7f58e816e8a0de.zip