[NFC] Parallelize the actual inlining part of the Inlining pass (#6966)

We already parallelized collecting info about functions and finding inlining opportunities, but the actual inlining - copying the code into the target function - was done sequentially. It turns out that a lot of work happens there: this PR makes the pass over 2x faster. Details: * Move nameHint to InliningAction, so it is there when we apply the actions. * Add a DoInlining internal pass which calls doInlining(). * Refactor OptUtils a little to make it easy to run DoInlining before opts. * Also remove the return value of doInlining which was not used.
author: Alon Zakai <azakai@google.com> 2024-09-24 14:28:26 -0700
committer: GitHub <noreply@github.com> 2024-09-24 14:28:26 -0700
commit: 4d906204ebd3ad88a48350f008c7b72a0844e1da (patch)
tree: 101c4e3231098c49c05bc206c9c3fae032f3267a
parent: 480f5ba352a9f89afe72779c81f8a16fd3c8ba4a (diff)
download: binaryen-4d906204ebd3ad88a48350f008c7b72a0844e1da.tar.gz
binaryen-4d906204ebd3ad88a48350f008c7b72a0844e1da.tar.bz2
binaryen-4d906204ebd3ad88a48350f008c7b72a0844e1da.zip
2 files changed, 97 insertions, 30 deletions
diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp
index e67a0f35b..30bb5c23f 100644
--- a/src/passes/Inlining.cpp
+++ b/src/passes/Inlining.cpp
@@ -244,8 +244,18 @@ struct InliningAction {
   Function* contents;
   bool insideATry;
 
-  InliningAction(Expression** callSite, Function* contents, bool insideATry)
-    : callSite(callSite), contents(contents), insideATry(insideATry) {}
+  // An optional name hint can be provided, which will then be used in the name
+  // of the block we put the inlined code in. Using a unique name hint in each
+  // inlining can reduce the risk of name overlaps (which cause fixup work in
+  // UniqueNameMapper::uniquify).
+  Index nameHint = 0;
+
+  InliningAction(Expression** callSite,
+                 Function* contents,
+                 bool insideATry,
+                 Index nameHint = 0)
+    : callSite(callSite), contents(contents), insideATry(insideATry),
+      nameHint(nameHint) {}
 };
 
 struct InliningState {
@@ -436,17 +446,11 @@ struct Updater : public TryDepthWalker<Updater> {
 };
 
 // Core inlining logic. Modifies the outside function (adding locals as
-// needed), and returns the inlined code.
-//
-// An optional name hint can be provided, which will then be used in the name of
-// the block we put the inlined code in. Using a unique name hint in each call
-// of this function can reduce the risk of name overlaps (which cause fixup work
-// in UniqueNameMapper::uniquify).
-static Expression* doInlining(Module* module,
-                              Function* into,
-                              const InliningAction& action,
-                              PassOptions& options,
-                              Index nameHint = 0) {
+// needed).
+static void doInlining(Module* module,
+                       Function* into,
+                       const InliningAction& action,
+                       PassOptions& options) {
   Function* from = action.contents;
   auto* call = (*action.callSite)->cast<Call>();
 
@@ -457,8 +461,8 @@ static Expression* doInlining(Module* module,
   Builder builder(*module);
   auto* block = builder.makeBlock();
   auto name = std::string("__inlined_func$") + from->name.toString();
-  if (nameHint) {
-    name += '$' + std::to_string(nameHint);
+  if (action.nameHint) {
+    name += '$' + std::to_string(action.nameHint);
   }
   block->name = Name(name);
 
@@ -629,9 +633,39 @@ static Expression* doInlining(Module* module,
   // New locals we added may require fixups for nondefaultability.
   // FIXME Is this not done automatically?
   TypeUpdating::handleNonDefaultableLocals(into, *module);
-  return block;
 }
 
+// A map of function names to the inlining actions we've decided to actually
+// perform in them.
+using ChosenActions = std::unordered_map<Name, std::vector<InliningAction>>;
+
+// A pass that calls doInlining() on a bunch of actions that were chosen to
+// perform.
+struct DoInlining : public Pass {
+  bool isFunctionParallel() override { return true; }
+
+  std::unique_ptr<Pass> create() override {
+    return std::make_unique<DoInlining>(chosenActions);
+  }
+
+  DoInlining(const ChosenActions& chosenActions)
+    : chosenActions(chosenActions) {}
+
+  void runOnFunction(Module* module, Function* func) override {
+    auto iter = chosenActions.find(func->name);
+    // We must be called on a function that we actually want to inline into.
+    assert(iter != chosenActions.end());
+    const auto& actions = iter->second;
+    assert(!actions.empty());
+    for (auto action : actions) {
+      doInlining(module, func, action, getPassOptions());
+    }
+  }
+
+private:
+  const ChosenActions& chosenActions;
+};
+
 //
 // Function splitting / partial inlining / inlining of conditions.
 //
@@ -1260,11 +1294,20 @@ struct Inlining : public Pass {
       state.actionsForFunction[func->name];
       funcNames.push_back(func->name);
     }
-    // find and plan inlinings
+
+    // Find and plan inlinings in parallel. This discovers inlining
+    // opportunities, by themselves, but does not yet take into account
+    // interactions between them (e.g. we don't want to both inline into a
+    // function and then inline it as well).
     Planner(&state).run(getPassRunner(), module);
-    // perform inlinings TODO: parallelize
-    std::unordered_map<Name, Index> inlinedUses; // how many uses we inlined
-    // which functions were inlined into
+
+    // Choose which inlinings to perform. We do this sequentially so that we
+    // can consider interactions between them, and avoid nondeterminism.
+    ChosenActions chosenActions;
+
+    // How many uses (calls of the function) we inlined.
+    std::unordered_map<Name, Index> inlinedUses;
+
     for (auto name : funcNames) {
       auto* func = module->getFunction(name);
       // if we've inlined a function, don't inline into it in this iteration,
@@ -1293,24 +1336,40 @@ struct Inlining : public Pass {
         std::cout << "inline " << inlinedName << " into " << func->name << '\n';
 #endif
 
-        // Update the action for the actual inlining we are about to perform
+        // Update the action for the actual inlining we have chosen to perform
         // (when splitting, we will actually inline one of the split pieces and
         // not the original function itself; note how even if we do that then
         // we are still removing a call to the original function here, and so
         // we do not need to change anything else lower down - we still want to
         // note that we got rid of one use of the original function).
         action.contents = getActuallyInlinedFunction(action.contents);
-
-        // Perform the inlining and update counts.
-        doInlining(module, func, action, getPassOptions(), inlinedNameHint++);
+        action.nameHint = inlinedNameHint++;
+        chosenActions[func->name].push_back(action);
         inlinedUses[inlinedName]++;
         inlinedInto.insert(func);
         assert(inlinedUses[inlinedName] <= infos[inlinedName].refs);
       }
     }
-    if (optimize && inlinedInto.size() > 0) {
-      OptUtils::optimizeAfterInlining(inlinedInto, module, getPassRunner());
+
+    if (chosenActions.empty()) {
+      // We found nothing to do.
+      return;
+    }
+
+    // Perform the inlinings in parallel (sequentially inside each function we
+    // inline into, but in parallel between them). If we are optimizing, do so
+    // as well.
+    {
+      PassUtils::FilteredPassRunner runner(
+        module, inlinedInto, getPassRunner()->options);
+      runner.setIsNested(true);
+      runner.add(std::make_unique<DoInlining>(chosenActions));
+      if (optimize) {
+        OptUtils::addUsefulPassesAfterInlining(runner);
+      }
+      runner.run();
     }
+
     // remove functions that we no longer need after inlining
     module->removeFunctions([&](Function* func) {
       auto name = func->name;
@@ -1320,7 +1379,7 @@ struct Inlining : public Pass {
     });
   }
 
-  // See explanation in doInlining() for the parameter nameHint.
+  // See explanation in InliningAction.
   Index inlinedNameHint = 0;
 
   // Decide for a given function whether to inline, and if so in what mode.
diff --git a/src/passes/opt-utils.h b/src/passes/opt-utils.h
index 7bb733b03..69552f717 100644
--- a/src/passes/opt-utils.h
+++ b/src/passes/opt-utils.h
@@ -28,15 +28,23 @@
 
 namespace wasm::OptUtils {
 
+// Given a PassRunner, applies a set of useful passes that make sense to run
+// after inlining.
+inline void addUsefulPassesAfterInlining(PassRunner& runner) {
+  // Propagating constants makes a lot of sense after inlining, as new constants
+  // may have arrived.
+  runner.add("precompute-propagate");
+  // Do all the usual stuff.
+  runner.addDefaultFunctionOptimizationPasses();
+}
+
 // Run useful optimizations after inlining new code into a set of functions.
 inline void optimizeAfterInlining(const PassUtils::FuncSet& funcs,
                                   Module* module,
                                   PassRunner* parentRunner) {
   PassUtils::FilteredPassRunner runner(module, funcs, parentRunner->options);
   runner.setIsNested(true);
-  // this is especially useful after inlining
-  runner.add("precompute-propagate");
-  runner.addDefaultFunctionOptimizationPasses(); // do all the usual stuff
+  addUsefulPassesAfterInlining(runner);
   runner.run();
 }
author	Alon Zakai <azakai@google.com>	2024-09-24 14:28:26 -0700
committer	GitHub <noreply@github.com>	2024-09-24 14:28:26 -0700
commit	4d906204ebd3ad88a48350f008c7b72a0844e1da (patch)
tree	101c4e3231098c49c05bc206c9c3fae032f3267a
parent	480f5ba352a9f89afe72779c81f8a16fd3c8ba4a (diff)
download	binaryen-4d906204ebd3ad88a48350f008c7b72a0844e1da.tar.gz binaryen-4d906204ebd3ad88a48350f008c7b72a0844e1da.tar.bz2 binaryen-4d906204ebd3ad88a48350f008c7b72a0844e1da.zip