summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2018-02-02 18:47:39 -0800
committerGitHub <noreply@github.com>2018-02-02 18:47:39 -0800
commit874e89eec8ee4c56ecdb9e6cd68f8366fe983b79 (patch)
treeb2893c1106c8f8263c1932e70fbacce96418f1f9 /src
parent6b05f000e8b9249afd0838774b6bdaf64fcaf90a (diff)
downloadbinaryen-874e89eec8ee4c56ecdb9e6cd68f8366fe983b79.tar.gz
binaryen-874e89eec8ee4c56ecdb9e6cd68f8366fe983b79.tar.bz2
binaryen-874e89eec8ee4c56ecdb9e6cd68f8366fe983b79.zip
Inlining improvements (#1397)
Simplify inlining logic: don't special case the first iteration or change behavior based on when we are optimizing or not. Instead, use one simpler set of heuristics for both inlining and inlining-optimizing. We only run inlining-optimizing by default anyhow, no point to try to make inlining without optimizations useful by itself, it's not a realistic use case. (inlining is still useful for debugging, and if you will run optimizations anyhow later on everything, in which case inlining-optimizing might add some redundancy.) The simpler heuristics after this let us do a somewhat better job as we are no longer paranoid about inlining in multiple iterations. Also raise limit on inlining things that are obviously worth it from size 1 to size 2: things of size 2 will never lead to an increase in code size after we optimize (it takes at least 3 nodes to generate something that reads two locals and reverses their order, which would require a temp local in the outside scope etc.). Also fix infinite recursion of inlining an infinitely recursive set of calls.
Diffstat (limited to 'src')
-rw-r--r--src/passes/Inlining.cpp65
-rw-r--r--src/passes/pass.cpp4
2 files changed, 42 insertions, 27 deletions
diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp
index 3a244948a..0c211e786 100644
--- a/src/passes/Inlining.cpp
+++ b/src/passes/Inlining.cpp
@@ -17,14 +17,17 @@
//
// Inlining.
//
-// By default, this does a conservative inlining of all functions that have
-// exactly one use, and are fairly small. That should not increase code
-// size, and may have speed benefits.
+// This uses some simple heuristics to decide when to inline.
//
-// When opt level is 3+ (-O3 or above), we more aggressively inline
-// even functions with more than one use, that seem to be "lightweight"
-// (no loops or calls etc.), so inlining them may get rid of call overhead
-// that would be noticeable otherwise
+// Two versions are provided: inlining and inlining-optimizing. You
+// probably want the optimizing version, which will optimize locations
+// we inlined into, as inlining by itself creates a block to house the
+// inlined code, some temp locals, etc., which can usually be removed
+// by optimizations. Note that the two versions use the same heuristics,
+// so we don't take into account the overhead if you don't optimize
+// afterwards. The non-optimizing version is mainly useful for debugging,
+// or if you intend to run a full set of optimizations anyhow on
+// everything later.
//
#include <atomic>
@@ -47,8 +50,16 @@ static const int CAREFUL_SIZE_LIMIT = 15;
static const int FLEXIBLE_SIZE_LIMIT = 20;
// A size so small that after optimizations, the inlined code will be
-// smaller than the call instruction itself.
-static const int INLINING_OPTIMIZING_WILL_DECREASE_SIZE_LIMIT = 1;
+// smaller than the call instruction itself. 2 is a safe number because
+// there is no risk of things like
+// (func $reverse (param $x i32) (param $y i32)
+// (call $something (get_local $y) (get_local $x))
+// )
+// in which case the reversing of the params means we'll possibly need
+// a block and a temp local. But that takes at least 3 nodes, and 2 < 3.
+// More generally, with 2 items we may have a get_local, but no way to
+// require it to be saved instead of directly consumed.
+static const int INLINING_OPTIMIZING_WILL_DECREASE_SIZE_LIMIT = 2;
// Useful into on a function, helping us decide if we can inline it
struct FunctionInfo {
@@ -64,23 +75,20 @@ struct FunctionInfo {
usedGlobally = false;
}
- bool worthInlining(PassOptions& options,
- bool allowMultipleInliningsPerFunction,
- bool optimizing) {
+ bool worthInlining(PassOptions& options) {
// if it's big, it's just not worth doing (TODO: investigate more)
if (size > FLEXIBLE_SIZE_LIMIT) return false;
// if it's so small we have a guarantee that after we optimize the
// size will not increase, inline it
- if (optimizing && size <= INLINING_OPTIMIZING_WILL_DECREASE_SIZE_LIMIT) return true;
+ if (size <= INLINING_OPTIMIZING_WILL_DECREASE_SIZE_LIMIT) return true;
// if it has one use, then inlining it would likely reduce code size
// since we are just moving code around, + optimizing, so worth it
// if small enough that we are pretty sure its ok
if (calls == 1 && !usedGlobally && size <= CAREFUL_SIZE_LIMIT) return true;
- if (!allowMultipleInliningsPerFunction) return false;
// more than one use, so we can't eliminate it after inlining,
// so only worth it if we really care about speed and don't care
// about size, and if it's lightweight so a good candidate for
- // speeding us up
+ // speeding us up.
return options.optimizeLevel >= 3 && options.shrinkLevel == 0 && lightweight;
}
};
@@ -221,19 +229,28 @@ struct Inlining : public Pass {
// whether to optimize where we inline
bool optimize = false;
+ // the information for each function. recomputed in each iteraction
NameInfoMap infos;
- bool firstIteration;
+ Index iterationNumber;
void run(PassRunner* runner, Module* module) override {
+ Index numFunctions = module->functions.size();
// keep going while we inline, to handle nesting. TODO: optimize
- firstIteration = true;
- while (1) {
+ iterationNumber = 0;
+ // no point to do more iterations than the number of functions, as
+ // it means we infinitely recursing (which should
+ // be very rare in practice, but it is possible that a recursive call
+ // can look like it is worth inlining)
+ while (iterationNumber <= numFunctions) {
+#ifdef INLINING_DEBUG
+ std::cout << "inlining loop iter " << iterationNumber << " (numFunctions: " << numFunctions << ")\n";
+#endif
calculateInfos(module);
if (!iteration(runner, module)) {
return;
}
- firstIteration = false;
+ iterationNumber++;
}
}
@@ -268,9 +285,7 @@ struct Inlining : public Pass {
InliningState state;
for (auto& func : module->functions) {
// on the first iteration, allow multiple inlinings per function
- if (infos[func->name].worthInlining(runner->options,
- firstIteration /* allowMultipleInliningsPerFunction */,
- optimize)) {
+ if (infos[func->name].worthInlining(runner->options)) {
state.worthInlining.insert(func->name);
}
}
@@ -291,14 +306,14 @@ struct Inlining : public Pass {
std::unordered_set<Function*> inlinedInto; // which functions were inlined into
for (auto& func : module->functions) {
// if we've inlined a function, don't inline into it in this iteration,
- // avoid risk of loops
+ // avoid risk of races
// note that we do not risk stalling progress, as each iteration() will
// inline at least one call before hitting this
if (inlinedUses.count(func->name)) continue;
for (auto& action : state.actionsForFunction[func->name]) {
auto* inlinedFunction = action.contents;
// if we've inlined into a function, don't inline it in this iteration,
- // avoid risk of loops
+ // avoid risk of races
// note that we do not risk stalling progress, as each iteration() will
// inline at least one call before hitting this
if (inlinedInto.count(inlinedFunction)) continue;
@@ -326,7 +341,7 @@ struct Inlining : public Pass {
auto& info = infos[name];
bool canRemove = inlinedUses.count(name) && inlinedUses[name] == info.calls && !info.usedGlobally;
#ifdef INLINING_DEBUG
- std::cout << "removing " << name << '\n';
+ if (canRemove) std::cout << "removing " << name << '\n';
#endif
return canRemove;
}), funcs.end());
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 11575b1df..c9fc7c40e 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -74,8 +74,8 @@ void PassRegistry::registerPasses() {
registerPass("extract-function", "leaves just one function (useful for debugging)", createExtractFunctionPass);
registerPass("flatten", "flattens out code, removing nesting", createFlattenPass);
registerPass("func-metrics", "reports function metrics", createFunctionMetricsPass);
- registerPass("inlining", "inlines functions", createInliningPass);
- registerPass("inlining-optimizing", "inlines functions and optimizes where we inlined", createInliningOptimizingPass);
+ registerPass("inlining", "inline functions (you probably want inlining-optimizing)", createInliningPass);
+ registerPass("inlining-optimizing", "inline functions and optimizes where we inlined", createInliningOptimizingPass);
registerPass("legalize-js-interface", "legalizes i64 types on the import/export boundary", createLegalizeJSInterfacePass);
registerPass("local-cse", "common subexpression elimination inside basic blocks", createLocalCSEPass);
registerPass("log-execution", "instrument the build with logging of where execution goes", createLogExecutionPass);