diff options
author | Alon Zakai <alonzakai@gmail.com> | 2018-02-02 18:47:39 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-02 18:47:39 -0800 |
commit | 874e89eec8ee4c56ecdb9e6cd68f8366fe983b79 (patch) | |
tree | b2893c1106c8f8263c1932e70fbacce96418f1f9 /src | |
parent | 6b05f000e8b9249afd0838774b6bdaf64fcaf90a (diff) | |
download | binaryen-874e89eec8ee4c56ecdb9e6cd68f8366fe983b79.tar.gz binaryen-874e89eec8ee4c56ecdb9e6cd68f8366fe983b79.tar.bz2 binaryen-874e89eec8ee4c56ecdb9e6cd68f8366fe983b79.zip |
Inlining improvements (#1397)
Simplify inlining logic: don't special case the first iteration or change behavior based on when we are optimizing or not. Instead, use one simpler set of heuristics for both inlining and inlining-optimizing. We only run inlining-optimizing by default anyhow, no point to try to make inlining without optimizations useful by itself, it's not a realistic use case. (inlining is still useful for debugging, and if you will run optimizations anyhow later on everything, in which case inlining-optimizing might add some redundancy.) The simpler heuristics after this let us do a somewhat better job as we are no longer paranoid about inlining in multiple iterations.
Also raise limit on inlining things that are obviously worth it from size 1 to size 2: things of size 2 will never lead to an increase in code size after we optimize (it takes at least 3 nodes to generate something that reads two locals and reverses their order, which would require a temp local in the outside scope etc.).
Also fix infinite recursion of inlining an infinitely recursive set of calls.
Diffstat (limited to 'src')
-rw-r--r-- | src/passes/Inlining.cpp | 65 | ||||
-rw-r--r-- | src/passes/pass.cpp | 4 |
2 files changed, 42 insertions, 27 deletions
diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 3a244948a..0c211e786 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -17,14 +17,17 @@ // // Inlining. // -// By default, this does a conservative inlining of all functions that have -// exactly one use, and are fairly small. That should not increase code -// size, and may have speed benefits. +// This uses some simple heuristics to decide when to inline. // -// When opt level is 3+ (-O3 or above), we more aggressively inline -// even functions with more than one use, that seem to be "lightweight" -// (no loops or calls etc.), so inlining them may get rid of call overhead -// that would be noticeable otherwise +// Two versions are provided: inlining and inlining-optimizing. You +// probably want the optimizing version, which will optimize locations +// we inlined into, as inlining by itself creates a block to house the +// inlined code, some temp locals, etc., which can usually be removed +// by optimizations. Note that the two versions use the same heuristics, +// so we don't take into account the overhead if you don't optimize +// afterwards. The non-optimizing version is mainly useful for debugging, +// or if you intend to run a full set of optimizations anyhow on +// everything later. // #include <atomic> @@ -47,8 +50,16 @@ static const int CAREFUL_SIZE_LIMIT = 15; static const int FLEXIBLE_SIZE_LIMIT = 20; // A size so small that after optimizations, the inlined code will be -// smaller than the call instruction itself. -static const int INLINING_OPTIMIZING_WILL_DECREASE_SIZE_LIMIT = 1; +// smaller than the call instruction itself. 2 is a safe number because +// there is no risk of things like +// (func $reverse (param $x i32) (param $y i32) +// (call $something (get_local $y) (get_local $x)) +// ) +// in which case the reversing of the params means we'll possibly need +// a block and a temp local. But that takes at least 3 nodes, and 2 < 3. +// More generally, with 2 items we may have a get_local, but no way to +// require it to be saved instead of directly consumed. +static const int INLINING_OPTIMIZING_WILL_DECREASE_SIZE_LIMIT = 2; // Useful into on a function, helping us decide if we can inline it struct FunctionInfo { @@ -64,23 +75,20 @@ struct FunctionInfo { usedGlobally = false; } - bool worthInlining(PassOptions& options, - bool allowMultipleInliningsPerFunction, - bool optimizing) { + bool worthInlining(PassOptions& options) { // if it's big, it's just not worth doing (TODO: investigate more) if (size > FLEXIBLE_SIZE_LIMIT) return false; // if it's so small we have a guarantee that after we optimize the // size will not increase, inline it - if (optimizing && size <= INLINING_OPTIMIZING_WILL_DECREASE_SIZE_LIMIT) return true; + if (size <= INLINING_OPTIMIZING_WILL_DECREASE_SIZE_LIMIT) return true; // if it has one use, then inlining it would likely reduce code size // since we are just moving code around, + optimizing, so worth it // if small enough that we are pretty sure its ok if (calls == 1 && !usedGlobally && size <= CAREFUL_SIZE_LIMIT) return true; - if (!allowMultipleInliningsPerFunction) return false; // more than one use, so we can't eliminate it after inlining, // so only worth it if we really care about speed and don't care // about size, and if it's lightweight so a good candidate for - // speeding us up + // speeding us up. return options.optimizeLevel >= 3 && options.shrinkLevel == 0 && lightweight; } }; @@ -221,19 +229,28 @@ struct Inlining : public Pass { // whether to optimize where we inline bool optimize = false; + // the information for each function. recomputed in each iteraction NameInfoMap infos; - bool firstIteration; + Index iterationNumber; void run(PassRunner* runner, Module* module) override { + Index numFunctions = module->functions.size(); // keep going while we inline, to handle nesting. TODO: optimize - firstIteration = true; - while (1) { + iterationNumber = 0; + // no point to do more iterations than the number of functions, as + // it means we infinitely recursing (which should + // be very rare in practice, but it is possible that a recursive call + // can look like it is worth inlining) + while (iterationNumber <= numFunctions) { +#ifdef INLINING_DEBUG + std::cout << "inlining loop iter " << iterationNumber << " (numFunctions: " << numFunctions << ")\n"; +#endif calculateInfos(module); if (!iteration(runner, module)) { return; } - firstIteration = false; + iterationNumber++; } } @@ -268,9 +285,7 @@ struct Inlining : public Pass { InliningState state; for (auto& func : module->functions) { // on the first iteration, allow multiple inlinings per function - if (infos[func->name].worthInlining(runner->options, - firstIteration /* allowMultipleInliningsPerFunction */, - optimize)) { + if (infos[func->name].worthInlining(runner->options)) { state.worthInlining.insert(func->name); } } @@ -291,14 +306,14 @@ struct Inlining : public Pass { std::unordered_set<Function*> inlinedInto; // which functions were inlined into for (auto& func : module->functions) { // if we've inlined a function, don't inline into it in this iteration, - // avoid risk of loops + // avoid risk of races // note that we do not risk stalling progress, as each iteration() will // inline at least one call before hitting this if (inlinedUses.count(func->name)) continue; for (auto& action : state.actionsForFunction[func->name]) { auto* inlinedFunction = action.contents; // if we've inlined into a function, don't inline it in this iteration, - // avoid risk of loops + // avoid risk of races // note that we do not risk stalling progress, as each iteration() will // inline at least one call before hitting this if (inlinedInto.count(inlinedFunction)) continue; @@ -326,7 +341,7 @@ struct Inlining : public Pass { auto& info = infos[name]; bool canRemove = inlinedUses.count(name) && inlinedUses[name] == info.calls && !info.usedGlobally; #ifdef INLINING_DEBUG - std::cout << "removing " << name << '\n'; + if (canRemove) std::cout << "removing " << name << '\n'; #endif return canRemove; }), funcs.end()); diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 11575b1df..c9fc7c40e 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -74,8 +74,8 @@ void PassRegistry::registerPasses() { registerPass("extract-function", "leaves just one function (useful for debugging)", createExtractFunctionPass); registerPass("flatten", "flattens out code, removing nesting", createFlattenPass); registerPass("func-metrics", "reports function metrics", createFunctionMetricsPass); - registerPass("inlining", "inlines functions", createInliningPass); - registerPass("inlining-optimizing", "inlines functions and optimizes where we inlined", createInliningOptimizingPass); + registerPass("inlining", "inline functions (you probably want inlining-optimizing)", createInliningPass); + registerPass("inlining-optimizing", "inline functions and optimizes where we inlined", createInliningOptimizingPass); registerPass("legalize-js-interface", "legalizes i64 types on the import/export boundary", createLegalizeJSInterfacePass); registerPass("local-cse", "common subexpression elimination inside basic blocks", createLocalCSEPass); registerPass("log-execution", "instrument the build with logging of where execution goes", createLogExecutionPass); |