diff options
Diffstat (limited to 'src/passes/Inlining.cpp')
-rw-r--r-- | src/passes/Inlining.cpp | 628 |
1 files changed, 588 insertions, 40 deletions
diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 11e9c5a2c..18b3d562a 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -30,10 +30,12 @@ #include <atomic> +#include "ir/branch-utils.h" #include "ir/debug.h" #include "ir/element-utils.h" #include "ir/literal-utils.h" #include "ir/module-utils.h" +#include "ir/names.h" #include "ir/type-updating.h" #include "ir/utils.h" #include "parsing.h" @@ -44,6 +46,8 @@ namespace wasm { +namespace { + // Useful into on a function, helping us decide if we can inline it struct FunctionInfo { std::atomic<Index> refs; @@ -54,7 +58,9 @@ struct FunctionInfo { bool usedGlobally; // in a table or export bool uninlineable; - FunctionInfo() { + FunctionInfo() { clear(); } + + void clear() { refs = 0; size = 0; hasCalls = false; @@ -64,6 +70,18 @@ struct FunctionInfo { uninlineable = false; } + // Provide an explicit = operator as the |refs| field lacks one by default. + FunctionInfo& operator=(FunctionInfo& other) { + refs = other.refs.load(); + size = other.size; + hasCalls = other.hasCalls; + hasLoops = other.hasLoops; + hasTryDelegate = other.hasTryDelegate; + usedGlobally = other.usedGlobally; + uninlineable = other.uninlineable; + return *this; + } + // See pass.h for how defaults for these options were chosen. bool worthInlining(PassOptions& options) { if (uninlineable) { @@ -102,6 +120,17 @@ struct FunctionInfo { } }; +static bool canHandleParams(Function* func) { + // We cannot inline a function if we cannot handle placing its params in a + // locals, as all params become locals. + for (auto param : func->getParams()) { + if (!TypeUpdating::canHandleAsLocal(param)) { + return false; + } + } + return true; +} + typedef std::unordered_map<Name, FunctionInfo> NameInfoMap; struct FunctionInfoScanner @@ -149,12 +178,8 @@ struct FunctionInfoScanner void visitFunction(Function* curr) { auto& info = (*infos)[curr->name]; - // We cannot inline a function if we cannot handle placing it in a local, as - // all params become locals. - for (auto param : curr->getParams()) { - if (!TypeUpdating::canHandleAsLocal(param)) { - info.uninlineable = true; - } + if (!canHandleParams(curr)) { + info.uninlineable = true; } info.size = Measurer::measure(curr->body); @@ -332,6 +357,439 @@ doInlining(Module* module, Function* into, const InliningAction& action) { return block; } +// +// Function splitting / partial inlining / inlining of conditions. +// +// A function may be too costly to inline, but it may be profitable to +// *partially* inline it. The specific cases optimized here are functions with a +// condition, +// +// function foo(x) { +// if (x) return; +// ..lots and lots of other code.. +// } +// +// If the other code after the if is large enough or costly enough, then we will +// not inline the entire function. But it is useful to inline the condition. +// Consider this caller: +// +// function caller(x) { +// foo(0); +// foo(x); +// } +// +// If we inline the condition, we end up with this: +// +// function caller(x) { +// if (0) foo(0); +// if (x) foo(x); +// } +// +// By inlining the condition out of foo() we gain two benefits: +// +// * In the first call here the condition is zero, which means we can +// statically optimize out the call entirely. +// * Even if we can't do that (as in the second call) if at runtime we see the +// condition is false then we avoid the call. That means we perform what is +// hopefully a cheap branch instead of a call and then a branch. +// +// The cost to doing this is an increase in code size, and so this is only done +// when optimizing heavily for speed. +// +// To implement partial inlining we split the function to be inlined. Starting +// with +// +// function foo(x) { +// if (x) return; +// ..lots and lots of other code.. +// } +// +// We create the "inlineable" part of it, and the code that is "outlined": +// +// function foo$inlineable(x) { +// if (x) return; +// foo$outlined(x); +// } +// function foo$outlined(x) { +// ..lots and lots of other code.. +// } +// +// (Note how if the second function were inlined into the first, we would end +// up where we started, with the original function.) After splitting the +// function in this way, we simply inline the inlineable part using the normal +// mechanism for that. That ends up replacing foo(x); with +// +// if (x) foo$outlined(x); +// +// which is what we wanted. +// +// To reduce the complexity of this feature, it is implemented almost entirely +// in its own class, FunctionSplitter. The main inlining logic just calls out to +// the splitter to check if a function is worth splitting, and to get the split +// part if so. +// + +struct FunctionSplitter { + Module* module; + PassOptions& options; + + FunctionSplitter(Module* module, PassOptions& options) + : module(module), options(options) {} + + // Check if an function could be split in order to at least inline part of it, + // in a worthwhile manner. + // + // Even if this returns true, we may not end up inlining the function, as the + // main inlining logic has a few other considerations to take into account + // (like limitations on which functions can be inlined into in each iteration, + // the number of iterations, etc.). Therefore this function will only find out + // if we *can* split, but not actually do any splitting. + bool canSplit(Function* func) { + if (!canHandleParams(func)) { + return false; + } + + return maybeSplit(func); + } + + // Returns the function we should inline, after we split the function into two + // pieces as described above (that is, in the example above, this would return + // foo$inlineable). + // + // This is called when we are definitely inlining the function, and so it will + // perform the splitting (if that has not already been done before). + Function* getInlineableSplitFunction(Function* func) { + Function* inlineable = nullptr; + auto success = maybeSplit(func, &inlineable); + assert(success && inlineable); + return inlineable; + } + + // Clean up. When we are done we no longer need the inlineable functions on + // the module, as they have been inlined into all the places we wanted them + // for. + // + // Returns a list of the names of the functions we split. + std::vector<Name> finish() { + std::vector<Name> ret; + for (auto& kv : splits) { + Name func = kv.first; + auto& split = kv.second; + auto* inlineable = split.inlineable; + if (inlineable) { + module->removeFunction(inlineable->name); + ret.push_back(func); + } + } + return ret; + } + +private: + // Information about splitting a function. + struct Split { + // Whether we can split the function. If this is false, the other two will + // remain nullptr forever; if this is true then we will populate them the + // first time we need them. + bool splittable = false; + + // The inlineable function out of the two that we generate by splitting. + // That is, foo$inlineable from above. + Function* inlineable = nullptr; + + // The outlined function, that is, foo$outlined from above. + Function* outlined = nullptr; + }; + + // All the splitting we have already performed. + // + // Note that this maps from function names, and not Function*, as the main + // inlining code can remove functions as it goes, but we can rely on names + // staying constant. + std::unordered_map<Name, Split> splits; + + // Check if we can split a function. Returns whether we can. If the out param + // is provided, also actually does the split, and returns the inlineable split + // function in that out param. + bool maybeSplit(Function* func, Function** inlineableOut = nullptr) { + // Check if we've processed this input before. + auto iter = splits.find(func->name); + if (iter != splits.end()) { + if (!iter->second.splittable) { + // We've seen before that this cannot be split. + return false; + } + // We can split this function. If we've already done so, return that + // cached result. + if (iter->second.inlineable) { + if (inlineableOut) { + *inlineableOut = iter->second.inlineable; + } + return true; + } + // Otherwise, we are splittable but have not performed the split yet; + // proceed to do so. + } + + // The default value of split.splittable is false, so if we fail we just + // need to return false from this function. If, on the other hand, we can + // split, then we will update this split info accordingly. + auto& split = splits[func->name]; + + auto* body = func->body; + + // If the body is a block, and we have breaks to that block, then we cannot + // outline any code - we can't outline a break without the break's target. + if (auto* block = body->dynCast<Block>()) { + if (BranchUtils::BranchSeeker::has(block, block->name)) { + return false; + } + } + + // All the patterns we look for right now start with an if at the very top + // of the function. + auto* iff = getIf(body); + if (!iff) { + return false; + } + + // If the condition is not very simple, the benefits of this optimization + // are not obvious. + if (!isSimple(iff->condition)) { + return false; + } + + Builder builder(*module); + + // Pattern A: Check if the function begins with + // + // if (simple) return; + // + // TODO: support a return value + if (!iff->ifFalse && func->getResults() == Type::none && + iff->ifTrue->is<Return>()) { + // The body must be a block, because if it were not then the function + // would be easily inlineable (just an if with a simple condition and a + // return), and we would not even attempt to do splitting. + assert(body->is<Block>()); + + split.splittable = true; + // If we were just checking, stop and report success. + if (!inlineableOut) { + return true; + } + + // Note that "A" in the name here identifies this as being a split from + // pattern A. The second pattern B will have B in the name. + split.inlineable = copyFunction(func, "inlineable-A"); + auto* outlined = copyFunction(func, "outlined-A"); + + // The inlineable function should only have the if, which will call the + // outlined function with a flipped condition. + auto* inlineableIf = getIf(split.inlineable->body); + inlineableIf->condition = + builder.makeUnary(EqZInt32, inlineableIf->condition); + inlineableIf->ifTrue = builder.makeCall( + outlined->name, getForwardedArgs(func, builder), Type::none); + split.inlineable->body = inlineableIf; + + // The outlined function no longer needs the initial if. + auto& outlinedList = outlined->body->cast<Block>()->list; + outlinedList.erase(outlinedList.begin()); + + *inlineableOut = split.inlineable; + return true; + } + + // Pattern B: Represents a function whose entire body looks like + // + // if (A_1) { + // ..heavy work.. + // } + // .. + // if (A_k) { + // ..heavy work.. + // } + // B; // an optional final value (which can be a return value) + // + // where there is a small number of such ifs with arguments A1..A_k, and + // A_1..A_k and B (if the final value B exists) are very simple. + // + // Also, each if body must either be unreachable, or it must have type none + // and have no returns. If it is unreachable, for example because it is a + // return, then we will just return a value in the inlineable function: + // + // if (A_i) { + // return outlined(..); + // } + // + // Or, if an if body has type none, then for now we assume that we do not + // need to return a value from there, which makes things simpler, and in + // that case we just do this, which continues onward in the function: + // + // if (A_i) { + // outlined(..); + // } + // + // TODO: handle a possible returned value in this case as well. + // + // Note that the if body type must be unreachable or none, as this is an if + // without an else. + + // Find the number of ifs. + // TODO: Investigate more values here. 4 appears useful on real-world code. + const Index MaxIfs = 4; + Index numIfs = 0; + while (getIf(body, numIfs) && numIfs <= MaxIfs) { + numIfs++; + } + if (numIfs == 0 || numIfs > MaxIfs) { + return false; + } + + // Look for a final item after the ifs. + auto* finalItem = getItem(body, numIfs); + + // The final item must be simple (or not exist, which is simple enough). + if (finalItem && !isSimple(finalItem)) { + return false; + } + + // There must be no other items after the optional final one. + if (finalItem && getItem(body, numIfs + 1)) { + return false; + } + // This has the general shape we seek. Check each if. + for (Index i = 0; i < numIfs; i++) { + auto* iff = getIf(body, i); + // The if must have a simple condition and no else arm. + if (!isSimple(iff->condition) || iff->ifFalse) { + return false; + } + if (iff->ifTrue->type == Type::none) { + // This must have no returns. + if (!FindAll<Return>(iff->ifTrue).list.empty()) { + return false; + } + } else { + // This is an if without an else, and so the type is either none of + // unreachable; + assert(iff->ifTrue->type == Type::unreachable); + } + } + + // Success, this matches the pattern. Exit if we were just checking. + split.splittable = true; + if (!inlineableOut) { + return true; + } + + split.inlineable = copyFunction(func, "inlineable-B"); + + // The inlineable function should only have the ifs, which will call the + // outlined heavy work. + for (Index i = 0; i < numIfs; i++) { + // For each if, create an outlined function with the body of that if, + // and call that from the if. + auto* inlineableIf = getIf(split.inlineable->body, i); + auto* outlined = copyFunction(func, "outlined-B"); + outlined->body = inlineableIf->ifTrue; + + // The outlined function either returns the same results as the original + // one, or nothing, depending on if a value is returned here. + auto valueReturned = + func->getResults() != Type::none && outlined->body->type != Type::none; + outlined->setResults(valueReturned ? func->getResults() : Type::none); + inlineableIf->ifTrue = builder.makeCall(outlined->name, + getForwardedArgs(func, builder), + outlined->getResults()); + if (valueReturned) { + inlineableIf->ifTrue = builder.makeReturn(inlineableIf->ifTrue); + } + } + + // We can just leave the final value at the end, if it exists. + + *inlineableOut = split.inlineable; + return true; + } + + Function* copyFunction(Function* func, std::string prefix) { + // TODO: We copy quite a lot more than we need here, and throw stuff out. + // It is simple to just copy the entire thing to get the params and + // results and all that, but we could be more efficient. + prefix = "byn-split-" + prefix; + return ModuleUtils::copyFunction( + func, + *module, + Names::getValidFunctionName(*module, prefix + '$' + func->name.str)); + } + + // Get the i-th item in a sequence of initial items in an expression. That is, + // if the item is a block, it may have several such items, and otherwise there + // is a single item, that item itself. This basically provides a simpler + // interface than checking if something is a block or not when there is just + // one item. + // + // Returns nullptr if there is no such item. + static Expression* getItem(Expression* curr, Index i = 0) { + if (auto* block = curr->dynCast<Block>()) { + auto& list = block->list; + if (i < list.size()) { + return list[i]; + } + } + if (i == 0) { + return curr; + } + return nullptr; + } + + // Get the i-th if in a sequence of initial ifs in an expression. If no such + // if exists, returns nullptr. + static If* getIf(Expression* curr, Index i = 0) { + auto* item = getItem(curr, i); + if (!item) { + return nullptr; + } + if (auto* iff = item->dynCast<If>()) { + return iff; + } + return nullptr; + } + + // Checks if an expression is very simple - something simple enough that we + // are willing to inline it in this optimization. This should basically take + // almost no cost at all to compute. + bool isSimple(Expression* curr) { + // For now, support local and global gets, and unary operations. + // TODO: Generalize? Use costs.h? + if (curr->type == Type::unreachable) { + return false; + } + if (curr->is<GlobalGet>() || curr->is<LocalGet>()) { + return true; + } + if (auto* unary = curr->dynCast<Unary>()) { + return isSimple(unary->value); + } + if (auto* is = curr->dynCast<RefIs>()) { + return isSimple(is->value); + } + return false; + } + + // Returns a list of local.gets, one for each of the parameters to the + // function. This forwards the arguments passed to the inlineable function to + // the outlined one. + std::vector<Expression*> getForwardedArgs(Function* func, Builder& builder) { + std::vector<Expression*> args; + for (Index i = 0; i < func->getNumParams(); i++) { + args.push_back(builder.makeLocalGet(i, func->getLocalType(i))); + } + return args; + } +}; + struct Inlining : public Pass { // This pass changes locals and parameters. // FIXME DWARF updating does not handle local changes yet. @@ -343,7 +801,15 @@ struct Inlining : public Pass { // the information for each function. recomputed in each iteraction NameInfoMap infos; - void run(PassRunner* runner, Module* module) override { + std::unique_ptr<FunctionSplitter> functionSplitter; + + PassRunner* runner = nullptr; + Module* module = nullptr; + + void run(PassRunner* runner_, Module* module_) override { + runner = runner_; + module = module_; + // No point to do more iterations than the number of functions, as it means // we are infinitely recursing (which should be very rare in practice, but // it is possible that a recursive call can look like it is worth inlining). @@ -361,10 +827,17 @@ struct Inlining : public Pass { // then like loop unrolling it loses its benefit quickly, so set a limit // here. // + // In addition to inlining into a function, we track how many times we do + // other potentially repetitive operations like splitting a function before + // inlining, as any such repetitive operation should be limited in how many + // times we perform it. (An exception is how many times we inlined a + // function, which we do not want to limit - it can be profitable to inline + // a call into a great many callsites, over many iterations.) + // // (Track names here, and not Function pointers, as we can remove functions // while inlining, and it may be confusing during debugging to have a // pointer to something that was removed.) - std::unordered_map<Name, Index> iterationsInlinedInto; + std::unordered_map<Name, Index> iterationCounts; const size_t MaxIterationsForFunc = 5; @@ -373,12 +846,13 @@ struct Inlining : public Pass { std::cout << "inlining loop iter " << iterationNumber << " (numFunctions: " << module->functions.size() << ")\n"; #endif - - calculateInfos(module); - iterationNumber++; + std::unordered_set<Function*> inlinedInto; - iteration(runner, module, inlinedInto); + + prepare(); + iteration(inlinedInto); + if (inlinedInto.empty()) { return; } @@ -388,25 +862,35 @@ struct Inlining : public Pass { #endif for (auto* func : inlinedInto) { - if (++iterationsInlinedInto[func->name] >= MaxIterationsForFunc) { + if (++iterationCounts[func->name] >= MaxIterationsForFunc) { return; } } + + if (functionSplitter) { + auto splitNames = functionSplitter->finish(); + for (auto name : splitNames) { + if (++iterationCounts[name] >= MaxIterationsForFunc) { + return; + } + } + } } } - void calculateInfos(Module* module) { + void prepare() { infos.clear(); // fill in info, as we operate on it in parallel (each function to its own // entry) for (auto& func : module->functions) { infos[func->name]; } - PassRunner runner(module); - FunctionInfoScanner scanner(&infos); - scanner.run(&runner, module); - // fill in global uses - scanner.walkModuleCode(module); + { + PassRunner runner(module); + FunctionInfoScanner scanner(&infos); + scanner.run(&runner, module); + scanner.walkModuleCode(module); + } for (auto& ex : module->exports) { if (ex->kind == ExternalKind::Function) { infos[ex->value].usedGlobally = true; @@ -415,32 +899,42 @@ struct Inlining : public Pass { if (module->start.is()) { infos[module->start].usedGlobally = true; } + + // When optimizing heavily for size, we may potentially split functions in + // order to inline parts of them. + if (runner->options.optimizeLevel >= 3 && !runner->options.shrinkLevel) { + functionSplitter = + std::make_unique<FunctionSplitter>(module, runner->options); + } } - void iteration(PassRunner* runner, - Module* module, - std::unordered_set<Function*>& inlinedInto) { + void iteration(std::unordered_set<Function*>& inlinedInto) { // decide which to inline InliningState state; ModuleUtils::iterDefinedFunctions(*module, [&](Function* func) { - if (infos[func->name].worthInlining(runner->options)) { + if (worthInlining(func->name)) { state.worthInlining.insert(func->name); } }); if (state.worthInlining.size() == 0) { return; } - // fill in actionsForFunction, as we operate on it in parallel (each - // function to its own entry) + // Fill in actionsForFunction, as we operate on it in parallel (each + // function to its own entry). Also generate a vector of the function names + // so that in the later loop we can iterate on it deterministically and + // without iterator invalidation. + std::vector<Name> funcNames; for (auto& func : module->functions) { state.actionsForFunction[func->name]; + funcNames.push_back(func->name); } // find and plan inlinings Planner(&state).run(runner, module); // perform inlinings TODO: parallelize std::unordered_map<Name, Index> inlinedUses; // how many uses we inlined // which functions were inlined into - for (auto& func : module->functions) { + for (auto name : funcNames) { + auto* func = module->getFunction(name); // if we've inlined a function, don't inline into it in this iteration, // avoid risk of races // note that we do not risk stalling progress, as each iteration() will @@ -448,7 +942,7 @@ struct Inlining : public Pass { if (inlinedUses.count(func->name)) { continue; } - for (auto& action : state.actionsForFunction[func->name]) { + for (auto& action : state.actionsForFunction[name]) { auto* inlinedFunction = action.contents; // if we've inlined into a function, don't inline it in this iteration, // avoid risk of races @@ -461,12 +955,24 @@ struct Inlining : public Pass { if (!isUnderSizeLimit(func->name, inlinedName)) { continue; } + + // Success - we can inline. #ifdef INLINING_DEBUG std::cout << "inline " << inlinedName << " into " << func->name << '\n'; #endif - doInlining(module, func.get(), action); + + // Update the action for the actual inlining we are about to perform + // (when splitting, we will actually inline one of the split pieces and + // not the original function itself; note how even if we do that then + // we are still removing a call to the original function here, and so + // we do not need to change anything else lower down - we still want to + // note that we got rid of one use of the original function). + action.contents = getActuallyInlinedFunction(action.contents); + + // Perform the inlining and update counts. + doInlining(module, func, action); inlinedUses[inlinedName]++; - inlinedInto.insert(func.get()); + inlinedInto.insert(func); assert(inlinedUses[inlinedName] <= infos[inlinedName].refs); } } @@ -486,6 +992,41 @@ struct Inlining : public Pass { }); } + bool worthInlining(Name name) { + // Check if the function itself is worth inlining as it is. + if (infos[name].worthInlining(runner->options)) { + return true; + } + + // Otherwise, check if we can at least inline part of it, if we are + // interested in such things. + if (functionSplitter && + functionSplitter->canSplit(module->getFunction(name))) { + return true; + } + + return false; + } + + // Gets the actual function to be inlined. Normally this is the function + // itself, but if it is a function that we must first split (i.e., we only + // want to partially inline it) then it will be the inlineable part of the + // split. + // + // This is called right before actually performing the inlining, that is, we + // are guaranteed to inline after this. + Function* getActuallyInlinedFunction(Function* func) { + // If we want to inline this function itself, do so. + if (infos[func->name].worthInlining(runner->options)) { + return func; + } + + // Otherwise, this is a case where we want to inline part of it, after + // splitting. + assert(functionSplitter); + return functionSplitter->getInlineableSplitFunction(func); + } + // Checks if the combined size of the code after inlining is under the // absolute size limit. We have an absolute limit in order to avoid // extremely-large sizes after inlining, as they may hit limits in VMs and/or @@ -505,21 +1046,20 @@ struct Inlining : public Pass { } }; -Pass* createInliningPass() { return new Inlining(); } - -Pass* createInliningOptimizingPass() { - auto* ret = new Inlining(); - ret->optimize = true; - return ret; -} - -static const char* MAIN = "main"; -static const char* ORIGINAL_MAIN = "__original_main"; +} // anonymous namespace +// +// InlineMain +// // Inline __original_main into main, if they exist. This works around the odd // thing that clang/llvm currently do, where __original_main contains the user's // actual main (this is done as a workaround for main having two different // possible signatures). +// + +static const char* MAIN = "main"; +static const char* ORIGINAL_MAIN = "__original_main"; + struct InlineMainPass : public Pass { void run(PassRunner* runner, Module* module) override { auto* main = module->getFunctionOrNull(MAIN); @@ -547,6 +1087,14 @@ struct InlineMainPass : public Pass { } }; +Pass* createInliningPass() { return new Inlining(); } + +Pass* createInliningOptimizingPass() { + auto* ret = new Inlining(); + ret->optimize = true; + return ret; +} + Pass* createInlineMainPass() { return new InlineMainPass(); } } // namespace wasm |