/* * Copyright 2016 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // // Inlining. // // Two versions are provided: inlining and inlining-optimizing. You // probably want the optimizing version, which will optimize locations // we inlined into, as inlining by itself creates a block to house the // inlined code, some temp locals, etc., which can usually be removed // by optimizations. Note that the two versions use the same heuristics, // so we don't take into account the overhead if you don't optimize // afterwards. The non-optimizing version is mainly useful for debugging, // or if you intend to run a full set of optimizations anyhow on // everything later. // #include #include "ir/branch-utils.h" #include "ir/debuginfo.h" #include "ir/drop.h" #include "ir/eh-utils.h" #include "ir/element-utils.h" #include "ir/find_all.h" #include "ir/literal-utils.h" #include "ir/localize.h" #include "ir/module-utils.h" #include "ir/names.h" #include "ir/type-updating.h" #include "ir/utils.h" #include "parsing.h" #include "pass.h" #include "passes/opt-utils.h" #include "wasm-builder.h" #include "wasm.h" namespace wasm { namespace { enum class InliningMode { // We do not know yet if this function can be inlined, as that has // not been computed yet. Unknown, // This function cannot be inlinined in any way. Uninlineable, // This function can be inlined fully, that is, normally: the entire function // can be inlined. This is in contrast to split/partial inlining, see below. Full, // This function cannot be inlined normally, but we can use split inlining, // using pattern "A" or "B" (see below). SplitPatternA, SplitPatternB }; // Useful into on a function, helping us decide if we can inline it struct FunctionInfo { std::atomic refs; Index size; bool hasCalls; bool hasLoops; bool hasTryDelegate; // Something is used globally if there is a reference to it in a table or // export etc. bool usedGlobally; // We consider a function to be a trivial call if the body is just a call with // trivial arguments, like this: // // (func $forward (param $x) (param $y) // (call $target (local.get $x) (local.get $y)) // ) // // Specifically the body must be a call, and the operands to the call must be // of size 1 (generally, LocalGet or Const). bool isTrivialCall; InliningMode inliningMode; FunctionInfo() { clear(); } void clear() { refs = 0; size = 0; hasCalls = false; hasLoops = false; hasTryDelegate = false; usedGlobally = false; isTrivialCall = false; inliningMode = InliningMode::Unknown; } // Provide an explicit = operator as the |refs| field lacks one by default. FunctionInfo& operator=(const FunctionInfo& other) { refs = other.refs.load(); size = other.size; hasCalls = other.hasCalls; hasLoops = other.hasLoops; hasTryDelegate = other.hasTryDelegate; usedGlobally = other.usedGlobally; isTrivialCall = other.isTrivialCall; inliningMode = other.inliningMode; return *this; } // See pass.h for how defaults for these options were chosen. bool worthFullInlining(PassOptions& options) { // Until we have proper support for try-delegate, ignore such functions. // FIXME https://github.com/WebAssembly/binaryen/issues/3634 if (hasTryDelegate) { return false; } // If it's small enough that we always want to inline such things, do so. if (size <= options.inlining.alwaysInlineMaxSize) { return true; } // If it has one use, then inlining it would likely reduce code size, at // least for reasonable function sizes. if (refs == 1 && !usedGlobally && size <= options.inlining.oneCallerInlineMaxSize) { return true; } // If it's so big that we have no flexible options that could allow it, // do not inline. if (size > options.inlining.flexibleInlineMaxSize) { return false; } // More than one use, so we can't eliminate it after inlining, and inlining // it will hurt code size. Stop if we are focused on size or not heavily // focused on speed. if (options.shrinkLevel > 0 || options.optimizeLevel < 3) { return false; } if (hasCalls) { // This has calls. If it is just a trivial call itself then inline, as we // will save a call that way - basically we skip a trampoline in the // middle - but if it is something more complex, leave it alone, as we may // not help much (and with recursion we may end up with a wasteful // increase in code size). // // Note that inlining trivial calls may increase code size, e.g. if they // use a parameter more than once (forcing us after inlining to save that // value to a local, etc.), but here we are optimizing for speed and not // size, so we risk it. return isTrivialCall; } // This doesn't have calls. Inline if loops do not prevent us (normally, a // loop suggests a lot of work and so inlining is less useful). return !hasLoops || options.inlining.allowFunctionsWithLoops; } }; static bool canHandleParams(Function* func) { // We cannot inline a function if we cannot handle placing its params in a // locals, as all params become locals. for (auto param : func->getParams()) { if (!TypeUpdating::canHandleAsLocal(param)) { return false; } } return true; } using NameInfoMap = std::unordered_map; struct FunctionInfoScanner : public WalkerPass> { bool isFunctionParallel() override { return true; } FunctionInfoScanner(NameInfoMap& infos) : infos(infos) {} std::unique_ptr create() override { return std::make_unique(infos); } void visitLoop(Loop* curr) { // having a loop infos[getFunction()->name].hasLoops = true; } void visitCall(Call* curr) { // can't add a new element in parallel assert(infos.count(curr->target) > 0); infos[curr->target].refs++; // having a call infos[getFunction()->name].hasCalls = true; } // N.B.: CallIndirect and CallRef are intentionally omitted here, as we only // note direct calls. Direct calls can lead to infinite recursion // which we need to avoid, while indirect ones may in theory be // optimized to direct calls later, but we take that risk - which is // worthwhile as if we do manage to turn an indirect call into something // else then it can be a big speedup, so we do want to inline code that // has such indirect calls. void visitTry(Try* curr) { if (curr->isDelegate()) { infos[getFunction()->name].hasTryDelegate = true; } } void visitRefFunc(RefFunc* curr) { assert(infos.count(curr->func) > 0); infos[curr->func].refs++; } void visitFunction(Function* curr) { auto& info = infos[curr->name]; if (!canHandleParams(curr)) { info.inliningMode = InliningMode::Uninlineable; } info.size = Measurer::measure(curr->body); if (auto* call = curr->body->dynCast()) { if (info.size == call->operands.size() + 1) { // This function body is a call with some trivial (size 1) operands like // LocalGet or Const, so it is a trivial call. info.isTrivialCall = true; } } } private: NameInfoMap& infos; }; struct InliningAction { Expression** callSite; Function* contents; bool insideATry; // An optional name hint can be provided, which will then be used in the name // of the block we put the inlined code in. Using a unique name hint in each // inlining can reduce the risk of name overlaps (which cause fixup work in // UniqueNameMapper::uniquify). Index nameHint = 0; InliningAction(Expression** callSite, Function* contents, bool insideATry, Index nameHint = 0) : callSite(callSite), contents(contents), insideATry(insideATry), nameHint(nameHint) {} }; struct InliningState { // Maps functions worth inlining to the mode with which we can inline them. std::unordered_map inlinableFunctions; // function name => actions that can be performed in it std::unordered_map> actionsForFunction; }; struct Planner : public WalkerPass> { bool isFunctionParallel() override { return true; } Planner(InliningState* state) : state(state) {} std::unique_ptr create() override { return std::make_unique(state); } void visitCall(Call* curr) { // plan to inline if we know this is valid to inline, and if the call is // actually performed - if it is dead code, it's pointless to inline. // we also cannot inline ourselves. bool isUnreachable; if (curr->isReturn) { // Tail calls are only actually unreachable if an argument is isUnreachable = std::any_of( curr->operands.begin(), curr->operands.end(), [](Expression* op) { return op->type == Type::unreachable; }); } else { isUnreachable = curr->type == Type::unreachable; } if (state->inlinableFunctions.count(curr->target) && !isUnreachable && curr->target != getFunction()->name) { // can't add a new element in parallel assert(state->actionsForFunction.count(getFunction()->name) > 0); state->actionsForFunction[getFunction()->name].emplace_back( getCurrentPointer(), getModule()->getFunction(curr->target), tryDepth > 0); } } private: InliningState* state; }; struct Updater : public TryDepthWalker { Module* module; std::map localMapping; Name returnName; Type resultType; bool isReturn; Builder* builder; PassOptions& options; struct ReturnCallInfo { // The original `return_call` or `return_call_indirect` or `return_call_ref` // with its operands replaced with `local.get`s. Expression* call; // The branch that is serving as the "return" part of the original // `return_call`. Break* branch; }; // Collect information on return_calls in the inlined body. Each will be // turned into branches out of the original inlined body followed by // non-return version of the original `return_call`, followed by a branch out // to the caller. The branch labels will be filled in at the end of the walk. std::vector returnCallInfos; Updater(PassOptions& options) : options(options) {} void visitReturn(Return* curr) { replaceCurrent(builder->makeBreak(returnName, curr->value)); } template void handleReturnCall(T* curr, Signature sig) { if (isReturn || !curr->isReturn) { // If the inlined callsite was already a return_call, then we can keep // return_calls in the inlined function rather than downgrading them. // That is, if A->B and B->C and both those calls are return_calls // then after inlining A->B we want to now have A->C be a // return_call. return; } if (tryDepth == 0) { // Return calls in inlined functions should only break out of // the scope of the inlined code, not the entire function they // are being inlined into. To achieve this, make the call a // non-return call and add a break. This does not cause // unbounded stack growth because inlining and return calling // both avoid creating a new stack frame. curr->isReturn = false; curr->type = sig.results; // There might still be unreachable children causing this to be // unreachable. curr->finalize(); if (sig.results.isConcrete()) { replaceCurrent(builder->makeBreak(returnName, curr)); } else { replaceCurrent(builder->blockify(curr, builder->makeBreak(returnName))); } } else { // Set the children to locals as necessary, then add a branch out of the // inlined body. The branch label will be set later when we create branch // targets for the calls. Block* childBlock = ChildLocalizer(curr, getFunction(), *module, options) .getChildrenReplacement(); Break* branch = builder->makeBreak(Name()); childBlock->list.push_back(branch); childBlock->type = Type::unreachable; replaceCurrent(childBlock); curr->isReturn = false; curr->type = sig.results; returnCallInfos.push_back({curr, branch}); } } void visitCall(Call* curr) { handleReturnCall(curr, module->getFunction(curr->target)->getSig()); } void visitCallIndirect(CallIndirect* curr) { handleReturnCall(curr, curr->heapType.getSignature()); } void visitCallRef(CallRef* curr) { Type targetType = curr->target->type; if (!targetType.isSignature()) { // We don't know what type the call should return, but it will also never // be reached, so we don't need to do anything here. return; } handleReturnCall(curr, targetType.getHeapType().getSignature()); } void visitLocalGet(LocalGet* curr) { curr->index = localMapping[curr->index]; } void visitLocalSet(LocalSet* curr) { curr->index = localMapping[curr->index]; } void walk(Expression*& curr) { PostWalker::walk(curr); if (returnCallInfos.empty()) { return; } Block* body = builder->blockify(curr); curr = body; auto blockNames = BranchUtils::BranchAccumulator::get(body); for (Index i = 0; i < returnCallInfos.size(); ++i) { auto& info = returnCallInfos[i]; // Add a block containing the previous body and a branch up to the caller. // Give the block a name that will allow this return_call's original // callsite to branch out of it then execute the call before returning to // the caller. auto name = Names::getValidName( "__return_call", [&](Name test) { return !blockNames.count(test); }, i); blockNames.insert(name); info.branch->name = name; Block* oldBody = builder->makeBlock(body->list, body->type); body->list.clear(); if (resultType.isConcrete()) { body->list.push_back(builder->makeBlock( name, {builder->makeBreak(returnName, oldBody)}, Type::none)); } else { oldBody->list.push_back(builder->makeBreak(returnName)); oldBody->name = name; oldBody->type = Type::none; body->list.push_back(oldBody); } body->list.push_back(info.call); body->finalize(resultType); } } }; // Core inlining logic. Modifies the outside function (adding locals as // needed) by copying the inlined code into it. static void doCodeInlining(Module* module, Function* into, const InliningAction& action, PassOptions& options) { Function* from = action.contents; auto* call = (*action.callSite)->cast(); // Works for return_call, too Type retType = module->getFunction(call->target)->getResults(); // Build the block that will contain the inlined contents. Builder builder(*module); auto* block = builder.makeBlock(); auto name = std::string("__inlined_func$") + from->name.toString(); if (action.nameHint) { name += '$' + std::to_string(action.nameHint); } block->name = Name(name); // In the unlikely event that the function already has a branch target with // this name, fix that up, as otherwise we can get unexpected capture of our // branches, that is, we could end up with this: // // (block $X ;; a new block we add as the target of returns // (from's contents // (block $X ;; a block in from's contents with a colliding name // (br $X ;; a new br we just added that replaces a return // // Here the br wants to go to the very outermost block, to represent a // return from the inlined function's code, but it ends up captured by an // internal block. We also need to be careful of the call's children: // // (block $X ;; a new block we add as the target of returns // (local.set $param // (call's first parameter // (br $X) ;; nested br in call's first parameter // ) // ) // // (In this case we could use a second block and define the named block $X // after the call's parameters, but that adds work for an extremely rare // situation.) The latter case does not apply if the call is a // return_call inside a try, because in that case the call's // children do not appear inside the same block as the inlined body. bool hoistCall = call->isReturn && action.insideATry; if (BranchUtils::hasBranchTarget(from->body, block->name) || (!hoistCall && BranchUtils::BranchSeeker::has(call, block->name))) { auto fromNames = BranchUtils::getBranchTargets(from->body); auto callNames = hoistCall ? BranchUtils::NameSet{} : BranchUtils::BranchAccumulator::get(call); block->name = Names::getValidName(block->name, [&](Name test) { return !fromNames.count(test) && !callNames.count(test); }); } // Prepare to update the inlined code's locals and other things. Updater updater(options); updater.setFunction(into); updater.module = module; updater.resultType = from->getResults(); updater.returnName = block->name; updater.isReturn = call->isReturn; updater.builder = &builder; // Set up a locals mapping for (Index i = 0; i < from->getNumLocals(); i++) { updater.localMapping[i] = builder.addVar(into, from->getLocalType(i)); } if (hoistCall) { // Wrap the existing function body in a block we can branch out of before // entering the inlined function body. This block must have a name that is // different from any other block name above the branch. auto intoNames = BranchUtils::BranchAccumulator::get(into->body); auto bodyName = Names::getValidName(Name("__original_body"), [&](Name test) { return !intoNames.count(test); }); if (retType.isConcrete()) { into->body = builder.makeBlock( bodyName, {builder.makeReturn(into->body)}, Type::none); } else { into->body = builder.makeBlock( bodyName, {into->body, builder.makeReturn()}, Type::none); } // Sequence the inlined function body after the original caller body. into->body = builder.makeSequence(into->body, block, retType); // Replace the original callsite with an expression that assigns the // operands into the params and branches out of the original body. auto numParams = from->getParams().size(); if (numParams) { auto* branchBlock = builder.makeBlock(); for (Index i = 0; i < numParams; i++) { branchBlock->list.push_back( builder.makeLocalSet(updater.localMapping[i], call->operands[i])); } branchBlock->list.push_back(builder.makeBreak(bodyName)); branchBlock->finalize(Type::unreachable); *action.callSite = branchBlock; } else { *action.callSite = builder.makeBreak(bodyName); } } else { // Assign the operands into the params for (Index i = 0; i < from->getParams().size(); i++) { block->list.push_back( builder.makeLocalSet(updater.localMapping[i], call->operands[i])); } // Zero out the vars (as we may be in a loop, and may depend on their // zero-init value for (Index i = 0; i < from->vars.size(); i++) { auto type = from->vars[i]; if (!LiteralUtils::canMakeZero(type)) { // Non-zeroable locals do not need to be zeroed out. As they have no // zero value they by definition should not be used before being written // to, so any value we set here would not be observed anyhow. continue; } block->list.push_back( builder.makeLocalSet(updater.localMapping[from->getVarIndexBase() + i], LiteralUtils::makeZero(type, *module))); } if (call->isReturn) { assert(!action.insideATry); if (retType.isConcrete()) { *action.callSite = builder.makeReturn(block); } else { *action.callSite = builder.makeSequence(block, builder.makeReturn()); } } else { *action.callSite = block; } } // Generate and update the inlined contents auto* contents = ExpressionManipulator::copy(from->body, *module); debuginfo::copyBetweenFunctions(from->body, contents, from, into); updater.walk(contents); block->list.push_back(contents); block->type = retType; // The ReFinalize below will handle propagating unreachability if we need to // do so, that is, if the call was reachable but now the inlined content we // replaced it with was unreachable. The opposite case requires special // handling: ReFinalize works under the assumption that code can become // unreachable, but it does not go back from that state. But inlining can // cause that: // // (call $A ;; an unreachable call // (unreachable) // ) // => // (block $__inlined_A_body (result i32) ;; reachable code after inlining // (unreachable) // ) // // That is, if the called function wraps the input parameter in a block with a // declared type, then the block is not unreachable. And then we might error // if the outside expects the code to be unreachable - perhaps it only // validates that way. To fix this, if the call was unreachable then we make // the inlined code unreachable as well. That also maximizes DCE // opportunities by propagating unreachability as much as possible. // // (Note that we don't need to do this for a return_call, which is always // unreachable anyhow.) if (call->type == Type::unreachable && !call->isReturn) { // Make the replacement code unreachable. Note that we can't just add an // unreachable at the end, as the block might have breaks to it (returns are // transformed into those). Expression* old = block; if (old->type.isConcrete()) { old = builder.makeDrop(old); } *action.callSite = builder.makeSequence(old, builder.makeUnreachable()); } } // Updates the outer function after we inline into it. This is a general // operation that does not depend on what we inlined, it just makes sure that we // refinalize everything, have no duplicate break labels, etc. static void updateAfterInlining(Module* module, Function* into) { // Anything we inlined into may now have non-unique label names, fix it up. // Note that we must do this before refinalization, as otherwise duplicate // block labels can lead to errors (the IR must be valid before we // refinalize). wasm::UniqueNameMapper::uniquify(into->body); // Inlining unreachable contents can make things in the function we inlined // into unreachable. ReFinalize().walkFunctionInModule(into, module); // New locals we added may require fixups for nondefaultability. // FIXME Is this not done automatically? TypeUpdating::handleNonDefaultableLocals(into, *module); } static void doInlining(Module* module, Function* into, const InliningAction& action, PassOptions& options) { doCodeInlining(module, into, action, options); updateAfterInlining(module, into); } // A map of function names to the inlining actions we've decided to actually // perform in them. using ChosenActions = std::unordered_map>; // A pass that calls doInlining() on a bunch of actions that were chosen to // perform. struct DoInlining : public Pass { bool isFunctionParallel() override { return true; } std::unique_ptr create() override { return std::make_unique(chosenActions); } DoInlining(const ChosenActions& chosenActions) : chosenActions(chosenActions) {} void runOnFunction(Module* module, Function* func) override { auto iter = chosenActions.find(func->name); // We must be called on a function that we actually want to inline into. assert(iter != chosenActions.end()); const auto& actions = iter->second; assert(!actions.empty()); // Inline all the code first, then update func once at the end (which saves // e.g. running ReFinalize after each action, of which there might be many). for (auto action : actions) { doCodeInlining(module, func, action, getPassOptions()); } updateAfterInlining(module, func); } private: const ChosenActions& chosenActions; }; // // Function splitting / partial inlining / inlining of conditions. // // A function may be too costly to inline, but it may be profitable to // *partially* inline it. The specific cases optimized here are functions with a // condition, // // function foo(x) { // if (x) return; // ..lots and lots of other code.. // } // // If the other code after the if is large enough or costly enough, then we will // not inline the entire function. But it is useful to inline the condition. // Consider this caller: // // function caller(x) { // foo(0); // foo(x); // } // // If we inline the condition, we end up with this: // // function caller(x) { // if (0) foo(0); // if (x) foo(x); // } // // By inlining the condition out of foo() we gain two benefits: // // * In the first call here the condition is zero, which means we can // statically optimize out the call entirely. // * Even if we can't do that (as in the second call) if at runtime we see the // condition is false then we avoid the call. That means we perform what is // hopefully a cheap branch instead of a call and then a branch. // // The cost to doing this is an increase in code size, and so this is only done // when optimizing heavily for speed. // // To implement partial inlining we split the function to be inlined. Starting // with // // function foo(x) { // if (x) return; // ..lots and lots of other code.. // } // // We create the "inlineable" part of it, and the code that is "outlined": // // function foo$inlineable(x) { // if (x) return; // foo$outlined(x); // } // function foo$outlined(x) { // ..lots and lots of other code.. // } // // (Note how if the second function were inlined into the first, we would end // up where we started, with the original function.) After splitting the // function in this way, we simply inline the inlineable part using the normal // mechanism for that. That ends up replacing foo(x); with // // if (x) foo$outlined(x); // // which is what we wanted. // // To reduce the complexity of this feature, it is implemented almost entirely // in its own class, FunctionSplitter. The main inlining logic just calls out to // the splitter to check if a function is worth splitting, and to get the split // part if so. // struct FunctionSplitter { Module* module; PassOptions& options; FunctionSplitter(Module* module, PassOptions& options) : module(module), options(options) {} // Check if an function could be split in order to at least inline part of it, // in a worthwhile manner. // // Even if this returns a split inlining mode, we may not end up inlining the // function, as the main inlining logic has a few other considerations to take // into account (like limitations on which functions can be inlined into in // each iteration, the number of iterations, etc.). Therefore this function // may only find out if we *can* split, but not actually do any splitting. // // Note that to avoid wasteful work, this function may return "Full" inlining // mode instead of a split inlining. That is, if it detects that a partial // inlining will trigger a follow up full inline of the splitted function // then it will instead return "InliningMode::Full" directly. In more detail, // imagine we have // // foo(10); // // which calls // // func foo(x) { // if (x) { // CODE // } // } // // If we partially inline then the call becomes // // if (10) { // outlined-CODE() // } // // That is, we've only inlined the |if| out of foo(), and we call a function // that contains the code in CODE. But if CODE is small enough to be inlined // then a later iteration of the inliner will do just that. The result of this // split inlining and then full inlining of the outlined code is exactly the // same as if we normally inlined from the beginning, but it adds more work, // so we'd like to avoid it, which we do by seeing when a split would be // followed by inlining the remainder, and then we return Full here and just // inline it all normally. // // Note that the above issue shows that we may in some cases inline more than // the normal inlining limit. That is, if the inlining limit is 20, and CODE // in the example above was 19, then foo()'s size was 21 (when we add the if // and get of |x|). That leads to the situation where foo() is too big to // normally inline, but the outlined CODE can then be inlined. And this shows // that we end up inlining a function of size 21, even though it is larger // than our inlining limit. We consider this acceptable because it only // occurs on functions slightly larger than the inlining limit, and only if // they have the simple forms that split inlining recognizes, that we think // are useful to inline. Alternatively, if we wanted to avoid this, it would // add complexity because we'd need to recognize the outlined function with // CODE in it and *not* inline it even though it is small enough, after // splitting, to be inlined. That is, splitting creates smaller functions, so // it can lead to more inlining (but, again, that makes sense since we only // split on very specific patterns we believe are worth handling in that // manner). InliningMode getSplitDrivenInliningMode(Function* func, FunctionInfo& info) { const Index MaxIfs = options.inlining.partialInliningIfs; assert(MaxIfs > 0); auto* body = func->body; // If the body is a block, and we have breaks to that block, then we cannot // outline any code - we can't outline a break without the break's target. if (auto* block = body->dynCast()) { if (BranchUtils::BranchSeeker::has(block, block->name)) { return InliningMode::Uninlineable; } } // All the patterns we look for right now start with an if at the very top // of the function. auto* iff = getIf(body); if (!iff) { return InliningMode::Uninlineable; } // If the condition is not very simple, the benefits of this optimization // are not obvious. if (!isSimple(iff->condition)) { return InliningMode::Uninlineable; } // Pattern A: Check if the function begins with // // if (simple) return; // // TODO: support a return value if (!iff->ifFalse && func->getResults() == Type::none && iff->ifTrue->is()) { // The body must be a block, because if it were not then the function // would be easily inlineable (just an if with a simple condition and a // return), and we would not even attempt to do splitting. assert(body->is()); auto outlinedFunctionSize = info.size - Measurer::measure(iff); // If outlined function will be worth normal inline, skip the intermediate // state and inline fully now. Note that if full inlining is disabled we // will not do this, and instead inline partially. if (!func->noFullInline && outlinedFunctionWorthInlining(info, outlinedFunctionSize)) { return InliningMode::Full; } return InliningMode::SplitPatternA; } // Pattern B: Represents a function whose entire body looks like // // if (A_1) { // ..heavy work.. // } // .. // if (A_k) { // ..heavy work.. // } // B; // an optional final value (which can be a return value) // // where there is a small number of such ifs with arguments A1..A_k, and // A_1..A_k and B (if the final value B exists) are very simple. // // Also, each if body must either be unreachable, or it must have type none // and have no returns. If it is unreachable, for example because it is a // return, then we will just return a value in the inlineable function: // // if (A_i) { // return outlined(..); // } // // Or, if an if body has type none, then for now we assume that we do not // need to return a value from there, which makes things simpler, and in // that case we just do this, which continues onward in the function: // // if (A_i) { // outlined(..); // } // // TODO: handle a possible returned value in this case as well. // // Note that the if body type must be unreachable or none, as this is an if // without an else. // Find the number of ifs. Index numIfs = 0; while (getIf(body, numIfs) && numIfs <= MaxIfs) { numIfs++; } if (numIfs == 0 || numIfs > MaxIfs) { return InliningMode::Uninlineable; } // Look for a final item after the ifs. auto* finalItem = getItem(body, numIfs); // The final item must be simple (or not exist, which is simple enough). if (finalItem && !isSimple(finalItem)) { return InliningMode::Uninlineable; } // There must be no other items after the optional final one. if (finalItem && getItem(body, numIfs + 1)) { return InliningMode::Uninlineable; } // This has the general shape we seek. Check each if. for (Index i = 0; i < numIfs; i++) { auto* iff = getIf(body, i); // The if must have a simple condition and no else arm. if (!isSimple(iff->condition) || iff->ifFalse) { return InliningMode::Uninlineable; } if (iff->ifTrue->type == Type::none) { // This must have no returns. if (!FindAll(iff->ifTrue).list.empty()) { return InliningMode::Uninlineable; } } else { // This is an if without an else, and so the type is either none or // unreachable, and we ruled out none before. assert(iff->ifTrue->type == Type::unreachable); } } // Success, this matches the pattern. // If the outlined function will be worth inlining normally, skip the // intermediate state and inline fully now. (As above, if full inlining is // disabled, we only partially inline.) if (numIfs == 1) { auto outlinedFunctionSize = Measurer::measure(iff->ifTrue); if (!func->noFullInline && outlinedFunctionWorthInlining(info, outlinedFunctionSize)) { return InliningMode::Full; } } return InliningMode::SplitPatternB; } // Returns the function we should inline, after we split the function into two // pieces as described above (that is, in the example above, this would return // foo$inlineable). // // This is called when we are definitely inlining the function, and so it will // perform the splitting (if that has not already been done before). Function* getInlineableSplitFunction(Function* func, InliningMode inliningMode) { assert(inliningMode == InliningMode::SplitPatternA || inliningMode == InliningMode::SplitPatternB); auto& split = splits[func->name]; if (!split.inlineable) { // We haven't performed the split, do it now. split.inlineable = doSplit(func, inliningMode); } return split.inlineable; } // Clean up. When we are done we no longer need the inlineable functions on // the module, as they have been inlined into all the places we wanted them // for. // // Returns a list of the names of the functions we split. std::vector finish() { std::vector ret; std::unordered_set inlineableNames; for (auto& [func, split] : splits) { auto* inlineable = split.inlineable; if (inlineable) { inlineableNames.insert(inlineable->name); ret.push_back(func); } } module->removeFunctions([&](Function* func) { return inlineableNames.find(func->name) != inlineableNames.end(); }); return ret; } private: // Information about splitting a function. struct Split { // The inlineable function out of the two that we generate by splitting. // That is, foo$inlineable from above. Function* inlineable = nullptr; // The outlined function, that is, foo$outlined from above. Function* outlined = nullptr; }; // All the splitting we have already performed. // // Note that this maps from function names, and not Function*, as the main // inlining code can remove functions as it goes, but we can rely on names // staying constant. std::unordered_map splits; bool outlinedFunctionWorthInlining(FunctionInfo& origin, Index sizeEstimate) { FunctionInfo info; // Start with a copy of the origin's info, and apply the size estimate. // This is not accurate, for example the origin function may have // loop or calls even though this section may not have. // This is a conservative estimate, that is, it will return true only when // it should, but might return false when a more precise analysis would // return true. And it is a practical estimation to avoid extra future work. info = origin; info.size = sizeEstimate; return info.worthFullInlining(options); } Function* doSplit(Function* func, InliningMode inliningMode) { Builder builder(*module); if (inliningMode == InliningMode::SplitPatternA) { // Note that "A" in the name here identifies this as being a split from // pattern A. The second pattern B will have B in the name. Function* inlineable = copyFunction(func, "inlineable-A"); auto* outlined = copyFunction(func, "outlined-A"); // The inlineable function should only have the if, which will call the // outlined function with a flipped condition. auto* inlineableIf = getIf(inlineable->body); inlineableIf->condition = builder.makeUnary(EqZInt32, inlineableIf->condition); inlineableIf->ifTrue = builder.makeCall( outlined->name, getForwardedArgs(func, builder), Type::none); inlineable->body = inlineableIf; // The outlined function no longer needs the initial if. auto& outlinedList = outlined->body->cast()->list; outlinedList.erase(outlinedList.begin()); return inlineable; } assert(inliningMode == InliningMode::SplitPatternB); Function* inlineable = copyFunction(func, "inlineable-B"); const Index MaxIfs = options.inlining.partialInliningIfs; assert(MaxIfs > 0); // The inlineable function should only have the ifs, which will call the // outlined heavy work. for (Index i = 0; i < MaxIfs; i++) { // For each if, create an outlined function with the body of that if, // and call that from the if. auto* inlineableIf = getIf(inlineable->body, i); if (!inlineableIf) { break; } auto* outlined = copyFunction(func, "outlined-B"); outlined->body = inlineableIf->ifTrue; // The outlined function either returns the same results as the original // one, or nothing, depending on if a value is returned here. auto valueReturned = func->getResults() != Type::none && outlined->body->type != Type::none; outlined->setResults(valueReturned ? func->getResults() : Type::none); inlineableIf->ifTrue = builder.makeCall(outlined->name, getForwardedArgs(func, builder), outlined->getResults()); if (valueReturned) { inlineableIf->ifTrue = builder.makeReturn(inlineableIf->ifTrue); } } return inlineable; } Function* copyFunction(Function* func, std::string prefix) { // TODO: We copy quite a lot more than we need here, and throw stuff out. // It is simple to just copy the entire thing to get the params and // results and all that, but we could be more efficient. prefix = "byn-split-" + prefix; return ModuleUtils::copyFunction( func, *module, Names::getValidFunctionName(*module, prefix + '$' + func->name.toString())); } // Get the i-th item in a sequence of initial items in an expression. That is, // if the item is a block, it may have several such items, and otherwise there // is a single item, that item itself. This basically provides a simpler // interface than checking if something is a block or not when there is just // one item. // // Returns nullptr if there is no such item. static Expression* getItem(Expression* curr, Index i = 0) { if (auto* block = curr->dynCast()) { auto& list = block->list; if (i < list.size()) { return list[i]; } } if (i == 0) { return curr; } return nullptr; } // Get the i-th if in a sequence of initial ifs in an expression. If no such // if exists, returns nullptr. static If* getIf(Expression* curr, Index i = 0) { auto* item = getItem(curr, i); if (!item) { return nullptr; } if (auto* iff = item->dynCast()) { return iff; } return nullptr; } // Checks if an expression is very simple - something simple enough that we // are willing to inline it in this optimization. This should basically take // almost no cost at all to compute. bool isSimple(Expression* curr) { // For now, support local and global gets, and unary operations. // TODO: Generalize? Use costs.h? if (curr->type == Type::unreachable) { return false; } if (curr->is() || curr->is()) { return true; } if (auto* unary = curr->dynCast()) { return isSimple(unary->value); } if (auto* is = curr->dynCast()) { return isSimple(is->value); } return false; } // Returns a list of local.gets, one for each of the parameters to the // function. This forwards the arguments passed to the inlineable function to // the outlined one. std::vector getForwardedArgs(Function* func, Builder& builder) { std::vector args; for (Index i = 0; i < func->getNumParams(); i++) { args.push_back(builder.makeLocalGet(i, func->getLocalType(i))); } return args; } }; struct Inlining : public Pass { // This pass changes locals and parameters. // FIXME DWARF updating does not handle local changes yet. bool invalidatesDWARF() override { return true; } // whether to optimize where we inline bool optimize = false; // the information for each function. recomputed in each iteraction NameInfoMap infos; std::unique_ptr functionSplitter; Module* module = nullptr; void run(Module* module_) override { module = module_; // No point to do more iterations than the number of functions, as it means // we are infinitely recursing (which should be very rare in practice, but // it is possible that a recursive call can look like it is worth inlining). Index iterationNumber = 0; auto numOriginalFunctions = module->functions.size(); // Track in how many iterations a function was inlined into. We are willing // to inline many times into a function within an iteration, as e.g. that // helps the case of many calls of a small getter. However, if we only do // more inlining in separate iterations then it is likely code that was the // result of previous inlinings that is now being inlined into. That is, an // old inlining added a call to somewhere, and now we are inlining into that // call. This is typically recursion, which to some extent can help, but // then like loop unrolling it loses its benefit quickly, so set a limit // here. // // In addition to inlining into a function, we track how many times we do // other potentially repetitive operations like splitting a function before // inlining, as any such repetitive operation should be limited in how many // times we perform it. (An exception is how many times we inlined a // function, which we do not want to limit - it can be profitable to inline // a call into a great many callsites, over many iterations.) // // (Track names here, and not Function pointers, as we can remove functions // while inlining, and it may be confusing during debugging to have a // pointer to something that was removed.) std::unordered_map iterationCounts; const size_t MaxIterationsForFunc = 5; while (iterationNumber <= numOriginalFunctions) { #ifdef INLINING_DEBUG std::cout << "inlining loop iter " << iterationNumber << " (numFunctions: " << module->functions.size() << ")\n"; #endif iterationNumber++; std::unordered_set inlinedInto; prepare(); iteration(inlinedInto); if (inlinedInto.empty()) { return; } #ifdef INLINING_DEBUG std::cout << " inlined into " << inlinedInto.size() << " funcs.\n"; #endif for (auto* func : inlinedInto) { EHUtils::handleBlockNestedPops(func, *module); } for (auto* func : inlinedInto) { if (++iterationCounts[func->name] >= MaxIterationsForFunc) { return; } } if (functionSplitter) { auto splitNames = functionSplitter->finish(); for (auto name : splitNames) { if (++iterationCounts[name] >= MaxIterationsForFunc) { return; } } } } } void prepare() { infos.clear(); // fill in info, as we operate on it in parallel (each function to its own // entry) for (auto& func : module->functions) { infos[func->name]; } { FunctionInfoScanner scanner(infos); scanner.run(getPassRunner(), module); scanner.walkModuleCode(module); } for (auto& ex : module->exports) { if (ex->kind == ExternalKind::Function) { infos[ex->value].usedGlobally = true; } } if (module->start.is()) { infos[module->start].usedGlobally = true; } // When optimizing heavily for size, we may potentially split functions in // order to inline parts of them, if partialInliningIfs is enabled. auto& options = getPassOptions(); if (options.optimizeLevel >= 3 && !options.shrinkLevel && options.inlining.partialInliningIfs) { functionSplitter = std::make_unique(module, options); } } void iteration(std::unordered_set& inlinedInto) { // decide which to inline InliningState state; ModuleUtils::iterDefinedFunctions(*module, [&](Function* func) { InliningMode inliningMode = getInliningMode(func->name); assert(inliningMode != InliningMode::Unknown); if (inliningMode != InliningMode::Uninlineable) { state.inlinableFunctions[func->name] = inliningMode; } }); if (state.inlinableFunctions.empty()) { return; } // Fill in actionsForFunction, as we operate on it in parallel (each // function to its own entry). Also generate a vector of the function names // so that in the later loop we can iterate on it deterministically and // without iterator invalidation. std::vector funcNames; for (auto& func : module->functions) { state.actionsForFunction[func->name]; funcNames.push_back(func->name); } // Find and plan inlinings in parallel. This discovers inlining // opportunities, by themselves, but does not yet take into account // interactions between them (e.g. we don't want to both inline into a // function and then inline it as well). Planner(&state).run(getPassRunner(), module); // Choose which inlinings to perform. We do this sequentially so that we // can consider interactions between them, and avoid nondeterminism. ChosenActions chosenActions; // How many uses (calls of the function) we inlined. std::unordered_map inlinedUses; for (auto name : funcNames) { auto* func = module->getFunction(name); // if we've inlined a function, don't inline into it in this iteration, // avoid risk of races // note that we do not risk stalling progress, as each iteration() will // inline at least one call before hitting this if (inlinedUses.count(func->name)) { continue; } for (auto& action : state.actionsForFunction[name]) { auto* inlinedFunction = action.contents; // if we've inlined into a function, don't inline it in this iteration, // avoid risk of races // note that we do not risk stalling progress, as each iteration() will // inline at least one call before hitting this if (inlinedInto.count(inlinedFunction)) { continue; } Name inlinedName = inlinedFunction->name; if (!isUnderSizeLimit(func->name, inlinedName)) { continue; } // Success - we can inline. #ifdef INLINING_DEBUG std::cout << "inline " << inlinedName << " into " << func->name << '\n'; #endif // Update the action for the actual inlining we have chosen to perform // (when splitting, we will actually inline one of the split pieces and // not the original function itself; note how even if we do that then // we are still removing a call to the original function here, and so // we do not need to change anything else lower down - we still want to // note that we got rid of one use of the original function). action.contents = getActuallyInlinedFunction(action.contents); action.nameHint = inlinedNameHint++; chosenActions[func->name].push_back(action); inlinedUses[inlinedName]++; inlinedInto.insert(func); assert(inlinedUses[inlinedName] <= infos[inlinedName].refs); } } if (chosenActions.empty()) { // We found nothing to do. return; } // Perform the inlinings in parallel (sequentially inside each function we // inline into, but in parallel between them). If we are optimizing, do so // as well. { PassUtils::FilteredPassRunner runner( module, inlinedInto, getPassRunner()->options); runner.setIsNested(true); runner.add(std::make_unique(chosenActions)); if (optimize) { OptUtils::addUsefulPassesAfterInlining(runner); } runner.run(); } // remove functions that we no longer need after inlining module->removeFunctions([&](Function* func) { auto name = func->name; auto& info = infos[name]; return inlinedUses.count(name) && inlinedUses[name] == info.refs && !info.usedGlobally; }); } // See explanation in InliningAction. Index inlinedNameHint = 0; // Decide for a given function whether to inline, and if so in what mode. InliningMode getInliningMode(Name name) { auto* func = module->getFunction(name); auto& info = infos[name]; if (info.inliningMode != InliningMode::Unknown) { return info.inliningMode; } // Check if the function itself is worth inlining as it is. if (!func->noFullInline && info.worthFullInlining(getPassOptions())) { return info.inliningMode = InliningMode::Full; } // Otherwise, check if we can at least inline part of it, if we are // interested in such things. if (!func->noPartialInline && functionSplitter) { info.inliningMode = functionSplitter->getSplitDrivenInliningMode( module->getFunction(name), info); return info.inliningMode; } // Cannot be fully or partially inlined => uninlineable. info.inliningMode = InliningMode::Uninlineable; return info.inliningMode; } // Gets the actual function to be inlined. Normally this is the function // itself, but if it is a function that we must first split (i.e., we only // want to partially inline it) then it will be the inlineable part of the // split. // // This is called right before actually performing the inlining, that is, we // are guaranteed to inline after this. Function* getActuallyInlinedFunction(Function* func) { InliningMode inliningMode = infos[func->name].inliningMode; // If we want to inline this function itself, do so. if (inliningMode == InliningMode::Full) { return func; } // Otherwise, this is a case where we want to inline part of it, after // splitting. assert(functionSplitter); return functionSplitter->getInlineableSplitFunction(func, inliningMode); } // Checks if the combined size of the code after inlining is under the // absolute size limit. We have an absolute limit in order to avoid // extremely-large sizes after inlining, as they may hit limits in VMs and/or // slow down startup (measurements there indicate something like ~1 second to // optimize a 100K function). See e.g. // https://github.com/WebAssembly/binaryen/pull/3730#issuecomment-867939138 // https://github.com/emscripten-core/emscripten/issues/13899#issuecomment-825073344 bool isUnderSizeLimit(Name target, Name source) { // Estimate the combined binary size from the number of instructions. auto combinedSize = infos[target].size + infos[source].size; auto estimatedBinarySize = Measurer::BytesPerExpr * combinedSize; // The limit is arbitrary, but based on the links above. It is a very high // value that should appear very rarely in practice (for example, it does // not occur on the Emscripten benchmark suite of real-world codebases). const Index MaxCombinedBinarySize = 400 * 1024; return estimatedBinarySize < MaxCombinedBinarySize; } }; } // anonymous namespace // // InlineMain // // Inline __original_main into main, if they exist. This works around the odd // thing that clang/llvm currently do, where __original_main contains the user's // actual main (this is done as a workaround for main having two different // possible signatures). // static const char* MAIN = "main"; static const char* ORIGINAL_MAIN = "__original_main"; struct InlineMainPass : public Pass { void run(Module* module) override { auto* main = module->getFunctionOrNull(MAIN); auto* originalMain = module->getFunctionOrNull(ORIGINAL_MAIN); if (!main || main->imported() || !originalMain || originalMain->imported()) { return; } FindAllPointers calls(main->body); Expression** callSite = nullptr; for (auto* call : calls.list) { if ((*call)->cast()->target == ORIGINAL_MAIN) { if (callSite) { // More than one call site. return; } callSite = call; } } if (!callSite) { // No call at all. return; } doInlining(module, main, InliningAction(callSite, originalMain, true), getPassOptions()); } }; Pass* createInliningPass() { return new Inlining(); } Pass* createInliningOptimizingPass() { auto* ret = new Inlining(); ret->optimize = true; return ret; } Pass* createInlineMainPass() { return new InlineMainPass(); } } // namespace wasm