diff options
Diffstat (limited to 'src/passes/Inlining.cpp')
-rw-r--r-- | src/passes/Inlining.cpp | 173 |
1 files changed, 131 insertions, 42 deletions
diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index c352a704a..377aa5247 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -18,45 +18,68 @@ // Inlining. // // For now, this does a conservative inlining of all functions that have -// exactly one use. That should not increase code size, and may have -// speed benefits. +// exactly one use, and are fairly small. That should not increase code +// size, and may have speed benefits. // +#include <atomic> + #include <wasm.h> #include <pass.h> #include <wasm-builder.h> +#include <ast_utils.h> #include <parsing.h> namespace wasm { +// A limit on how big a function to inline. +static const int INLINING_SIZE_LIMIT = 15; + +// We only inline a function with a single use. +static const int SINGLE_USE = 1; + +// A number of uses of a function that is too high for us to +// inline it to all those locations. +static const int TOO_MANY_USES_TO_INLINE = SINGLE_USE + 1; + +// Map of function name => number of uses. We build the values in +// parallel, using atomic increments. This is safe because we never +// update the map itself in parallel, we only update the values, +// and so the map never allocates or moves values which could be +// a problem with atomics (in fact it would be a problem in general +// as well, not just with atomics, as we don't use a lock in +// parallel access, we depend on the map itself being constant +// when running multiple threads). +typedef std::map<Name, std::atomic<Index>> NameToAtomicIndexMap; + struct FunctionUseCounter : public WalkerPass<PostWalker<FunctionUseCounter>> { bool isFunctionParallel() override { return true; } - FunctionUseCounter(std::map<Name, Index>* output) : output(output) {} + FunctionUseCounter(NameToAtomicIndexMap* uses) : uses(uses) {} FunctionUseCounter* create() override { - return new FunctionUseCounter(output); + return new FunctionUseCounter(uses); } void visitCall(Call *curr) { - (*output)[curr->target]++; + assert(uses->count(curr->target) > 0); // can't add a new element in parallel + (*uses)[curr->target]++; } private: - std::map<Name, Index>* output; + NameToAtomicIndexMap* uses; }; -struct Action { - Call* call; - Block* block; // the replacement for the call, into which we should inline +struct InliningAction { + Expression** callSite; Function* contents; - Action(Call* call, Block* block, Function* contents) : call(call), block(block), contents(contents) {} + InliningAction(Expression** callSite, Function* contents) : callSite(callSite), contents(contents) {} }; struct InliningState { std::set<Name> canInline; - std::map<Name, std::vector<Action>> actionsForFunction; // function name => actions that can be performed in it + std::map<Name, std::vector<InliningAction>> actionsForFunction; // function name => actions that can be performed in it }; struct Planner : public WalkerPass<PostWalker<Planner>> { @@ -68,12 +91,18 @@ struct Planner : public WalkerPass<PostWalker<Planner>> { return new Planner(state); } - void visitCall(Call *curr) { - if (state->canInline.count(curr->target)) { - auto* block = Builder(*getModule()).makeBlock(); - block->type = curr->type; + void visitCall(Call* curr) { + // plan to inline if we know this is valid to inline, and if the call is + // actually performed - if it is dead code, it's pointless to inline + if (state->canInline.count(curr->target) && + curr->type != unreachable) { + // nest the call in a block. that way the location of the pointer to the call will not + // change even if we inline multiple times into the same function, otherwise + // call1(call2()) might be a problem + auto* block = Builder(*getModule()).makeBlock(curr); replaceCurrent(block); - state->actionsForFunction[getFunction()->name].emplace_back(curr, block, getModule()->getFunction(curr->target)); + assert(state->actionsForFunction.count(getFunction()->name) > 0); // can't add a new element in parallel + state->actionsForFunction[getFunction()->name].emplace_back(&block->list[0], getModule()->getFunction(curr->target)); } } @@ -91,13 +120,13 @@ private: // Core inlining logic. Modifies the outside function (adding locals as // needed), and returns the inlined code. -// Since we only inline once, and do not need the function afterwards, we -// can just reuse all the nodes and even avoid copying. -static Expression* doInlining(Module* module, Function* into, Action& action) { +static Expression* doInlining(Module* module, Function* into, InliningAction& action) { + auto* call = (*action.callSite)->cast<Call>(); Builder builder(*module); - auto* block = action.block; + auto* block = Builder(*module).makeBlock(); + block->type = call->type; block->name = Name(std::string("__inlined_func$") + action.contents->name.str); - block->type = action.contents->result; + *action.callSite = block; // set up a locals mapping struct Updater : public PostWalker<Updater> { std::map<Index, Index> localMapping; @@ -121,49 +150,59 @@ static Expression* doInlining(Module* module, Function* into, Action& action) { } // assign the operands into the params for (Index i = 0; i < action.contents->params.size(); i++) { - block->list.push_back(builder.makeSetLocal(updater.localMapping[i], action.call->operands[i])); + block->list.push_back(builder.makeSetLocal(updater.localMapping[i], call->operands[i])); } - // update the inlined contents - updater.walk(action.contents->body); - block->list.push_back(action.contents->body); - action.contents->body = builder.makeUnreachable(); // not strictly needed, since it's going away + // generate and update the inlined contents + auto* contents = ExpressionManipulator::copy(action.contents->body, *module); + updater.walk(contents); + block->list.push_back(contents); return block; } struct Inlining : public Pass { + // whether to optimize where we inline + bool optimize = false; + + NameToAtomicIndexMap uses; + void run(PassRunner* runner, Module* module) override { // keep going while we inline, to handle nesting. TODO: optimize + calculateUses(module); while (iteration(runner, module)) {} } - bool iteration(PassRunner* runner, Module* module) { - // Count uses - std::map<Name, Index> uses; + void calculateUses(Module* module) { // fill in uses, as we operate on it in parallel (each function to its own entry) for (auto& func : module->functions) { - uses[func->name] = 0; - } - { - PassRunner runner(module); - runner.setIsNested(true); - runner.add<FunctionUseCounter>(&uses); - runner.run(); + uses[func->name].store(0); } + PassRunner runner(module); + runner.setIsNested(true); + runner.add<FunctionUseCounter>(&uses); + runner.run(); + // anything exported or used in a table should not be inlined for (auto& ex : module->exports) { if (ex->kind == ExternalKind::Function) { - uses[ex->value] = 2; // too many, so we ignore it + uses[ex->value].store(TOO_MANY_USES_TO_INLINE); } } for (auto& segment : module->table.segments) { for (auto name : segment.data) { - uses[name]++; + if (module->getFunctionOrNull(name)) { + uses[name].store(TOO_MANY_USES_TO_INLINE); + } } } + } + + bool iteration(PassRunner* runner, Module* module) { // decide which to inline InliningState state; - for (auto iter : uses) { - if (iter.second == 1) { - state.canInline.insert(iter.first); + for (auto& func : module->functions) { + auto name = func->name; + auto numUses = uses[name].load(); + if (canInline(numUses) && worthInlining(module->getFunction(name))) { + state.canInline.insert(name); } } // fill in actionsForFunction, as we operate on it in parallel (each function to its own entry) @@ -182,15 +221,21 @@ struct Inlining : public Pass { std::set<Function*> inlinedInto; for (auto& func : module->functions) { for (auto& action : state.actionsForFunction[func->name]) { + Name inlinedName = action.contents->name; doInlining(module, func.get(), action); - inlined.insert(action.contents->name); + inlined.insert(inlinedName); inlinedInto.insert(func.get()); + uses[inlinedName]--; + assert(uses[inlinedName].load() == 0); } } // anything we inlined into may now have non-unique label names, fix it up for (auto func : inlinedInto) { wasm::UniqueNameMapper::uniquify(func->body); } + if (optimize && inlinedInto.size() > 0) { + doOptimize(inlinedInto, module, runner); + } // remove functions that we managed to inline, their one use is gone auto& funcs = module->functions; funcs.erase(std::remove_if(funcs.begin(), funcs.end(), [&inlined](const std::unique_ptr<Function>& curr) { @@ -199,11 +244,55 @@ struct Inlining : public Pass { // return whether we did any work return inlined.size() > 0; } + + bool canInline(int numUses) { + return numUses == SINGLE_USE; + } + + bool worthInlining(Function* func) { + return Measurer::measure(func->body) <= INLINING_SIZE_LIMIT; + } + + // Run useful optimizations after inlining, things like removing + // unnecessary new blocks, sharing variables, etc. + void doOptimize(std::set<Function*>& funcs, Module* module, PassRunner* parentRunner) { + // save the full list of functions on the side + std::vector<std::unique_ptr<Function>> all; + all.swap(module->functions); + module->updateMaps(); + for (auto& func : funcs) { + module->addFunction(func); + } + PassRunner runner(module, parentRunner->options); + runner.setIsNested(true); + runner.setValidateGlobally(false); // not a full valid module + runner.add("remove-unused-brs"); + runner.add("remove-unused-names"); + runner.add("coalesce-locals"); + runner.add("simplify-locals"); + runner.add("vacuum"); + runner.add("reorder-locals"); + runner.add("remove-unused-brs"); + runner.add("merge-blocks"); + runner.run(); + // restore all the funcs + for (auto& func : module->functions) { + func.release(); + } + all.swap(module->functions); + module->updateMaps(); + } }; Pass *createInliningPass() { return new Inlining(); } +Pass *createInliningOptimizingPass() { + auto* ret = new Inlining(); + ret->optimize = true; + return ret; +} + } // namespace wasm |