diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/pass.h | 10 | ||||
-rw-r--r-- | src/passes/Inlining.cpp | 173 | ||||
-rw-r--r-- | src/passes/pass.cpp | 16 | ||||
-rw-r--r-- | src/passes/passes.h | 1 | ||||
-rw-r--r-- | src/tools/wasm-merge.cpp | 2 | ||||
-rw-r--r-- | src/wasm-module-building.h | 4 |
6 files changed, 154 insertions, 52 deletions
diff --git a/src/pass.h b/src/pass.h index 21836ebf9..2f3bca078 100644 --- a/src/pass.h +++ b/src/pass.h @@ -109,8 +109,14 @@ struct PassRunner { void addDefaultFunctionOptimizationPasses(); // Adds the default optimization passes that work on - // entire modules as a whole. - void addDefaultGlobalOptimizationPasses(); + // entire modules as a whole, and make sense to + // run before function passes. + void addDefaultGlobalOptimizationPrePasses(); + + // Adds the default optimization passes that work on + // entire modules as a whole, and make sense to + // run after function passes. + void addDefaultGlobalOptimizationPostPasses(); // Run the passes on the module void run(); diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index c352a704a..377aa5247 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -18,45 +18,68 @@ // Inlining. // // For now, this does a conservative inlining of all functions that have -// exactly one use. That should not increase code size, and may have -// speed benefits. +// exactly one use, and are fairly small. That should not increase code +// size, and may have speed benefits. // +#include <atomic> + #include <wasm.h> #include <pass.h> #include <wasm-builder.h> +#include <ast_utils.h> #include <parsing.h> namespace wasm { +// A limit on how big a function to inline. +static const int INLINING_SIZE_LIMIT = 15; + +// We only inline a function with a single use. +static const int SINGLE_USE = 1; + +// A number of uses of a function that is too high for us to +// inline it to all those locations. +static const int TOO_MANY_USES_TO_INLINE = SINGLE_USE + 1; + +// Map of function name => number of uses. We build the values in +// parallel, using atomic increments. This is safe because we never +// update the map itself in parallel, we only update the values, +// and so the map never allocates or moves values which could be +// a problem with atomics (in fact it would be a problem in general +// as well, not just with atomics, as we don't use a lock in +// parallel access, we depend on the map itself being constant +// when running multiple threads). +typedef std::map<Name, std::atomic<Index>> NameToAtomicIndexMap; + struct FunctionUseCounter : public WalkerPass<PostWalker<FunctionUseCounter>> { bool isFunctionParallel() override { return true; } - FunctionUseCounter(std::map<Name, Index>* output) : output(output) {} + FunctionUseCounter(NameToAtomicIndexMap* uses) : uses(uses) {} FunctionUseCounter* create() override { - return new FunctionUseCounter(output); + return new FunctionUseCounter(uses); } void visitCall(Call *curr) { - (*output)[curr->target]++; + assert(uses->count(curr->target) > 0); // can't add a new element in parallel + (*uses)[curr->target]++; } private: - std::map<Name, Index>* output; + NameToAtomicIndexMap* uses; }; -struct Action { - Call* call; - Block* block; // the replacement for the call, into which we should inline +struct InliningAction { + Expression** callSite; Function* contents; - Action(Call* call, Block* block, Function* contents) : call(call), block(block), contents(contents) {} + InliningAction(Expression** callSite, Function* contents) : callSite(callSite), contents(contents) {} }; struct InliningState { std::set<Name> canInline; - std::map<Name, std::vector<Action>> actionsForFunction; // function name => actions that can be performed in it + std::map<Name, std::vector<InliningAction>> actionsForFunction; // function name => actions that can be performed in it }; struct Planner : public WalkerPass<PostWalker<Planner>> { @@ -68,12 +91,18 @@ struct Planner : public WalkerPass<PostWalker<Planner>> { return new Planner(state); } - void visitCall(Call *curr) { - if (state->canInline.count(curr->target)) { - auto* block = Builder(*getModule()).makeBlock(); - block->type = curr->type; + void visitCall(Call* curr) { + // plan to inline if we know this is valid to inline, and if the call is + // actually performed - if it is dead code, it's pointless to inline + if (state->canInline.count(curr->target) && + curr->type != unreachable) { + // nest the call in a block. that way the location of the pointer to the call will not + // change even if we inline multiple times into the same function, otherwise + // call1(call2()) might be a problem + auto* block = Builder(*getModule()).makeBlock(curr); replaceCurrent(block); - state->actionsForFunction[getFunction()->name].emplace_back(curr, block, getModule()->getFunction(curr->target)); + assert(state->actionsForFunction.count(getFunction()->name) > 0); // can't add a new element in parallel + state->actionsForFunction[getFunction()->name].emplace_back(&block->list[0], getModule()->getFunction(curr->target)); } } @@ -91,13 +120,13 @@ private: // Core inlining logic. Modifies the outside function (adding locals as // needed), and returns the inlined code. -// Since we only inline once, and do not need the function afterwards, we -// can just reuse all the nodes and even avoid copying. -static Expression* doInlining(Module* module, Function* into, Action& action) { +static Expression* doInlining(Module* module, Function* into, InliningAction& action) { + auto* call = (*action.callSite)->cast<Call>(); Builder builder(*module); - auto* block = action.block; + auto* block = Builder(*module).makeBlock(); + block->type = call->type; block->name = Name(std::string("__inlined_func$") + action.contents->name.str); - block->type = action.contents->result; + *action.callSite = block; // set up a locals mapping struct Updater : public PostWalker<Updater> { std::map<Index, Index> localMapping; @@ -121,49 +150,59 @@ static Expression* doInlining(Module* module, Function* into, Action& action) { } // assign the operands into the params for (Index i = 0; i < action.contents->params.size(); i++) { - block->list.push_back(builder.makeSetLocal(updater.localMapping[i], action.call->operands[i])); + block->list.push_back(builder.makeSetLocal(updater.localMapping[i], call->operands[i])); } - // update the inlined contents - updater.walk(action.contents->body); - block->list.push_back(action.contents->body); - action.contents->body = builder.makeUnreachable(); // not strictly needed, since it's going away + // generate and update the inlined contents + auto* contents = ExpressionManipulator::copy(action.contents->body, *module); + updater.walk(contents); + block->list.push_back(contents); return block; } struct Inlining : public Pass { + // whether to optimize where we inline + bool optimize = false; + + NameToAtomicIndexMap uses; + void run(PassRunner* runner, Module* module) override { // keep going while we inline, to handle nesting. TODO: optimize + calculateUses(module); while (iteration(runner, module)) {} } - bool iteration(PassRunner* runner, Module* module) { - // Count uses - std::map<Name, Index> uses; + void calculateUses(Module* module) { // fill in uses, as we operate on it in parallel (each function to its own entry) for (auto& func : module->functions) { - uses[func->name] = 0; - } - { - PassRunner runner(module); - runner.setIsNested(true); - runner.add<FunctionUseCounter>(&uses); - runner.run(); + uses[func->name].store(0); } + PassRunner runner(module); + runner.setIsNested(true); + runner.add<FunctionUseCounter>(&uses); + runner.run(); + // anything exported or used in a table should not be inlined for (auto& ex : module->exports) { if (ex->kind == ExternalKind::Function) { - uses[ex->value] = 2; // too many, so we ignore it + uses[ex->value].store(TOO_MANY_USES_TO_INLINE); } } for (auto& segment : module->table.segments) { for (auto name : segment.data) { - uses[name]++; + if (module->getFunctionOrNull(name)) { + uses[name].store(TOO_MANY_USES_TO_INLINE); + } } } + } + + bool iteration(PassRunner* runner, Module* module) { // decide which to inline InliningState state; - for (auto iter : uses) { - if (iter.second == 1) { - state.canInline.insert(iter.first); + for (auto& func : module->functions) { + auto name = func->name; + auto numUses = uses[name].load(); + if (canInline(numUses) && worthInlining(module->getFunction(name))) { + state.canInline.insert(name); } } // fill in actionsForFunction, as we operate on it in parallel (each function to its own entry) @@ -182,15 +221,21 @@ struct Inlining : public Pass { std::set<Function*> inlinedInto; for (auto& func : module->functions) { for (auto& action : state.actionsForFunction[func->name]) { + Name inlinedName = action.contents->name; doInlining(module, func.get(), action); - inlined.insert(action.contents->name); + inlined.insert(inlinedName); inlinedInto.insert(func.get()); + uses[inlinedName]--; + assert(uses[inlinedName].load() == 0); } } // anything we inlined into may now have non-unique label names, fix it up for (auto func : inlinedInto) { wasm::UniqueNameMapper::uniquify(func->body); } + if (optimize && inlinedInto.size() > 0) { + doOptimize(inlinedInto, module, runner); + } // remove functions that we managed to inline, their one use is gone auto& funcs = module->functions; funcs.erase(std::remove_if(funcs.begin(), funcs.end(), [&inlined](const std::unique_ptr<Function>& curr) { @@ -199,11 +244,55 @@ struct Inlining : public Pass { // return whether we did any work return inlined.size() > 0; } + + bool canInline(int numUses) { + return numUses == SINGLE_USE; + } + + bool worthInlining(Function* func) { + return Measurer::measure(func->body) <= INLINING_SIZE_LIMIT; + } + + // Run useful optimizations after inlining, things like removing + // unnecessary new blocks, sharing variables, etc. + void doOptimize(std::set<Function*>& funcs, Module* module, PassRunner* parentRunner) { + // save the full list of functions on the side + std::vector<std::unique_ptr<Function>> all; + all.swap(module->functions); + module->updateMaps(); + for (auto& func : funcs) { + module->addFunction(func); + } + PassRunner runner(module, parentRunner->options); + runner.setIsNested(true); + runner.setValidateGlobally(false); // not a full valid module + runner.add("remove-unused-brs"); + runner.add("remove-unused-names"); + runner.add("coalesce-locals"); + runner.add("simplify-locals"); + runner.add("vacuum"); + runner.add("reorder-locals"); + runner.add("remove-unused-brs"); + runner.add("merge-blocks"); + runner.run(); + // restore all the funcs + for (auto& func : module->functions) { + func.release(); + } + all.swap(module->functions); + module->updateMaps(); + } }; Pass *createInliningPass() { return new Inlining(); } +Pass *createInliningOptimizingPass() { + auto* ret = new Inlining(); + ret->optimize = true; + return ret; +} + } // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index fc0a583fb..37eb50a0b 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -73,6 +73,7 @@ void PassRegistry::registerPasses() { registerPass("extract-function", "leaves just one function (useful for debugging)", createExtractFunctionPass); registerPass("flatten-control-flow", "flattens out control flow to be only on blocks, not nested as expressions", createFlattenControlFlowPass); registerPass("inlining", "inlines functions (currently only ones with a single use)", createInliningPass); + registerPass("inlining-optimizing", "inlines functions (currently only ones with a single use) and optimizes where we inlined", createInliningOptimizingPass); registerPass("legalize-js-interface", "legalizes i64 types on the import/export boundary", createLegalizeJSInterfacePass); registerPass("local-cse", "common subexpression elimination inside basic blocks", createLocalCSEPass); registerPass("log-execution", "instrument the build with logging of where execution goes", createLogExecutionPass); @@ -110,11 +111,9 @@ void PassRegistry::registerPasses() { } void PassRunner::addDefaultOptimizationPasses() { - add("duplicate-function-elimination"); + addDefaultGlobalOptimizationPrePasses(); addDefaultFunctionOptimizationPasses(); - add("duplicate-function-elimination"); // optimizations show more functions as duplicate - add("remove-unused-module-elements"); - add("memory-packing"); + addDefaultGlobalOptimizationPostPasses(); } void PassRunner::addDefaultFunctionOptimizationPasses() { @@ -154,9 +153,16 @@ void PassRunner::addDefaultFunctionOptimizationPasses() { add("vacuum"); // just to be safe } -void PassRunner::addDefaultGlobalOptimizationPasses() { +void PassRunner::addDefaultGlobalOptimizationPrePasses() { add("duplicate-function-elimination"); +} + +void PassRunner::addDefaultGlobalOptimizationPostPasses() { + add("duplicate-function-elimination"); // optimizations show more functions as duplicate add("remove-unused-module-elements"); + if (options.optimizeLevel >= 2 || options.shrinkLevel >= 2) { + add("inlining-optimizing"); + } add("memory-packing"); } diff --git a/src/passes/passes.h b/src/passes/passes.h index 7a40b7da4..5e7eba540 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -32,6 +32,7 @@ Pass *createExtractFunctionPass(); Pass *createFlattenControlFlowPass(); Pass *createFullPrinterPass(); Pass *createInliningPass(); +Pass *createInliningOptimizingPass(); Pass *createLegalizeJSInterfacePass(); Pass *createLocalCSEPass(); Pass *createLogExecutionPass(); diff --git a/src/tools/wasm-merge.cpp b/src/tools/wasm-merge.cpp index fa2b195c9..441cdc0f7 100644 --- a/src/tools/wasm-merge.cpp +++ b/src/tools/wasm-merge.cpp @@ -622,7 +622,7 @@ int main(int argc, const char* argv[]) { PassRunner passRunner(&output); passRunner.add("precompute"); passRunner.add("optimize-instructions"); // things now-constant may be further optimized - passRunner.addDefaultGlobalOptimizationPasses(); + passRunner.addDefaultGlobalOptimizationPostPasses(); passRunner.run(); } diff --git a/src/wasm-module-building.h b/src/wasm-module-building.h index b501a1ab6..c52cea9f9 100644 --- a/src/wasm-module-building.h +++ b/src/wasm-module-building.h @@ -165,7 +165,7 @@ public: } addPrePasses(passRunner); passRunner.addDefaultFunctionOptimizationPasses(); - passRunner.addDefaultGlobalOptimizationPasses(); + passRunner.addDefaultGlobalOptimizationPostPasses(); passRunner.run(); return; } @@ -226,7 +226,7 @@ private: void optimizeGlobally() { PassRunner passRunner(wasm, passOptions); - passRunner.addDefaultGlobalOptimizationPasses(); + passRunner.addDefaultGlobalOptimizationPostPasses(); passRunner.run(); } |