summaryrefslogtreecommitdiff
path: root/src/passes/Inlining.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/passes/Inlining.cpp')
-rw-r--r--src/passes/Inlining.cpp173
1 files changed, 131 insertions, 42 deletions
diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp
index c352a704a..377aa5247 100644
--- a/src/passes/Inlining.cpp
+++ b/src/passes/Inlining.cpp
@@ -18,45 +18,68 @@
// Inlining.
//
// For now, this does a conservative inlining of all functions that have
-// exactly one use. That should not increase code size, and may have
-// speed benefits.
+// exactly one use, and are fairly small. That should not increase code
+// size, and may have speed benefits.
//
+#include <atomic>
+
#include <wasm.h>
#include <pass.h>
#include <wasm-builder.h>
+#include <ast_utils.h>
#include <parsing.h>
namespace wasm {
+// A limit on how big a function to inline.
+static const int INLINING_SIZE_LIMIT = 15;
+
+// We only inline a function with a single use.
+static const int SINGLE_USE = 1;
+
+// A number of uses of a function that is too high for us to
+// inline it to all those locations.
+static const int TOO_MANY_USES_TO_INLINE = SINGLE_USE + 1;
+
+// Map of function name => number of uses. We build the values in
+// parallel, using atomic increments. This is safe because we never
+// update the map itself in parallel, we only update the values,
+// and so the map never allocates or moves values which could be
+// a problem with atomics (in fact it would be a problem in general
+// as well, not just with atomics, as we don't use a lock in
+// parallel access, we depend on the map itself being constant
+// when running multiple threads).
+typedef std::map<Name, std::atomic<Index>> NameToAtomicIndexMap;
+
struct FunctionUseCounter : public WalkerPass<PostWalker<FunctionUseCounter>> {
bool isFunctionParallel() override { return true; }
- FunctionUseCounter(std::map<Name, Index>* output) : output(output) {}
+ FunctionUseCounter(NameToAtomicIndexMap* uses) : uses(uses) {}
FunctionUseCounter* create() override {
- return new FunctionUseCounter(output);
+ return new FunctionUseCounter(uses);
}
void visitCall(Call *curr) {
- (*output)[curr->target]++;
+ assert(uses->count(curr->target) > 0); // can't add a new element in parallel
+ (*uses)[curr->target]++;
}
private:
- std::map<Name, Index>* output;
+ NameToAtomicIndexMap* uses;
};
-struct Action {
- Call* call;
- Block* block; // the replacement for the call, into which we should inline
+struct InliningAction {
+ Expression** callSite;
Function* contents;
- Action(Call* call, Block* block, Function* contents) : call(call), block(block), contents(contents) {}
+ InliningAction(Expression** callSite, Function* contents) : callSite(callSite), contents(contents) {}
};
struct InliningState {
std::set<Name> canInline;
- std::map<Name, std::vector<Action>> actionsForFunction; // function name => actions that can be performed in it
+ std::map<Name, std::vector<InliningAction>> actionsForFunction; // function name => actions that can be performed in it
};
struct Planner : public WalkerPass<PostWalker<Planner>> {
@@ -68,12 +91,18 @@ struct Planner : public WalkerPass<PostWalker<Planner>> {
return new Planner(state);
}
- void visitCall(Call *curr) {
- if (state->canInline.count(curr->target)) {
- auto* block = Builder(*getModule()).makeBlock();
- block->type = curr->type;
+ void visitCall(Call* curr) {
+ // plan to inline if we know this is valid to inline, and if the call is
+ // actually performed - if it is dead code, it's pointless to inline
+ if (state->canInline.count(curr->target) &&
+ curr->type != unreachable) {
+ // nest the call in a block. that way the location of the pointer to the call will not
+ // change even if we inline multiple times into the same function, otherwise
+ // call1(call2()) might be a problem
+ auto* block = Builder(*getModule()).makeBlock(curr);
replaceCurrent(block);
- state->actionsForFunction[getFunction()->name].emplace_back(curr, block, getModule()->getFunction(curr->target));
+ assert(state->actionsForFunction.count(getFunction()->name) > 0); // can't add a new element in parallel
+ state->actionsForFunction[getFunction()->name].emplace_back(&block->list[0], getModule()->getFunction(curr->target));
}
}
@@ -91,13 +120,13 @@ private:
// Core inlining logic. Modifies the outside function (adding locals as
// needed), and returns the inlined code.
-// Since we only inline once, and do not need the function afterwards, we
-// can just reuse all the nodes and even avoid copying.
-static Expression* doInlining(Module* module, Function* into, Action& action) {
+static Expression* doInlining(Module* module, Function* into, InliningAction& action) {
+ auto* call = (*action.callSite)->cast<Call>();
Builder builder(*module);
- auto* block = action.block;
+ auto* block = Builder(*module).makeBlock();
+ block->type = call->type;
block->name = Name(std::string("__inlined_func$") + action.contents->name.str);
- block->type = action.contents->result;
+ *action.callSite = block;
// set up a locals mapping
struct Updater : public PostWalker<Updater> {
std::map<Index, Index> localMapping;
@@ -121,49 +150,59 @@ static Expression* doInlining(Module* module, Function* into, Action& action) {
}
// assign the operands into the params
for (Index i = 0; i < action.contents->params.size(); i++) {
- block->list.push_back(builder.makeSetLocal(updater.localMapping[i], action.call->operands[i]));
+ block->list.push_back(builder.makeSetLocal(updater.localMapping[i], call->operands[i]));
}
- // update the inlined contents
- updater.walk(action.contents->body);
- block->list.push_back(action.contents->body);
- action.contents->body = builder.makeUnreachable(); // not strictly needed, since it's going away
+ // generate and update the inlined contents
+ auto* contents = ExpressionManipulator::copy(action.contents->body, *module);
+ updater.walk(contents);
+ block->list.push_back(contents);
return block;
}
struct Inlining : public Pass {
+ // whether to optimize where we inline
+ bool optimize = false;
+
+ NameToAtomicIndexMap uses;
+
void run(PassRunner* runner, Module* module) override {
// keep going while we inline, to handle nesting. TODO: optimize
+ calculateUses(module);
while (iteration(runner, module)) {}
}
- bool iteration(PassRunner* runner, Module* module) {
- // Count uses
- std::map<Name, Index> uses;
+ void calculateUses(Module* module) {
// fill in uses, as we operate on it in parallel (each function to its own entry)
for (auto& func : module->functions) {
- uses[func->name] = 0;
- }
- {
- PassRunner runner(module);
- runner.setIsNested(true);
- runner.add<FunctionUseCounter>(&uses);
- runner.run();
+ uses[func->name].store(0);
}
+ PassRunner runner(module);
+ runner.setIsNested(true);
+ runner.add<FunctionUseCounter>(&uses);
+ runner.run();
+ // anything exported or used in a table should not be inlined
for (auto& ex : module->exports) {
if (ex->kind == ExternalKind::Function) {
- uses[ex->value] = 2; // too many, so we ignore it
+ uses[ex->value].store(TOO_MANY_USES_TO_INLINE);
}
}
for (auto& segment : module->table.segments) {
for (auto name : segment.data) {
- uses[name]++;
+ if (module->getFunctionOrNull(name)) {
+ uses[name].store(TOO_MANY_USES_TO_INLINE);
+ }
}
}
+ }
+
+ bool iteration(PassRunner* runner, Module* module) {
// decide which to inline
InliningState state;
- for (auto iter : uses) {
- if (iter.second == 1) {
- state.canInline.insert(iter.first);
+ for (auto& func : module->functions) {
+ auto name = func->name;
+ auto numUses = uses[name].load();
+ if (canInline(numUses) && worthInlining(module->getFunction(name))) {
+ state.canInline.insert(name);
}
}
// fill in actionsForFunction, as we operate on it in parallel (each function to its own entry)
@@ -182,15 +221,21 @@ struct Inlining : public Pass {
std::set<Function*> inlinedInto;
for (auto& func : module->functions) {
for (auto& action : state.actionsForFunction[func->name]) {
+ Name inlinedName = action.contents->name;
doInlining(module, func.get(), action);
- inlined.insert(action.contents->name);
+ inlined.insert(inlinedName);
inlinedInto.insert(func.get());
+ uses[inlinedName]--;
+ assert(uses[inlinedName].load() == 0);
}
}
// anything we inlined into may now have non-unique label names, fix it up
for (auto func : inlinedInto) {
wasm::UniqueNameMapper::uniquify(func->body);
}
+ if (optimize && inlinedInto.size() > 0) {
+ doOptimize(inlinedInto, module, runner);
+ }
// remove functions that we managed to inline, their one use is gone
auto& funcs = module->functions;
funcs.erase(std::remove_if(funcs.begin(), funcs.end(), [&inlined](const std::unique_ptr<Function>& curr) {
@@ -199,11 +244,55 @@ struct Inlining : public Pass {
// return whether we did any work
return inlined.size() > 0;
}
+
+ bool canInline(int numUses) {
+ return numUses == SINGLE_USE;
+ }
+
+ bool worthInlining(Function* func) {
+ return Measurer::measure(func->body) <= INLINING_SIZE_LIMIT;
+ }
+
+ // Run useful optimizations after inlining, things like removing
+ // unnecessary new blocks, sharing variables, etc.
+ void doOptimize(std::set<Function*>& funcs, Module* module, PassRunner* parentRunner) {
+ // save the full list of functions on the side
+ std::vector<std::unique_ptr<Function>> all;
+ all.swap(module->functions);
+ module->updateMaps();
+ for (auto& func : funcs) {
+ module->addFunction(func);
+ }
+ PassRunner runner(module, parentRunner->options);
+ runner.setIsNested(true);
+ runner.setValidateGlobally(false); // not a full valid module
+ runner.add("remove-unused-brs");
+ runner.add("remove-unused-names");
+ runner.add("coalesce-locals");
+ runner.add("simplify-locals");
+ runner.add("vacuum");
+ runner.add("reorder-locals");
+ runner.add("remove-unused-brs");
+ runner.add("merge-blocks");
+ runner.run();
+ // restore all the funcs
+ for (auto& func : module->functions) {
+ func.release();
+ }
+ all.swap(module->functions);
+ module->updateMaps();
+ }
};
Pass *createInliningPass() {
return new Inlining();
}
+Pass *createInliningOptimizingPass() {
+ auto* ret = new Inlining();
+ ret->optimize = true;
+ return ret;
+}
+
} // namespace wasm