diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mixed_arena.h | 4 | ||||
-rw-r--r-- | src/passes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/passes/DeadArgumentElimination.cpp | 367 | ||||
-rw-r--r-- | src/passes/Inlining.cpp | 27 | ||||
-rw-r--r-- | src/passes/opt-utils.h | 56 | ||||
-rw-r--r-- | src/passes/pass.cpp | 5 | ||||
-rw-r--r-- | src/passes/passes.h | 2 | ||||
-rw-r--r-- | src/support/sorted_vector.h | 16 | ||||
-rw-r--r-- | src/wasm-builder.h | 8 | ||||
-rw-r--r-- | src/wasm-module-building.h | 5 |
10 files changed, 462 insertions, 29 deletions
diff --git a/src/mixed_arena.h b/src/mixed_arena.h index a1c51b5eb..90203842e 100644 --- a/src/mixed_arena.h +++ b/src/mixed_arena.h @@ -223,6 +223,10 @@ public: usedElements -= size; } + void erase(Iterator it) { + erase(it, it + 1); + } + void clear() { usedElements = 0; } diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 87b7661aa..984b85e63 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -10,6 +10,7 @@ SET(passes_SOURCES CodeFolding.cpp ConstHoisting.cpp DataFlowOpts.cpp + DeadArgumentElimination.cpp DeadCodeElimination.cpp DuplicateFunctionElimination.cpp ExtractFunction.cpp diff --git a/src/passes/DeadArgumentElimination.cpp b/src/passes/DeadArgumentElimination.cpp new file mode 100644 index 000000000..b1a1008f4 --- /dev/null +++ b/src/passes/DeadArgumentElimination.cpp @@ -0,0 +1,367 @@ +/* + * Copyright 2018 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Optimizes call arguments in a whole-program manner, removing ones +// that are not used (dead). +// +// Specifically, this does these things: +// +// * Find functions for whom an argument is always passed the same +// constant. If so, we can just set that local to that constant +// in the function. +// * Find functions that don't use the value passed to an argument. +// If so, we can avoid even sending and receiving it. (Note how if +// the previous point was true for an argument, then the second +// must as well.) +// +// This pass does not depend on flattening, but it may be more effective, +// as then call arguments never have side effects (which we need to +// watch for here). +// + +#include <unordered_map> +#include <unordered_set> + +#include <wasm.h> +#include <pass.h> +#include <wasm-builder.h> +#include <cfg/cfg-traversal.h> +#include <ir/effects.h> +#include <passes/opt-utils.h> +#include <support/sorted_vector.h> + +namespace wasm { + +// Information for a function +struct DAEFunctionInfo { + // The unused parameters, if any. + SortedVector unusedParams; + // Maps a function name to the calls going to it. + std::unordered_map<Name, std::vector<Call*>> calls; + // Whether the function can be called from places that + // affect what we can do. For now, any call we don't + // see inhibits our optimizations, but TODO: an export + // could be worked around by exporting a thunk that + // adds the parameter. + bool hasUnseenCalls = false; +}; + +typedef std::unordered_map<Name, DAEFunctionInfo> DAEFunctionInfoMap; + +// Information in a basic block +struct DAEBlockInfo { + // A local may be read, written, or not accessed in this block. + // If it is both read and written, we just care about the first + // action (if it is read first, that's all the info we are + // looking for; if it is written first, it can't be read later). + enum LocalUse { + Read, + Written + }; + std::unordered_map<Index, LocalUse> localUses; +}; + +struct DAEScanner : public WalkerPass<CFGWalker<DAEScanner, Visitor<DAEScanner>, DAEBlockInfo>> { + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new DAEScanner(infoMap); } + + DAEScanner(DAEFunctionInfoMap* infoMap) : infoMap(infoMap) {} + + DAEFunctionInfoMap* infoMap; + DAEFunctionInfo* info; + + Index numParams; + + // cfg traversal work + + void visitGetLocal(GetLocal* curr) { + if (currBasicBlock) { + auto& localUses = currBasicBlock->contents.localUses; + auto index = curr->index; + if (localUses.count(index) == 0) { + localUses[index] = DAEBlockInfo::Read; + } + } + } + + void visitSetLocal(SetLocal* curr) { + if (currBasicBlock) { + auto& localUses = currBasicBlock->contents.localUses; + auto index = curr->index; + if (localUses.count(index) == 0) { + localUses[index] = DAEBlockInfo::Written; + } + } + } + + void visitCall(Call* curr) { + info->calls[curr->target].push_back(curr); + } + + // main entry point + + void doWalkFunction(Function* func) { + numParams = func->getNumParams(); + info = &((*infoMap)[func->name]); + CFGWalker<DAEScanner, Visitor<DAEScanner>, DAEBlockInfo>::doWalkFunction(func); + // If there are relevant params, check if they are used. (If + // we can't optimize the function anyhow, there's no point.) + if (numParams > 0 && !info->hasUnseenCalls) { + findUnusedParams(func); + } + } + + void findUnusedParams(Function* func) { + // Flow the incoming parameter values, see if they reach a read. + // Once we've seen a parameter at a block, we need never consider it there + // again. + std::unordered_map<BasicBlock*, SortedVector> seenBlockIndexes; + // Start with all the incoming parameters. + SortedVector initial; + for (Index i = 0; i < numParams; i++) { + initial.push_back(i); + } + // The used params, which we now compute. + std::unordered_set<Index> usedParams; + // An item of work is a block plus the values arriving there. + typedef std::pair<BasicBlock*, SortedVector> Item; + std::vector<Item> work; + work.emplace_back(entry, initial); + while (!work.empty()) { + auto item = std::move(work.back()); + work.pop_back(); + auto* block = item.first; + auto& indexes = item.second; + // Ignore things we've already seen, or we've already seen to be used. + auto& seenIndexes = seenBlockIndexes[block]; + indexes.filter([&](const Index i) { + if (seenIndexes.has(i) || usedParams.count(i)) { + return false; + } else { + seenIndexes.insert(i); + return true; + } + }); + if (indexes.empty()) { + continue; // nothing more to flow + } + auto& localUses = block->contents.localUses; + SortedVector remainingIndexes; + for (auto i : indexes) { + auto iter = localUses.find(i); + if (iter != localUses.end()) { + auto use = iter->second; + if (use == DAEBlockInfo::Read) { + usedParams.insert(i); + } + // Whether it was a read or a write, we can stop looking at that local here. + } else { + remainingIndexes.insert(i); + } + } + // If there are remaining indexes, flow them forward. + if (!remainingIndexes.empty()) { + for (auto* next : block->out) { + work.emplace_back(next, remainingIndexes); + } + } + } + // We can now compute the unused params. + for (Index i = 0; i < numParams; i++) { + if (usedParams.count(i) == 0) { + info->unusedParams.insert(i); + } + } + } +}; + +struct DAE : public Pass { + bool optimize = false; + + void run(PassRunner* runner, Module* module) override { + DAEFunctionInfoMap infoMap; + // Ensure they all exist so the parallel threads don't modify the data structure. + for (auto& func : module->functions) { + infoMap[func->name]; + } + // Check the influence of the table and exports. + for (auto& curr : module->exports) { + if (curr->kind == ExternalKind::Function) { + infoMap[curr->value].hasUnseenCalls = true; + } + } + for (auto& segment : module->table.segments) { + for (auto name : segment.data) { + infoMap[name].hasUnseenCalls = true; + } + } + // Scan all the functions. + { + PassRunner runner(module); + runner.setIsNested(true); + runner.add<DAEScanner>(&infoMap); + runner.run(); + } + // Combine all the info. + std::unordered_map<Name, std::vector<Call*>> allCalls; + for (auto& pair : infoMap) { + auto& info = pair.second; + for (auto& pair : info.calls) { + auto name = pair.first; + auto& calls = pair.second; + auto& allCallsToName = allCalls[name]; + allCallsToName.insert(allCallsToName.end(), calls.begin(), calls.end()); + } + } + // We now have a mapping of all call sites for each function. Check which + // are always passed the same constant for a particular argument. + for (auto& pair : allCalls) { + auto name = pair.first; + // We can only optimize if we see all the calls and can modify + // them. + if (infoMap[name].hasUnseenCalls) continue; + auto& calls = pair.second; + auto* func = module->getFunction(name); + auto numParams = func->getNumParams(); + for (Index i = 0; i < numParams; i++) { + Literal value; + for (auto* call : calls) { + assert(call->target == name); + assert(call->operands.size() == numParams); + auto* operand = call->operands[i]; + if (auto* c = operand->dynCast<Const>()) { + if (value.type == none) { + // This is the first value seen. + value = c->value; + } else if (value != c->value) { + // Not identical, give up + value.type = none; + break; + } + } else { + // Not a constant, give up + value.type = none; + break; + } + } + if (value.type != none) { + // Success! We can just apply the constant in the function, which makes + // the parameter value unused, which lets us remove it later. + Builder builder(*module); + func->body = builder.makeSequence( + builder.makeSetLocal(i, builder.makeConst(value)), + func->body + ); + // Mark it as unused, which we know it now is (no point to + // re-scan just for that). + infoMap[name].unusedParams.insert(i); + } + } + } + // Track which functions we changed, and optimize them later if necessary. + std::unordered_set<Function*> changed; + // We now know which parameters are unused, and can potentially remove them. + for (auto& pair : allCalls) { + auto name = pair.first; + auto& calls = pair.second; + auto* func = module->getFunction(name); + auto numParams = func->getNumParams(); + if (numParams == 0) continue; + // Iterate downwards, as we may remove more than one. + Index i = numParams - 1; + while (1) { + if (infoMap[name].unusedParams.has(i)) { + // Great, it's not used. Check if none of the calls has a param with side + // effects, as that would prevent us removing them (flattening should + // have been done earlier). + bool canRemove = true; + for (auto* call : calls) { + auto* operand = call->operands[i]; + if (EffectAnalyzer(runner->options, operand).hasSideEffects()) { + canRemove = false; + break; + } + } + if (canRemove) { + // Wonderful, nothing stands in our way! Do it. + // TODO: parallelize this? + removeParameter(func, i, calls); + changed.insert(func); + } + } + if (i == 0) break; + i--; + } + } + if (optimize && changed.size() > 0) { + OptUtils::optimizeAfterInlining(changed, module, runner); + } + } + +private: + void removeParameter(Function* func, Index i, std::vector<Call*> calls) { + // Clear the type, which is no longer accurate. + func->type = Name(); + // It's cumbersome to adjust local names - TODO don't clear them? + Builder::clearLocalNames(func); + // Remove the parameter from the function. We must add a new local + // for uses of the parameter, but cannot make it use the same index + // (in general). + auto type = func->getLocalType(i); + func->params.erase(func->params.begin() + i); + Index newIndex = Builder::addVar(func, type); + // Update local operations. + struct LocalUpdater : public PostWalker<LocalUpdater> { + Index removedIndex; + Index newIndex; + LocalUpdater(Function* func, Index removedIndex, Index newIndex) : removedIndex(removedIndex), newIndex(newIndex) { + walk(func->body); + } + void visitGetLocal(GetLocal* curr) { + updateIndex(curr->index); + } + void visitSetLocal(SetLocal* curr) { + updateIndex(curr->index); + } + void updateIndex(Index& index) { + if (index == removedIndex) { + index = newIndex; + } else if (index > removedIndex) { + index--; + } + } + } localUpdater(func, i, newIndex); + // Remove the arguments from the calls. + for (auto* call : calls) { + call->operands.erase(call->operands.begin() + i); + } + } +}; + +Pass *createDAEPass() { + return new DAE(); +} + +Pass *createDAEOptimizingPass() { + auto* ret = new DAE(); + ret->optimize = true; + return ret; +} + +} // namespace wasm + diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 5de214338..67613fa47 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -38,6 +38,7 @@ #include <ir/utils.h> #include <ir/literal-utils.h> #include <parsing.h> +#include <passes/opt-utils.h> namespace wasm { @@ -332,7 +333,7 @@ struct Inlining : public Pass { wasm::UniqueNameMapper::uniquify(func->body); } if (optimize && inlinedInto.size() > 0) { - doOptimize(inlinedInto, module, runner); + OptUtils::optimizeAfterInlining(inlinedInto, module, runner); } // remove functions that we no longer need after inlining auto& funcs = module->functions; @@ -348,30 +349,6 @@ struct Inlining : public Pass { // return whether we did any work return inlinedUses.size() > 0; } - - // Run useful optimizations after inlining, things like removing - // unnecessary new blocks, sharing variables, etc. - void doOptimize(std::unordered_set<Function*>& funcs, Module* module, PassRunner* parentRunner) { - // save the full list of functions on the side - std::vector<std::unique_ptr<Function>> all; - all.swap(module->functions); - module->updateMaps(); - for (auto& func : funcs) { - module->addFunction(func); - } - PassRunner runner(module, parentRunner->options); - runner.setIsNested(true); - runner.setValidateGlobally(false); // not a full valid module - runner.add("precompute-propagate"); // this is especially useful after inlining - runner.addDefaultFunctionOptimizationPasses(); // do all the usual stuff - runner.run(); - // restore all the funcs - for (auto& func : module->functions) { - func.release(); - } - all.swap(module->functions); - module->updateMaps(); - } }; Pass *createInliningPass() { diff --git a/src/passes/opt-utils.h b/src/passes/opt-utils.h new file mode 100644 index 000000000..a880e2623 --- /dev/null +++ b/src/passes/opt-utils.h @@ -0,0 +1,56 @@ +/* + * Copyright 2018 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_passes_opt_utils_h +#define wasm_passes_opt_utils_h + +#include <unordered_set> + +#include <wasm.h> +#include <pass.h> + +namespace wasm { + +namespace OptUtils { + +// Run useful optimizations after inlining new code into a set +// of functions. +inline void optimizeAfterInlining(std::unordered_set<Function*>& funcs, Module* module, PassRunner* parentRunner) { + // save the full list of functions on the side + std::vector<std::unique_ptr<Function>> all; + all.swap(module->functions); + module->updateMaps(); + for (auto& func : funcs) { + module->addFunction(func); + } + PassRunner runner(module, parentRunner->options); + runner.setIsNested(true); + runner.setValidateGlobally(false); // not a full valid module + runner.add("precompute-propagate"); // this is especially useful after inlining + runner.addDefaultFunctionOptimizationPasses(); // do all the usual stuff + runner.run(); + // restore all the funcs + for (auto& func : module->functions) { + func.release(); + } + all.swap(module->functions); + module->updateMaps(); +} + +} // namespace OptUtils +} // namespace wasm + +#endif // wasm_passes_opt_utils_h diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index aa3ed8eb3..9215341e3 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -66,6 +66,8 @@ std::string PassRegistry::getPassDescription(std::string name) { // PassRunner void PassRegistry::registerPasses() { + registerPass("dae", "removes arguments to calls in an lto-like manner", createDAEPass); + registerPass("dae-optimizing", "removes arguments to calls in an lto-like manner, and optimizes where we removed", createDAEOptimizingPass); registerPass("coalesce-locals", "reduce # of locals by coalescing", createCoalesceLocalsPass); registerPass("coalesce-locals-learning", "reduce # of locals by coalescing and learning", createCoalesceLocalsWithLearningPass); registerPass("code-pushing", "push code forward, potentially making it not always execute", createCodePushingPass); @@ -200,6 +202,9 @@ void PassRunner::addDefaultGlobalOptimizationPrePasses() { } void PassRunner::addDefaultGlobalOptimizationPostPasses() { + if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) { + add("dae-optimizing"); + } // inline when working hard, and when not preserving debug info // (inlining+optimizing can remove the annotations) if ((options.optimizeLevel >= 2 || options.shrinkLevel >= 2) && diff --git a/src/passes/passes.h b/src/passes/passes.h index be0849075..b4cc56898 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -27,6 +27,8 @@ Pass* createCoalesceLocalsWithLearningPass(); Pass* createCodeFoldingPass(); Pass* createCodePushingPass(); Pass* createConstHoistingPass(); +Pass* createDAEPass(); +Pass* createDAEOptimizingPass(); Pass* createDataFlowOptsPass(); Pass* createDeadCodeEliminationPass(); Pass* createDuplicateFunctionEliminationPass(); diff --git a/src/support/sorted_vector.h b/src/support/sorted_vector.h index bb157f590..8f33de03a 100644 --- a/src/support/sorted_vector.h +++ b/src/support/sorted_vector.h @@ -25,7 +25,7 @@ namespace wasm { -struct SortedVector : std::vector<Index> { +struct SortedVector : public std::vector<Index> { SortedVector() {} SortedVector merge(const SortedVector& other) const { @@ -85,6 +85,20 @@ struct SortedVector : std::vector<Index> { return it != end() && *it == x; } + template<typename T> + SortedVector& filter(T keep) { + size_t skip = 0; + for (size_t i = 0; i < size(); i++) { + if (keep((*this)[i])) { + (*this)[i - skip] = (*this)[i]; + } else { + skip++; + } + } + resize(size() - skip); + return *this; + } + void verify() const { for (Index i = 1; i < size(); i++) { assert((*this)[i - 1] < (*this)[i]); diff --git a/src/wasm-builder.h b/src/wasm-builder.h index b8de8ce25..4032eb10a 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -389,11 +389,15 @@ public: return addVar(func, Name(), type); } + static void clearLocalNames(Function* func) { + func->localNames.clear(); + func->localIndices.clear(); + } + static void clearLocals(Function* func) { func->params.clear(); func->vars.clear(); - func->localNames.clear(); - func->localIndices.clear(); + clearLocalNames(func); } // ensure a node is a block, if it isn't already, and optionally append to the block diff --git a/src/wasm-module-building.h b/src/wasm-module-building.h index d03eef7c8..e92436952 100644 --- a/src/wasm-module-building.h +++ b/src/wasm-module-building.h @@ -37,7 +37,10 @@ static std::mutex debug; // starts optimizing using worker threads *while you are still adding*. // It runs function optimization passes at that time. This does not // run global optimization after that by default, but you can do that -// to by calling optimizeGlobally(). +// to by calling optimizeGlobally(), which runs the the global post-passes +// (we can't run the pre-passes, as they must be run before function +// passes, and no such time is possible here given that we receive +// functions one by one and optimize them). // // This might also be faster than normal module optimization since it // runs all passes on each function, then goes on to the next function |