diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/ir/type-updating.h | 12 | ||||
-rw-r--r-- | src/passes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/passes/Monomorphize.cpp | 245 | ||||
-rw-r--r-- | src/passes/pass.cpp | 6 | ||||
-rw-r--r-- | src/passes/passes.h | 2 |
5 files changed, 260 insertions, 6 deletions
diff --git a/src/ir/type-updating.h b/src/ir/type-updating.h index f6df449ae..50ba9efca 100644 --- a/src/ir/type-updating.h +++ b/src/ir/type-updating.h @@ -409,12 +409,12 @@ Expression* fixLocalGet(LocalGet* get, Module& wasm); // Applies new types of parameters to a function. This does all the necessary // changes aside from altering the function type, which the caller is expected -// to do (the caller might simply change the type, but in other cases the caller -// might be rewriting the types and need to preserve their identity in terms of -// nominal typing, so we don't change the type here). The specific things this -// function does are to update the types of local.get/tee operations, -// refinalize, etc., basically all operations necessary to ensure validation -// with the new types. +// to do after we run (the caller might simply change the type, but in other +// cases the caller might be rewriting the types and need to preserve their +// identity in terms of nominal typing, so we don't change the type here). The +// specific things this function does are to update the types of local.get/tee +// operations, refinalize, etc., basically all operations necessary to ensure +// validation with the new types. // // While doing so, we can either update or not update the types of local.get and // local.tee operations. (We do not update them here if we'll be doing an update diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index cb180179c..621a8e68a 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -62,6 +62,7 @@ set(passes_SOURCES MergeLocals.cpp Metrics.cpp MinifyImportsAndExports.cpp + Monomorphize.cpp MultiMemoryLowering.cpp NameList.cpp NameTypes.cpp diff --git a/src/passes/Monomorphize.cpp b/src/passes/Monomorphize.cpp new file mode 100644 index 000000000..80e908a83 --- /dev/null +++ b/src/passes/Monomorphize.cpp @@ -0,0 +1,245 @@ +/* + * Copyright 2022 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// When we see a call foo(arg1, arg2) and at least one of the arguments has a +// more refined type than is declared in the function being called, create a +// copy of the function with the refined type. That copy can then potentially be +// optimized in useful ways later. +// +// Inlining also monomorphizes in effect. What this pass does is handle the +// cases where inlining cannot be done. +// +// To see when monomorphizing makes sense, this optimizes the target function +// both with and without the more refined types. If the refined types help then +// the version with might remove a cast, for example. Note that while doing so +// we keep the optimization results of the version without - there is no reason +// to forget them since we've gone to the trouble anyhow. So this pass may have +// the side effect of performing minor optimizations on functions. There is also +// a variant of the pass that always monomorphizes, even when it does not seem +// helpful, which is useful for testing, and possibly in cases where we need +// more than just local optimizations to see the benefit - for example, perhaps +// GUFA ends up more powerful later on. +// +// TODO: When we optimize we could run multiple cycles: A calls B calls C might +// end up with the refined+optimized B now having refined types in its +// call to C, which it did not have before. This is in fact the expected +// pattern of incremental monomorphization. Doing it in the pass could be +// more efficient as later cycles can focus only on what was just +// optimized and changed. Also, operating on functions just modified would +// help the case of A calls B and we end up optimizing A after we consider +// A->B, and the optimized version sends more refined types to B, which +// could unlock more potential. +// TODO: We could sort the functions so that we start from root functions and +// end on leaves. That would make it more likely for a single iteration to +// do more work, as if A->B->C then we'd do A->B and optimize B and only +// then look at B->C. +// TODO: Also run the result-refining part of SignatureRefining, as if we +// refine the result then callers of the function may benefit, even if +// there is no benefit in the function itself. +// TODO: If this is too slow, we could "group" things, for example we could +// compute the LUB of a bunch of calls to a target and then investigate +// that one case and use it in all those callers. +// TODO: Not just direct calls? But updating vtables is complex. +// TODO: Not just types? We could monomorphize using Literal values. E.g. for +// function references, if we monomorphized we'd end up specializing qsort +// for the particular functions it is given. +// + +#include "ir/cost.h" +#include "ir/find_all.h" +#include "ir/module-utils.h" +#include "ir/names.h" +#include "ir/type-updating.h" +#include "ir/utils.h" +#include "pass.h" +#include "wasm-type.h" +#include "wasm.h" + +namespace wasm { + +namespace { + +struct Monomorphize : public Pass { + // If set, we run some opts to see if monomorphization helps, and skip it if + // not. + bool onlyWhenHelpful; + + Monomorphize(bool onlyWhenHelpful) : onlyWhenHelpful(onlyWhenHelpful) {} + + void run(Module* module) override { + if (!module->features.hasGC()) { + return; + } + + // TODO: parallelize, see comments below + + // Note the list of all functions. We'll be adding more, and do not want to + // operate on those. + std::vector<Name> funcNames; + ModuleUtils::iterDefinedFunctions( + *module, [&](Function* func) { funcNames.push_back(func->name); }); + + // Find the calls in each function and optimize where we can, changing them + // to call more refined targets. + for (auto name : funcNames) { + auto* func = module->getFunction(name); + for (auto* call : FindAll<Call>(func->body).list) { + if (call->type == Type::unreachable) { + // Ignore unreachable code. + // TODO: return_call? + continue; + } + + if (call->target == name) { + // Avoid recursion, which adds some complexity (as we'd be modifying + // ourselves if we apply optimizations). + continue; + } + + call->target = getRefinedTarget(call, module); + } + } + } + + // Given a call, make a copy of the function it is calling that has more + // refined arguments that fit the arguments being passed perfectly. + Name getRefinedTarget(Call* call, Module* module) { + auto target = call->target; + auto* func = module->getFunction(target); + if (func->imported()) { + // Nothing to do since this calls outside of the module. + return target; + } + auto params = func->getParams(); + bool hasRefinedParam = false; + for (Index i = 0; i < call->operands.size(); i++) { + if (call->operands[i]->type != params[i]) { + hasRefinedParam = true; + break; + } + } + if (!hasRefinedParam) { + // Nothing to do since all params are fully refined already. + return target; + } + + std::vector<Type> refinedTypes; + for (auto* operand : call->operands) { + refinedTypes.push_back(operand->type); + } + auto refinedParams = Type(refinedTypes); + auto iter = funcParamMap.find({target, refinedParams}); + if (iter != funcParamMap.end()) { + return iter->second; + } + + // This is the first time we see this situation. Let's see if it is worth + // monomorphizing. + + // Create a new function with refined parameters as a copy of the original. + // (Note we must clear stack IR on the original: atm we do not have the + // ability to copy stack IR, so we'd hit an internal error. But as we will + // be optimizing the function anyhow, we'd be throwing away stack IR later + // so this isn't a problem.) + func->stackIR.reset(); + auto refinedTarget = Names::getValidFunctionName(*module, target); + auto* refinedFunc = ModuleUtils::copyFunction(func, *module, refinedTarget); + TypeUpdating::updateParamTypes(refinedFunc, refinedTypes, *module); + refinedFunc->type = HeapType(Signature(refinedParams, func->getResults())); + + // Assume we'll choose to use the refined target, but if we are being + // careful then we might change our mind. + auto chosenTarget = refinedTarget; + if (onlyWhenHelpful) { + // Optimize both functions using minimal opts, hopefully enough to see if + // there is a benefit to the refined types (such as the new types allowing + // a cast to be removed). + // TODO: Atm this can be done many times per function as it is once per + // function and per set of types sent to it. Perhaps have some + // total limit to avoid slow runtimes. + // TODO: We can end up optimizing |func| more than once. It may be + // different each time if the previous optimization helped, but + // often it will be identical. We could save the original version + // and use that as the starting point here (and cache the optimized + // version), but then we'd be throwing away optimization results. Or + // we could see if later optimizations do not further decrease the + // cost, and if so, use a cached value for the cost on such + // "already maximally optimized" functions. The former approach is + // more amenable to parallelization, as it avoids path dependence - + // the other approaches are deterministic but they depend on the + // order in which we see things. But it does require saving a copy + // of the function, which uses memory, which is avoided if we just + // keep optimizing from the current contents as we go. It's not + // obvious which approach is best here. + doMinimalOpts(func); + doMinimalOpts(refinedFunc); + + auto costBefore = CostAnalyzer(func->body).cost; + auto costAfter = CostAnalyzer(refinedFunc->body).cost; + if (costAfter >= costBefore) { + // We failed to improve. Remove the new function and return the old + // target. + module->removeFunction(refinedTarget); + chosenTarget = target; + } + } + + // Mark the chosen target in the map, so we don't do this work again: every + // pair of target and refinedParams is only considered once. + funcParamMap[{target, refinedParams}] = chosenTarget; + + return chosenTarget; + } + + // Run minimal function-level optimizations on a function. This optimizes at + // -O1 which is very fast and runs in linear time basically, and so it should + // be reasonable to run as part of this pass: -O1 is several times faster than + // a full -O2, in particular, and so if we run this as part of -O2 we should + // not be making it much slower overall. + // TODO: Perhaps we don't need all of -O1, and can focus on specific things we + // expect to help. That would be faster, but we'd always run the risk of + // missing things, especially as new passes are added later and we don't + // think to add them here. + void doMinimalOpts(Function* func) { + PassRunner runner(getPassRunner()); + runner.options.optimizeLevel = 1; + // Local subtyping is not run in -O1, but we really do want it here since + // the entire point is that parameters now have more refined types, which + // can lead to locals reading them being refinable as well. + runner.add("local-subtyping"); + runner.addDefaultFunctionOptimizationPasses(); + runner.setIsNested(true); + runner.runOnFunction(func); + } + + // Maps [func name, param types] to the name of a new function whose params + // have those types. + // + // Note that this can contain funcParamMap{A, types} = A, that is, that maps + // a function name to itself. That indicates we found no benefit from + // refining with those particular types, and saves us from computing it again + // later on. + std::unordered_map<std::pair<Name, Type>, Name> funcParamMap; +}; + +} // anonymous namespace + +Pass* createMonomorphizePass() { return new Monomorphize(true); } + +Pass* createMonomorphizeAlwaysPass() { return new Monomorphize(false); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 7190201a1..b3bf80f8f 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -272,6 +272,12 @@ void PassRegistry::registerPasses() { registerPass("mod-asyncify-never-unwind", "apply the assumption that asyncify never unwinds", createModAsyncifyNeverUnwindPass); + registerPass("monomorphize", + "creates specialized versions of functions", + createMonomorphizePass); + registerPass("monomorphize-always", + "creates specialized versions of functions (even if unhelpful)", + createMonomorphizeAlwaysPass); registerPass("multi-memory-lowering", "combines multiple memories into a single memory", createMultiMemoryLoweringPass); diff --git a/src/passes/passes.h b/src/passes/passes.h index 12e8b77c7..ba60ade6f 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -84,6 +84,8 @@ Pass* createMinifyImportsPass(); Pass* createMinifyImportsAndExportsPass(); Pass* createMinifyImportsAndExportsAndModulesPass(); Pass* createMetricsPass(); +Pass* createMonomorphizePass(); +Pass* createMonomorphizeAlwaysPass(); Pass* createMultiMemoryLoweringPass(); Pass* createNameListPass(); Pass* createNameTypesPass(); |