diff options
Diffstat (limited to 'src/passes/GUFA.cpp')
-rw-r--r-- | src/passes/GUFA.cpp | 264 |
1 files changed, 264 insertions, 0 deletions
diff --git a/src/passes/GUFA.cpp b/src/passes/GUFA.cpp new file mode 100644 index 000000000..1ac8ed820 --- /dev/null +++ b/src/passes/GUFA.cpp @@ -0,0 +1,264 @@ +/* + * Copyright 2022 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Grand Unified Flow Analysis (GUFA) +// +// Optimize based on information about what content can appear in each location +// in the program. This does a whole-program analysis to find that out and +// hopefully learn more than the type system does - for example, a type might be +// $A, which means $A or any subtype can appear there, but perhaps the analysis +// can find that only $A', a particular subtype, can appear there in practice, +// and not $A or any subtypes of $A', etc. Or, we may find that no type is +// actually possible at a particular location, say if we can prove that the +// casts on the way to that location allow nothing through. We can also find +// that only a particular value is possible of that type. +// +// GUFA will infer constants and unreachability, and add those to the code. This +// can increase code size if further optimizations are not run later like dead +// code elimination and vacuum. The "optimizing" variant of this pass will run +// such followup opts automatically in functions where we make changes, and so +// it is useful if GUFA is run near the end of the optimization pipeline. +// +// TODO: GUFA + polymorphic devirtualization + traps-never-happen. If we see +// that the possible call targets are {A, B, C}, and GUFA info lets us +// prove that A, C will trap if called - say, if they cast the first +// parameter to something GUFA proved it cannot be - then we can ignore +// them, and devirtualize to a call to B. +// + +#include "ir/drop.h" +#include "ir/eh-utils.h" +#include "ir/possible-contents.h" +#include "ir/properties.h" +#include "ir/utils.h" +#include "pass.h" +#include "wasm.h" + +namespace wasm { + +namespace { + +struct GUFAOptimizer + : public WalkerPass< + PostWalker<GUFAOptimizer, UnifiedExpressionVisitor<GUFAOptimizer>>> { + bool isFunctionParallel() override { return true; } + + ContentOracle& oracle; + bool optimizing; + + GUFAOptimizer(ContentOracle& oracle, bool optimizing) + : oracle(oracle), optimizing(optimizing) {} + + GUFAOptimizer* create() override { + return new GUFAOptimizer(oracle, optimizing); + } + + bool optimized = false; + + void visitExpression(Expression* curr) { + // Skip things we can't improve in any way. + auto type = curr->type; + if (type == Type::unreachable || type == Type::none || + Properties::isConstantExpression(curr)) { + return; + } + + if (type.isTuple()) { + // TODO: tuple types. + return; + } + + if (type.isRef() && (getTypeSystem() != TypeSystem::Nominal && + getTypeSystem() != TypeSystem::Isorecursive)) { + // Without type info we can't analyze subtypes, so we cannot infer + // anything about refs. + return; + } + + // Ok, this is an interesting location that we might optimize. See what the + // oracle says is possible there. + auto contents = oracle.getContents(ExpressionLocation{curr, 0}); + + auto& options = getPassOptions(); + auto& wasm = *getModule(); + Builder builder(wasm); + + if (contents.getType() == Type::unreachable) { + // This cannot contain any possible value at all. It must be unreachable + // code. + replaceCurrent(getDroppedUnconditionalChildrenAndAppend( + curr, wasm, options, builder.makeUnreachable())); + optimized = true; + return; + } + + // This is reachable. Check if we can emit something optimized for it. + // TODO: can we handle more general things here too? + if (!contents.canMakeExpression()) { + return; + } + + if (contents.isNull() && curr->type.isNullable()) { + // Null values are all identical, so just fix up the type here if we need + // to (the null's type might not fit in this expression, if it passed + // through casts). + if (!Type::isSubType(contents.getType(), curr->type)) { + contents = PossibleContents::literal( + Literal::makeNull(curr->type.getHeapType())); + } + + // Note that if curr's type is *not* nullable, then the code will trap at + // runtime (the null must arrive through a cast that will trap). We handle + // that below, so we don't need to think about it here. + + // TODO: would emitting a more specific null be useful when valid? + } + + auto* c = contents.makeExpression(wasm); + + // We can only place the constant value here if it has the right type. For + // example, a block may return (ref any), that is, not allow a null, but in + // practice only a null may flow there if it goes through casts that will + // trap at runtime. + // TODO: GUFA should eventually do this, but it will require it properly + // filtering content not just on ref.cast as it does now, but also + // ref.as etc. Once it does those we could assert on the type being + // valid here. + if (Type::isSubType(c->type, curr->type)) { + replaceCurrent( + getDroppedUnconditionalChildrenAndAppend(curr, wasm, options, c)); + optimized = true; + } else { + // The type is not compatible: we cannot place |c| in this location, even + // though we have proven it is the only value possible here. + if (Properties::isConstantExpression(c)) { + // The type is not compatible and this is a simple constant expression + // like a ref.func. That means this code must be unreachable. (See below + // for the case of a non-constant.) + replaceCurrent(getDroppedUnconditionalChildrenAndAppend( + curr, wasm, options, builder.makeUnreachable())); + optimized = true; + } else { + // This is not a constant expression, but we are certain it is the right + // value. Atm the only such case we handle is a global.get of an + // immutable global. We don't know what the value will be, nor its + // specific type, but we do know that a global.get will get that value + // properly. However, in this case it does not have the right type for + // this location. That can happen since the global.get does not have + // exactly the proper type for the contents: the global.get might be + // nullable, for example, even though the contents are not actually a + // null. For example, consider what happens here: + // + // (global $foo (ref null any) (struct.new $Foo)) + // .. + // (ref.as_non_null + // (global.get $foo)) + // + // We create a $Foo in the global $foo, so its value is not a null. But + // the global's type is nullable, so the global.get's type will be as + // well. When we get to the ref.as_non_null, we then want to replace it + // with a global.get - in fact that's what its child already is, showing + // it is the right content for it - but that global.get would not have a + // non-nullable type like a ref.as_non_null must have, so we cannot + // simply replace it. + // + // For now, do nothing here, but in some cases we could probably + // optimize (e.g. by adding a ref.as_non_null in the example) TODO + assert(c->is<GlobalGet>()); + } + } + } + + // TODO: If an instruction would trap on null, like struct.get, we could + // remove it here if it has no possible contents and if we are in + // traps-never-happen mode (that is, we'd have proven it can only trap, + // but we assume no traps happen, so it must be dead code). That info + // is present in OptimizeInstructions where it removes redundant + // ref.as_non_null (it removes them because it knows that the parent + // will trap on null anyhow), so maybe there is a way to share that + // information about parents. + + void visitFunction(Function* func) { + if (!optimized) { + return; + } + + // Optimization may introduce more unreachables, which we need to + // propagate. + ReFinalize().walkFunctionInModule(func, getModule()); + + // We may add blocks around pops, which we must fix up. + EHUtils::handleBlockNestedPops(func, *getModule()); + + // If we are in "optimizing" mode, we'll also run some more passes on this + // function that we just optimized. If not, leave now. + if (!optimizing) { + return; + } + + PassRunner runner(getModule(), getPassOptions()); + runner.setIsNested(true); + // New unreachables we added have created dead code we can remove. If we do + // not do this, then running GUFA repeatedly can actually increase code size + // (by adding multiple unneeded unreachables). + runner.add("dce"); + // New drops we added allow us to remove more unused code and values. As + // with unreachables, without a vacuum we may increase code size as in + // nested expressions we may apply the same value multiple times: + // + // (block $out + // (block $in + // (i32.const 10))) + // + // In each of the blocks we'll infer the value must be 10, so we'll end up + // with this repeating code: + // + // (block ;; a new block just to drop the old outer block + // (drop + // (block $out + // (drop + // (block $in + // (i32.const 10) + // ) + // ) + // (i32.const 10) + // ) + // ) + // (i32.const 10) + // ) + runner.add("vacuum"); + runner.runOnFunction(func); + } +}; + +struct GUFAPass : public Pass { + bool optimizing; + + GUFAPass(bool optimizing) : optimizing(optimizing) {} + + void run(PassRunner* runner, Module* module) override { + ContentOracle oracle(*module); + GUFAOptimizer(oracle, optimizing).run(runner, module); + } +}; + +} // anonymous namespace + +Pass* createGUFAPass() { return new GUFAPass(false); } +Pass* createGUFAOptimizingPass() { return new GUFAPass(true); } + +} // namespace wasm |