diff options
author | Alon Zakai <azakai@google.com> | 2022-07-22 08:21:42 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-22 08:21:42 -0700 |
commit | ed704448a6883e9ee0b2f6284f6b5a7b5e7b4aa9 (patch) | |
tree | 2db1f5d38f4c86264549366a499c4db6588c585f /src/passes/GUFA.cpp | |
parent | af7c69795668cad3e25a8f80e2e7a9741df97f29 (diff) | |
download | binaryen-ed704448a6883e9ee0b2f6284f6b5a7b5e7b4aa9.tar.gz binaryen-ed704448a6883e9ee0b2f6284f6b5a7b5e7b4aa9.tar.bz2 binaryen-ed704448a6883e9ee0b2f6284f6b5a7b5e7b4aa9.zip |
Grand Unified Flow Analysis (GUFA) (#4598)
This tracks the possible contents in the entire program all at once using a single IR.
That is in contrast to say DeadArgumentElimination of LocalRefining etc., all of whom
look at one particular aspect of the program (function params and returns in DAE,
locals in LocalRefining). The cost is to build up an entire new IR, which takes a lot
of new code (mostly in the already-landed PossibleContents). Another cost
is this new IR is very big and requires a lot of time and memory to process.
The benefit is that this can find opportunities that are only obvious when looking
at the entire program, and also it can track information that is more specialized
than the normal type system in the IR - in particular, this can track an ExactType,
which is the case where we know the value is of a particular type exactly and not
a subtype.
Diffstat (limited to 'src/passes/GUFA.cpp')
-rw-r--r-- | src/passes/GUFA.cpp | 264 |
1 files changed, 264 insertions, 0 deletions
diff --git a/src/passes/GUFA.cpp b/src/passes/GUFA.cpp new file mode 100644 index 000000000..1ac8ed820 --- /dev/null +++ b/src/passes/GUFA.cpp @@ -0,0 +1,264 @@ +/* + * Copyright 2022 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Grand Unified Flow Analysis (GUFA) +// +// Optimize based on information about what content can appear in each location +// in the program. This does a whole-program analysis to find that out and +// hopefully learn more than the type system does - for example, a type might be +// $A, which means $A or any subtype can appear there, but perhaps the analysis +// can find that only $A', a particular subtype, can appear there in practice, +// and not $A or any subtypes of $A', etc. Or, we may find that no type is +// actually possible at a particular location, say if we can prove that the +// casts on the way to that location allow nothing through. We can also find +// that only a particular value is possible of that type. +// +// GUFA will infer constants and unreachability, and add those to the code. This +// can increase code size if further optimizations are not run later like dead +// code elimination and vacuum. The "optimizing" variant of this pass will run +// such followup opts automatically in functions where we make changes, and so +// it is useful if GUFA is run near the end of the optimization pipeline. +// +// TODO: GUFA + polymorphic devirtualization + traps-never-happen. If we see +// that the possible call targets are {A, B, C}, and GUFA info lets us +// prove that A, C will trap if called - say, if they cast the first +// parameter to something GUFA proved it cannot be - then we can ignore +// them, and devirtualize to a call to B. +// + +#include "ir/drop.h" +#include "ir/eh-utils.h" +#include "ir/possible-contents.h" +#include "ir/properties.h" +#include "ir/utils.h" +#include "pass.h" +#include "wasm.h" + +namespace wasm { + +namespace { + +struct GUFAOptimizer + : public WalkerPass< + PostWalker<GUFAOptimizer, UnifiedExpressionVisitor<GUFAOptimizer>>> { + bool isFunctionParallel() override { return true; } + + ContentOracle& oracle; + bool optimizing; + + GUFAOptimizer(ContentOracle& oracle, bool optimizing) + : oracle(oracle), optimizing(optimizing) {} + + GUFAOptimizer* create() override { + return new GUFAOptimizer(oracle, optimizing); + } + + bool optimized = false; + + void visitExpression(Expression* curr) { + // Skip things we can't improve in any way. + auto type = curr->type; + if (type == Type::unreachable || type == Type::none || + Properties::isConstantExpression(curr)) { + return; + } + + if (type.isTuple()) { + // TODO: tuple types. + return; + } + + if (type.isRef() && (getTypeSystem() != TypeSystem::Nominal && + getTypeSystem() != TypeSystem::Isorecursive)) { + // Without type info we can't analyze subtypes, so we cannot infer + // anything about refs. + return; + } + + // Ok, this is an interesting location that we might optimize. See what the + // oracle says is possible there. + auto contents = oracle.getContents(ExpressionLocation{curr, 0}); + + auto& options = getPassOptions(); + auto& wasm = *getModule(); + Builder builder(wasm); + + if (contents.getType() == Type::unreachable) { + // This cannot contain any possible value at all. It must be unreachable + // code. + replaceCurrent(getDroppedUnconditionalChildrenAndAppend( + curr, wasm, options, builder.makeUnreachable())); + optimized = true; + return; + } + + // This is reachable. Check if we can emit something optimized for it. + // TODO: can we handle more general things here too? + if (!contents.canMakeExpression()) { + return; + } + + if (contents.isNull() && curr->type.isNullable()) { + // Null values are all identical, so just fix up the type here if we need + // to (the null's type might not fit in this expression, if it passed + // through casts). + if (!Type::isSubType(contents.getType(), curr->type)) { + contents = PossibleContents::literal( + Literal::makeNull(curr->type.getHeapType())); + } + + // Note that if curr's type is *not* nullable, then the code will trap at + // runtime (the null must arrive through a cast that will trap). We handle + // that below, so we don't need to think about it here. + + // TODO: would emitting a more specific null be useful when valid? + } + + auto* c = contents.makeExpression(wasm); + + // We can only place the constant value here if it has the right type. For + // example, a block may return (ref any), that is, not allow a null, but in + // practice only a null may flow there if it goes through casts that will + // trap at runtime. + // TODO: GUFA should eventually do this, but it will require it properly + // filtering content not just on ref.cast as it does now, but also + // ref.as etc. Once it does those we could assert on the type being + // valid here. + if (Type::isSubType(c->type, curr->type)) { + replaceCurrent( + getDroppedUnconditionalChildrenAndAppend(curr, wasm, options, c)); + optimized = true; + } else { + // The type is not compatible: we cannot place |c| in this location, even + // though we have proven it is the only value possible here. + if (Properties::isConstantExpression(c)) { + // The type is not compatible and this is a simple constant expression + // like a ref.func. That means this code must be unreachable. (See below + // for the case of a non-constant.) + replaceCurrent(getDroppedUnconditionalChildrenAndAppend( + curr, wasm, options, builder.makeUnreachable())); + optimized = true; + } else { + // This is not a constant expression, but we are certain it is the right + // value. Atm the only such case we handle is a global.get of an + // immutable global. We don't know what the value will be, nor its + // specific type, but we do know that a global.get will get that value + // properly. However, in this case it does not have the right type for + // this location. That can happen since the global.get does not have + // exactly the proper type for the contents: the global.get might be + // nullable, for example, even though the contents are not actually a + // null. For example, consider what happens here: + // + // (global $foo (ref null any) (struct.new $Foo)) + // .. + // (ref.as_non_null + // (global.get $foo)) + // + // We create a $Foo in the global $foo, so its value is not a null. But + // the global's type is nullable, so the global.get's type will be as + // well. When we get to the ref.as_non_null, we then want to replace it + // with a global.get - in fact that's what its child already is, showing + // it is the right content for it - but that global.get would not have a + // non-nullable type like a ref.as_non_null must have, so we cannot + // simply replace it. + // + // For now, do nothing here, but in some cases we could probably + // optimize (e.g. by adding a ref.as_non_null in the example) TODO + assert(c->is<GlobalGet>()); + } + } + } + + // TODO: If an instruction would trap on null, like struct.get, we could + // remove it here if it has no possible contents and if we are in + // traps-never-happen mode (that is, we'd have proven it can only trap, + // but we assume no traps happen, so it must be dead code). That info + // is present in OptimizeInstructions where it removes redundant + // ref.as_non_null (it removes them because it knows that the parent + // will trap on null anyhow), so maybe there is a way to share that + // information about parents. + + void visitFunction(Function* func) { + if (!optimized) { + return; + } + + // Optimization may introduce more unreachables, which we need to + // propagate. + ReFinalize().walkFunctionInModule(func, getModule()); + + // We may add blocks around pops, which we must fix up. + EHUtils::handleBlockNestedPops(func, *getModule()); + + // If we are in "optimizing" mode, we'll also run some more passes on this + // function that we just optimized. If not, leave now. + if (!optimizing) { + return; + } + + PassRunner runner(getModule(), getPassOptions()); + runner.setIsNested(true); + // New unreachables we added have created dead code we can remove. If we do + // not do this, then running GUFA repeatedly can actually increase code size + // (by adding multiple unneeded unreachables). + runner.add("dce"); + // New drops we added allow us to remove more unused code and values. As + // with unreachables, without a vacuum we may increase code size as in + // nested expressions we may apply the same value multiple times: + // + // (block $out + // (block $in + // (i32.const 10))) + // + // In each of the blocks we'll infer the value must be 10, so we'll end up + // with this repeating code: + // + // (block ;; a new block just to drop the old outer block + // (drop + // (block $out + // (drop + // (block $in + // (i32.const 10) + // ) + // ) + // (i32.const 10) + // ) + // ) + // (i32.const 10) + // ) + runner.add("vacuum"); + runner.runOnFunction(func); + } +}; + +struct GUFAPass : public Pass { + bool optimizing; + + GUFAPass(bool optimizing) : optimizing(optimizing) {} + + void run(PassRunner* runner, Module* module) override { + ContentOracle oracle(*module); + GUFAOptimizer(oracle, optimizing).run(runner, module); + } +}; + +} // anonymous namespace + +Pass* createGUFAPass() { return new GUFAPass(false); } +Pass* createGUFAOptimizingPass() { return new GUFAPass(true); } + +} // namespace wasm |