summaryrefslogtreecommitdiff
path: root/src/passes/Heap2Local.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/passes/Heap2Local.cpp')
-rw-r--r--src/passes/Heap2Local.cpp741
1 files changed, 389 insertions, 352 deletions
diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp
index fcf0d8886..534ff2f3d 100644
--- a/src/passes/Heap2Local.cpp
+++ b/src/passes/Heap2Local.cpp
@@ -166,337 +166,46 @@ namespace wasm {
namespace {
-struct Heap2LocalOptimizer {
- Function* func;
- Module* module;
- const PassOptions& passOptions;
-
- // To find allocations that do not escape, we must track locals so that we
- // can see how allocations flow from sets to gets and so forth.
- // TODO: only scan reference types
- LocalGraph localGraph;
+// Core analysis that provides an escapes() method to check if an allocation
+// escapes in a way that prevents optimizing it away as described above. It also
+// stashes information about the relevant expressions as it goes, which helps
+// optimization later (|seen| and |reached|).
+struct EscapeAnalyzer {
+ // All the expressions that have already been seen by the optimizer, see the
+ // comment above on exclusivity: once we have seen something when analyzing
+ // one allocation, if we reach it again then we can exit early since seeing it
+ // a second time proves we lost exclusivity. We must track this across
+ // multiple instances of EscapeAnalyzer as each handles a particular
+ // allocation.
+ std::unordered_set<Expression*>& seen;
// To find what escapes, we need to follow where values flow, both up to
- // parents, and via branches.
- Parents parents;
- BranchUtils::BranchTargets branchTargets;
-
- Heap2LocalOptimizer(Function* func,
- Module* module,
- const PassOptions& passOptions)
- : func(func), module(module), passOptions(passOptions),
- localGraph(func, module), parents(func->body), branchTargets(func->body) {
- // We need to track what each set influences, to see where its value can
- // flow to.
- localGraph.computeSetInfluences();
-
- // All the allocations in the function.
- // TODO: Arrays (of constant size) as well, if all element accesses use
- // constant indexes. One option might be to first convert such
- // nonescaping arrays into structs.
- FindAll<StructNew> allocations(func->body);
-
- for (auto* allocation : allocations.list) {
- // The point of this optimization is to replace heap allocations with
- // locals, so we must be able to place the data in locals.
- if (!canHandleAsLocals(allocation->type)) {
- continue;
- }
-
- convertToLocals(allocation);
- }
- }
-
- bool canHandleAsLocals(Type type) {
- if (type == Type::unreachable) {
- return false;
- }
- auto& fields = type.getHeapType().getStruct().fields;
- for (auto field : fields) {
- if (!TypeUpdating::canHandleAsLocal(field.type)) {
- return false;
- }
- if (field.isPacked()) {
- // TODO: support packed fields by adding coercions/truncations.
- return false;
- }
- }
- return true;
- }
-
- // Handles the rewriting that we do to perform the optimization. We store the
- // data that rewriting will need here, while we analyze, and then if we can do
- // the optimization, we tell it to run.
- //
- // TODO: Doing a single rewrite walk at the end would be more efficient, but
- // it would need to be more complex.
- struct Rewriter : PostWalker<Rewriter> {
- StructNew* allocation;
- Function* func;
- Module* module;
- Builder builder;
- const FieldList& fields;
-
- Rewriter(StructNew* allocation, Function* func, Module* module)
- : allocation(allocation), func(func), module(module), builder(*module),
- fields(allocation->type.getHeapType().getStruct().fields) {}
-
- // We must track all the local.sets that write the allocation, to verify
- // exclusivity.
- std::unordered_set<LocalSet*> sets;
-
- // All the expressions we reached during the flow analysis. That is exactly
- // all the places where our allocation is used. We track these so that we
- // can fix them up at the end, if the optimization ends up possible.
- std::unordered_set<Expression*> reached;
-
- // Maps indexes in the struct to the local index that will replace them.
- std::vector<Index> localIndexes;
-
- // In rare cases we may need to refinalize, see below.
- bool refinalize = false;
-
- void applyOptimization() {
- // Allocate locals to store the allocation's fields in.
- for (auto field : fields) {
- localIndexes.push_back(builder.addVar(func, field.type));
- }
-
- // Replace the things we need to using the visit* methods.
- walk(func->body);
-
- if (refinalize) {
- ReFinalize().walkFunctionInModule(func, module);
- }
- }
-
- // Rewrite the code in visit* methods. The general approach taken is to
- // replace the allocation with a null reference (which may require changing
- // types in some places, like making a block return value nullable), and to
- // remove all uses of it as much as possible, using the information we have
- // (for example, when our allocation reaches a RefAsNonNull we can simply
- // remove that operation as we know it would not throw). Some things are
- // left to other passes, like getting rid of dropped code without side
- // effects.
-
- // Adjust the type that flows through an expression, updating that type as
- // necessary.
- void adjustTypeFlowingThrough(Expression* curr) {
- if (!reached.count(curr)) {
- return;
- }
-
- // Our allocation passes through this expr. We must turn its type into a
- // nullable one, because we will remove things like RefAsNonNull of it,
- // which means we may no longer have a non-nullable value as our input,
- // and we could fail to validate. It is safe to make this change in terms
- // of our parent, since we know very specifically that only safe things
- // will end up using our value, like a StructGet or a Drop, which do not
- // care about non-nullability.
- assert(curr->type.isRef());
- curr->type = Type(curr->type.getHeapType(), Nullable);
- }
-
- void visitBlock(Block* curr) { adjustTypeFlowingThrough(curr); }
-
- void visitLoop(Loop* curr) { adjustTypeFlowingThrough(curr); }
-
- void visitLocalSet(LocalSet* curr) {
- if (!reached.count(curr)) {
- return;
- }
-
- // We don't need any sets of the reference to any of the locals it
- // originally was written to.
- if (curr->isTee()) {
- replaceCurrent(curr->value);
- } else {
- replaceCurrent(builder.makeDrop(curr->value));
- }
- }
-
- void visitLocalGet(LocalGet* curr) {
- if (!reached.count(curr)) {
- return;
- }
-
- // Uses of this get will drop it, so the value does not matter. Replace it
- // with something else, which avoids issues with non-nullability (when
- // non-nullable locals are enabled), which could happen like this:
- //
- // (local $x (ref $foo))
- // (local.set $x ..)
- // (.. (local.get $x))
- //
- // If we remove the set but not the get then the get would appear to read
- // the default value of a non-nullable local, which is not allowed.
- //
- // For simplicity, replace the get with a null. We anyhow have null types
- // in the places where our allocation was earlier, see notes on
- // visitBlock, and so using a null here adds no extra complexity.
- replaceCurrent(builder.makeRefNull(curr->type.getHeapType()));
- }
-
- void visitBreak(Break* curr) {
- if (!reached.count(curr)) {
- return;
- }
-
- // Breaks that our allocation flows through may change type, as we now
- // have a nullable type there.
- curr->finalize();
- }
-
- void visitStructNew(StructNew* curr) {
- if (curr != allocation) {
- return;
- }
+ // parents, and via branches, and through locals.
+ // TODO: for efficiency, only scan reference types in LocalGraph
+ const LocalGraph& localGraph;
+ const Parents& parents;
+ const BranchUtils::BranchTargets& branchTargets;
- // First, assign the initial values to the new locals.
- std::vector<Expression*> contents;
-
- if (!allocation->isWithDefault()) {
- // We must assign the initial values to temp indexes, then copy them
- // over all at once. If instead we did set them as we go, then we might
- // hit a problem like this:
- //
- // (local.set X (new_X))
- // (local.set Y (block (result ..)
- // (.. (local.get X) ..) ;; returns new_X, wrongly
- // (new_Y)
- // )
- //
- // Note how we assign to the local X and use it during the assignment to
- // the local Y - but we should still see the old value of X, not new_X.
- // Temp locals X', Y' can ensure that:
- //
- // (local.set X' (new_X))
- // (local.set Y' (block (result ..)
- // (.. (local.get X) ..) ;; returns the proper, old X
- // (new_Y)
- // )
- // ..
- // (local.set X (local.get X'))
- // (local.set Y (local.get Y'))
- std::vector<Index> tempIndexes;
-
- for (auto field : fields) {
- tempIndexes.push_back(builder.addVar(func, field.type));
- }
-
- // Store the initial values into the temp locals.
- for (Index i = 0; i < tempIndexes.size(); i++) {
- contents.push_back(
- builder.makeLocalSet(tempIndexes[i], allocation->operands[i]));
- }
-
- // Copy them to the normal ones.
- for (Index i = 0; i < tempIndexes.size(); i++) {
- contents.push_back(builder.makeLocalSet(
- localIndexes[i],
- builder.makeLocalGet(tempIndexes[i], fields[i].type)));
- }
-
- // TODO Check if the nondefault case does not increase code size in some
- // cases. A heap allocation that implicitly sets the default values
- // is smaller than multiple explicit settings of locals to
- // defaults.
- } else {
- // Set the default values.
- // Note that we must assign the defaults because we might be in a loop,
- // that is, there might be a previous value.
- for (Index i = 0; i < localIndexes.size(); i++) {
- contents.push_back(builder.makeLocalSet(
- localIndexes[i],
- builder.makeConstantExpression(Literal::makeZero(fields[i].type))));
- }
- }
-
- // Replace the allocation with a null reference. This changes the type
- // from non-nullable to nullable, but as we optimize away the code that
- // the allocation reaches, we will handle that.
- contents.push_back(builder.makeRefNull(allocation->type.getHeapType()));
- replaceCurrent(builder.makeBlock(contents));
- }
-
- void visitRefAs(RefAs* curr) {
- if (!reached.count(curr)) {
- return;
- }
-
- // It is safe to optimize out this RefAsNonNull, since we proved it
- // contains our allocation, and so cannot trap.
- assert(curr->op == RefAsNonNull);
- replaceCurrent(curr->value);
- }
-
- void visitRefCast(RefCast* curr) {
- if (!reached.count(curr)) {
- return;
- }
-
- // It is safe to optimize out this RefCast, since we proved it
- // contains our allocation and we have checked that the type of
- // the allocation is a subtype of the type of the cast, and so
- // cannot trap.
- replaceCurrent(curr->ref);
-
- // We need to refinalize after this, as while we know the cast is not
- // logically needed - the value flowing through will not be used - we do
- // need validation to succeed even before other optimizations remove the
- // code. For example:
- //
- // (block (result $B)
- // (ref.cast $B
- // (block (result $A)
- //
- // Without the cast this does not validate, so we need to refinalize
- // (which will fix this, as we replace the unused value with a null, so
- // that type will propagate out).
- refinalize = true;
- }
-
- void visitStructSet(StructSet* curr) {
- if (!reached.count(curr)) {
- return;
- }
-
- // Drop the ref (leaving it to other opts to remove, when possible), and
- // write the data to the local instead of the heap allocation.
- replaceCurrent(builder.makeSequence(
- builder.makeDrop(curr->ref),
- builder.makeLocalSet(localIndexes[curr->index], curr->value)));
- }
-
- void visitStructGet(StructGet* curr) {
- if (!reached.count(curr)) {
- return;
- }
-
- auto type = fields[curr->index].type;
- if (type != curr->type) {
- // Normally we are just replacing a struct.get with a local.get of a
- // local that was created to have the same type as the struct's field,
- // but in some cases we may refine, if the struct.get's reference type
- // is less refined than the reference that actually arrives, like here:
- //
- // (struct.get $parent 0
- // (block (ref $parent)
- // (struct.new $child)))
- //
- // We allocated locals for the field of the child, and are replacing a
- // get of the parent field with a local of the same type as the child's,
- // which may be more refined.
- refinalize = true;
- }
- replaceCurrent(builder.makeSequence(
- builder.makeDrop(curr->ref),
- builder.makeLocalGet(localIndexes[curr->index], type)));
- }
- };
-
- // All the expressions we have already looked at.
- std::unordered_set<Expression*> seen;
+ const PassOptions& passOptions;
+ Module& wasm;
+
+ EscapeAnalyzer(std::unordered_set<Expression*>& seen,
+ const LocalGraph& localGraph,
+ const Parents& parents,
+ const BranchUtils::BranchTargets& branchTargets,
+ const PassOptions& passOptions,
+ Module& wasm)
+ : seen(seen), localGraph(localGraph), parents(parents),
+ branchTargets(branchTargets), passOptions(passOptions), wasm(wasm) {}
+
+ // We must track all the local.sets that write the allocation, to verify
+ // exclusivity.
+ std::unordered_set<LocalSet*> sets;
+
+ // All the expressions we reached during the flow analysis. That is exactly
+ // all the places where our allocation is used. We track these so that we
+ // can fix them up at the end, if the optimization ends up possible.
+ std::unordered_set<Expression*> reached;
enum class ParentChildInteraction {
// The parent lets the child escape. E.g. the parent is a call.
@@ -518,11 +227,8 @@ struct Heap2LocalOptimizer {
Mixes,
};
- // Analyze an allocation to see if we can convert it from a heap allocation to
- // locals.
- void convertToLocals(StructNew* allocation) {
- Rewriter rewriter(allocation, func, module);
-
+ // Analyze an allocation to see if it escapes or not.
+ bool escapes(StructNew* allocation) {
// A queue of flows from children to parents. When something is in the queue
// here then it assumed that it is ok for the allocation to be at the child
// (that is, we have already checked the child before placing it in the
@@ -546,7 +252,7 @@ struct Heap2LocalOptimizer {
interaction == ParentChildInteraction::Mixes) {
// If the parent may let us escape, or the parent mixes other values
// up with us, give up.
- return;
+ return true;
}
// The parent either fully consumes us, or flows us onwards; either way,
@@ -575,7 +281,7 @@ struct Heap2LocalOptimizer {
// added the parent to |seen| for both children, the reference would get
// blocked from being optimized.
if (!seen.emplace(parent).second) {
- return;
+ return true;
}
// We can proceed, as the parent interacts with us properly, and we are
@@ -592,7 +298,7 @@ struct Heap2LocalOptimizer {
// exclusive use of our allocation by all the gets that read the value.
// Note the set, and we will check the gets at the end once we know all
// of our sets.
- rewriter.sets.insert(set);
+ sets.insert(set);
// We must also look at how the value flows from those gets.
if (auto* getsReached = getGetsReached(set)) {
@@ -611,17 +317,17 @@ struct Heap2LocalOptimizer {
// If we got to here, then we can continue to hope that we can optimize
// this allocation. Mark the parent and child as reached by it, and
// continue.
- rewriter.reached.insert(parent);
- rewriter.reached.insert(child);
+ reached.insert(parent);
+ reached.insert(child);
}
// We finished the loop over the flows. Do the final checks.
- if (!getsAreExclusiveToSets(rewriter.sets)) {
- return;
+ if (!getsAreExclusiveToSets()) {
+ return true;
}
- // We can do it, hurray!
- rewriter.applyOptimization();
+ // Nothing escapes, hurray!
+ return false;
}
ParentChildInteraction getParentChildInteraction(StructNew* allocation,
@@ -729,7 +435,7 @@ struct Heap2LocalOptimizer {
// Finally, check for mixing. If the child is the immediate fallthrough
// of the parent then no other values can be mixed in.
- if (Properties::getImmediateFallthrough(parent, passOptions, *module) ==
+ if (Properties::getImmediateFallthrough(parent, passOptions, wasm) ==
child) {
return ParentChildInteraction::Flows;
}
@@ -755,7 +461,7 @@ struct Heap2LocalOptimizer {
return ParentChildInteraction::Mixes;
}
- LocalGraph::SetInfluences* getGetsReached(LocalSet* set) {
+ const LocalGraph::SetInfluences* getGetsReached(LocalSet* set) {
auto iter = localGraph.setInfluences.find(set);
if (iter != localGraph.setInfluences.end()) {
return &iter->second;
@@ -763,8 +469,8 @@ struct Heap2LocalOptimizer {
return nullptr;
}
- BranchUtils::NameSet branchesSentByParent(Expression* child,
- Expression* parent) {
+ const BranchUtils::NameSet branchesSentByParent(Expression* child,
+ Expression* parent) {
BranchUtils::NameSet names;
BranchUtils::operateOnScopeNameUsesAndSentValues(
parent, [&](Name name, Expression* value) {
@@ -780,7 +486,7 @@ struct Heap2LocalOptimizer {
// else), we need to check whether all the gets that read that value cannot
// read anything else (which would be the case if another set writes to that
// local, in the right live range).
- bool getsAreExclusiveToSets(const std::unordered_set<LocalSet*>& sets) {
+ bool getsAreExclusiveToSets() {
// Find all the relevant gets (which may overlap between the sets).
std::unordered_set<LocalGet*> gets;
for (auto* set : sets) {
@@ -793,7 +499,9 @@ struct Heap2LocalOptimizer {
// Check that the gets can only read from the specific known sets.
for (auto* get : gets) {
- for (auto* set : localGraph.getSetses[get]) {
+ auto iter = localGraph.getSetses.find(get);
+ assert(iter != localGraph.getSetses.end());
+ for (auto* set : iter->second) {
if (sets.count(set) == 0) {
return false;
}
@@ -804,11 +512,340 @@ struct Heap2LocalOptimizer {
}
};
-struct Heap2Local : public WalkerPass<PostWalker<Heap2Local>> {
+// An optimizer that handles the rewriting to turn a struct allocation into
+// locals. We run this after proving that allocation does not escape.
+//
+// TODO: Doing a single rewrite walk at the end (for all structs) would be more
+// efficient, but it would need to be more complex.
+struct Struct2Local : PostWalker<Struct2Local> {
+ StructNew* allocation;
+ const EscapeAnalyzer& analyzer;
+ Function* func;
+ Module& wasm;
+ Builder builder;
+ const FieldList& fields;
+
+ Struct2Local(StructNew* allocation,
+ const EscapeAnalyzer& analyzer,
+ Function* func,
+ Module& wasm)
+ : allocation(allocation), analyzer(analyzer), func(func), wasm(wasm),
+ builder(wasm), fields(allocation->type.getHeapType().getStruct().fields) {
+
+ // Allocate locals to store the allocation's fields in.
+ for (auto field : fields) {
+ localIndexes.push_back(builder.addVar(func, field.type));
+ }
+
+ // Replace the things we need to using the visit* methods.
+ walk(func->body);
+
+ if (refinalize) {
+ ReFinalize().walkFunctionInModule(func, &wasm);
+ }
+ }
+
+ // Maps indexes in the struct to the local index that will replace them.
+ std::vector<Index> localIndexes;
+
+ // In rare cases we may need to refinalize, see below.
+ bool refinalize = false;
+
+ // Rewrite the code in visit* methods. The general approach taken is to
+ // replace the allocation with a null reference (which may require changing
+ // types in some places, like making a block return value nullable), and to
+ // remove all uses of it as much as possible, using the information we have
+ // (for example, when our allocation reaches a RefAsNonNull we can simply
+ // remove that operation as we know it would not throw). Some things are
+ // left to other passes, like getting rid of dropped code without side
+ // effects.
+
+ // Adjust the type that flows through an expression, updating that type as
+ // necessary.
+ void adjustTypeFlowingThrough(Expression* curr) {
+ if (!analyzer.reached.count(curr)) {
+ return;
+ }
+
+ // Our allocation passes through this expr. We must turn its type into a
+ // nullable one, because we will remove things like RefAsNonNull of it,
+ // which means we may no longer have a non-nullable value as our input,
+ // and we could fail to validate. It is safe to make this change in terms
+ // of our parent, since we know very specifically that only safe things
+ // will end up using our value, like a StructGet or a Drop, which do not
+ // care about non-nullability.
+ assert(curr->type.isRef());
+ curr->type = Type(curr->type.getHeapType(), Nullable);
+ }
+
+ void visitBlock(Block* curr) { adjustTypeFlowingThrough(curr); }
+
+ void visitLoop(Loop* curr) { adjustTypeFlowingThrough(curr); }
+
+ void visitLocalSet(LocalSet* curr) {
+ if (!analyzer.reached.count(curr)) {
+ return;
+ }
+
+ // We don't need any sets of the reference to any of the locals it
+ // originally was written to.
+ if (curr->isTee()) {
+ replaceCurrent(curr->value);
+ } else {
+ replaceCurrent(builder.makeDrop(curr->value));
+ }
+ }
+
+ void visitLocalGet(LocalGet* curr) {
+ if (!analyzer.reached.count(curr)) {
+ return;
+ }
+
+ // Uses of this get will drop it, so the value does not matter. Replace it
+ // with something else, which avoids issues with non-nullability (when
+ // non-nullable locals are enabled), which could happen like this:
+ //
+ // (local $x (ref $foo))
+ // (local.set $x ..)
+ // (.. (local.get $x))
+ //
+ // If we remove the set but not the get then the get would appear to read
+ // the default value of a non-nullable local, which is not allowed.
+ //
+ // For simplicity, replace the get with a null. We anyhow have null types
+ // in the places where our allocation was earlier, see notes on
+ // visitBlock, and so using a null here adds no extra complexity.
+ replaceCurrent(builder.makeRefNull(curr->type.getHeapType()));
+ }
+
+ void visitBreak(Break* curr) {
+ if (!analyzer.reached.count(curr)) {
+ return;
+ }
+
+ // Breaks that our allocation flows through may change type, as we now
+ // have a nullable type there.
+ curr->finalize();
+ }
+
+ void visitStructNew(StructNew* curr) {
+ if (curr != allocation) {
+ return;
+ }
+
+ // First, assign the initial values to the new locals.
+ std::vector<Expression*> contents;
+
+ if (!allocation->isWithDefault()) {
+ // We must assign the initial values to temp indexes, then copy them
+ // over all at once. If instead we did set them as we go, then we might
+ // hit a problem like this:
+ //
+ // (local.set X (new_X))
+ // (local.set Y (block (result ..)
+ // (.. (local.get X) ..) ;; returns new_X, wrongly
+ // (new_Y)
+ // )
+ //
+ // Note how we assign to the local X and use it during the assignment to
+ // the local Y - but we should still see the old value of X, not new_X.
+ // Temp locals X', Y' can ensure that:
+ //
+ // (local.set X' (new_X))
+ // (local.set Y' (block (result ..)
+ // (.. (local.get X) ..) ;; returns the proper, old X
+ // (new_Y)
+ // )
+ // ..
+ // (local.set X (local.get X'))
+ // (local.set Y (local.get Y'))
+ std::vector<Index> tempIndexes;
+
+ for (auto field : fields) {
+ tempIndexes.push_back(builder.addVar(func, field.type));
+ }
+
+ // Store the initial values into the temp locals.
+ for (Index i = 0; i < tempIndexes.size(); i++) {
+ contents.push_back(
+ builder.makeLocalSet(tempIndexes[i], allocation->operands[i]));
+ }
+
+ // Copy them to the normal ones.
+ for (Index i = 0; i < tempIndexes.size(); i++) {
+ contents.push_back(builder.makeLocalSet(
+ localIndexes[i],
+ builder.makeLocalGet(tempIndexes[i], fields[i].type)));
+ }
+
+ // TODO Check if the nondefault case does not increase code size in some
+ // cases. A heap allocation that implicitly sets the default values
+ // is smaller than multiple explicit settings of locals to
+ // defaults.
+ } else {
+ // Set the default values.
+ // Note that we must assign the defaults because we might be in a loop,
+ // that is, there might be a previous value.
+ for (Index i = 0; i < localIndexes.size(); i++) {
+ contents.push_back(builder.makeLocalSet(
+ localIndexes[i],
+ builder.makeConstantExpression(Literal::makeZero(fields[i].type))));
+ }
+ }
+
+ // Replace the allocation with a null reference. This changes the type
+ // from non-nullable to nullable, but as we optimize away the code that
+ // the allocation reaches, we will handle that.
+ contents.push_back(builder.makeRefNull(allocation->type.getHeapType()));
+ replaceCurrent(builder.makeBlock(contents));
+ }
+
+ void visitRefAs(RefAs* curr) {
+ if (!analyzer.reached.count(curr)) {
+ return;
+ }
+
+ // It is safe to optimize out this RefAsNonNull, since we proved it
+ // contains our allocation, and so cannot trap.
+ assert(curr->op == RefAsNonNull);
+ replaceCurrent(curr->value);
+ }
+
+ void visitRefCast(RefCast* curr) {
+ if (!analyzer.reached.count(curr)) {
+ return;
+ }
+
+ // It is safe to optimize out this RefCast, since we proved it
+ // contains our allocation and we have checked that the type of
+ // the allocation is a subtype of the type of the cast, and so
+ // cannot trap.
+ replaceCurrent(curr->ref);
+
+ // We need to refinalize after this, as while we know the cast is not
+ // logically needed - the value flowing through will not be used - we do
+ // need validation to succeed even before other optimizations remove the
+ // code. For example:
+ //
+ // (block (result $B)
+ // (ref.cast $B
+ // (block (result $A)
+ //
+ // Without the cast this does not validate, so we need to refinalize
+ // (which will fix this, as we replace the unused value with a null, so
+ // that type will propagate out).
+ refinalize = true;
+ }
+
+ void visitStructSet(StructSet* curr) {
+ if (!analyzer.reached.count(curr)) {
+ return;
+ }
+
+ // Drop the ref (leaving it to other opts to remove, when possible), and
+ // write the data to the local instead of the heap allocation.
+ replaceCurrent(builder.makeSequence(
+ builder.makeDrop(curr->ref),
+ builder.makeLocalSet(localIndexes[curr->index], curr->value)));
+ }
+
+ void visitStructGet(StructGet* curr) {
+ if (!analyzer.reached.count(curr)) {
+ return;
+ }
+
+ auto type = fields[curr->index].type;
+ if (type != curr->type) {
+ // Normally we are just replacing a struct.get with a local.get of a
+ // local that was created to have the same type as the struct's field,
+ // but in some cases we may refine, if the struct.get's reference type
+ // is less refined than the reference that actually arrives, like here:
+ //
+ // (struct.get $parent 0
+ // (block (ref $parent)
+ // (struct.new $child)))
+ //
+ // We allocated locals for the field of the child, and are replacing a
+ // get of the parent field with a local of the same type as the child's,
+ // which may be more refined.
+ refinalize = true;
+ }
+ replaceCurrent(builder.makeSequence(
+ builder.makeDrop(curr->ref),
+ builder.makeLocalGet(localIndexes[curr->index], type)));
+ }
+};
+
+// Core Heap2Local optimization that operates on a function: Builds up the data
+// structures we need (LocalGraph, etc.) that we will use across multiple
+// analyses of allocations, and then runs those analyses and optimizes where
+// possible.
+struct Heap2Local {
+ Function* func;
+ Module& wasm;
+ const PassOptions& passOptions;
+
+ LocalGraph localGraph;
+ Parents parents;
+ BranchUtils::BranchTargets branchTargets;
+
+ Heap2Local(Function* func, Module& wasm, const PassOptions& passOptions)
+ : func(func), wasm(wasm), passOptions(passOptions), localGraph(func, &wasm),
+ parents(func->body), branchTargets(func->body) {
+ // We need to track what each set influences, to see where its value can
+ // flow to.
+ localGraph.computeSetInfluences();
+
+ // All the expressions we have already looked at. We use this to avoid
+ // repeated work, see above.
+ std::unordered_set<Expression*> seen;
+
+ // All the allocations in the function.
+ // TODO: Arrays (of constant size) as well, if all element accesses use
+ // constant indexes. One option might be to first convert such
+ // nonescaping arrays into structs.
+ FindAll<StructNew> allocations(func->body);
+
+ for (auto* allocation : allocations.list) {
+ // The point of this optimization is to replace heap allocations with
+ // locals, so we must be able to place the data in locals.
+ if (!canHandleAsLocals(allocation->type)) {
+ continue;
+ }
+
+ // Check for escaping, noting relevant information as we go. If this does
+ // not escape, optimize it.
+ EscapeAnalyzer analyzer(
+ seen, localGraph, parents, branchTargets, passOptions, wasm);
+ if (!analyzer.escapes(allocation)) {
+ Struct2Local optimizer(allocation, analyzer, func, wasm);
+ }
+ }
+ }
+
+ bool canHandleAsLocals(Type type) {
+ if (type == Type::unreachable) {
+ return false;
+ }
+ auto& fields = type.getHeapType().getStruct().fields;
+ for (auto field : fields) {
+ if (!TypeUpdating::canHandleAsLocal(field.type)) {
+ return false;
+ }
+ if (field.isPacked()) {
+ // TODO: support packed fields by adding coercions/truncations.
+ return false;
+ }
+ }
+ return true;
+ }
+};
+
+struct Heap2LocalPass : public WalkerPass<PostWalker<Heap2LocalPass>> {
bool isFunctionParallel() override { return true; }
std::unique_ptr<Pass> create() override {
- return std::make_unique<Heap2Local>();
+ return std::make_unique<Heap2LocalPass>();
}
void doWalkFunction(Function* func) {
@@ -821,12 +858,12 @@ struct Heap2Local : public WalkerPass<PostWalker<Heap2Local>> {
// vacuum, in particular, to optimize such nested allocations.
// TODO Consider running multiple iterations here, and running vacuum in
// between them.
- Heap2LocalOptimizer(func, getModule(), getPassOptions());
+ Heap2Local(func, *getModule(), getPassOptions());
}
};
} // anonymous namespace
-Pass* createHeap2LocalPass() { return new Heap2Local(); }
+Pass* createHeap2LocalPass() { return new Heap2LocalPass(); }
} // namespace wasm