summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ir/branch-utils.h43
-rw-r--r--src/passes/Heap2Local.cpp35
-rw-r--r--test/lit/passes/heap2local.wast228
3 files changed, 283 insertions, 23 deletions
diff --git a/src/ir/branch-utils.h b/src/ir/branch-utils.h
index cf21facbc..662f70725 100644
--- a/src/ir/branch-utils.h
+++ b/src/ir/branch-utils.h
@@ -236,6 +236,22 @@ inline NameSet getBranchTargets(Expression* ast) {
return scanner.targets;
}
+// Get the name of the branch target that is defined in the expression, or an
+// empty name if there is none.
+inline Name getDefinedName(Expression* curr) {
+ Name ret;
+ operateOnScopeNameDefs(curr, [&](Name& name) { ret = name; });
+ return ret;
+}
+
+// Return the value sent by a branch instruction, or nullptr if there is none.
+inline Expression* getSentValue(Expression* curr) {
+ Expression* ret = nullptr;
+ operateOnScopeNameUsesAndSentValues(
+ curr, [&](Name name, Expression* value) { ret = value; });
+ return ret;
+}
+
// Finds if there are branches targeting a name. Note that since names are
// unique in our IR, we just need to look for the name, and do not need
// to analyze scoping.
@@ -352,24 +368,41 @@ public:
}
};
-// Provides a map of branch target names to the expressions that branch to
-// them.
+// Stores information about branch targets, specifically, finding them by their
+// name, and finding the branches to them.
struct BranchTargets {
BranchTargets(Expression* expr) { inner.walk(expr); }
- Expression* getTarget(Name name) { return inner.map[name]; }
+ // Gets the expression that defines this branch target, i.e., where we branch
+ // to if we branch to that name.
+ Expression* getTarget(Name name) { return inner.targets[name]; }
+
+ // Gets the expressions branching to a target.
+ std::unordered_set<Expression*> getBranches(Name name) {
+ auto iter = inner.branches.find(name);
+ if (iter != inner.branches.end()) {
+ return iter->second;
+ }
+ return {};
+ }
private:
struct Inner : public PostWalker<Inner, UnifiedExpressionVisitor<Inner>> {
void visitExpression(Expression* curr) {
operateOnScopeNameDefs(curr, [&](Name name) {
if (name.is()) {
- map[name] = curr;
+ targets[name] = curr;
+ }
+ });
+ operateOnScopeNameUses(curr, [&](Name& name) {
+ if (name.is()) {
+ branches[name].insert(curr);
}
});
}
- std::map<Name, Expression*> map;
+ std::map<Name, Expression*> targets;
+ std::map<Name, std::unordered_set<Expression*>> branches;
} inner;
};
diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp
index 7e8f12990..afdcab06f 100644
--- a/src/passes/Heap2Local.cpp
+++ b/src/passes/Heap2Local.cpp
@@ -394,6 +394,9 @@ struct Heap2LocalOptimizer {
}
};
+ // All the expressions we have already looked at.
+ std::unordered_set<Expression*> seen;
+
enum class ParentChildInteraction {
// The parent lets the child escape. E.g. the parent is a call.
Escapes,
@@ -444,10 +447,10 @@ struct Heap2LocalOptimizer {
// look at something that another allocation reached, which would be in a
// different call to this function and use a different queue (any overlap
// between calls would prove non-exclusivity).
- if (seen.count(child)) {
+ if (seen.count(parent)) {
return false;
}
- seen.insert(child);
+ seen.insert(parent);
switch (getParentChildInteraction(parent, child)) {
case ParentChildInteraction::Escapes: {
@@ -490,7 +493,7 @@ struct Heap2LocalOptimizer {
// If the parent may send us on a branch, we will need to look at the flow
// to the branch target(s).
for (auto name : branchesSentByParent(child, parent)) {
- flows.push({parent, branchTargets.getTarget(name)});
+ flows.push({child, branchTargets.getTarget(name)});
}
// If we got to here, then we can continue to hope that we can optimize
@@ -509,9 +512,6 @@ struct Heap2LocalOptimizer {
return true;
}
- // All the expressions we have already looked at.
- std::unordered_set<Expression*> seen;
-
ParentChildInteraction getParentChildInteraction(Expression* parent,
Expression* child) {
// If there is no parent then we are the body of the function, and that
@@ -611,16 +611,29 @@ struct Heap2LocalOptimizer {
// Finally, check for mixing. If the child is the immediate fallthrough
// of the parent then no other values can be mixed in.
- //
- // TODO: Also check if we are sent via a branch (and that branch sends the
- // single value exiting the parent).
- // TODO: Also check for safe merges where our allocation is in all places,
- // like two if or select arms, or branches.
if (Properties::getImmediateFallthrough(
parent, passOptions, module->features) == child) {
return ParentChildInteraction::Flows;
}
+ // Likewise, if the child branches to the parent, and it is the sole branch,
+ // with no other value exiting the block (in particular, no final value at
+ // the end that flows out), then there is no mixing.
+ auto branches =
+ branchTargets.getBranches(BranchUtils::getDefinedName(parent));
+ if (branches.size() == 1 &&
+ BranchUtils::getSentValue(*branches.begin()) == child) {
+ // TODO: support more types of branch targets.
+ if (auto* parentAsBlock = parent->dynCast<Block>()) {
+ if (parentAsBlock->list.back()->type == Type::unreachable) {
+ return ParentChildInteraction::Flows;
+ }
+ }
+ }
+
+ // TODO: Also check for safe merges where our allocation is in all places,
+ // like two if or select arms, or branches.
+
return ParentChildInteraction::Mixes;
}
diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast
index 29c25a8df..3736e4990 100644
--- a/test/lit/passes/heap2local.wast
+++ b/test/lit/passes/heap2local.wast
@@ -239,11 +239,13 @@
;; CHECK: (func $ignore-unreachable
;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (block
- ;; CHECK-NEXT: (struct.new_with_rtt $struct.A
- ;; CHECK-NEXT: (i32.const 2)
- ;; CHECK-NEXT: (unreachable)
- ;; CHECK-NEXT: (rtt.canon $struct.A)
+ ;; CHECK-NEXT: (block ;; (replaces something unreachable we can't emit)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.new_with_rtt $struct.A
+ ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: (unreachable)
+ ;; CHECK-NEXT: (rtt.canon $struct.A)
+ ;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -729,7 +731,7 @@
)
)
- ;; CHECK: (func $branch-value (result f64)
+ ;; CHECK: (func $block-value (result f64)
;; CHECK-NEXT: (local $ref (ref null $struct.A))
;; CHECK-NEXT: (local $1 i32)
;; CHECK-NEXT: (local $2 f64)
@@ -758,7 +760,7 @@
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- (func $branch-value (result f64)
+ (func $block-value (result f64)
(local $ref (ref null $struct.A))
(local.set $ref
(struct.new_default_with_rtt $struct.A
@@ -1530,6 +1532,218 @@
)
)
+ ;; CHECK: (func $branch-to-block-no-fallthrough (result f64)
+ ;; CHECK-NEXT: (local $0 (ref null $struct.A))
+ ;; CHECK-NEXT: (local $1 i32)
+ ;; CHECK-NEXT: (local $2 f64)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result (ref $struct.A))
+ ;; CHECK-NEXT: (local.set $1
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $2
+ ;; CHECK-NEXT: (f64.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (struct.new_default_with_rtt $struct.A
+ ;; CHECK-NEXT: (rtt.canon $struct.A)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result f64)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block $block (result (ref null $struct.A))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (br_if $block
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (return
+ ;; CHECK-NEXT: (f64.const 2.1828)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $branch-to-block-no-fallthrough (result f64)
+ (local $0 (ref null $struct.A))
+ (local.set $0
+ (struct.new_default_with_rtt $struct.A
+ (rtt.canon $struct.A)
+ )
+ )
+ (struct.get $struct.A 1
+ (block $block (result (ref null $struct.A))
+ (drop
+ ;; A branch to the block of our allocation. In this case there is no
+ ;; other value reaching the block, and so our branch is the sole value
+ ;; which means there is no mixing, and we can optimize this.
+ (br_if $block
+ (local.get $0)
+ (i32.const 0)
+ )
+ )
+ (return (f64.const 2.1828))
+ )
+ )
+ )
+
+ ;; CHECK: (func $two-branches (result f64)
+ ;; CHECK-NEXT: (local $0 (ref null $struct.A))
+ ;; CHECK-NEXT: (local.set $0
+ ;; CHECK-NEXT: (struct.new_default_with_rtt $struct.A
+ ;; CHECK-NEXT: (rtt.canon $struct.A)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (struct.get $struct.A 1
+ ;; CHECK-NEXT: (block $block (result (ref null $struct.A))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (br_if $block
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (br_if $block
+ ;; CHECK-NEXT: (ref.null $struct.A)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (return
+ ;; CHECK-NEXT: (f64.const 2.1828)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $two-branches (result f64)
+ (local $0 (ref null $struct.A))
+ (local.set $0
+ (struct.new_default_with_rtt $struct.A
+ (rtt.canon $struct.A)
+ )
+ )
+ (struct.get $struct.A 1
+ (block $block (result (ref null $struct.A))
+ (drop
+ ;; A branch to the block of our allocation.
+ (br_if $block
+ (local.get $0)
+ (i32.const 0)
+ )
+ )
+ (drop
+ ;; Another branch, causing mixing that prevents optimizations.
+ (br_if $block
+ (ref.null $struct.A)
+ (i32.const 0)
+ )
+ )
+ (return (f64.const 2.1828))
+ )
+ )
+ )
+
+ ;; CHECK: (func $two-branches-b (result f64)
+ ;; CHECK-NEXT: (local $0 (ref null $struct.A))
+ ;; CHECK-NEXT: (local.set $0
+ ;; CHECK-NEXT: (struct.new_default_with_rtt $struct.A
+ ;; CHECK-NEXT: (rtt.canon $struct.A)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (struct.get $struct.A 1
+ ;; CHECK-NEXT: (block $block (result (ref null $struct.A))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (br_if $block
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (br_if $block
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (return
+ ;; CHECK-NEXT: (f64.const 2.1828)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $two-branches-b (result f64)
+ (local $0 (ref null $struct.A))
+ (local.set $0
+ (struct.new_default_with_rtt $struct.A
+ (rtt.canon $struct.A)
+ )
+ )
+ (struct.get $struct.A 1
+ (block $block (result (ref null $struct.A))
+ (drop
+ (br_if $block
+ (local.get $0)
+ (i32.const 0)
+ )
+ )
+ (drop
+ ;; As in $two-branches, but the value here is our allocation, the same
+ ;; as in the first branch above us. We do not yet optimize such merges
+ ;; of our allocation, but we could in the future.
+ (br_if $block
+ (local.get $0)
+ (i32.const 0)
+ )
+ )
+ (return (f64.const 2.1828))
+ )
+ )
+ )
+
+ ;; CHECK: (func $br_if_flow (result f64)
+ ;; CHECK-NEXT: (local $0 (ref null $struct.A))
+ ;; CHECK-NEXT: (local.set $0
+ ;; CHECK-NEXT: (struct.new_default_with_rtt $struct.A
+ ;; CHECK-NEXT: (rtt.canon $struct.A)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (struct.get $struct.A 1
+ ;; CHECK-NEXT: (block $block (result (ref null $struct.A))
+ ;; CHECK-NEXT: (call $send-ref
+ ;; CHECK-NEXT: (br_if $block
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (return
+ ;; CHECK-NEXT: (f64.const 2.1828)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $br_if_flow (result f64)
+ (local $0 (ref null $struct.A))
+ (local.set $0
+ (struct.new_default_with_rtt $struct.A
+ (rtt.canon $struct.A)
+ )
+ )
+ (struct.get $struct.A 1
+ (block $block (result (ref null $struct.A))
+ ;; If it were not for the call here then we would be able to optimize
+ ;; the allocation in this function. (The branch with the allocation is
+ ;; ok, but the br_if also flows the value into a call, that escapes it.)
+ (call $send-ref
+ (br_if $block
+ (local.get $0)
+ (i32.const 0)
+ )
+ )
+ (return (f64.const 2.1828))
+ )
+ )
+ )
+
;; CHECK: (func $ref-as-non-null
;; CHECK-NEXT: (local $ref (ref null $struct.A))
;; CHECK-NEXT: (local $1 i32)