summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2024-12-18 13:40:28 -0800
committerGitHub <noreply@github.com>2024-12-18 13:40:28 -0800
commit009078979a26b7f4ec99fdfe1929b26176575805 (patch)
treec959452030e090fd559090787dd47195f75190f5
parentc744bd18ce73b82cf23e64728a8167c61ae4e24a (diff)
downloadbinaryen-009078979a26b7f4ec99fdfe1929b26176575805.tar.gz
binaryen-009078979a26b7f4ec99fdfe1929b26176575805.tar.bz2
binaryen-009078979a26b7f4ec99fdfe1929b26176575805.zip
Handle atomic accesses in ConstantFieldPropagation (#7159)
Sequentially consistent gets that are optimized out need to have seqcst fences inserted in their place to keep the same effect on global ordering of sequentially consistent operations. In principle, acquire gets could be similarly optimized with an acquire fence in their place, but acquire fences synchronize more strongly than acquire gets, so this may have a negative performance impact. For now, inhibit optimization of acquire gets.
-rw-r--r--src/passes/ConstantFieldPropagation.cpp35
-rw-r--r--test/lit/passes/cfp.wast123
2 files changed, 150 insertions, 8 deletions
diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp
index a3dd6aa6f..36aebf3a4 100644
--- a/src/passes/ConstantFieldPropagation.cpp
+++ b/src/passes/ConstantFieldPropagation.cpp
@@ -139,18 +139,28 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
if (!info.hasNoted()) {
// This field is never written at all. That means that we do not even
// construct any data of this type, and so it is a logic error to reach
- // this location in the code. (Unless we are in an open-world
- // situation, which we assume we are not in.) Replace this get with a
- // trap. Note that we do not need to care about the nullability of the
- // reference, as if it should have trapped, we are replacing it with
- // another trap, which we allow to reorder (but we do need to care about
- // side effects in the reference, so keep it around).
+ // this location in the code. (Unless we are in an open-world situation,
+ // which we assume we are not in.) Replace this get with a trap. Note that
+ // we do not need to care about the nullability of the reference, as if it
+ // should have trapped, we are replacing it with another trap, which we
+ // allow to reorder (but we do need to care about side effects in the
+ // reference, so keep it around). We also do not need to care about
+ // synchronization since trapping accesses do not synchronize with other
+ // accesses.
replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
builder.makeUnreachable()));
changed = true;
return;
}
+ if (curr->order == MemoryOrder::AcqRel) {
+ // Removing an acquire get and preserving its synchronization properties
+ // would require inserting an acquire fence, but the fence would have
+ // stronger synchronization properties so might be more expensive.
+ // Instead, just skip the optimization.
+ return;
+ }
+
// If the value is not a constant, then it is unknown and we must give up
// on simply applying a constant. However, we can try to use a ref.test, if
// that is allowed.
@@ -166,8 +176,17 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
// constant value. (Leave it to further optimizations to get rid of the
// ref.)
auto* value = makeExpression(info, heapType, curr);
- replaceCurrent(builder.makeSequence(
- builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), value));
+ auto* replacement = builder.blockify(
+ builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)));
+ // If this get is sequentially consistent, then it synchronizes with other
+ // threads at least by participating in the global order of sequentially
+ // consistent operations. Preserve that effect by replacing the access with
+ // a fence.
+ assert(curr->order != MemoryOrder::AcqRel);
+ if (curr->order == MemoryOrder::SeqCst) {
+ replacement = builder.blockify(replacement, builder.makeAtomicFence());
+ }
+ replaceCurrent(builder.blockify(replacement, value));
changed = true;
}
diff --git a/test/lit/passes/cfp.wast b/test/lit/passes/cfp.wast
index e70cfc639..e674fdc4b 100644
--- a/test/lit/passes/cfp.wast
+++ b/test/lit/passes/cfp.wast
@@ -2830,3 +2830,126 @@
)
)
)
+
+;; Atomic accesses require special handling
+(module
+ (rec
+ ;; CHECK: (rec
+ ;; CHECK-NEXT: (type $shared (shared (struct (field (mut i32)))))
+ (type $shared (shared (struct (mut i32))))
+ ;; CHECK: (type $unwritten (shared (struct (field (mut i32)))))
+ (type $unwritten (shared (struct (mut i32))))
+ )
+
+ ;; CHECK: (type $2 (func))
+
+ ;; CHECK: (type $3 (func (param (ref $shared))))
+
+ ;; CHECK: (type $4 (func (param (ref $unwritten))))
+
+ ;; CHECK: (func $init (type $2)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.new_default $shared)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $init
+ (drop
+ (struct.new_default $shared)
+ )
+ )
+
+ ;; CHECK: (func $gets (type $3) (param $0 (ref $shared))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.atomic.get acqrel $shared 0
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (atomic.fence)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $gets (param (ref $shared))
+ (drop
+ (struct.get $shared 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; This is not optimized because we wouldn't want to replace it with a
+ ;; stronger acquire fence.
+ (struct.atomic.get acqrel $shared 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; This can be optimized, but requires a seqcst fence.
+ (struct.atomic.get $shared 0
+ (local.get 0)
+ )
+ )
+ )
+
+ ;; CHECK: (func $traps (type $4) (param $0 (ref $unwritten))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (unreachable)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (unreachable)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (unreachable)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $traps (param (ref $unwritten))
+ ;; This are all optimizable because they are known to trap. No fences are
+ ;; necessary.
+ (drop
+ (struct.get $unwritten 0
+ (local.get 0)
+ )
+ )
+ (drop
+ (struct.atomic.get acqrel $unwritten 0
+ (local.get 0)
+ )
+ )
+ (drop
+ (struct.atomic.get $unwritten 0
+ (local.get 0)
+ )
+ )
+ )
+)