summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/passes/ConstantFieldPropagation.cpp35
-rw-r--r--test/lit/passes/cfp.wast123
2 files changed, 150 insertions, 8 deletions
diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp
index a3dd6aa6f..36aebf3a4 100644
--- a/src/passes/ConstantFieldPropagation.cpp
+++ b/src/passes/ConstantFieldPropagation.cpp
@@ -139,18 +139,28 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
if (!info.hasNoted()) {
// This field is never written at all. That means that we do not even
// construct any data of this type, and so it is a logic error to reach
- // this location in the code. (Unless we are in an open-world
- // situation, which we assume we are not in.) Replace this get with a
- // trap. Note that we do not need to care about the nullability of the
- // reference, as if it should have trapped, we are replacing it with
- // another trap, which we allow to reorder (but we do need to care about
- // side effects in the reference, so keep it around).
+ // this location in the code. (Unless we are in an open-world situation,
+ // which we assume we are not in.) Replace this get with a trap. Note that
+ // we do not need to care about the nullability of the reference, as if it
+ // should have trapped, we are replacing it with another trap, which we
+ // allow to reorder (but we do need to care about side effects in the
+ // reference, so keep it around). We also do not need to care about
+ // synchronization since trapping accesses do not synchronize with other
+ // accesses.
replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
builder.makeUnreachable()));
changed = true;
return;
}
+ if (curr->order == MemoryOrder::AcqRel) {
+ // Removing an acquire get and preserving its synchronization properties
+ // would require inserting an acquire fence, but the fence would have
+ // stronger synchronization properties so might be more expensive.
+ // Instead, just skip the optimization.
+ return;
+ }
+
// If the value is not a constant, then it is unknown and we must give up
// on simply applying a constant. However, we can try to use a ref.test, if
// that is allowed.
@@ -166,8 +176,17 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
// constant value. (Leave it to further optimizations to get rid of the
// ref.)
auto* value = makeExpression(info, heapType, curr);
- replaceCurrent(builder.makeSequence(
- builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), value));
+ auto* replacement = builder.blockify(
+ builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)));
+ // If this get is sequentially consistent, then it synchronizes with other
+ // threads at least by participating in the global order of sequentially
+ // consistent operations. Preserve that effect by replacing the access with
+ // a fence.
+ assert(curr->order != MemoryOrder::AcqRel);
+ if (curr->order == MemoryOrder::SeqCst) {
+ replacement = builder.blockify(replacement, builder.makeAtomicFence());
+ }
+ replaceCurrent(builder.blockify(replacement, value));
changed = true;
}
diff --git a/test/lit/passes/cfp.wast b/test/lit/passes/cfp.wast
index e70cfc639..e674fdc4b 100644
--- a/test/lit/passes/cfp.wast
+++ b/test/lit/passes/cfp.wast
@@ -2830,3 +2830,126 @@
)
)
)
+
+;; Atomic accesses require special handling
+(module
+ (rec
+ ;; CHECK: (rec
+ ;; CHECK-NEXT: (type $shared (shared (struct (field (mut i32)))))
+ (type $shared (shared (struct (mut i32))))
+ ;; CHECK: (type $unwritten (shared (struct (field (mut i32)))))
+ (type $unwritten (shared (struct (mut i32))))
+ )
+
+ ;; CHECK: (type $2 (func))
+
+ ;; CHECK: (type $3 (func (param (ref $shared))))
+
+ ;; CHECK: (type $4 (func (param (ref $unwritten))))
+
+ ;; CHECK: (func $init (type $2)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.new_default $shared)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $init
+ (drop
+ (struct.new_default $shared)
+ )
+ )
+
+ ;; CHECK: (func $gets (type $3) (param $0 (ref $shared))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.atomic.get acqrel $shared 0
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (atomic.fence)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $gets (param (ref $shared))
+ (drop
+ (struct.get $shared 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; This is not optimized because we wouldn't want to replace it with a
+ ;; stronger acquire fence.
+ (struct.atomic.get acqrel $shared 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; This can be optimized, but requires a seqcst fence.
+ (struct.atomic.get $shared 0
+ (local.get 0)
+ )
+ )
+ )
+
+ ;; CHECK: (func $traps (type $4) (param $0 (ref $unwritten))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (unreachable)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (unreachable)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (unreachable)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $traps (param (ref $unwritten))
+ ;; This are all optimizable because they are known to trap. No fences are
+ ;; necessary.
+ (drop
+ (struct.get $unwritten 0
+ (local.get 0)
+ )
+ )
+ (drop
+ (struct.atomic.get acqrel $unwritten 0
+ (local.get 0)
+ )
+ )
+ (drop
+ (struct.atomic.get $unwritten 0
+ (local.get 0)
+ )
+ )
+ )
+)