2 files changed, 150 insertions, 8 deletions
diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp
index a3dd6aa6f..36aebf3a4 100644
--- a/src/passes/ConstantFieldPropagation.cpp
+++ b/src/passes/ConstantFieldPropagation.cpp
@@ -139,18 +139,28 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
     if (!info.hasNoted()) {
       // This field is never written at all. That means that we do not even
       // construct any data of this type, and so it is a logic error to reach
-      // this location in the code. (Unless we are in an open-world
-      // situation, which we assume we are not in.) Replace this get with a
-      // trap. Note that we do not need to care about the nullability of the
-      // reference, as if it should have trapped, we are replacing it with
-      // another trap, which we allow to reorder (but we do need to care about
-      // side effects in the reference, so keep it around).
+      // this location in the code. (Unless we are in an open-world situation,
+      // which we assume we are not in.) Replace this get with a trap. Note that
+      // we do not need to care about the nullability of the reference, as if it
+      // should have trapped, we are replacing it with another trap, which we
+      // allow to reorder (but we do need to care about side effects in the
+      // reference, so keep it around). We also do not need to care about
+      // synchronization since trapping accesses do not synchronize with other
+      // accesses.
       replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
                                           builder.makeUnreachable()));
       changed = true;
       return;
     }
 
+    if (curr->order == MemoryOrder::AcqRel) {
+      // Removing an acquire get and preserving its synchronization properties
+      // would require inserting an acquire fence, but the fence would have
+      // stronger synchronization properties so might be more expensive.
+      // Instead, just skip the optimization.
+      return;
+    }
+
     // If the value is not a constant, then it is unknown and we must give up
     // on simply applying a constant. However, we can try to use a ref.test, if
     // that is allowed.
@@ -166,8 +176,17 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
     // constant value. (Leave it to further optimizations to get rid of the
     // ref.)
     auto* value = makeExpression(info, heapType, curr);
-    replaceCurrent(builder.makeSequence(
-      builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), value));
+    auto* replacement = builder.blockify(
+      builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)));
+    // If this get is sequentially consistent, then it synchronizes with other
+    // threads at least by participating in the global order of sequentially
+    // consistent operations. Preserve that effect by replacing the access with
+    // a fence.
+    assert(curr->order != MemoryOrder::AcqRel);
+    if (curr->order == MemoryOrder::SeqCst) {
+      replacement = builder.blockify(replacement, builder.makeAtomicFence());
+    }
+    replaceCurrent(builder.blockify(replacement, value));
     changed = true;
   }
 
diff --git a/test/lit/passes/cfp.wast b/test/lit/passes/cfp.wast
index e70cfc639..e674fdc4b 100644
--- a/test/lit/passes/cfp.wast
+++ b/test/lit/passes/cfp.wast
@@ -2830,3 +2830,126 @@
     )
   )
 )
+
+;; Atomic accesses require special handling
+(module
+  (rec
+    ;; CHECK:      (rec
+    ;; CHECK-NEXT:  (type $shared (shared (struct (field (mut i32)))))
+    (type $shared (shared (struct (mut i32))))
+    ;; CHECK:       (type $unwritten (shared (struct (field (mut i32)))))
+    (type $unwritten (shared (struct (mut i32))))
+  )
+
+  ;; CHECK:      (type $2 (func))
+
+  ;; CHECK:      (type $3 (func (param (ref $shared))))
+
+  ;; CHECK:      (type $4 (func (param (ref $unwritten))))
+
+  ;; CHECK:      (func $init (type $2)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new_default $shared)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $init
+    (drop
+      (struct.new_default $shared)
+    )
+  )
+
+  ;; CHECK:      (func $gets (type $3) (param $0 (ref $shared))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block (result i32)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (ref.as_non_null
+  ;; CHECK-NEXT:      (local.get $0)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.atomic.get acqrel $shared 0
+  ;; CHECK-NEXT:    (local.get $0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block (result i32)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (ref.as_non_null
+  ;; CHECK-NEXT:      (local.get $0)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (atomic.fence)
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $gets (param (ref $shared))
+    (drop
+      (struct.get $shared 0
+        (local.get 0)
+      )
+    )
+    (drop
+      ;; This is not optimized because we wouldn't want to replace it with a
+      ;; stronger acquire fence.
+      (struct.atomic.get acqrel $shared 0
+        (local.get 0)
+      )
+    )
+    (drop
+      ;; This can be optimized, but requires a seqcst fence.
+      (struct.atomic.get $shared 0
+        (local.get 0)
+      )
+    )
+  )
+
+  ;; CHECK:      (func $traps (type $4) (param $0 (ref $unwritten))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (local.get $0)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (unreachable)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (local.get $0)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (unreachable)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (local.get $0)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (unreachable)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $traps (param (ref $unwritten))
+    ;; This are all optimizable because they are known to trap. No fences are
+    ;; necessary.
+    (drop
+      (struct.get $unwritten 0
+        (local.get 0)
+      )
+    )
+    (drop
+      (struct.atomic.get acqrel $unwritten 0
+        (local.get 0)
+      )
+    )
+    (drop
+      (struct.atomic.get $unwritten 0
+        (local.get 0)
+      )
+    )
+  )
+)