Handle atomic accesses in ConstantFieldPropagation (#7159)

Sequentially consistent gets that are optimized out need to have seqcst fences inserted in their place to keep the same effect on global ordering of sequentially consistent operations. In principle, acquire gets could be similarly optimized with an acquire fence in their place, but acquire fences synchronize more strongly than acquire gets, so this may have a negative performance impact. For now, inhibit optimization of acquire gets.
author: Thomas Lively <tlively@google.com> 2024-12-18 13:40:28 -0800
committer: GitHub <noreply@github.com> 2024-12-18 13:40:28 -0800
commit: 009078979a26b7f4ec99fdfe1929b26176575805 (patch)
tree: c959452030e090fd559090787dd47195f75190f5
parent: c744bd18ce73b82cf23e64728a8167c61ae4e24a (diff)
download: binaryen-009078979a26b7f4ec99fdfe1929b26176575805.tar.gz
binaryen-009078979a26b7f4ec99fdfe1929b26176575805.tar.bz2
binaryen-009078979a26b7f4ec99fdfe1929b26176575805.zip
2 files changed, 150 insertions, 8 deletions
diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp
index a3dd6aa6f..36aebf3a4 100644
--- a/src/passes/ConstantFieldPropagation.cpp
+++ b/src/passes/ConstantFieldPropagation.cpp
@@ -139,18 +139,28 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
     if (!info.hasNoted()) {
       // This field is never written at all. That means that we do not even
       // construct any data of this type, and so it is a logic error to reach
-      // this location in the code. (Unless we are in an open-world
-      // situation, which we assume we are not in.) Replace this get with a
-      // trap. Note that we do not need to care about the nullability of the
-      // reference, as if it should have trapped, we are replacing it with
-      // another trap, which we allow to reorder (but we do need to care about
-      // side effects in the reference, so keep it around).
+      // this location in the code. (Unless we are in an open-world situation,
+      // which we assume we are not in.) Replace this get with a trap. Note that
+      // we do not need to care about the nullability of the reference, as if it
+      // should have trapped, we are replacing it with another trap, which we
+      // allow to reorder (but we do need to care about side effects in the
+      // reference, so keep it around). We also do not need to care about
+      // synchronization since trapping accesses do not synchronize with other
+      // accesses.
       replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
                                           builder.makeUnreachable()));
       changed = true;
       return;
     }
 
+    if (curr->order == MemoryOrder::AcqRel) {
+      // Removing an acquire get and preserving its synchronization properties
+      // would require inserting an acquire fence, but the fence would have
+      // stronger synchronization properties so might be more expensive.
+      // Instead, just skip the optimization.
+      return;
+    }
+
     // If the value is not a constant, then it is unknown and we must give up
     // on simply applying a constant. However, we can try to use a ref.test, if
     // that is allowed.
@@ -166,8 +176,17 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
     // constant value. (Leave it to further optimizations to get rid of the
     // ref.)
     auto* value = makeExpression(info, heapType, curr);
-    replaceCurrent(builder.makeSequence(
-      builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), value));
+    auto* replacement = builder.blockify(
+      builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)));
+    // If this get is sequentially consistent, then it synchronizes with other
+    // threads at least by participating in the global order of sequentially
+    // consistent operations. Preserve that effect by replacing the access with
+    // a fence.
+    assert(curr->order != MemoryOrder::AcqRel);
+    if (curr->order == MemoryOrder::SeqCst) {
+      replacement = builder.blockify(replacement, builder.makeAtomicFence());
+    }
+    replaceCurrent(builder.blockify(replacement, value));
     changed = true;
   }
 
diff --git a/test/lit/passes/cfp.wast b/test/lit/passes/cfp.wast
index e70cfc639..e674fdc4b 100644
--- a/test/lit/passes/cfp.wast
+++ b/test/lit/passes/cfp.wast
@@ -2830,3 +2830,126 @@
     )
   )
 )
+
+;; Atomic accesses require special handling
+(module
+  (rec
+    ;; CHECK:      (rec
+    ;; CHECK-NEXT:  (type $shared (shared (struct (field (mut i32)))))
+    (type $shared (shared (struct (mut i32))))
+    ;; CHECK:       (type $unwritten (shared (struct (field (mut i32)))))
+    (type $unwritten (shared (struct (mut i32))))
+  )
+
+  ;; CHECK:      (type $2 (func))
+
+  ;; CHECK:      (type $3 (func (param (ref $shared))))
+
+  ;; CHECK:      (type $4 (func (param (ref $unwritten))))
+
+  ;; CHECK:      (func $init (type $2)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new_default $shared)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $init
+    (drop
+      (struct.new_default $shared)
+    )
+  )
+
+  ;; CHECK:      (func $gets (type $3) (param $0 (ref $shared))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block (result i32)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (ref.as_non_null
+  ;; CHECK-NEXT:      (local.get $0)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.atomic.get acqrel $shared 0
+  ;; CHECK-NEXT:    (local.get $0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block (result i32)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (ref.as_non_null
+  ;; CHECK-NEXT:      (local.get $0)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (atomic.fence)
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $gets (param (ref $shared))
+    (drop
+      (struct.get $shared 0
+        (local.get 0)
+      )
+    )
+    (drop
+      ;; This is not optimized because we wouldn't want to replace it with a
+      ;; stronger acquire fence.
+      (struct.atomic.get acqrel $shared 0
+        (local.get 0)
+      )
+    )
+    (drop
+      ;; This can be optimized, but requires a seqcst fence.
+      (struct.atomic.get $shared 0
+        (local.get 0)
+      )
+    )
+  )
+
+  ;; CHECK:      (func $traps (type $4) (param $0 (ref $unwritten))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (local.get $0)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (unreachable)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (local.get $0)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (unreachable)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (local.get $0)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (unreachable)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $traps (param (ref $unwritten))
+    ;; This are all optimizable because they are known to trap. No fences are
+    ;; necessary.
+    (drop
+      (struct.get $unwritten 0
+        (local.get 0)
+      )
+    )
+    (drop
+      (struct.atomic.get acqrel $unwritten 0
+        (local.get 0)
+      )
+    )
+    (drop
+      (struct.atomic.get $unwritten 0
+        (local.get 0)
+      )
+    )
+  )
+)
author	Thomas Lively <tlively@google.com>	2024-12-18 13:40:28 -0800
committer	GitHub <noreply@github.com>	2024-12-18 13:40:28 -0800
commit	009078979a26b7f4ec99fdfe1929b26176575805 (patch)
tree	c959452030e090fd559090787dd47195f75190f5
parent	c744bd18ce73b82cf23e64728a8167c61ae4e24a (diff)
download	binaryen-009078979a26b7f4ec99fdfe1929b26176575805.tar.gz binaryen-009078979a26b7f4ec99fdfe1929b26176575805.tar.bz2 binaryen-009078979a26b7f4ec99fdfe1929b26176575805.zip