diff options
author | Thomas Lively <tlively@google.com> | 2024-12-18 13:40:28 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-18 13:40:28 -0800 |
commit | 009078979a26b7f4ec99fdfe1929b26176575805 (patch) | |
tree | c959452030e090fd559090787dd47195f75190f5 | |
parent | c744bd18ce73b82cf23e64728a8167c61ae4e24a (diff) | |
download | binaryen-009078979a26b7f4ec99fdfe1929b26176575805.tar.gz binaryen-009078979a26b7f4ec99fdfe1929b26176575805.tar.bz2 binaryen-009078979a26b7f4ec99fdfe1929b26176575805.zip |
Handle atomic accesses in ConstantFieldPropagation (#7159)
Sequentially consistent gets that are optimized out need to have seqcst
fences inserted in their place to keep the same effect on global
ordering of sequentially consistent operations. In principle, acquire
gets could be similarly optimized with an acquire fence in their place,
but acquire fences synchronize more strongly than acquire gets, so this
may have a negative performance impact. For now, inhibit optimization of
acquire gets.
-rw-r--r-- | src/passes/ConstantFieldPropagation.cpp | 35 | ||||
-rw-r--r-- | test/lit/passes/cfp.wast | 123 |
2 files changed, 150 insertions, 8 deletions
diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp index a3dd6aa6f..36aebf3a4 100644 --- a/src/passes/ConstantFieldPropagation.cpp +++ b/src/passes/ConstantFieldPropagation.cpp @@ -139,18 +139,28 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> { if (!info.hasNoted()) { // This field is never written at all. That means that we do not even // construct any data of this type, and so it is a logic error to reach - // this location in the code. (Unless we are in an open-world - // situation, which we assume we are not in.) Replace this get with a - // trap. Note that we do not need to care about the nullability of the - // reference, as if it should have trapped, we are replacing it with - // another trap, which we allow to reorder (but we do need to care about - // side effects in the reference, so keep it around). + // this location in the code. (Unless we are in an open-world situation, + // which we assume we are not in.) Replace this get with a trap. Note that + // we do not need to care about the nullability of the reference, as if it + // should have trapped, we are replacing it with another trap, which we + // allow to reorder (but we do need to care about side effects in the + // reference, so keep it around). We also do not need to care about + // synchronization since trapping accesses do not synchronize with other + // accesses. replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref), builder.makeUnreachable())); changed = true; return; } + if (curr->order == MemoryOrder::AcqRel) { + // Removing an acquire get and preserving its synchronization properties + // would require inserting an acquire fence, but the fence would have + // stronger synchronization properties so might be more expensive. + // Instead, just skip the optimization. + return; + } + // If the value is not a constant, then it is unknown and we must give up // on simply applying a constant. However, we can try to use a ref.test, if // that is allowed. @@ -166,8 +176,17 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> { // constant value. (Leave it to further optimizations to get rid of the // ref.) auto* value = makeExpression(info, heapType, curr); - replaceCurrent(builder.makeSequence( - builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), value)); + auto* replacement = builder.blockify( + builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref))); + // If this get is sequentially consistent, then it synchronizes with other + // threads at least by participating in the global order of sequentially + // consistent operations. Preserve that effect by replacing the access with + // a fence. + assert(curr->order != MemoryOrder::AcqRel); + if (curr->order == MemoryOrder::SeqCst) { + replacement = builder.blockify(replacement, builder.makeAtomicFence()); + } + replaceCurrent(builder.blockify(replacement, value)); changed = true; } diff --git a/test/lit/passes/cfp.wast b/test/lit/passes/cfp.wast index e70cfc639..e674fdc4b 100644 --- a/test/lit/passes/cfp.wast +++ b/test/lit/passes/cfp.wast @@ -2830,3 +2830,126 @@ ) ) ) + +;; Atomic accesses require special handling +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $shared (shared (struct (field (mut i32))))) + (type $shared (shared (struct (mut i32)))) + ;; CHECK: (type $unwritten (shared (struct (field (mut i32))))) + (type $unwritten (shared (struct (mut i32)))) + ) + + ;; CHECK: (type $2 (func)) + + ;; CHECK: (type $3 (func (param (ref $shared)))) + + ;; CHECK: (type $4 (func (param (ref $unwritten)))) + + ;; CHECK: (func $init (type $2) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_default $shared) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $init + (drop + (struct.new_default $shared) + ) + ) + + ;; CHECK: (func $gets (type $3) (param $0 (ref $shared)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.atomic.get acqrel $shared 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $gets (param (ref $shared)) + (drop + (struct.get $shared 0 + (local.get 0) + ) + ) + (drop + ;; This is not optimized because we wouldn't want to replace it with a + ;; stronger acquire fence. + (struct.atomic.get acqrel $shared 0 + (local.get 0) + ) + ) + (drop + ;; This can be optimized, but requires a seqcst fence. + (struct.atomic.get $shared 0 + (local.get 0) + ) + ) + ) + + ;; CHECK: (func $traps (type $4) (param $0 (ref $unwritten)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $traps (param (ref $unwritten)) + ;; This are all optimizable because they are known to trap. No fences are + ;; necessary. + (drop + (struct.get $unwritten 0 + (local.get 0) + ) + ) + (drop + (struct.atomic.get acqrel $unwritten 0 + (local.get 0) + ) + ) + (drop + (struct.atomic.get $unwritten 0 + (local.get 0) + ) + ) + ) +) |