summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2024-12-17 23:06:03 -0800
committerGitHub <noreply@github.com>2024-12-18 07:06:03 +0000
commitd444abdc9fcee98715813f03e28aa7070879c414 (patch)
tree79e074285c57490d15cf34d4f1c377cd89da88eb
parent7c8cd2f4a51213964f6f1ec11e54d2b6e721cdaf (diff)
downloadbinaryen-d444abdc9fcee98715813f03e28aa7070879c414.tar.gz
binaryen-d444abdc9fcee98715813f03e28aa7070879c414.tar.bz2
binaryen-d444abdc9fcee98715813f03e28aa7070879c414.zip
Handle atomic accesses in Heap2Local (#7158)
Heap2Local replaces gets and sets of non-escaping heap allocations with gets and sets of locals. Since the accessed data does not escape, it cannot be used to directly synchronize with other threads, so this optimization is generally safe even in the presence of shared structs and atomic struct accesses. The only caveat is that sequentially consistent accesses additionally participate in the global ordering of sequentially consistent operations, and that effect on the global ordering cannot be removed. Insert seqcst fences to maintain this global synchronization when removing sequentially consistent gets and sets.
-rw-r--r--src/passes/Heap2Local.cpp22
-rw-r--r--test/lit/passes/heap2local.wast103
2 files changed, 122 insertions, 3 deletions
diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp
index c31bc8436..a6223d4fc 100644
--- a/src/passes/Heap2Local.cpp
+++ b/src/passes/Heap2Local.cpp
@@ -839,9 +839,19 @@ struct Struct2Local : PostWalker<Struct2Local> {
// Drop the ref (leaving it to other opts to remove, when possible), and
// write the data to the local instead of the heap allocation.
- replaceCurrent(builder.makeSequence(
+ auto* replacement = builder.makeSequence(
builder.makeDrop(curr->ref),
- builder.makeLocalSet(localIndexes[curr->index], curr->value)));
+ builder.makeLocalSet(localIndexes[curr->index], curr->value));
+
+ // This struct.set cannot possibly synchronize with other threads via the
+ // read value, since the struct never escapes this function. But if the set
+ // is sequentially consistent, it still participates in the global order of
+ // sequentially consistent operations. Preserve this effect on the global
+ // ordering by inserting a fence.
+ if (curr->order == MemoryOrder::SeqCst) {
+ replacement = builder.blockify(replacement, builder.makeAtomicFence());
+ }
+ replaceCurrent(replacement);
}
void visitStructGet(StructGet* curr) {
@@ -873,7 +883,13 @@ struct Struct2Local : PostWalker<Struct2Local> {
// general. However, signed gets make that more complicated, so leave this
// for other opts to handle.
value = Bits::makePackedFieldGet(value, field, curr->signed_, wasm);
- replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref), value));
+ auto* replacement = builder.blockify(builder.makeDrop(curr->ref));
+ // See the note on seqcst struct.set. It is ok to insert the fence before
+ // the value here since we know the value is just a local.get.
+ if (curr->order == MemoryOrder::SeqCst) {
+ replacement = builder.blockify(replacement, builder.makeAtomicFence());
+ }
+ replaceCurrent(builder.blockify(replacement, value));
}
};
diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast
index bad4a33bf..0af7e7bb4 100644
--- a/test/lit/passes/heap2local.wast
+++ b/test/lit/passes/heap2local.wast
@@ -4608,3 +4608,106 @@
)
)
)
+
+;; Atomic accesses need special handling
+(module
+ ;; CHECK: (type $0 (func))
+
+ ;; CHECK: (type $struct (shared (struct (field (mut i32)))))
+ (type $struct (shared (struct (field (mut i32)))))
+
+ ;; CHECK: (func $acqrel (type $0)
+ ;; CHECK-NEXT: (local $0 (ref null $struct))
+ ;; CHECK-NEXT: (local $1 i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result (ref null (shared none)))
+ ;; CHECK-NEXT: (local.set $1
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (ref.null (shared none))
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.null (shared none))
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.null (shared none))
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $1
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $acqrel
+ (local (ref null $struct))
+ (local.set 0
+ (struct.new_default $struct)
+ )
+ ;; acqrel accesses to non-escaping structs cannot synchronize with other
+ ;; threads, so we can optimize normally.
+ (drop
+ (struct.atomic.get acqrel $struct 0
+ (local.get 0)
+ )
+ )
+ (struct.atomic.set acqrel $struct 0
+ (local.get 0)
+ (i32.const 0)
+ )
+ )
+
+ ;; CHECK: (func $seqcst (type $0)
+ ;; CHECK-NEXT: (local $0 (ref null $struct))
+ ;; CHECK-NEXT: (local $1 i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result (ref null (shared none)))
+ ;; CHECK-NEXT: (local.set $1
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (ref.null (shared none))
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.null (shared none))
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (atomic.fence)
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.null (shared none))
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $1
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (atomic.fence)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $seqcst
+ (local (ref null $struct))
+ (local.set 0
+ (struct.new_default $struct)
+ )
+ ;; seqcst accesses participate in the global ordering of seqcst operations,
+ ;; so they need to be replaced with a seqcst fence to maintain that
+ ;; ordering.
+ (drop
+ (struct.atomic.get $struct 0
+ (local.get 0)
+ )
+ )
+ (struct.atomic.set $struct 0
+ (local.get 0)
+ (i32.const 0)
+ )
+ )
+)