diff options
author | Thomas Lively <tlively@google.com> | 2024-12-17 23:06:03 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-18 07:06:03 +0000 |
commit | d444abdc9fcee98715813f03e28aa7070879c414 (patch) | |
tree | 79e074285c57490d15cf34d4f1c377cd89da88eb | |
parent | 7c8cd2f4a51213964f6f1ec11e54d2b6e721cdaf (diff) | |
download | binaryen-d444abdc9fcee98715813f03e28aa7070879c414.tar.gz binaryen-d444abdc9fcee98715813f03e28aa7070879c414.tar.bz2 binaryen-d444abdc9fcee98715813f03e28aa7070879c414.zip |
Handle atomic accesses in Heap2Local (#7158)
Heap2Local replaces gets and sets of non-escaping heap allocations with
gets and sets of locals. Since the accessed data does not escape, it
cannot be used to directly synchronize with other threads, so this
optimization is generally safe even in the presence of shared structs
and atomic struct accesses. The only caveat is that sequentially
consistent accesses additionally participate in the global ordering of
sequentially consistent operations, and that effect on the global
ordering cannot be removed. Insert seqcst fences to maintain this global
synchronization when removing sequentially consistent gets and sets.
-rw-r--r-- | src/passes/Heap2Local.cpp | 22 | ||||
-rw-r--r-- | test/lit/passes/heap2local.wast | 103 |
2 files changed, 122 insertions, 3 deletions
diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index c31bc8436..a6223d4fc 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -839,9 +839,19 @@ struct Struct2Local : PostWalker<Struct2Local> { // Drop the ref (leaving it to other opts to remove, when possible), and // write the data to the local instead of the heap allocation. - replaceCurrent(builder.makeSequence( + auto* replacement = builder.makeSequence( builder.makeDrop(curr->ref), - builder.makeLocalSet(localIndexes[curr->index], curr->value))); + builder.makeLocalSet(localIndexes[curr->index], curr->value)); + + // This struct.set cannot possibly synchronize with other threads via the + // read value, since the struct never escapes this function. But if the set + // is sequentially consistent, it still participates in the global order of + // sequentially consistent operations. Preserve this effect on the global + // ordering by inserting a fence. + if (curr->order == MemoryOrder::SeqCst) { + replacement = builder.blockify(replacement, builder.makeAtomicFence()); + } + replaceCurrent(replacement); } void visitStructGet(StructGet* curr) { @@ -873,7 +883,13 @@ struct Struct2Local : PostWalker<Struct2Local> { // general. However, signed gets make that more complicated, so leave this // for other opts to handle. value = Bits::makePackedFieldGet(value, field, curr->signed_, wasm); - replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref), value)); + auto* replacement = builder.blockify(builder.makeDrop(curr->ref)); + // See the note on seqcst struct.set. It is ok to insert the fence before + // the value here since we know the value is just a local.get. + if (curr->order == MemoryOrder::SeqCst) { + replacement = builder.blockify(replacement, builder.makeAtomicFence()); + } + replaceCurrent(builder.blockify(replacement, value)); } }; diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index bad4a33bf..0af7e7bb4 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -4608,3 +4608,106 @@ ) ) ) + +;; Atomic accesses need special handling +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $struct (shared (struct (field (mut i32))))) + (type $struct (shared (struct (field (mut i32))))) + + ;; CHECK: (func $acqrel (type $0) + ;; CHECK-NEXT: (local $0 (ref null $struct)) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null (shared none))) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $acqrel + (local (ref null $struct)) + (local.set 0 + (struct.new_default $struct) + ) + ;; acqrel accesses to non-escaping structs cannot synchronize with other + ;; threads, so we can optimize normally. + (drop + (struct.atomic.get acqrel $struct 0 + (local.get 0) + ) + ) + (struct.atomic.set acqrel $struct 0 + (local.get 0) + (i32.const 0) + ) + ) + + ;; CHECK: (func $seqcst (type $0) + ;; CHECK-NEXT: (local $0 (ref null $struct)) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null (shared none))) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $seqcst + (local (ref null $struct)) + (local.set 0 + (struct.new_default $struct) + ) + ;; seqcst accesses participate in the global ordering of seqcst operations, + ;; so they need to be replaced with a seqcst fence to maintain that + ;; ordering. + (drop + (struct.atomic.get $struct 0 + (local.get 0) + ) + ) + (struct.atomic.set $struct 0 + (local.get 0) + (i32.const 0) + ) + ) +) |