diff options
-rw-r--r-- | src/passes/Heap2Local.cpp | 22 | ||||
-rw-r--r-- | test/lit/passes/heap2local.wast | 103 |
2 files changed, 122 insertions, 3 deletions
diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index c31bc8436..a6223d4fc 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -839,9 +839,19 @@ struct Struct2Local : PostWalker<Struct2Local> { // Drop the ref (leaving it to other opts to remove, when possible), and // write the data to the local instead of the heap allocation. - replaceCurrent(builder.makeSequence( + auto* replacement = builder.makeSequence( builder.makeDrop(curr->ref), - builder.makeLocalSet(localIndexes[curr->index], curr->value))); + builder.makeLocalSet(localIndexes[curr->index], curr->value)); + + // This struct.set cannot possibly synchronize with other threads via the + // read value, since the struct never escapes this function. But if the set + // is sequentially consistent, it still participates in the global order of + // sequentially consistent operations. Preserve this effect on the global + // ordering by inserting a fence. + if (curr->order == MemoryOrder::SeqCst) { + replacement = builder.blockify(replacement, builder.makeAtomicFence()); + } + replaceCurrent(replacement); } void visitStructGet(StructGet* curr) { @@ -873,7 +883,13 @@ struct Struct2Local : PostWalker<Struct2Local> { // general. However, signed gets make that more complicated, so leave this // for other opts to handle. value = Bits::makePackedFieldGet(value, field, curr->signed_, wasm); - replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref), value)); + auto* replacement = builder.blockify(builder.makeDrop(curr->ref)); + // See the note on seqcst struct.set. It is ok to insert the fence before + // the value here since we know the value is just a local.get. + if (curr->order == MemoryOrder::SeqCst) { + replacement = builder.blockify(replacement, builder.makeAtomicFence()); + } + replaceCurrent(builder.blockify(replacement, value)); } }; diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index bad4a33bf..0af7e7bb4 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -4608,3 +4608,106 @@ ) ) ) + +;; Atomic accesses need special handling +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $struct (shared (struct (field (mut i32))))) + (type $struct (shared (struct (field (mut i32))))) + + ;; CHECK: (func $acqrel (type $0) + ;; CHECK-NEXT: (local $0 (ref null $struct)) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null (shared none))) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $acqrel + (local (ref null $struct)) + (local.set 0 + (struct.new_default $struct) + ) + ;; acqrel accesses to non-escaping structs cannot synchronize with other + ;; threads, so we can optimize normally. + (drop + (struct.atomic.get acqrel $struct 0 + (local.get 0) + ) + ) + (struct.atomic.set acqrel $struct 0 + (local.get 0) + (i32.const 0) + ) + ) + + ;; CHECK: (func $seqcst (type $0) + ;; CHECK-NEXT: (local $0 (ref null $struct)) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null (shared none))) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $seqcst + (local (ref null $struct)) + (local.set 0 + (struct.new_default $struct) + ) + ;; seqcst accesses participate in the global ordering of seqcst operations, + ;; so they need to be replaced with a seqcst fence to maintain that + ;; ordering. + (drop + (struct.atomic.get $struct 0 + (local.get 0) + ) + ) + (struct.atomic.set $struct 0 + (local.get 0) + (i32.const 0) + ) + ) +) |