diff options
author | Thomas Lively <tlively@google.com> | 2024-12-20 12:08:35 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-20 12:08:35 -0800 |
commit | 793e07369902026b727c5c4bc16a72ad3e4950e4 (patch) | |
tree | 5787dd3a26803c0d41d24bb1e2fa4420a2fe1a18 | |
parent | 7c42700738fa9f9aff29dd930e9fd7f6cf71fc29 (diff) | |
download | binaryen-793e07369902026b727c5c4bc16a72ad3e4950e4.tar.gz binaryen-793e07369902026b727c5c4bc16a72ad3e4950e4.tar.bz2 binaryen-793e07369902026b727c5c4bc16a72ad3e4950e4.zip |
Update GlobalStructInference to handle atomics (#7168)
GlobalStructInference optimizes gets of immutable fields of structs that
are only ever instantiated to initialize immutable globals. Due to all
the immutability, it's not possible for the optimized reads to
synchronize with any writes via the accessed memory, so we just need to
be careful to replace removed seqcst gets with seqcst fences.
As a drive-by, fix some stale comments in gsi.wast.
-rw-r--r-- | src/passes/GlobalStructInference.cpp | 35 | ||||
-rw-r--r-- | test/lit/passes/gsi.wast | 228 |
2 files changed, 251 insertions, 12 deletions
diff --git a/src/passes/GlobalStructInference.cpp b/src/passes/GlobalStructInference.cpp index 4158db051..f27df3a3c 100644 --- a/src/passes/GlobalStructInference.cpp +++ b/src/passes/GlobalStructInference.cpp @@ -359,6 +359,11 @@ struct GlobalStructInference : public Pass { // refined, which could change the struct.get's type. refinalize = true; } + // No need to worry about atomic gets here. We will still read from + // the same memory location as before and preserve all side effects + // (including synchronization) that were previously present. The + // memory location is immutable anyway, so there cannot be any writes + // to synchronize with in the first place. curr->ref = builder.makeSequence( builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), builder.makeGlobalGet(global, globalType)); @@ -457,10 +462,18 @@ struct GlobalStructInference : public Pass { // the early return above) so that only leaves 1 and 2. if (values.size() == 1) { // The case of 1 value is simple: trap if the ref is null, and - // otherwise return the value. - replaceCurrent(builder.makeSequence( - builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), - getReadValue(values[0]))); + // otherwise return the value. We must also fence if the get was + // seqcst. No additional work is necessary for an acquire get because + // there cannot have been any writes to this immutable field that it + // would synchronize with. + Expression* replacement = + builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)); + if (curr->order == MemoryOrder::SeqCst) { + replacement = + builder.blockify(replacement, builder.makeAtomicFence()); + } + replaceCurrent( + builder.blockify(replacement, getReadValue(values[0]))); return; } assert(values.size() == 2); @@ -486,11 +499,19 @@ struct GlobalStructInference : public Pass { // of their execution matters (they may note globals for un-nesting). auto* left = getReadValue(values[0]); auto* right = getReadValue(values[1]); - // Note that we must trap on null, so add a ref.as_non_null here. + // Note that we must trap on null, so add a ref.as_non_null here. We + // must also add a fence if this get is seqcst. As before, no extra work + // is necessary for an acquire get because there cannot be a write it + // synchronizes with. + Expression* getGlobal = + builder.makeGlobalGet(checkGlobal, wasm.getGlobal(checkGlobal)->type); + if (curr->order == MemoryOrder::SeqCst) { + getGlobal = + builder.makeSequence(builder.makeAtomicFence(), getGlobal); + } replaceCurrent(builder.makeSelect( builder.makeRefEq(builder.makeRefAs(RefAsNonNull, curr->ref), - builder.makeGlobalGet( - checkGlobal, wasm.getGlobal(checkGlobal)->type)), + getGlobal), left, right)); } diff --git a/test/lit/passes/gsi.wast b/test/lit/passes/gsi.wast index 299b19e00..7bc83efd4 100644 --- a/test/lit/passes/gsi.wast +++ b/test/lit/passes/gsi.wast @@ -144,16 +144,16 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $test1 (param $struct1 (ref null $struct1)) (param $struct2 (ref null $struct2)) - ;; We can infer that this get must reference $global1 and make the reference - ;; point to that. Note that we do not infer the value of 42 here, but leave - ;; it for other passes to do. + ;; Even though the value here is not known at compile time - it reads an + ;; imported global - we can still infer that we are reading from $global1. (drop (struct.get $struct1 0 (local.get $struct1) ) ) - ;; Even though the value here is not known at compile time - it reads an - ;; imported global - we can still infer that we are reading from $global2. + ;; We can infer that this get must reference $global2 and make the reference + ;; point to that. Note that we do not infer the value of 42 here, but leave + ;; it for other passes to do. (drop (struct.get $struct2 0 (local.get $struct2) @@ -1944,3 +1944,221 @@ ) ) ) + +;; Test atomic gets. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $one (shared (struct (field i32)))) + (type $one (shared (struct (field i32)))) + ;; CHECK: (type $two (shared (struct (field i32)))) + (type $two (shared (struct (field i32)))) + ;; CHECK: (type $two-same (shared (struct (field i32)))) + (type $two-same (shared (struct (field i32)))) + ) + + ;; CHECK: (type $3 (func (param (ref $one)))) + + ;; CHECK: (type $4 (func (param (ref $two)))) + + ;; CHECK: (type $5 (func (param (ref $two-same)))) + + ;; CHECK: (global $one (ref $one) (struct.new $one + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: )) + (global $one (ref $one) (struct.new $one (i32.const 42))) + + ;; CHECK: (global $two-a (ref $two) (struct.new $two + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: )) + (global $two-a (ref $two) (struct.new $two (i32.const 42))) + + ;; CHECK: (global $two-b (ref $two) (struct.new $two + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: )) + (global $two-b (ref $two) (struct.new $two (i32.const 1337))) + + ;; CHECK: (global $two-same-a (ref $two-same) (struct.new $two-same + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: )) + (global $two-same-a (ref $two-same) (struct.new $two-same (i32.const 42))) + + ;; CHECK: (global $two-same-b (ref $two-same) (struct.new $two-same + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: )) + (global $two-same-b (ref $two-same) (struct.new $two-same (i32.const 42))) + + ;; CHECK: (func $one (type $3) (param $0 (ref $one)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $one 0 + ;; CHECK-NEXT: (block (result (ref $one)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $one) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.atomic.get acqrel $one 0 + ;; CHECK-NEXT: (block (result (ref $one)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $one) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.atomic.get $one 0 + ;; CHECK-NEXT: (block (result (ref $one)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $one) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $one (param (ref $one)) + (drop + (struct.get $one 0 + (local.get 0) + ) + ) + (drop + (struct.atomic.get acqrel $one 0 + (local.get 0) + ) + ) + (drop + (struct.atomic.get $one 0 + (local.get 0) + ) + ) + ) + + ;; CHECK: (func $two (type $4) (param $0 (ref $two)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $two-a) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $two-a) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result (ref $two)) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (global.get $two-a) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $two (param (ref $two)) + (drop + (struct.get $two 0 + (local.get 0) + ) + ) + (drop + ;; This is optimized normally because there cannot be any writes it + ;; synchronizes with. + (struct.atomic.get acqrel $two 0 + (local.get 0) + ) + ) + (drop + ;; This requires a fence to maintain its effect on the global order of + ;; seqcst operations. + (struct.atomic.get $two 0 + (local.get 0) + ) + ) + ) + + ;; CHECK: (func $two-same (type $5) (param $0 (ref $two-same)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $two-same (param (ref $two-same)) + (drop + (struct.get $two-same 0 + (local.get 0) + ) + ) + (drop + ;; This is optimized normally because there cannot be any writes it + ;; synchronizes with. + (struct.atomic.get acqrel $two-same 0 + (local.get 0) + ) + ) + (drop + ;; This requires a fence to maintain its effect on the global order of + ;; seqcst operations. + (struct.atomic.get $two-same 0 + (local.get 0) + ) + ) + ) +) |