summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2024-12-20 12:08:35 -0800
committerGitHub <noreply@github.com>2024-12-20 12:08:35 -0800
commit793e07369902026b727c5c4bc16a72ad3e4950e4 (patch)
tree5787dd3a26803c0d41d24bb1e2fa4420a2fe1a18
parent7c42700738fa9f9aff29dd930e9fd7f6cf71fc29 (diff)
downloadbinaryen-793e07369902026b727c5c4bc16a72ad3e4950e4.tar.gz
binaryen-793e07369902026b727c5c4bc16a72ad3e4950e4.tar.bz2
binaryen-793e07369902026b727c5c4bc16a72ad3e4950e4.zip
Update GlobalStructInference to handle atomics (#7168)
GlobalStructInference optimizes gets of immutable fields of structs that are only ever instantiated to initialize immutable globals. Due to all the immutability, it's not possible for the optimized reads to synchronize with any writes via the accessed memory, so we just need to be careful to replace removed seqcst gets with seqcst fences. As a drive-by, fix some stale comments in gsi.wast.
-rw-r--r--src/passes/GlobalStructInference.cpp35
-rw-r--r--test/lit/passes/gsi.wast228
2 files changed, 251 insertions, 12 deletions
diff --git a/src/passes/GlobalStructInference.cpp b/src/passes/GlobalStructInference.cpp
index 4158db051..f27df3a3c 100644
--- a/src/passes/GlobalStructInference.cpp
+++ b/src/passes/GlobalStructInference.cpp
@@ -359,6 +359,11 @@ struct GlobalStructInference : public Pass {
// refined, which could change the struct.get's type.
refinalize = true;
}
+ // No need to worry about atomic gets here. We will still read from
+ // the same memory location as before and preserve all side effects
+ // (including synchronization) that were previously present. The
+ // memory location is immutable anyway, so there cannot be any writes
+ // to synchronize with in the first place.
curr->ref = builder.makeSequence(
builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)),
builder.makeGlobalGet(global, globalType));
@@ -457,10 +462,18 @@ struct GlobalStructInference : public Pass {
// the early return above) so that only leaves 1 and 2.
if (values.size() == 1) {
// The case of 1 value is simple: trap if the ref is null, and
- // otherwise return the value.
- replaceCurrent(builder.makeSequence(
- builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)),
- getReadValue(values[0])));
+ // otherwise return the value. We must also fence if the get was
+ // seqcst. No additional work is necessary for an acquire get because
+ // there cannot have been any writes to this immutable field that it
+ // would synchronize with.
+ Expression* replacement =
+ builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref));
+ if (curr->order == MemoryOrder::SeqCst) {
+ replacement =
+ builder.blockify(replacement, builder.makeAtomicFence());
+ }
+ replaceCurrent(
+ builder.blockify(replacement, getReadValue(values[0])));
return;
}
assert(values.size() == 2);
@@ -486,11 +499,19 @@ struct GlobalStructInference : public Pass {
// of their execution matters (they may note globals for un-nesting).
auto* left = getReadValue(values[0]);
auto* right = getReadValue(values[1]);
- // Note that we must trap on null, so add a ref.as_non_null here.
+ // Note that we must trap on null, so add a ref.as_non_null here. We
+ // must also add a fence if this get is seqcst. As before, no extra work
+ // is necessary for an acquire get because there cannot be a write it
+ // synchronizes with.
+ Expression* getGlobal =
+ builder.makeGlobalGet(checkGlobal, wasm.getGlobal(checkGlobal)->type);
+ if (curr->order == MemoryOrder::SeqCst) {
+ getGlobal =
+ builder.makeSequence(builder.makeAtomicFence(), getGlobal);
+ }
replaceCurrent(builder.makeSelect(
builder.makeRefEq(builder.makeRefAs(RefAsNonNull, curr->ref),
- builder.makeGlobalGet(
- checkGlobal, wasm.getGlobal(checkGlobal)->type)),
+ getGlobal),
left,
right));
}
diff --git a/test/lit/passes/gsi.wast b/test/lit/passes/gsi.wast
index 299b19e00..7bc83efd4 100644
--- a/test/lit/passes/gsi.wast
+++ b/test/lit/passes/gsi.wast
@@ -144,16 +144,16 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $test1 (param $struct1 (ref null $struct1)) (param $struct2 (ref null $struct2))
- ;; We can infer that this get must reference $global1 and make the reference
- ;; point to that. Note that we do not infer the value of 42 here, but leave
- ;; it for other passes to do.
+ ;; Even though the value here is not known at compile time - it reads an
+ ;; imported global - we can still infer that we are reading from $global1.
(drop
(struct.get $struct1 0
(local.get $struct1)
)
)
- ;; Even though the value here is not known at compile time - it reads an
- ;; imported global - we can still infer that we are reading from $global2.
+ ;; We can infer that this get must reference $global2 and make the reference
+ ;; point to that. Note that we do not infer the value of 42 here, but leave
+ ;; it for other passes to do.
(drop
(struct.get $struct2 0
(local.get $struct2)
@@ -1944,3 +1944,221 @@
)
)
)
+
+;; Test atomic gets.
+(module
+ (rec
+ ;; CHECK: (rec
+ ;; CHECK-NEXT: (type $one (shared (struct (field i32))))
+ (type $one (shared (struct (field i32))))
+ ;; CHECK: (type $two (shared (struct (field i32))))
+ (type $two (shared (struct (field i32))))
+ ;; CHECK: (type $two-same (shared (struct (field i32))))
+ (type $two-same (shared (struct (field i32))))
+ )
+
+ ;; CHECK: (type $3 (func (param (ref $one))))
+
+ ;; CHECK: (type $4 (func (param (ref $two))))
+
+ ;; CHECK: (type $5 (func (param (ref $two-same))))
+
+ ;; CHECK: (global $one (ref $one) (struct.new $one
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $one (ref $one) (struct.new $one (i32.const 42)))
+
+ ;; CHECK: (global $two-a (ref $two) (struct.new $two
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $two-a (ref $two) (struct.new $two (i32.const 42)))
+
+ ;; CHECK: (global $two-b (ref $two) (struct.new $two
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $two-b (ref $two) (struct.new $two (i32.const 1337)))
+
+ ;; CHECK: (global $two-same-a (ref $two-same) (struct.new $two-same
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $two-same-a (ref $two-same) (struct.new $two-same (i32.const 42)))
+
+ ;; CHECK: (global $two-same-b (ref $two-same) (struct.new $two-same
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $two-same-b (ref $two-same) (struct.new $two-same (i32.const 42)))
+
+ ;; CHECK: (func $one (type $3) (param $0 (ref $one))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $one 0
+ ;; CHECK-NEXT: (block (result (ref $one))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $one)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.atomic.get acqrel $one 0
+ ;; CHECK-NEXT: (block (result (ref $one))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $one)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.atomic.get $one 0
+ ;; CHECK-NEXT: (block (result (ref $one))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $one)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $one (param (ref $one))
+ (drop
+ (struct.get $one 0
+ (local.get 0)
+ )
+ )
+ (drop
+ (struct.atomic.get acqrel $one 0
+ (local.get 0)
+ )
+ )
+ (drop
+ (struct.atomic.get $one 0
+ (local.get 0)
+ )
+ )
+ )
+
+ ;; CHECK: (func $two (type $4) (param $0 (ref $two))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: (ref.eq
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $two-a)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: (ref.eq
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $two-a)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: (ref.eq
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result (ref $two))
+ ;; CHECK-NEXT: (atomic.fence)
+ ;; CHECK-NEXT: (global.get $two-a)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $two (param (ref $two))
+ (drop
+ (struct.get $two 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; This is optimized normally because there cannot be any writes it
+ ;; synchronizes with.
+ (struct.atomic.get acqrel $two 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; This requires a fence to maintain its effect on the global order of
+ ;; seqcst operations.
+ (struct.atomic.get $two 0
+ (local.get 0)
+ )
+ )
+ )
+
+ ;; CHECK: (func $two-same (type $5) (param $0 (ref $two-same))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (atomic.fence)
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $two-same (param (ref $two-same))
+ (drop
+ (struct.get $two-same 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; This is optimized normally because there cannot be any writes it
+ ;; synchronizes with.
+ (struct.atomic.get acqrel $two-same 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; This requires a fence to maintain its effect on the global order of
+ ;; seqcst operations.
+ (struct.atomic.get $two-same 0
+ (local.get 0)
+ )
+ )
+ )
+)