summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2024-12-19 17:45:05 -0800
committerGitHub <noreply@github.com>2024-12-19 17:45:05 -0800
commit7c42700738fa9f9aff29dd930e9fd7f6cf71fc29 (patch)
tree75b8df675e6da753d1209f7b02c9781c8db6cc4e
parent74b2b064e59beee84e88afaa952a8c51cf9309a4 (diff)
downloadbinaryen-7c42700738fa9f9aff29dd930e9fd7f6cf71fc29.tar.gz
binaryen-7c42700738fa9f9aff29dd930e9fd7f6cf71fc29.tar.bz2
binaryen-7c42700738fa9f9aff29dd930e9fd7f6cf71fc29.zip
Do not optimize atomic gets in GUFA (#7161)
Conservatively avoid introducing synchronization bugs by not optimizing atomic struct.gets at all in GUFA. It is possible that we could be more precise in the future. Also remove obsolete logic dealing with the types of null values as a drive-by. All null values now have bottom types, so the type mismatch this code checked for is impossible.
-rw-r--r--src/ir/properties.h24
-rw-r--r--src/passes/GUFA.cpp22
-rw-r--r--test/lit/passes/gufa-refs.wast53
3 files changed, 85 insertions, 14 deletions
diff --git a/src/ir/properties.h b/src/ir/properties.h
index 09486ee34..70f18c276 100644
--- a/src/ir/properties.h
+++ b/src/ir/properties.h
@@ -486,6 +486,30 @@ inline bool canEmitSelectWithArms(Expression* ifTrue, Expression* ifFalse) {
return ifTrue->type.isSingle() && ifFalse->type.isSingle();
}
+// If this instruction accesses memory or the heap, or otherwise participates in
+// shared memory synchronization, return the memory order corresponding to the
+// kind of synchronization it does. Return MemoryOrder::Unordered if there is no
+// synchronization. Does not look at children.
+inline MemoryOrder getMemoryOrder(Expression* curr) {
+ if (auto* get = curr->dynCast<StructGet>()) {
+ return get->order;
+ }
+ if (auto* set = curr->dynCast<StructSet>()) {
+ return set->order;
+ }
+ if (auto* load = curr->dynCast<Load>()) {
+ return load->isAtomic ? MemoryOrder::SeqCst : MemoryOrder::Unordered;
+ }
+ if (auto* store = curr->dynCast<Store>()) {
+ return store->isAtomic ? MemoryOrder::SeqCst : MemoryOrder::Unordered;
+ }
+ if (curr->is<AtomicRMW>() || curr->is<AtomicWait>() ||
+ curr->is<AtomicNotify>() || curr->is<AtomicFence>()) {
+ return MemoryOrder::SeqCst;
+ }
+ return MemoryOrder::Unordered;
+}
+
// A "generative" expression is one that can generate different results for the
// same inputs, and that difference is *not* explained by other expressions that
// interact with this one. This is an intrinsic/internal property of the
diff --git a/src/passes/GUFA.cpp b/src/passes/GUFA.cpp
index 513b2cf3d..2ca1ac6ef 100644
--- a/src/passes/GUFA.cpp
+++ b/src/passes/GUFA.cpp
@@ -150,20 +150,14 @@ struct GUFAOptimizer
return;
}
- if (contents.isNull() && curr->type.isNullable()) {
- // Null values are all identical, so just fix up the type here if we need
- // to (the null's type might not fit in this expression, if it passed
- // through casts).
- if (!Type::isSubType(contents.getType(), curr->type)) {
- contents = PossibleContents::literal(
- Literal::makeNull(curr->type.getHeapType()));
- }
-
- // Note that if curr's type is *not* nullable, then the code will trap at
- // runtime (the null must arrive through a cast that will trap). We handle
- // that below, so we don't need to think about it here.
-
- // TODO: would emitting a more specific null be useful when valid?
+ if (Properties::getMemoryOrder(curr) != MemoryOrder::Unordered) {
+ // This load might synchronize with some store, and if we replaced the
+ // load with a constant or with a load from a global, it would not
+ // synchronize with that store anymore. Since we know what value the store
+ // must write, and we know it is the same as every other store to the same
+ // location, it's possible that optimizing here would be allowable, but
+ // for now be conservative and do not optimize.
+ return;
}
auto* c = contents.makeExpression(wasm);
diff --git a/test/lit/passes/gufa-refs.wast b/test/lit/passes/gufa-refs.wast
index 80fd32386..ec67cf40c 100644
--- a/test/lit/passes/gufa-refs.wast
+++ b/test/lit/passes/gufa-refs.wast
@@ -6137,3 +6137,56 @@
)
)
)
+
+;; Atomic accesses require special handling
+(module
+ ;; CHECK: (type $A (shared (struct (field i32))))
+ (type $A (shared (struct (field i32))))
+
+ ;; CHECK: (type $1 (func))
+
+ ;; CHECK: (func $gets (type $1)
+ ;; CHECK-NEXT: (local $0 (ref $A))
+ ;; CHECK-NEXT: (local.set $0
+ ;; CHECK-NEXT: (struct.new_default $A)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.atomic.get acqrel $A 0
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.atomic.get $A 0
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $gets
+ (local (ref $A))
+ (local.set 0
+ (struct.new_default $A)
+ )
+ (drop
+ ;; This is optimizable. It reads from shared memory, but there is only one
+ ;; possible value that can be read.
+ (struct.get $A 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; We do not (yet) optimize atomic gets.
+ (struct.atomic.get acqrel $A 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; We do not (yet) optimize atomic gets.
+ (struct.atomic.get $A 0
+ (local.get 0)
+ )
+ )
+ )
+)