summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ir/properties.h24
-rw-r--r--src/passes/GUFA.cpp22
-rw-r--r--test/lit/passes/gufa-refs.wast53
3 files changed, 85 insertions, 14 deletions
diff --git a/src/ir/properties.h b/src/ir/properties.h
index 09486ee34..70f18c276 100644
--- a/src/ir/properties.h
+++ b/src/ir/properties.h
@@ -486,6 +486,30 @@ inline bool canEmitSelectWithArms(Expression* ifTrue, Expression* ifFalse) {
return ifTrue->type.isSingle() && ifFalse->type.isSingle();
}
+// If this instruction accesses memory or the heap, or otherwise participates in
+// shared memory synchronization, return the memory order corresponding to the
+// kind of synchronization it does. Return MemoryOrder::Unordered if there is no
+// synchronization. Does not look at children.
+inline MemoryOrder getMemoryOrder(Expression* curr) {
+ if (auto* get = curr->dynCast<StructGet>()) {
+ return get->order;
+ }
+ if (auto* set = curr->dynCast<StructSet>()) {
+ return set->order;
+ }
+ if (auto* load = curr->dynCast<Load>()) {
+ return load->isAtomic ? MemoryOrder::SeqCst : MemoryOrder::Unordered;
+ }
+ if (auto* store = curr->dynCast<Store>()) {
+ return store->isAtomic ? MemoryOrder::SeqCst : MemoryOrder::Unordered;
+ }
+ if (curr->is<AtomicRMW>() || curr->is<AtomicWait>() ||
+ curr->is<AtomicNotify>() || curr->is<AtomicFence>()) {
+ return MemoryOrder::SeqCst;
+ }
+ return MemoryOrder::Unordered;
+}
+
// A "generative" expression is one that can generate different results for the
// same inputs, and that difference is *not* explained by other expressions that
// interact with this one. This is an intrinsic/internal property of the
diff --git a/src/passes/GUFA.cpp b/src/passes/GUFA.cpp
index 513b2cf3d..2ca1ac6ef 100644
--- a/src/passes/GUFA.cpp
+++ b/src/passes/GUFA.cpp
@@ -150,20 +150,14 @@ struct GUFAOptimizer
return;
}
- if (contents.isNull() && curr->type.isNullable()) {
- // Null values are all identical, so just fix up the type here if we need
- // to (the null's type might not fit in this expression, if it passed
- // through casts).
- if (!Type::isSubType(contents.getType(), curr->type)) {
- contents = PossibleContents::literal(
- Literal::makeNull(curr->type.getHeapType()));
- }
-
- // Note that if curr's type is *not* nullable, then the code will trap at
- // runtime (the null must arrive through a cast that will trap). We handle
- // that below, so we don't need to think about it here.
-
- // TODO: would emitting a more specific null be useful when valid?
+ if (Properties::getMemoryOrder(curr) != MemoryOrder::Unordered) {
+ // This load might synchronize with some store, and if we replaced the
+ // load with a constant or with a load from a global, it would not
+ // synchronize with that store anymore. Since we know what value the store
+ // must write, and we know it is the same as every other store to the same
+ // location, it's possible that optimizing here would be allowable, but
+ // for now be conservative and do not optimize.
+ return;
}
auto* c = contents.makeExpression(wasm);
diff --git a/test/lit/passes/gufa-refs.wast b/test/lit/passes/gufa-refs.wast
index 80fd32386..ec67cf40c 100644
--- a/test/lit/passes/gufa-refs.wast
+++ b/test/lit/passes/gufa-refs.wast
@@ -6137,3 +6137,56 @@
)
)
)
+
+;; Atomic accesses require special handling
+(module
+ ;; CHECK: (type $A (shared (struct (field i32))))
+ (type $A (shared (struct (field i32))))
+
+ ;; CHECK: (type $1 (func))
+
+ ;; CHECK: (func $gets (type $1)
+ ;; CHECK-NEXT: (local $0 (ref $A))
+ ;; CHECK-NEXT: (local.set $0
+ ;; CHECK-NEXT: (struct.new_default $A)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.atomic.get acqrel $A 0
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.atomic.get $A 0
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $gets
+ (local (ref $A))
+ (local.set 0
+ (struct.new_default $A)
+ )
+ (drop
+ ;; This is optimizable. It reads from shared memory, but there is only one
+ ;; possible value that can be read.
+ (struct.get $A 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; We do not (yet) optimize atomic gets.
+ (struct.atomic.get acqrel $A 0
+ (local.get 0)
+ )
+ )
+ (drop
+ ;; We do not (yet) optimize atomic gets.
+ (struct.atomic.get $A 0
+ (local.get 0)
+ )
+ )
+ )
+)