diff options
-rw-r--r-- | src/ir/properties.h | 24 | ||||
-rw-r--r-- | src/passes/GUFA.cpp | 22 | ||||
-rw-r--r-- | test/lit/passes/gufa-refs.wast | 53 |
3 files changed, 85 insertions, 14 deletions
diff --git a/src/ir/properties.h b/src/ir/properties.h index 09486ee34..70f18c276 100644 --- a/src/ir/properties.h +++ b/src/ir/properties.h @@ -486,6 +486,30 @@ inline bool canEmitSelectWithArms(Expression* ifTrue, Expression* ifFalse) { return ifTrue->type.isSingle() && ifFalse->type.isSingle(); } +// If this instruction accesses memory or the heap, or otherwise participates in +// shared memory synchronization, return the memory order corresponding to the +// kind of synchronization it does. Return MemoryOrder::Unordered if there is no +// synchronization. Does not look at children. +inline MemoryOrder getMemoryOrder(Expression* curr) { + if (auto* get = curr->dynCast<StructGet>()) { + return get->order; + } + if (auto* set = curr->dynCast<StructSet>()) { + return set->order; + } + if (auto* load = curr->dynCast<Load>()) { + return load->isAtomic ? MemoryOrder::SeqCst : MemoryOrder::Unordered; + } + if (auto* store = curr->dynCast<Store>()) { + return store->isAtomic ? MemoryOrder::SeqCst : MemoryOrder::Unordered; + } + if (curr->is<AtomicRMW>() || curr->is<AtomicWait>() || + curr->is<AtomicNotify>() || curr->is<AtomicFence>()) { + return MemoryOrder::SeqCst; + } + return MemoryOrder::Unordered; +} + // A "generative" expression is one that can generate different results for the // same inputs, and that difference is *not* explained by other expressions that // interact with this one. This is an intrinsic/internal property of the diff --git a/src/passes/GUFA.cpp b/src/passes/GUFA.cpp index 513b2cf3d..2ca1ac6ef 100644 --- a/src/passes/GUFA.cpp +++ b/src/passes/GUFA.cpp @@ -150,20 +150,14 @@ struct GUFAOptimizer return; } - if (contents.isNull() && curr->type.isNullable()) { - // Null values are all identical, so just fix up the type here if we need - // to (the null's type might not fit in this expression, if it passed - // through casts). - if (!Type::isSubType(contents.getType(), curr->type)) { - contents = PossibleContents::literal( - Literal::makeNull(curr->type.getHeapType())); - } - - // Note that if curr's type is *not* nullable, then the code will trap at - // runtime (the null must arrive through a cast that will trap). We handle - // that below, so we don't need to think about it here. - - // TODO: would emitting a more specific null be useful when valid? + if (Properties::getMemoryOrder(curr) != MemoryOrder::Unordered) { + // This load might synchronize with some store, and if we replaced the + // load with a constant or with a load from a global, it would not + // synchronize with that store anymore. Since we know what value the store + // must write, and we know it is the same as every other store to the same + // location, it's possible that optimizing here would be allowable, but + // for now be conservative and do not optimize. + return; } auto* c = contents.makeExpression(wasm); diff --git a/test/lit/passes/gufa-refs.wast b/test/lit/passes/gufa-refs.wast index 80fd32386..ec67cf40c 100644 --- a/test/lit/passes/gufa-refs.wast +++ b/test/lit/passes/gufa-refs.wast @@ -6137,3 +6137,56 @@ ) ) ) + +;; Atomic accesses require special handling +(module + ;; CHECK: (type $A (shared (struct (field i32)))) + (type $A (shared (struct (field i32)))) + + ;; CHECK: (type $1 (func)) + + ;; CHECK: (func $gets (type $1) + ;; CHECK-NEXT: (local $0 (ref $A)) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (struct.new_default $A) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.atomic.get acqrel $A 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.atomic.get $A 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $gets + (local (ref $A)) + (local.set 0 + (struct.new_default $A) + ) + (drop + ;; This is optimizable. It reads from shared memory, but there is only one + ;; possible value that can be read. + (struct.get $A 0 + (local.get 0) + ) + ) + (drop + ;; We do not (yet) optimize atomic gets. + (struct.atomic.get acqrel $A 0 + (local.get 0) + ) + ) + (drop + ;; We do not (yet) optimize atomic gets. + (struct.atomic.get $A 0 + (local.get 0) + ) + ) + ) +) |