diff options
-rw-r--r-- | src/passes/ConstantFieldPropagation.cpp | 35 | ||||
-rw-r--r-- | test/lit/passes/cfp.wast | 123 |
2 files changed, 150 insertions, 8 deletions
diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp index a3dd6aa6f..36aebf3a4 100644 --- a/src/passes/ConstantFieldPropagation.cpp +++ b/src/passes/ConstantFieldPropagation.cpp @@ -139,18 +139,28 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> { if (!info.hasNoted()) { // This field is never written at all. That means that we do not even // construct any data of this type, and so it is a logic error to reach - // this location in the code. (Unless we are in an open-world - // situation, which we assume we are not in.) Replace this get with a - // trap. Note that we do not need to care about the nullability of the - // reference, as if it should have trapped, we are replacing it with - // another trap, which we allow to reorder (but we do need to care about - // side effects in the reference, so keep it around). + // this location in the code. (Unless we are in an open-world situation, + // which we assume we are not in.) Replace this get with a trap. Note that + // we do not need to care about the nullability of the reference, as if it + // should have trapped, we are replacing it with another trap, which we + // allow to reorder (but we do need to care about side effects in the + // reference, so keep it around). We also do not need to care about + // synchronization since trapping accesses do not synchronize with other + // accesses. replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref), builder.makeUnreachable())); changed = true; return; } + if (curr->order == MemoryOrder::AcqRel) { + // Removing an acquire get and preserving its synchronization properties + // would require inserting an acquire fence, but the fence would have + // stronger synchronization properties so might be more expensive. + // Instead, just skip the optimization. + return; + } + // If the value is not a constant, then it is unknown and we must give up // on simply applying a constant. However, we can try to use a ref.test, if // that is allowed. @@ -166,8 +176,17 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> { // constant value. (Leave it to further optimizations to get rid of the // ref.) auto* value = makeExpression(info, heapType, curr); - replaceCurrent(builder.makeSequence( - builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), value)); + auto* replacement = builder.blockify( + builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref))); + // If this get is sequentially consistent, then it synchronizes with other + // threads at least by participating in the global order of sequentially + // consistent operations. Preserve that effect by replacing the access with + // a fence. + assert(curr->order != MemoryOrder::AcqRel); + if (curr->order == MemoryOrder::SeqCst) { + replacement = builder.blockify(replacement, builder.makeAtomicFence()); + } + replaceCurrent(builder.blockify(replacement, value)); changed = true; } diff --git a/test/lit/passes/cfp.wast b/test/lit/passes/cfp.wast index e70cfc639..e674fdc4b 100644 --- a/test/lit/passes/cfp.wast +++ b/test/lit/passes/cfp.wast @@ -2830,3 +2830,126 @@ ) ) ) + +;; Atomic accesses require special handling +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $shared (shared (struct (field (mut i32))))) + (type $shared (shared (struct (mut i32)))) + ;; CHECK: (type $unwritten (shared (struct (field (mut i32))))) + (type $unwritten (shared (struct (mut i32)))) + ) + + ;; CHECK: (type $2 (func)) + + ;; CHECK: (type $3 (func (param (ref $shared)))) + + ;; CHECK: (type $4 (func (param (ref $unwritten)))) + + ;; CHECK: (func $init (type $2) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_default $shared) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $init + (drop + (struct.new_default $shared) + ) + ) + + ;; CHECK: (func $gets (type $3) (param $0 (ref $shared)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.atomic.get acqrel $shared 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $gets (param (ref $shared)) + (drop + (struct.get $shared 0 + (local.get 0) + ) + ) + (drop + ;; This is not optimized because we wouldn't want to replace it with a + ;; stronger acquire fence. + (struct.atomic.get acqrel $shared 0 + (local.get 0) + ) + ) + (drop + ;; This can be optimized, but requires a seqcst fence. + (struct.atomic.get $shared 0 + (local.get 0) + ) + ) + ) + + ;; CHECK: (func $traps (type $4) (param $0 (ref $unwritten)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $traps (param (ref $unwritten)) + ;; This are all optimizable because they are known to trap. No fences are + ;; necessary. + (drop + (struct.get $unwritten 0 + (local.get 0) + ) + ) + (drop + (struct.atomic.get acqrel $unwritten 0 + (local.get 0) + ) + ) + (drop + (struct.atomic.get $unwritten 0 + (local.get 0) + ) + ) + ) +) |