diff options
-rw-r--r-- | src/passes/GlobalStructInference.cpp | 35 | ||||
-rw-r--r-- | test/lit/passes/gsi.wast | 228 |
2 files changed, 251 insertions, 12 deletions
diff --git a/src/passes/GlobalStructInference.cpp b/src/passes/GlobalStructInference.cpp index 4158db051..f27df3a3c 100644 --- a/src/passes/GlobalStructInference.cpp +++ b/src/passes/GlobalStructInference.cpp @@ -359,6 +359,11 @@ struct GlobalStructInference : public Pass { // refined, which could change the struct.get's type. refinalize = true; } + // No need to worry about atomic gets here. We will still read from + // the same memory location as before and preserve all side effects + // (including synchronization) that were previously present. The + // memory location is immutable anyway, so there cannot be any writes + // to synchronize with in the first place. curr->ref = builder.makeSequence( builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), builder.makeGlobalGet(global, globalType)); @@ -457,10 +462,18 @@ struct GlobalStructInference : public Pass { // the early return above) so that only leaves 1 and 2. if (values.size() == 1) { // The case of 1 value is simple: trap if the ref is null, and - // otherwise return the value. - replaceCurrent(builder.makeSequence( - builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), - getReadValue(values[0]))); + // otherwise return the value. We must also fence if the get was + // seqcst. No additional work is necessary for an acquire get because + // there cannot have been any writes to this immutable field that it + // would synchronize with. + Expression* replacement = + builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)); + if (curr->order == MemoryOrder::SeqCst) { + replacement = + builder.blockify(replacement, builder.makeAtomicFence()); + } + replaceCurrent( + builder.blockify(replacement, getReadValue(values[0]))); return; } assert(values.size() == 2); @@ -486,11 +499,19 @@ struct GlobalStructInference : public Pass { // of their execution matters (they may note globals for un-nesting). auto* left = getReadValue(values[0]); auto* right = getReadValue(values[1]); - // Note that we must trap on null, so add a ref.as_non_null here. + // Note that we must trap on null, so add a ref.as_non_null here. We + // must also add a fence if this get is seqcst. As before, no extra work + // is necessary for an acquire get because there cannot be a write it + // synchronizes with. + Expression* getGlobal = + builder.makeGlobalGet(checkGlobal, wasm.getGlobal(checkGlobal)->type); + if (curr->order == MemoryOrder::SeqCst) { + getGlobal = + builder.makeSequence(builder.makeAtomicFence(), getGlobal); + } replaceCurrent(builder.makeSelect( builder.makeRefEq(builder.makeRefAs(RefAsNonNull, curr->ref), - builder.makeGlobalGet( - checkGlobal, wasm.getGlobal(checkGlobal)->type)), + getGlobal), left, right)); } diff --git a/test/lit/passes/gsi.wast b/test/lit/passes/gsi.wast index 299b19e00..7bc83efd4 100644 --- a/test/lit/passes/gsi.wast +++ b/test/lit/passes/gsi.wast @@ -144,16 +144,16 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $test1 (param $struct1 (ref null $struct1)) (param $struct2 (ref null $struct2)) - ;; We can infer that this get must reference $global1 and make the reference - ;; point to that. Note that we do not infer the value of 42 here, but leave - ;; it for other passes to do. + ;; Even though the value here is not known at compile time - it reads an + ;; imported global - we can still infer that we are reading from $global1. (drop (struct.get $struct1 0 (local.get $struct1) ) ) - ;; Even though the value here is not known at compile time - it reads an - ;; imported global - we can still infer that we are reading from $global2. + ;; We can infer that this get must reference $global2 and make the reference + ;; point to that. Note that we do not infer the value of 42 here, but leave + ;; it for other passes to do. (drop (struct.get $struct2 0 (local.get $struct2) @@ -1944,3 +1944,221 @@ ) ) ) + +;; Test atomic gets. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $one (shared (struct (field i32)))) + (type $one (shared (struct (field i32)))) + ;; CHECK: (type $two (shared (struct (field i32)))) + (type $two (shared (struct (field i32)))) + ;; CHECK: (type $two-same (shared (struct (field i32)))) + (type $two-same (shared (struct (field i32)))) + ) + + ;; CHECK: (type $3 (func (param (ref $one)))) + + ;; CHECK: (type $4 (func (param (ref $two)))) + + ;; CHECK: (type $5 (func (param (ref $two-same)))) + + ;; CHECK: (global $one (ref $one) (struct.new $one + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: )) + (global $one (ref $one) (struct.new $one (i32.const 42))) + + ;; CHECK: (global $two-a (ref $two) (struct.new $two + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: )) + (global $two-a (ref $two) (struct.new $two (i32.const 42))) + + ;; CHECK: (global $two-b (ref $two) (struct.new $two + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: )) + (global $two-b (ref $two) (struct.new $two (i32.const 1337))) + + ;; CHECK: (global $two-same-a (ref $two-same) (struct.new $two-same + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: )) + (global $two-same-a (ref $two-same) (struct.new $two-same (i32.const 42))) + + ;; CHECK: (global $two-same-b (ref $two-same) (struct.new $two-same + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: )) + (global $two-same-b (ref $two-same) (struct.new $two-same (i32.const 42))) + + ;; CHECK: (func $one (type $3) (param $0 (ref $one)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $one 0 + ;; CHECK-NEXT: (block (result (ref $one)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $one) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.atomic.get acqrel $one 0 + ;; CHECK-NEXT: (block (result (ref $one)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $one) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.atomic.get $one 0 + ;; CHECK-NEXT: (block (result (ref $one)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $one) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $one (param (ref $one)) + (drop + (struct.get $one 0 + (local.get 0) + ) + ) + (drop + (struct.atomic.get acqrel $one 0 + (local.get 0) + ) + ) + (drop + (struct.atomic.get $one 0 + (local.get 0) + ) + ) + ) + + ;; CHECK: (func $two (type $4) (param $0 (ref $two)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $two-a) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $two-a) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result (ref $two)) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (global.get $two-a) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $two (param (ref $two)) + (drop + (struct.get $two 0 + (local.get 0) + ) + ) + (drop + ;; This is optimized normally because there cannot be any writes it + ;; synchronizes with. + (struct.atomic.get acqrel $two 0 + (local.get 0) + ) + ) + (drop + ;; This requires a fence to maintain its effect on the global order of + ;; seqcst operations. + (struct.atomic.get $two 0 + (local.get 0) + ) + ) + ) + + ;; CHECK: (func $two-same (type $5) (param $0 (ref $two-same)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $two-same (param (ref $two-same)) + (drop + (struct.get $two-same 0 + (local.get 0) + ) + ) + (drop + ;; This is optimized normally because there cannot be any writes it + ;; synchronizes with. + (struct.atomic.get acqrel $two-same 0 + (local.get 0) + ) + ) + (drop + ;; This requires a fence to maintain its effect on the global order of + ;; seqcst operations. + (struct.atomic.get $two-same 0 + (local.get 0) + ) + ) + ) +) |