diff options
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 40 | ||||
-rw-r--r-- | test/lit/passes/optimize-instructions-atomics.wast | 44 | ||||
-rw-r--r-- | test/lit/passes/optimize-instructions.wast | 83 |
3 files changed, 167 insertions, 0 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index c6f1b0f3b..7c450ac06 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -907,6 +907,46 @@ struct OptimizeInstructions } } + if (curr->op == ExtendUInt32 || curr->op == ExtendSInt32) { + if (auto* load = curr->value->dynCast<Load>()) { + // i64.extend_i32_s(i32.load(_8|_16)(_u|_s)(x)) => + // i64.load(_8|_16|_32)(_u|_s)(x) + // + // i64.extend_i32_u(i32.load(_8|_16)(_u|_s)(x)) => + // i64.load(_8|_16|_32)(_u|_s)(x) + // + // but we can't do this in following cases: + // + // i64.extend_i32_u(i32.load8_s(x)) + // i64.extend_i32_u(i32.load16_s(x)) + // + // this mixed sign/zero extensions can't represent in single + // signed or unsigned 64-bit load operation. For example if `load8_s(x)` + // return i8(-1) (0xFF) than sign extended result will be + // i32(-1) (0xFFFFFFFF) and with zero extension to i64 we got + // finally 0x00000000FFFFFFFF. However with `i64.load8_s` in this + // situation we got `i64(-1)` (all ones) and with `i64.load8_u` it + // will be 0x00000000000000FF. + // + // Another limitation is atomics which only have unsigned loads. + // So we also avoid this only case: + // + // i64.extend_i32_s(i32.atomic.load(x)) + + // Special case for i32.load. In this case signedness depends on + // extend operation. + bool willBeSigned = curr->op == ExtendSInt32 && load->bytes == 4; + if (!(curr->op == ExtendUInt32 && load->bytes <= 2 && load->signed_) && + !(willBeSigned && load->isAtomic)) { + if (willBeSigned) { + load->signed_ = true; + } + load->type = Type::i64; + return replaceCurrent(load); + } + } + } + if (Abstract::hasAnyReinterpret(curr->op)) { // i32.reinterpret_f32(f32.reinterpret_i32(x)) => x // i64.reinterpret_f64(f64.reinterpret_i64(x)) => x diff --git a/test/lit/passes/optimize-instructions-atomics.wast b/test/lit/passes/optimize-instructions-atomics.wast index 519c8399d..979afde68 100644 --- a/test/lit/passes/optimize-instructions-atomics.wast +++ b/test/lit/passes/optimize-instructions-atomics.wast @@ -51,4 +51,48 @@ (drop (f64.reinterpret_i64 (i64.atomic.load (local.get $x)))) ;; skip (i32.atomic.store (i32.const 8) (i32.reinterpret_f32 (local.get $y))) ;; skip ) + + ;; CHECK: (func $combine_atomic_load_and_extends (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.atomic.load8_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.atomic.load16_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.atomic.load32_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.atomic.load8_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.atomic.load16_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.extend_i32_s + ;; CHECK-NEXT: (i32.atomic.load + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $combine_atomic_load_and_extends (param $x i32) + (drop (i64.extend_i32_u (i32.atomic.load8_u (local.get $x)))) + (drop (i64.extend_i32_u (i32.atomic.load16_u (local.get $x)))) + (drop (i64.extend_i32_u (i32.atomic.load (local.get $x)))) + (drop (i64.extend_i32_s (i32.atomic.load8_u (local.get $x)))) + (drop (i64.extend_i32_s (i32.atomic.load16_u (local.get $x)))) + ;; skips + (drop (i64.extend_i32_s (i32.atomic.load (local.get $x)))) + ) ) diff --git a/test/lit/passes/optimize-instructions.wast b/test/lit/passes/optimize-instructions.wast index 638811586..26ef699d1 100644 --- a/test/lit/passes/optimize-instructions.wast +++ b/test/lit/passes/optimize-instructions.wast @@ -13404,4 +13404,87 @@ (drop (f32.reinterpret_i32 (i32.reinterpret_f32 (local.get $z)))) (drop (f64.reinterpret_i64 (i64.reinterpret_f64 (local.get $w)))) ) + + ;; u64(i32.load(_8|_16)(_u|_s)(x)) => i64.load(_8|_16|_32)(_u|_s)(x) + ;; except: + ;; i64.extend_i32_u(i32.load8_s(x)) and + ;; i64.extend_i32_u(i32.load16_s(x)) + + ;; CHECK: (func $combine_load_and_extend_u (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.load8_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.load16_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.load32_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.extend_i32_u + ;; CHECK-NEXT: (i32.load8_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.extend_i32_u + ;; CHECK-NEXT: (i32.load16_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $combine_load_and_extend_u (param $x i32) + (drop (i64.extend_i32_u (i32.load8_u (local.get $x)))) + (drop (i64.extend_i32_u (i32.load16_u (local.get $x)))) + (drop (i64.extend_i32_u (i32.load (local.get $x)))) + + ;; skips + (drop (i64.extend_i32_u (i32.load8_s (local.get $x)))) + (drop (i64.extend_i32_u (i32.load16_s (local.get $x)))) + ) + + ;; i64(i32.load(_8|_16)(_u|_s)(x)) => i64.load(_8|_16|_32)(_u|_s)(x) + + ;; CHECK: (func $combine_load_and_extend_s (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.load8_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.load16_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.load8_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.load16_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.load32_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $combine_load_and_extend_s (param $x i32) + (drop (i64.extend_i32_s (i32.load8_u (local.get $x)))) + (drop (i64.extend_i32_s (i32.load16_u (local.get $x)))) + (drop (i64.extend_i32_s (i32.load8_s (local.get $x)))) + (drop (i64.extend_i32_s (i32.load16_s (local.get $x)))) + (drop (i64.extend_i32_s (i32.load (local.get $x)))) + ) ) |