summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/passes/OptimizeInstructions.cpp40
-rw-r--r--test/lit/passes/optimize-instructions-atomics.wast44
-rw-r--r--test/lit/passes/optimize-instructions.wast83
3 files changed, 167 insertions, 0 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index c6f1b0f3b..7c450ac06 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -907,6 +907,46 @@ struct OptimizeInstructions
}
}
+ if (curr->op == ExtendUInt32 || curr->op == ExtendSInt32) {
+ if (auto* load = curr->value->dynCast<Load>()) {
+ // i64.extend_i32_s(i32.load(_8|_16)(_u|_s)(x)) =>
+ // i64.load(_8|_16|_32)(_u|_s)(x)
+ //
+ // i64.extend_i32_u(i32.load(_8|_16)(_u|_s)(x)) =>
+ // i64.load(_8|_16|_32)(_u|_s)(x)
+ //
+ // but we can't do this in following cases:
+ //
+ // i64.extend_i32_u(i32.load8_s(x))
+ // i64.extend_i32_u(i32.load16_s(x))
+ //
+ // this mixed sign/zero extensions can't represent in single
+ // signed or unsigned 64-bit load operation. For example if `load8_s(x)`
+ // return i8(-1) (0xFF) than sign extended result will be
+ // i32(-1) (0xFFFFFFFF) and with zero extension to i64 we got
+ // finally 0x00000000FFFFFFFF. However with `i64.load8_s` in this
+ // situation we got `i64(-1)` (all ones) and with `i64.load8_u` it
+ // will be 0x00000000000000FF.
+ //
+ // Another limitation is atomics which only have unsigned loads.
+ // So we also avoid this only case:
+ //
+ // i64.extend_i32_s(i32.atomic.load(x))
+
+ // Special case for i32.load. In this case signedness depends on
+ // extend operation.
+ bool willBeSigned = curr->op == ExtendSInt32 && load->bytes == 4;
+ if (!(curr->op == ExtendUInt32 && load->bytes <= 2 && load->signed_) &&
+ !(willBeSigned && load->isAtomic)) {
+ if (willBeSigned) {
+ load->signed_ = true;
+ }
+ load->type = Type::i64;
+ return replaceCurrent(load);
+ }
+ }
+ }
+
if (Abstract::hasAnyReinterpret(curr->op)) {
// i32.reinterpret_f32(f32.reinterpret_i32(x)) => x
// i64.reinterpret_f64(f64.reinterpret_i64(x)) => x
diff --git a/test/lit/passes/optimize-instructions-atomics.wast b/test/lit/passes/optimize-instructions-atomics.wast
index 519c8399d..979afde68 100644
--- a/test/lit/passes/optimize-instructions-atomics.wast
+++ b/test/lit/passes/optimize-instructions-atomics.wast
@@ -51,4 +51,48 @@
(drop (f64.reinterpret_i64 (i64.atomic.load (local.get $x)))) ;; skip
(i32.atomic.store (i32.const 8) (i32.reinterpret_f32 (local.get $y))) ;; skip
)
+
+ ;; CHECK: (func $combine_atomic_load_and_extends (param $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.atomic.load8_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.atomic.load16_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.atomic.load32_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.atomic.load8_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.atomic.load16_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.extend_i32_s
+ ;; CHECK-NEXT: (i32.atomic.load
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $combine_atomic_load_and_extends (param $x i32)
+ (drop (i64.extend_i32_u (i32.atomic.load8_u (local.get $x))))
+ (drop (i64.extend_i32_u (i32.atomic.load16_u (local.get $x))))
+ (drop (i64.extend_i32_u (i32.atomic.load (local.get $x))))
+ (drop (i64.extend_i32_s (i32.atomic.load8_u (local.get $x))))
+ (drop (i64.extend_i32_s (i32.atomic.load16_u (local.get $x))))
+ ;; skips
+ (drop (i64.extend_i32_s (i32.atomic.load (local.get $x))))
+ )
)
diff --git a/test/lit/passes/optimize-instructions.wast b/test/lit/passes/optimize-instructions.wast
index 638811586..26ef699d1 100644
--- a/test/lit/passes/optimize-instructions.wast
+++ b/test/lit/passes/optimize-instructions.wast
@@ -13404,4 +13404,87 @@
(drop (f32.reinterpret_i32 (i32.reinterpret_f32 (local.get $z))))
(drop (f64.reinterpret_i64 (i64.reinterpret_f64 (local.get $w))))
)
+
+ ;; u64(i32.load(_8|_16)(_u|_s)(x)) => i64.load(_8|_16|_32)(_u|_s)(x)
+ ;; except:
+ ;; i64.extend_i32_u(i32.load8_s(x)) and
+ ;; i64.extend_i32_u(i32.load16_s(x))
+
+ ;; CHECK: (func $combine_load_and_extend_u (param $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.load8_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.load16_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.load32_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.extend_i32_u
+ ;; CHECK-NEXT: (i32.load8_s
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.extend_i32_u
+ ;; CHECK-NEXT: (i32.load16_s
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $combine_load_and_extend_u (param $x i32)
+ (drop (i64.extend_i32_u (i32.load8_u (local.get $x))))
+ (drop (i64.extend_i32_u (i32.load16_u (local.get $x))))
+ (drop (i64.extend_i32_u (i32.load (local.get $x))))
+
+ ;; skips
+ (drop (i64.extend_i32_u (i32.load8_s (local.get $x))))
+ (drop (i64.extend_i32_u (i32.load16_s (local.get $x))))
+ )
+
+ ;; i64(i32.load(_8|_16)(_u|_s)(x)) => i64.load(_8|_16|_32)(_u|_s)(x)
+
+ ;; CHECK: (func $combine_load_and_extend_s (param $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.load8_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.load16_u
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.load8_s
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.load16_s
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i64.load32_s
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $combine_load_and_extend_s (param $x i32)
+ (drop (i64.extend_i32_s (i32.load8_u (local.get $x))))
+ (drop (i64.extend_i32_s (i32.load16_u (local.get $x))))
+ (drop (i64.extend_i32_s (i32.load8_s (local.get $x))))
+ (drop (i64.extend_i32_s (i32.load16_s (local.get $x))))
+ (drop (i64.extend_i32_s (i32.load (local.get $x))))
+ )
)