diff options
author | Alon Zakai <azakai@google.com> | 2024-11-13 16:34:37 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-13 16:34:37 -0800 |
commit | 74a910bc298c95856e5bc09fcd2424d08c5df12f (patch) | |
tree | 71c01a9f330b923a74e84881cddca222738f37bf | |
parent | 9002cc6f87570afdcb000760f54abdad6861f1bd (diff) | |
download | binaryen-74a910bc298c95856e5bc09fcd2424d08c5df12f.tar.gz binaryen-74a910bc298c95856e5bc09fcd2424d08c5df12f.tar.bz2 binaryen-74a910bc298c95856e5bc09fcd2424d08c5df12f.zip |
[SignExt] OptimizeInstructions: Remove signexts of already-extended values (#7072)
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 52 | ||||
-rw-r--r-- | test/lit/passes/optimize-instructions-sign_ext.wast | 367 |
2 files changed, 404 insertions, 15 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 3a2841c1e..c9fe5f652 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -80,8 +80,8 @@ static bool isSignedOp(BinaryOp op) { struct LocalInfo { static const Index kUnknown = Index(-1); - Index maxBits; - Index signExtedBits; + Index maxBits = -1; + Index signExtBits = 0; }; struct LocalScanner : PostWalker<LocalScanner> { @@ -99,9 +99,9 @@ struct LocalScanner : PostWalker<LocalScanner> { auto& info = localInfo[i]; if (func->isParam(i)) { info.maxBits = getBitsForType(func->getLocalType(i)); // worst-case - info.signExtedBits = LocalInfo::kUnknown; // we will never know anything + info.signExtBits = LocalInfo::kUnknown; // we will never know anything } else { - info.maxBits = info.signExtedBits = 0; // we are open to learning + info.maxBits = info.signExtBits = 0; // we are open to learning } } // walk @@ -109,8 +109,8 @@ struct LocalScanner : PostWalker<LocalScanner> { // finalize for (Index i = 0; i < func->getNumLocals(); i++) { auto& info = localInfo[i]; - if (info.signExtedBits == LocalInfo::kUnknown) { - info.signExtedBits = 0; + if (info.signExtBits == LocalInfo::kUnknown) { + info.signExtBits = 0; } } } @@ -137,11 +137,11 @@ struct LocalScanner : PostWalker<LocalScanner> { signExtBits = load->bytes * 8; } } - if (info.signExtedBits == 0) { - info.signExtedBits = signExtBits; // first info we see - } else if (info.signExtedBits != signExtBits) { + if (info.signExtBits == 0) { + info.signExtBits = signExtBits; // first info we see + } else if (info.signExtBits != signExtBits) { // contradictory information, give up - info.signExtedBits = LocalInfo::kUnknown; + info.signExtBits = LocalInfo::kUnknown; } } @@ -1006,6 +1006,22 @@ struct OptimizeInstructions } } + // Simple sign extends can be removed if the value is already sign-extended. + auto signExtBits = getSignExtBits(curr->value); + if (signExtBits > 0) { + // Note that we can handle the case of |curr| having a larger sign-extend: + // if we have an 8-bit value in 32-bit, then there are 24 sign bits, and + // doing a sign-extend to 16 will only affect 16 of those 24, and the + // effect is to leave them as they are. + if ((curr->op == ExtendS8Int32 && signExtBits <= 8) || + (curr->op == ExtendS16Int32 && signExtBits <= 16) || + (curr->op == ExtendS8Int64 && signExtBits <= 8) || + (curr->op == ExtendS16Int64 && signExtBits <= 16) || + (curr->op == ExtendS32Int64 && signExtBits <= 32)) { + return replaceCurrent(curr->value); + } + } + if (Abstract::hasAnyReinterpret(curr->op)) { // i32.reinterpret_f32(f32.reinterpret_i32(x)) => x // i64.reinterpret_f64(f64.reinterpret_i64(x)) => x @@ -3611,16 +3627,22 @@ private: return inner; } - // check if an expression is already sign-extended + // Check if an expression is already sign-extended to an exact number of bits. bool isSignExted(Expression* curr, Index bits) { + return getSignExtBits(curr) == bits; + } + + // Returns the number of bits an expression is sign-extended (or 0 if it is + // not). + Index getSignExtBits(Expression* curr) { if (Properties::getSignExtValue(curr)) { - return Properties::getSignExtBits(curr) == bits; + return Properties::getSignExtBits(curr); } if (auto* get = curr->dynCast<LocalGet>()) { - // check what we know about the local - return localInfo[get->index].signExtedBits == bits; + // Check what we know about the local. + return localInfo[get->index].signExtBits; } - return false; + return 0; } // optimize trivial math operations, given that the right side of a binary diff --git a/test/lit/passes/optimize-instructions-sign_ext.wast b/test/lit/passes/optimize-instructions-sign_ext.wast new file mode 100644 index 000000000..19e6c45c8 --- /dev/null +++ b/test/lit/passes/optimize-instructions-sign_ext.wast @@ -0,0 +1,367 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s -all --optimize-instructions -S -o - | filecheck %s + +(module + ;; CHECK: (memory $0 16 17) + (memory $0 16 17) + + ;; CHECK: (func $i32-direct (type $1) (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.extend8_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i32-direct (param $x i32) + ;; We do not need to sign-extend twice, and can emit just one extend8. + (drop + (i32.extend8_s + (i32.shr_s + (i32.shl + (local.get $x) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + ) + + ;; CHECK: (func $i32-local (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.load8_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i32-local + ;; The local is sign-extended, so the i32.extend can be removed. + (local $temp i32) + (local.set $temp + (i32.load8_s + (i32.const 22) + ) + ) + (drop + (i32.extend8_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i32-local-i16 (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.load16_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i32-local-i16 + ;; As above with i16. + (local $temp i32) + (local.set $temp + (i32.load16_s + (i32.const 22) + ) + ) + (drop + (i32.extend16_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i32-local-i16-mismatch-bad (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.load16_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.extend8_s + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i32-local-i16-mismatch-bad + ;; As above with in i8/i16 mismatch. We do not optimize. + (local $temp i32) + (local.set $temp + (i32.load16_s + (i32.const 22) + ) + ) + (drop + (i32.extend8_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i32-local-i16-mismatch-good (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.load8_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i32-local-i16-mismatch-good + ;; As above with in i8/i16 mismatch, but in the direction we can handle. + (local $temp i32) + (local.set $temp + (i32.load8_s + (i32.const 22) + ) + ) + (drop + (i32.extend16_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i64 (type $0) + ;; CHECK-NEXT: (local $temp i64) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i64.load8_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i64 + ;; As above, but with i64. + (local $temp i64) + (local.set $temp + (i64.load8_s + (i32.const 22) + ) + ) + (drop + (i64.extend8_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i64-i16 (type $0) + ;; CHECK-NEXT: (local $temp i64) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i64.load16_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i64-i16 + (local $temp i64) + (local.set $temp + (i64.load16_s + (i32.const 22) + ) + ) + (drop + (i64.extend16_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i64-i32 (type $0) + ;; CHECK-NEXT: (local $temp i64) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i64.load32_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i64-i32 + (local $temp i64) + (local.set $temp + (i64.load32_s + (i32.const 22) + ) + ) + (drop + (i64.extend32_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i64-mismatch-good (type $0) + ;; CHECK-NEXT: (local $temp i64) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i64.load8_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i64-mismatch-good + (local $temp i64) + (local.set $temp + (i64.load8_s + (i32.const 22) + ) + ) + (drop + (i64.extend16_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i64-mismatch-good2 (type $0) + ;; CHECK-NEXT: (local $temp i64) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i64.load8_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i64-mismatch-good2 + (local $temp i64) + (local.set $temp + (i64.load8_s + (i32.const 22) + ) + ) + (drop + (i64.extend32_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i64-mismatch-good3 (type $0) + ;; CHECK-NEXT: (local $temp i64) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i64.load16_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i64-mismatch-good3 + (local $temp i64) + (local.set $temp + (i64.load16_s + (i32.const 22) + ) + ) + (drop + (i64.extend32_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i64-mismatch-bad (type $0) + ;; CHECK-NEXT: (local $temp i64) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i64.load16_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.extend8_s + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i64-mismatch-bad + (local $temp i64) + (local.set $temp + (i64.load16_s + (i32.const 22) + ) + ) + (drop + (i64.extend8_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i64-mismatch-bad2 (type $0) + ;; CHECK-NEXT: (local $temp i64) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i64.load32_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.extend8_s + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i64-mismatch-bad2 + (local $temp i64) + (local.set $temp + (i64.load32_s + (i32.const 22) + ) + ) + (drop + (i64.extend8_s + (local.get $temp) + ) + ) + ) + + ;; CHECK: (func $i64-mismatch-bad3 (type $0) + ;; CHECK-NEXT: (local $temp i64) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i64.load32_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.extend16_s + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $i64-mismatch-bad3 + (local $temp i64) + (local.set $temp + (i64.load32_s + (i32.const 22) + ) + ) + (drop + (i64.extend16_s + (local.get $temp) + ) + ) + ) +) |