diff options
-rw-r--r-- | src/passes/PickLoadSigns.cpp | 39 | ||||
-rw-r--r-- | test/lit/passes/pick-load-signs_sign-ext.wast | 80 |
2 files changed, 102 insertions, 17 deletions
diff --git a/src/passes/PickLoadSigns.cpp b/src/passes/PickLoadSigns.cpp index 587314209..693fd4a68 100644 --- a/src/passes/PickLoadSigns.cpp +++ b/src/passes/PickLoadSigns.cpp @@ -20,10 +20,11 @@ namespace wasm { +// // Adjust load signedness based on usage. If a load only has uses that sign or // unsign it anyhow, then it could be either, and picking the popular one can -// help remove the most sign/unsign operations -// unsigned, then it could be either +// help remove the most sign/unsign operations. +// struct PickLoadSigns : public WalkerPass<ExpressionStackWalker<PickLoadSigns>> { bool isFunctionParallel() override { return true; } @@ -59,13 +60,20 @@ struct PickLoadSigns : public WalkerPass<ExpressionStackWalker<PickLoadSigns>> { } void visitLocalGet(LocalGet* curr) { - // this is a use. check from the context what it is, signed or unsigned, - // etc. + // This is a use. check from the context what it is, signed or unsigned, + // etc. Sign- and zero-extends may have two levels of nesting (x << K >> K) + // or one (x & K), so check both. auto& usage = usages[curr->index]; usage.totalUsages++; - if (expressionStack.size() >= 2) { - auto* parent = expressionStack[expressionStack.size() - 2]; - if (Properties::getZeroExtValue(parent)) { + for (Index i = 2; i <= 3; i++) { + if (expressionStack.size() < i) { + // We do not have that many expressions above us. (And if we don't have + // 2 then we definitely don't have 3.) + break; + } + + auto* parent = expressionStack[expressionStack.size() - i]; + if (Properties::getZeroExtValue(parent) == curr) { auto bits = Properties::getZeroExtBits(parent); if (usage.unsignedUsages == 0) { usage.unsignedBits = bits; @@ -73,17 +81,14 @@ struct PickLoadSigns : public WalkerPass<ExpressionStackWalker<PickLoadSigns>> { usage.unsignedBits = 0; } usage.unsignedUsages++; - } else if (expressionStack.size() >= 3) { - auto* grandparent = expressionStack[expressionStack.size() - 3]; - if (Properties::getSignExtValue(grandparent)) { - auto bits = Properties::getSignExtBits(grandparent); - if (usage.signedUsages == 0) { - usage.signedBits = bits; - } else if (usage.signedBits != bits) { - usage.signedBits = 0; - } - usage.signedUsages++; + } else if (Properties::getSignExtValue(parent) == curr) { + auto bits = Properties::getSignExtBits(parent); + if (usage.signedUsages == 0) { + usage.signedBits = bits; + } else if (usage.signedBits != bits) { + usage.signedBits = 0; } + usage.signedUsages++; } } } diff --git a/test/lit/passes/pick-load-signs_sign-ext.wast b/test/lit/passes/pick-load-signs_sign-ext.wast new file mode 100644 index 000000000..c50af32c5 --- /dev/null +++ b/test/lit/passes/pick-load-signs_sign-ext.wast @@ -0,0 +1,80 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --pick-load-signs -all -S -o - | filecheck %s + +(module + ;; CHECK: (memory $0 16 17 shared) + (memory $0 16 17 shared) + + ;; CHECK: (func $load-other-use (type $0) (result i32) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (block $label (result i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.load8_u + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.extend8_s + ;; CHECK-NEXT: (br_if $label + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $load-other-use (result i32) + (local $temp i32) + ;; The load here is unsigned, while the value in the local is used in two + ;; ways: it is sign-extended, and it is sent on a branch by a br_if. We must + ;; not ignore the branch, as if we did then we'd think the only use is signed, + ;; and we'd optimize load8_u => load8_s. + (block $label (result i32) + (local.set $temp + (i32.load8_u + (i32.const 22) + ) + ) + (drop + (i32.extend8_s + (br_if $label + (local.get $temp) + (i32.const 1) + ) + ) + ) + (unreachable) + ) + ) + + ;; CHECK: (func $load-valid (type $1) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.load8_s + ;; CHECK-NEXT: (i32.const 22) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.extend8_s + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $load-valid + (local $temp i32) + ;; As above, but remove the br_if in the middle. Now this is a valid case to + ;; optimize, load8_u => load8_s. + (local.set $temp + (i32.load8_u + (i32.const 22) + ) + ) + (drop + (i32.extend8_s + (local.get $temp) + ) + ) + ) +) + |