summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai (kripken) <alonzakai@gmail.com>2017-02-13 15:17:57 -0800
committerAlon Zakai (kripken) <alonzakai@gmail.com>2017-02-16 22:45:39 -0800
commiteec567640d154147fb88754fd13aada738a63eef (patch)
tree38c4479faf66e20a953f833d298855315d1b8627
parent04d38f6f881b16a014c47e897a4590d76ec548ff (diff)
downloadbinaryen-eec567640d154147fb88754fd13aada738a63eef.tar.gz
binaryen-eec567640d154147fb88754fd13aada738a63eef.tar.bz2
binaryen-eec567640d154147fb88754fd13aada738a63eef.zip
use local info about maxBits and sign-extendedness in OptimizeInstructions
fix the maxBits of a signed load, which this uncovered - all the bits may be used in such a case
-rw-r--r--src/passes/OptimizeInstructions.cpp131
-rw-r--r--test/emcc_hello_world.fromasm5
-rw-r--r--test/emcc_hello_world.fromasm.imprecise5
-rw-r--r--test/passes/optimize-instructions.txt259
-rw-r--r--test/passes/optimize-instructions.wast289
5 files changed, 668 insertions, 21 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 844ed06ac..8e5ff6c12 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -166,7 +166,10 @@ struct Match {
// returns the maximum amount of bits used in an integer expression
// not extremely precise (doesn't look into add operands, etc.)
-static Index getMaxBits(Expression* curr) {
+// LocalInfoProvider is an optional class that can provide answers about
+// get_local.
+template<typename LocalInfoProvider>
+Index getMaxBits(Expression* curr, LocalInfoProvider* localInfoProvider) {
if (auto* const_ = curr->dynCast<Const>()) {
switch (curr->type) {
case i32: return 32 - const_->value.countLeadingZeroes().geti32();
@@ -179,17 +182,17 @@ static Index getMaxBits(Expression* curr) {
case AddInt32: case SubInt32: case MulInt32:
case DivSInt32: case DivUInt32: case RemSInt32:
case RemUInt32: case RotLInt32: case RotRInt32: return 32;
- case AndInt32: return std::min(getMaxBits(binary->left), getMaxBits(binary->right));
- case OrInt32: case XorInt32: return std::max(getMaxBits(binary->left), getMaxBits(binary->right));
+ case AndInt32: return std::min(getMaxBits(binary->left, localInfoProvider), getMaxBits(binary->right, localInfoProvider));
+ case OrInt32: case XorInt32: return std::max(getMaxBits(binary->left, localInfoProvider), getMaxBits(binary->right, localInfoProvider));
case ShlInt32: {
if (auto* shifts = binary->right->dynCast<Const>()) {
- return std::min(Index(32), getMaxBits(binary->left) + shifts->value.geti32());
+ return std::min(Index(32), getMaxBits(binary->left, localInfoProvider) + shifts->value.geti32());
}
return 32;
}
case ShrUInt32: {
if (auto* shift = binary->right->dynCast<Const>()) {
- auto maxBits = getMaxBits(binary->left);
+ auto maxBits = getMaxBits(binary->left, localInfoProvider);
auto shifts = std::min(Index(shift->value.geti32()), maxBits); // can ignore more shifts than zero us out
return std::max(Index(0), maxBits - shifts);
}
@@ -197,7 +200,7 @@ static Index getMaxBits(Expression* curr) {
}
case ShrSInt32: {
if (auto* shift = binary->right->dynCast<Const>()) {
- auto maxBits = getMaxBits(binary->left);
+ auto maxBits = getMaxBits(binary->left, localInfoProvider);
if (maxBits == 32) return 32;
auto shifts = std::min(Index(shift->value.geti32()), maxBits); // can ignore more shifts than zero us out
return std::max(Index(0), maxBits - shifts);
@@ -225,14 +228,19 @@ static Index getMaxBits(Expression* curr) {
case ClzInt32: case CtzInt32: case PopcntInt32: return 5;
case ClzInt64: case CtzInt64: case PopcntInt64: return 6;
case EqZInt32: case EqZInt64: return 1;
- case WrapInt64: return std::min(Index(32), getMaxBits(unary->value));
+ case WrapInt64: return std::min(Index(32), getMaxBits(unary->value, localInfoProvider));
default: {}
}
} else if (auto* set = curr->dynCast<SetLocal>()) {
// a tee passes through the value
- return getMaxBits(set->value);
+ return getMaxBits(set->value, localInfoProvider);
+ } else if (auto* get = curr->dynCast<GetLocal>()) {
+ return localInfoProvider->getMaxBitsForLocal(get);
} else if (auto* load = curr->dynCast<Load>()) {
- return 8 * load->bytes;
+ // if signed, then the sign-extension might fill all the bits
+ if (!load->signed_) {
+ return 8 * load->bytes;
+ }
}
switch (curr->type) {
case i32: return 32;
@@ -334,6 +342,76 @@ T* getFallthroughDynCast(Expression* curr) {
return nullptr;
}
+// Useful information about locals
+struct LocalInfo {
+ static const Index kUnknown = Index(-1);
+
+ Index maxBits;
+ Index signExtedBits;
+};
+
+struct LocalScanner : PostWalker<LocalScanner, Visitor<LocalScanner>> {
+ std::vector<LocalInfo>& localInfo;
+
+ LocalScanner(std::vector<LocalInfo>& localInfo) : localInfo(localInfo) {}
+
+ void doWalkFunction(Function* func) {
+ // prepare
+ localInfo.resize(func->getNumLocals());
+ for (Index i = 0; i < func->getNumLocals(); i++) {
+ auto& info = localInfo[i];
+ if (func->isParam(i)) {
+ info.maxBits = getBitsForType(func->getLocalType(i)); // worst-case
+ info.signExtedBits = LocalInfo::kUnknown; // we will never know anything
+ } else {
+ info.maxBits = info.signExtedBits = 0; // we are open to learning
+ }
+ }
+ // walk
+ PostWalker<LocalScanner, Visitor<LocalScanner>>::doWalkFunction(func);
+ // finalize
+ for (Index i = 0; i < func->getNumLocals(); i++) {
+ auto& info = localInfo[i];
+ if (info.signExtedBits == LocalInfo::kUnknown) {
+ info.signExtedBits = 0;
+ }
+ }
+ }
+
+ void visitSetLocal(SetLocal* curr) {
+ auto* func = getFunction();
+ if (func->isParam(curr->index)) return;
+ auto type = getFunction()->getLocalType(curr->index);
+ if (type != i32 && type != i64) return;
+ // an integer var, worth processing
+ auto& info = localInfo[curr->index];
+ info.maxBits = std::max(info.maxBits, getMaxBits(curr->value, this));
+ if (getSignExt(curr->value)) {
+ auto bits = getSignExtBits(curr->value);
+ if (info.signExtedBits == 0) {
+ info.signExtedBits = bits; // first info we see
+ } else if (info.signExtedBits != bits) {
+ info.signExtedBits = LocalInfo::kUnknown; // contradictory information, give up
+ }
+ } else {
+ info.signExtedBits = LocalInfo::kUnknown; // an input which isn't even a sign ext, give up
+ }
+ }
+
+ // define this for the templated getMaxBits method. we know nothing here yet about locals, so return the maxes
+ Index getMaxBitsForLocal(GetLocal* get) {
+ return getBitsForType(get->type);
+ }
+
+ Index getBitsForType(WasmType type) {
+ switch (type) {
+ case i32: return 32;
+ case i64: return 64;
+ default: return -1;
+ }
+ }
+};
+
// Main pass class
struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions, UnifiedExpressionVisitor<OptimizeInstructions>>> {
bool isFunctionParallel() override { return true; }
@@ -346,6 +424,16 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions,
#endif
}
+ void doWalkFunction(Function* func) {
+ // first, scan locals
+ {
+ LocalScanner scanner(localInfo);
+ scanner.walkFunction(func);
+ }
+ // main walk
+ WalkerPass<PostWalker<OptimizeInstructions, UnifiedExpressionVisitor<OptimizeInstructions>>>::doWalkFunction(func);
+ }
+
void visitExpression(Expression* curr) {
// we may be able to apply multiple patterns, one may open opportunities that look deeper NB: patterns must not have cycles
while (1) {
@@ -399,7 +487,8 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions,
}
}
// if the sign-extend input cannot have a sign bit, we don't need it
- if (getMaxBits(ext) + extraShifts < bits) {
+ // we also don't need it if it already has an identical-sized sign extend
+ if (getMaxBits(ext, this) + extraShifts < bits || isSignExted(ext, bits)) {
return removeAlmostSignExt(binary);
}
} else if (binary->op == EqInt32 || binary->op == NeInt32) {
@@ -448,7 +537,7 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions,
}
}
} else if (auto maskedBits = getMaskedBits(mask)) {
- if (getMaxBits(binary->left) <= maskedBits) {
+ if (getMaxBits(binary->left, this) <= maskedBits) {
// a mask of lower bits is not needed if we are already smaller
return binary->left;
}
@@ -586,7 +675,15 @@ struct OptimizeInstructions : public WalkerPass<PostWalker<OptimizeInstructions,
return nullptr;
}
+ Index getMaxBitsForLocal(GetLocal* get) {
+ // check what we know about the local
+ return localInfo[get->index].maxBits;
+ }
+
private:
+ // Information about our locals
+ std::vector<LocalInfo> localInfo;
+
// Optimize given that the expression is flowing into a boolean context
Expression* optimizeBoolean(Expression* boolean) {
if (auto* unary = boolean->dynCast<Unary>()) {
@@ -816,6 +913,18 @@ private:
innerConst->value = innerConst->value.sub(outerConst->value);
return inner;
}
+
+ // check if an expression is already sign-extended
+ bool isSignExted(Expression* curr, Index bits) {
+ if (getSignExt(curr)) {
+ return getSignExtBits(curr) == bits;
+ }
+ if (auto* get = curr->dynCast<GetLocal>()) {
+ // check what we know about the local
+ return localInfo[get->index].signExtedBits == bits;
+ }
+ return false;
+ }
};
Pass *createOptimizeInstructionsPass() {
diff --git a/test/emcc_hello_world.fromasm b/test/emcc_hello_world.fromasm
index 88db1c1ce..f01ba2f5d 100644
--- a/test/emcc_hello_world.fromasm
+++ b/test/emcc_hello_world.fromasm
@@ -5507,10 +5507,7 @@
(tee_local $5
(i32.add
(i32.xor
- (i32.and
- (get_local $32)
- (i32.const 1)
- )
+ (get_local $32)
(i32.const 1)
)
(get_local $17)
diff --git a/test/emcc_hello_world.fromasm.imprecise b/test/emcc_hello_world.fromasm.imprecise
index 5ed229e6f..22878e91f 100644
--- a/test/emcc_hello_world.fromasm.imprecise
+++ b/test/emcc_hello_world.fromasm.imprecise
@@ -5444,10 +5444,7 @@
(tee_local $5
(i32.add
(i32.xor
- (i32.and
- (get_local $32)
- (i32.const 1)
- )
+ (get_local $32)
(i32.const 1)
)
(get_local $17)
diff --git a/test/passes/optimize-instructions.txt b/test/passes/optimize-instructions.txt
index 31dceaf60..80d264e94 100644
--- a/test/passes/optimize-instructions.txt
+++ b/test/passes/optimize-instructions.txt
@@ -1240,8 +1240,22 @@
)
)
(drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u
+ (i32.load8_s
+ (i32.const 256)
+ )
+ (i32.const 1)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
(i32.shr_u
- (i32.load8_s
+ (i32.load8_u
(i32.const 256)
)
(i32.const 1)
@@ -1344,4 +1358,247 @@
)
)
)
+ (func $local-info-zero-ext (type $4) (param $0 i32) (param $1 i32)
+ (local $x i32)
+ (local $y i32)
+ (local $z i32)
+ (local $w i32)
+ (set_local $x
+ (i32.const 212)
+ )
+ (drop
+ (get_local $x)
+ )
+ (set_local $y
+ (i32.const 500)
+ )
+ (drop
+ (i32.and
+ (get_local $y)
+ (i32.const 255)
+ )
+ )
+ (set_local $0
+ (i32.const 212)
+ )
+ (drop
+ (i32.and
+ (get_local $0)
+ (i32.const 255)
+ )
+ )
+ (set_local $z
+ (i32.const 212)
+ )
+ (set_local $z
+ (i32.const 220)
+ )
+ (drop
+ (get_local $z)
+ )
+ (set_local $w
+ (i32.const 212)
+ )
+ (set_local $w
+ (i32.const 1000)
+ )
+ (drop
+ (i32.and
+ (get_local $w)
+ (i32.const 255)
+ )
+ )
+ )
+ (func $local-info-sign-ext-bitsize (type $4) (param $0 i32) (param $1 i32)
+ (local $x i32)
+ (local $y i32)
+ (local $z i32)
+ (local $w i32)
+ (set_local $x
+ (i32.const 127)
+ )
+ (drop
+ (get_local $x)
+ )
+ (set_local $y
+ (i32.const 128)
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $y)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $0
+ (i32.const 127)
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $z
+ (i32.const 127)
+ )
+ (set_local $z
+ (i32.const 100)
+ )
+ (drop
+ (get_local $z)
+ )
+ (set_local $w
+ (i32.const 127)
+ )
+ (set_local $w
+ (i32.const 150)
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $w)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ )
+ (func $local-info-sign-ext-already-exted (type $4) (param $0 i32) (param $1 i32)
+ (local $x i32)
+ (local $y i32)
+ (local $z i32)
+ (local $w i32)
+ (set_local $x
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (get_local $x)
+ )
+ (set_local $y
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 16)
+ )
+ (i32.const 16)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $y)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $0
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $z
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $z
+ (i32.shr_s
+ (i32.shl
+ (get_local $1)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (get_local $z)
+ )
+ (set_local $w
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $w
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 23)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $w)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 23)
+ )
+ )
+ )
+ (func $signed-loads-fill-the-bits (type $3) (param $$e i32) (result i32)
+ (local $$0 i32)
+ (local $$conv i32)
+ (set_local $$0
+ (i32.load8_s
+ (i32.const 1024)
+ )
+ )
+ (set_local $$conv
+ (i32.and
+ (get_local $$0)
+ (i32.const 255)
+ )
+ )
+ (return
+ (i32.eq
+ (get_local $$conv)
+ (get_local $$e)
+ )
+ )
+ )
)
diff --git a/test/passes/optimize-instructions.wast b/test/passes/optimize-instructions.wast
index c305ccbbb..0169bb054 100644
--- a/test/passes/optimize-instructions.wast
+++ b/test/passes/optimize-instructions.wast
@@ -1556,7 +1556,21 @@
(i32.shr_s
(i32.shl
(i32.shr_u
- (i32.load8_s ;; one byte, but reduced to 7
+ (i32.load8_s ;; one byte, but sexted to 32
+ (i32.const 256)
+ )
+ (i32.const 1)
+ )
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (i32.shr_u
+ (i32.load8_u ;; one byte, but reduced to 7
(i32.const 256)
)
(i32.const 1)
@@ -1691,4 +1705,277 @@
)
)
)
+ (func $local-info-zero-ext (param $0 i32) (param $1 i32)
+ (local $x i32)
+ (local $y i32)
+ (local $z i32)
+ (local $w i32)
+ (set_local $x
+ (i32.const 212) ;; mask is unneeded, we are small
+ )
+ (drop
+ (i32.and
+ (get_local $x)
+ (i32.const 255)
+ )
+ )
+ (set_local $y
+ (i32.const 500) ;; mask is needed, we are too big
+ )
+ (drop
+ (i32.and
+ (get_local $y)
+ (i32.const 255)
+ )
+ )
+ (set_local $0
+ (i32.const 212) ;; mask is unneeded, but we are a param, not a var, so no
+ )
+ (drop
+ (i32.and
+ (get_local $0)
+ (i32.const 255)
+ )
+ )
+ (set_local $z
+ (i32.const 212) ;; mask is unneeded, we are small
+ )
+ (set_local $z
+ (i32.const 220) ;; mask is still unneeded even with 2 uses
+ )
+ (drop
+ (i32.and
+ (get_local $z)
+ (i32.const 255)
+ )
+ )
+ (set_local $w
+ (i32.const 212) ;; mask is unneeded, we are small
+ )
+ (set_local $w
+ (i32.const 1000) ;; mask is needed, one use is too big
+ )
+ (drop
+ (i32.and
+ (get_local $w)
+ (i32.const 255)
+ )
+ )
+ )
+ (func $local-info-sign-ext-bitsize (param $0 i32) (param $1 i32)
+ (local $x i32)
+ (local $y i32)
+ (local $z i32)
+ (local $w i32)
+ (set_local $x
+ (i32.const 127) ;; mask is unneeded, we are small
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $x)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $y
+ (i32.const 128) ;; mask is needed, we are too big
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $y)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $0
+ (i32.const 127) ;; mask is unneeded, but we are a param, not a var, so no
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $z
+ (i32.const 127) ;; mask is unneeded, we are small
+ )
+ (set_local $z
+ (i32.const 100) ;; mask is still unneeded even with 2 uses
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $z)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $w
+ (i32.const 127) ;; mask is unneeded, we are small
+ )
+ (set_local $w
+ (i32.const 150) ;; mask is needed, one use is too big
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $w)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ )
+ (func $local-info-sign-ext-already-exted (param $0 i32) (param $1 i32)
+ (local $x i32)
+ (local $y i32)
+ (local $z i32)
+ (local $w i32)
+ (set_local $x
+ (i32.shr_s
+ (i32.shl
+ (get_local $0) ;; already sign-exted here, so no need later
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $x)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $y
+ (i32.shr_s
+ (i32.shl
+ (get_local $0) ;; already sign-exted here, but wrong bit size
+ (i32.const 16)
+ )
+ (i32.const 16)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $y)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $0
+ (i32.shr_s
+ (i32.shl
+ (get_local $0) ;; already sign-exted here, so no need later, but we are a param
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $0)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $z
+ (i32.shr_s
+ (i32.shl
+ (get_local $0) ;; already sign-exted here, so no need later
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $z
+ (i32.shr_s
+ (i32.shl
+ (get_local $1) ;; already sign-exted here, so no need later
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $z)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $w
+ (i32.shr_s
+ (i32.shl
+ (get_local $0) ;; already sign-exted here, so no need later
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (set_local $w
+ (i32.shr_s
+ (i32.shl
+ (get_local $0) ;; not quite a sign-ext
+ (i32.const 23)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop
+ (i32.shr_s
+ (i32.shl
+ (get_local $w)
+ (i32.const 24)
+ )
+ (i32.const 24)
+ )
+ )
+ (drop ;; odd corner case
+ (i32.shr_s
+ (i32.shl
+ (get_local $0) ;; param, so we should know nothing
+ (i32.const 24)
+ )
+ (i32.const 23) ;; different shift, smaller
+ )
+ )
+ )
+ (func $signed-loads-fill-the-bits (param $$e i32) (result i32)
+ (local $$0 i32)
+ (local $$conv i32)
+ (set_local $$0
+ (i32.load8_s ;; one byte, but 32 bits due to sign-extend
+ (i32.const 1024)
+ )
+ )
+ (set_local $$conv
+ (i32.and
+ (get_local $$0)
+ (i32.const 255) ;; so we need this zexting!
+ )
+ )
+ (return
+ (i32.eq
+ (get_local $$conv)
+ (get_local $$e)
+ )
+ )
+ )
)