diff options
author | Alon Zakai <alonzakai@gmail.com> | 2016-07-17 17:17:15 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-07-17 17:17:15 -0700 |
commit | 9f0c103be14144488c9bc64823d63e2c398eaa6e (patch) | |
tree | d1554fa16e241dcf171989c31da65a827a2d43b4 /src | |
parent | cd61f6a1ae959cc3ee22f4e72f0f4ba73c2abbd1 (diff) | |
parent | 4a6e828989ab6f627e6b35c66bd8c6130c9d65e2 (diff) | |
download | binaryen-9f0c103be14144488c9bc64823d63e2c398eaa6e.tar.gz binaryen-9f0c103be14144488c9bc64823d63e2c398eaa6e.tar.bz2 binaryen-9f0c103be14144488c9bc64823d63e2c398eaa6e.zip |
Merge pull request #645 from WebAssembly/coalesce-copies
Optimize to remove as many copies as possible in coalesce-locals, and other opts
Diffstat (limited to 'src')
-rw-r--r-- | src/passes/CoalesceLocals.cpp | 90 |
1 files changed, 69 insertions, 21 deletions
diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp index f5d51a4c0..2c707b614 100644 --- a/src/passes/CoalesceLocals.cpp +++ b/src/passes/CoalesceLocals.cpp @@ -79,12 +79,12 @@ struct LocalSet : std::vector<Index> { // TODO: binary search in all the following void insert(Index x) { - for (size_t i = 0; i < size(); i++) { + for (Index i = 0; i < size(); i++) { auto seen = (*this)[i]; if (seen == x) return; if (seen > x) { resize(size() + 1); - for (size_t j = size() - 1; j > i; j--) { + for (Index j = size() - 1; j > i; j--) { (*this)[j] = (*this)[j - 1]; } (*this)[i] = x; @@ -96,9 +96,9 @@ struct LocalSet : std::vector<Index> { } bool erase(Index x) { - for (size_t i = 0; i < size(); i++) { + for (Index i = 0; i < size(); i++) { if ((*this)[i] == x) { - for (size_t j = i + 1; j < size(); j++) { + for (Index j = i + 1; j < size(); j++) { (*this)[j - 1] = (*this)[j]; } resize(size() - 1); @@ -109,14 +109,14 @@ struct LocalSet : std::vector<Index> { } bool has(Index x) { - for (size_t i = 0; i < size(); i++) { + for (Index i = 0; i < size(); i++) { if ((*this)[i] == x) return true; } return false; } void verify() const { - for (size_t i = 1; i < size(); i++) { + for (Index i = 1; i < size(); i++) { assert((*this)[i - 1] < (*this)[i]); } } @@ -175,6 +175,10 @@ struct CoalesceLocals : public WalkerPass<CFGWalker<CoalesceLocals, Visitor<Coal static void doVisitSetLocal(CoalesceLocals* self, Expression** currp) { auto* curr = (*currp)->cast<SetLocal>(); self->currBasicBlock->contents.actions.emplace_back(Action::Set, curr->index, currp); + + // if this is a copy, note it + auto* get = curr->value->dynCast<GetLocal>(); + if (get) self->addCopy(curr->index, get->index); } // main entry point @@ -217,10 +221,25 @@ struct CoalesceLocals : public WalkerPass<CFGWalker<CoalesceLocals, Visitor<Coal bool interferes(Index i, Index j) { return interferences[std::min(i, j) * numLocals + std::max(i, j)]; } + + // copying state + + std::vector<uint8_t> copies; // canonicalized - accesses should check (low, high) + + void addCopy(Index i, Index j) { + auto k = std::min(i, j) * numLocals + std::max(i, j); + copies[k] = std::min(copies[k], uint8_t(254)) + 1; + } + + bool getCopies(Index i, Index j) { + return copies[std::min(i, j) * numLocals + std::max(i, j)]; + } }; void CoalesceLocals::doWalkFunction(Function* func) { numLocals = func->getNumLocals(); + copies.resize(numLocals * numLocals); + std::fill(copies.begin(), copies.end(), 0); // collect initial liveness info WalkerPass<CFGWalker<CoalesceLocals, Visitor<CoalesceLocals>, Liveness>>::doWalkFunction(func); // ignore links to dead blocks, so they don't confuse us and we can see their stores are all ineffective @@ -259,7 +278,7 @@ void CoalesceLocals::flowLiveness() { // do the first scan through the block, starting with nothing live at the end, and updating the liveness at the start scanLivenessThroughActions(curr->contents.actions, curr->contents.start); } - // at every point in time, we assume we already noted interferences between things already known alive at the end, and scanned back throught the block using that + // at every point in time, we assume we already noted interferences between things already known alive at the end, and scanned back through the block using that while (queue.size() > 0) { auto iter = queue.begin(); auto* curr = *iter; @@ -287,9 +306,9 @@ void CoalesceLocals::flowLiveness() { #ifdef CFG_DEBUG std::hash<std::vector<bool>> hasher; std::cout << getFunction()->name << ": interference hash: " << hasher(*(std::vector<bool>*)&interferences) << "\n"; - for (size_t i = 0; i < numLocals; i++) { + for (Index i = 0; i < numLocals; i++) { std::cout << "int for " << getFunction()->getLocalName(i) << " [" << i << "]: "; - for (size_t j = 0; j < numLocals; j++) { + for (Index j = 0; j < numLocals; j++) { if (interferes(i, j)) std::cout << getFunction()->getLocalName(j) << " "; } std::cout << "\n"; @@ -304,7 +323,7 @@ bool CoalesceLocals::mergeStartsAndCheckChange(std::vector<BasicBlock*>& blocks, ret = blocks[0]->contents.start; if (blocks.size() > 1) { // more than one, so we must merge - for (size_t i = 1; i < blocks.size(); i++) { + for (Index i = 1; i < blocks.size(); i++) { ret = ret.merge(blocks[i]->contents.start); } } @@ -358,9 +377,9 @@ void CoalesceLocals::calculateInterferences() { } void CoalesceLocals::calculateInterferences(const LocalSet& locals) { - size_t size = locals.size(); - for (size_t i = 0; i < size; i++) { - for (size_t j = i + 1; j < size; j++) { + Index size = locals.size(); + for (Index i = 0; i < size; i++) { + for (Index j = i + 1; j < size; j++) { interfereLowHigh(locals[i], locals[j]); } } @@ -374,10 +393,13 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector // TODO: take into account distribution (99-1 is better than 50-50 with two registers, for gzip) std::vector<WasmType> types; std::vector<bool> newInterferences; // new index * numLocals => list of all interferences of locals merged to it + std::vector<uint8_t> newCopies; // new index * numLocals => list of all copies of locals merged to it indices.resize(numLocals); types.resize(numLocals); newInterferences.resize(numLocals * numLocals); + newCopies.resize(numLocals * numLocals); std::fill(newInterferences.begin(), newInterferences.end(), 0); + std::fill(newCopies.begin(), newCopies.end(), 0); Index nextFree = 0; // we can't reorder parameters, they are fixed in order, and cannot coalesce auto numParams = getFunction()->getNumParams(); @@ -386,18 +408,24 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector assert(order[i] == i); // order must leave the params in place indices[i] = i; types[i] = getFunction()->getLocalType(i); - for (size_t j = 0; j < numLocals; j++) { + for (Index j = numParams; j < numLocals; j++) { newInterferences[numLocals * i + j] = interferes(i, j); + newCopies[numLocals * i + j] = getCopies(i, j); } nextFree++; } for (; i < numLocals; i++) { Index actual = order[i]; Index found = -1; + uint8_t foundCopies = -1; for (Index j = 0; j < nextFree; j++) { if (!newInterferences[j * numLocals + actual] && getFunction()->getLocalType(actual) == types[j]) { - indices[actual] = found = j; - break; + // this does not interfere, so it might be what we want. but pick the one eliminating the most copies + auto currCopies = newCopies[j * numLocals + actual]; + if (found == Index(-1) || currCopies > foundCopies) { + indices[actual] = found = j; + foundCopies = currCopies; + } } } if (found == Index(-1)) { @@ -405,22 +433,42 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector types[found] = getFunction()->getLocalType(actual); nextFree++; } - // merge new interferences for the new index - for (size_t j = 0; j < numLocals; j++) { + // merge new interferences and copies for the new index + for (Index k = i + 1; k < numLocals; k++) { + auto j = order[k]; // go in the order, we only need to update for those we will see later newInterferences[found * numLocals + j] = newInterferences[found * numLocals + j] | interferes(actual, j); + newCopies[found * numLocals + j] += getCopies(actual, j); } } } void CoalesceLocals::pickIndices(std::vector<Index>& indices) { - // use the natural order. this is less arbitrary than it seems, as the program + if (numLocals == 0) return; + if (numLocals == 1) { + indices.push_back(0); + return; + } + // first try the natural order. this is less arbitrary than it seems, as the program // may have a natural order of locals inherent in it. std::vector<Index> order; order.resize(numLocals); - for (size_t i = 0; i < numLocals; i++) { + for (Index i = 0; i < numLocals; i++) { order[i] = i; } pickIndicesFromOrder(order, indices); + auto maxIndex = *std::max_element(indices.begin(), indices.end()); + // next try the reverse order. this both gives us anothe chance at something good, + // and also the very naturalness of the simple order may be quite suboptimal + auto numParams = getFunction()->getNumParams(); + for (Index i = numParams; i < numLocals; i++) { + order[i] = numParams + numLocals - 1 - i; + } + std::vector<Index> reverseIndices; + pickIndicesFromOrder(order, reverseIndices); + auto reverseMaxIndex = *std::max_element(reverseIndices.begin(), reverseIndices.end()); + if (reverseMaxIndex < maxIndex) { + indices.swap(reverseIndices); + } } void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root) { @@ -452,7 +500,7 @@ void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root) } auto oldVars = getFunction()->vars; getFunction()->vars.resize(newNumLocals - numParams); - for (size_t index = numParams; index < numLocals; index++) { + for (Index index = numParams; index < numLocals; index++) { Index newIndex = indices[index]; if (newIndex >= numParams) { getFunction()->vars[newIndex - numParams] = oldVars[index - numParams]; |