diff options
Diffstat (limited to 'src/passes/CoalesceLocals.cpp')
-rw-r--r-- | src/passes/CoalesceLocals.cpp | 88 |
1 files changed, 77 insertions, 11 deletions
diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp index 693a13131..2b6c63f2a 100644 --- a/src/passes/CoalesceLocals.cpp +++ b/src/passes/CoalesceLocals.cpp @@ -226,10 +226,13 @@ struct CoalesceLocals : public WalkerPass<CFGWalker<CoalesceLocals, Visitor<Coal // copying state std::vector<uint8_t> copies; // canonicalized - accesses should check (low, high) + std::vector<Index> totalCopies; // total # of copies for each local, with all others void addCopy(Index i, Index j) { auto k = std::min(i, j) * numLocals + std::max(i, j); copies[k] = std::min(copies[k], uint8_t(254)) + 1; + totalCopies[i]++; + totalCopies[j]++; } bool getCopies(Index i, Index j) { @@ -241,6 +244,8 @@ void CoalesceLocals::doWalkFunction(Function* func) { numLocals = func->getNumLocals(); copies.resize(numLocals * numLocals); std::fill(copies.begin(), copies.end(), 0); + totalCopies.resize(numLocals); + std::fill(totalCopies.begin(), totalCopies.end(), 0); // collect initial liveness info WalkerPass<CFGWalker<CoalesceLocals, Visitor<CoalesceLocals>, Liveness>>::doWalkFunction(func); // ignore links to dead blocks, so they don't confuse us and we can see their stores are all ineffective @@ -394,9 +399,10 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector } void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector<Index>& indices, Index& removedCopies) { - // simple greedy coloring + // mostly-simple greedy coloring #if CFG_DEBUG - std::cerr << "pickIndicesFromOrder on " << getFunction()->name << '\n'; + std::cerr << "\npickIndicesFromOrder on " << getFunction()->name << '\n'; + std::cerr << getFunction()->body << '\n'; std::cerr << "order:\n"; for (auto i : order) std::cerr << i << ' '; std::cerr << '\n'; @@ -408,7 +414,7 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector for (Index j = i + 1; j < numLocals; j++) { std::cerr << int(interferes(i, j)) << ' '; } - std::cerr << '\n'; + std::cerr << " : $" << i << '\n'; } std::cerr << "copies:\n"; for (Index i = 0; i < numLocals; i++) { @@ -418,7 +424,11 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector for (Index j = i + 1; j < numLocals; j++) { std::cerr << int(getCopies(i, j)) << ' '; } - std::cerr << '\n'; + std::cerr << " : $" << i << '\n'; + } + std::cerr << "total copies:\n"; + for (Index i = 0; i < numLocals; i++) { + std::cerr << " $" << i << ": " << totalCopies[i] << '\n'; } #endif // TODO: take into account distribution (99-1 is better than 50-50 with two registers, for gzip) @@ -453,6 +463,7 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector for (Index j = 0; j < nextFree; j++) { if (!newInterferences[j * numLocals + actual] && getFunction()->getLocalType(actual) == types[j]) { // this does not interfere, so it might be what we want. but pick the one eliminating the most copies + // TODO: stop looking forward when there are no more items that have copies anyhow auto currCopies = newCopies[j * numLocals + actual]; if (found == Index(-1) || currCopies > foundCopies) { indices[actual] = found = j; @@ -468,6 +479,9 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector } else { removedCopies += foundCopies; } +#if CFG_DEBUG + std::cerr << "set local $" << actual << " to $" << found << '\n'; +#endif // merge new interferences and copies for the new index for (Index k = i + 1; k < numLocals; k++) { auto j = order[k]; // go in the order, we only need to update for those we will see later @@ -477,28 +491,78 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector } } +// Utilities for operating on permutation vectors + +static std::vector<Index> makeIdentity(Index num) { + std::vector<Index> ret; + ret.resize(num); + for (Index i = 0; i < num; i++) { + ret[i] = i; + } + return ret; +} + +static void setIdentity(std::vector<Index>& ret) { + auto num = ret.size(); + assert(num > 0); // must already be of the right size + for (Index i = 0; i < num; i++) { + ret[i] = i; + } +} + +static std::vector<Index> makeReversed(std::vector<Index>& original) { + std::vector<Index> ret; + auto num = original.size(); + ret.resize(num); + for (Index i = 0; i < num; i++) { + ret[original[i]] = i; + } + return ret; +} + +// given a baseline order, adjust it based on an important order of priorities (higher values +// are higher priority). The priorities take precedence, unless they are equal and then +// the original order should be kept. +std::vector<Index> adjustOrderByPriorities(std::vector<Index>& baseline, std::vector<Index>& priorities) { + std::vector<Index> ret = baseline; + std::vector<Index> reversed = makeReversed(baseline); + std::sort(ret.begin(), ret.end(), [&priorities, &reversed](Index x, Index y) { + return priorities[x] > priorities[y] || (priorities[x] == priorities[y] && reversed[x] < reversed[y]); + }); + return ret; +}; + void CoalesceLocals::pickIndices(std::vector<Index>& indices) { if (numLocals == 0) return; if (numLocals == 1) { indices.push_back(0); return; } + if (getFunction()->getNumVars() <= 1) { + // nothing to think about here, since we can't reorder params + indices = makeIdentity(numLocals); + return; + } + // take into account total copies. but we must keep params in place, so give them max priority + auto adjustedTotalCopies = totalCopies; + auto numParams = getFunction()->getNumParams(); + for (Index i = 0; i < numParams; i++) { + adjustedTotalCopies[i] = std::numeric_limits<Index>::max(); + } // first try the natural order. this is less arbitrary than it seems, as the program // may have a natural order of locals inherent in it. - std::vector<Index> order; - order.resize(numLocals); - for (Index i = 0; i < numLocals; i++) { - order[i] = i; - } + auto order = makeIdentity(numLocals); + order = adjustOrderByPriorities(order, adjustedTotalCopies); Index removedCopies; pickIndicesFromOrder(order, indices, removedCopies); auto maxIndex = *std::max_element(indices.begin(), indices.end()); - // next try the reverse order. this both gives us anothe chance at something good, + // next try the reverse order. this both gives us another chance at something good, // and also the very naturalness of the simple order may be quite suboptimal - auto numParams = getFunction()->getNumParams(); + setIdentity(order); for (Index i = numParams; i < numLocals; i++) { order[i] = numParams + numLocals - 1 - i; } + order = adjustOrderByPriorities(order, adjustedTotalCopies); std::vector<Index> reverseIndices; Index reverseRemovedCopies; pickIndicesFromOrder(order, reverseIndices, reverseRemovedCopies); @@ -618,6 +682,8 @@ void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) { // first, there may be an inherent order in the input (frequent indices are lower, // etc.). second, by ensuring we start with the natural order, we ensure we are at // least as good as the non-learning variant. + // TODO: use ::pickIndices from the parent, so we literally get the simpler approach + // as our first option first = false; } else { // leave params alone, shuffle the rest |