summaryrefslogtreecommitdiff
path: root/src/passes/CoalesceLocals.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/passes/CoalesceLocals.cpp')
-rw-r--r--src/passes/CoalesceLocals.cpp186
1 files changed, 117 insertions, 69 deletions
diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp
index 621383ca4..a085b61fb 100644
--- a/src/passes/CoalesceLocals.cpp
+++ b/src/passes/CoalesceLocals.cpp
@@ -14,32 +14,31 @@
* limitations under the License.
*/
-
//
// Coalesce locals, in order to reduce the total number of locals. This
// is similar to register allocation, however, there is never any
// spilling, and there isn't a fixed number of locals.
//
-
#include <algorithm>
#include <memory>
#include <unordered_set>
-#include "wasm.h"
-#include "pass.h"
-#include "ir/utils.h"
#include "cfg/liveness-traversal.h"
-#include "wasm-builder.h"
+#include "ir/utils.h"
+#include "pass.h"
#include "support/learning.h"
#include "support/permutations.h"
+#include "wasm-builder.h"
+#include "wasm.h"
#ifdef CFG_PROFILE
#include "support/timing.h"
#endif
namespace wasm {
-struct CoalesceLocals : public WalkerPass<LivenessWalker<CoalesceLocals, Visitor<CoalesceLocals>>> {
+struct CoalesceLocals
+ : public WalkerPass<LivenessWalker<CoalesceLocals, Visitor<CoalesceLocals>>> {
bool isFunctionParallel() override { return true; }
Pass* create() override { return new CoalesceLocals; }
@@ -54,23 +53,30 @@ struct CoalesceLocals : public WalkerPass<LivenessWalker<CoalesceLocals, Visitor
void calculateInterferences(const LocalSet& locals);
- void pickIndicesFromOrder(std::vector<Index>& order, std::vector<Index>& indices);
- void pickIndicesFromOrder(std::vector<Index>& order, std::vector<Index>& indices, Index& removedCopies);
+ void pickIndicesFromOrder(std::vector<Index>& order,
+ std::vector<Index>& indices);
+ void pickIndicesFromOrder(std::vector<Index>& order,
+ std::vector<Index>& indices,
+ Index& removedCopies);
- virtual void pickIndices(std::vector<Index>& indices); // returns a vector of oldIndex => newIndex
+ // returns a vector of oldIndex => newIndex
+ virtual void pickIndices(std::vector<Index>& indices);
void applyIndices(std::vector<Index>& indices, Expression* root);
// interference state
- std::vector<bool> interferences; // canonicalized - accesses should check (low, high)
+ // canonicalized - accesses should check (low, high)
+ std::vector<bool> interferences;
void interfere(Index i, Index j) {
- if (i == j) return;
+ if (i == j)
+ return;
interferences[std::min(i, j) * numLocals + std::max(i, j)] = 1;
}
- void interfereLowHigh(Index low, Index high) { // optimized version where you know that low < high
+ // optimized version where you know that low < high
+ void interfereLowHigh(Index low, Index high) {
assert(low < high);
interferences[low * numLocals + high] = 1;
}
@@ -97,20 +103,25 @@ void CoalesceLocals::doWalkFunction(Function* func) {
applyIndices(indices, func->body);
}
-// A copy on a backedge can be especially costly, forcing us to branch just to do that copy.
-// Add weight to such copies, so we prioritize getting rid of them.
+// A copy on a backedge can be especially costly, forcing us to branch just to
+// do that copy. Add weight to such copies, so we prioritize getting rid of
+// them.
void CoalesceLocals::increaseBackEdgePriorities() {
for (auto* loopTop : loopTops) {
// ignore the first edge, it is the initial entry, we just want backedges
auto& in = loopTop->in;
for (Index i = 1; i < in.size(); i++) {
auto* arrivingBlock = in[i];
- if (arrivingBlock->out.size() > 1) continue; // we just want unconditional branches to the loop top, true phi fragments
+ if (arrivingBlock->out.size() > 1)
+ // we just want unconditional branches to the loop top, true phi
+ // fragments
+ continue;
for (auto& action : arrivingBlock->contents.actions) {
if (action.isSet()) {
auto* set = (*action.origin)->cast<SetLocal>();
if (auto* get = getCopy(set)) {
- // this is indeed a copy, add to the cost (default cost is 2, so this adds 50%, and can mostly break ties)
+ // this is indeed a copy, add to the cost (default cost is 2, so
+ // this adds 50%, and can mostly break ties)
addCopy(set->index, get->index);
}
}
@@ -123,8 +134,10 @@ void CoalesceLocals::calculateInterferences() {
interferences.resize(numLocals * numLocals);
std::fill(interferences.begin(), interferences.end(), false);
for (auto& curr : basicBlocks) {
- if (liveBlocks.count(curr.get()) == 0) continue; // ignore dead blocks
- // everything coming in might interfere, as it might come from a different block
+ if (liveBlocks.count(curr.get()) == 0)
+ continue; // ignore dead blocks
+ // everything coming in might interfere, as it might come from a different
+ // block
auto live = curr->contents.end;
calculateInterferences(live);
// scan through the block itself
@@ -166,18 +179,22 @@ void CoalesceLocals::calculateInterferences(const LocalSet& locals) {
// Indices decision making
-void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector<Index>& indices) {
+void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order,
+ std::vector<Index>& indices) {
Index removedCopies;
pickIndicesFromOrder(order, indices, removedCopies);
}
-void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector<Index>& indices, Index& removedCopies) {
- // mostly-simple greedy coloring
+void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order,
+ std::vector<Index>& indices,
+ Index& removedCopies) {
+// mostly-simple greedy coloring
#if CFG_DEBUG
std::cerr << "\npickIndicesFromOrder on " << getFunction()->name << '\n';
std::cerr << getFunction()->body << '\n';
std::cerr << "order:\n";
- for (auto i : order) std::cerr << i << ' ';
+ for (auto i : order)
+ std::cerr << i << ' ';
std::cerr << '\n';
std::cerr << "interferences:\n";
for (Index i = 0; i < numLocals; i++) {
@@ -204,16 +221,20 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector
std::cerr << " $" << i << ": " << totalCopies[i] << '\n';
}
#endif
- // TODO: take into account distribution (99-1 is better than 50-50 with two registers, for gzip)
+ // TODO: take into account distribution (99-1 is better than 50-50 with two
+ // registers, for gzip)
std::vector<Type> types;
- std::vector<bool> newInterferences; // new index * numLocals => list of all interferences of locals merged to it
- std::vector<uint8_t> newCopies; // new index * numLocals => list of all copies of locals merged to it
+ // new index * numLocals => list of all interferences of locals merged to it
+ std::vector<bool> newInterferences;
+ // new index * numLocals => list of all copies of locals merged to it
+ std::vector<uint8_t> newCopies;
indices.resize(numLocals);
types.resize(numLocals);
newInterferences.resize(numLocals * numLocals);
std::fill(newInterferences.begin(), newInterferences.end(), false);
auto numParams = getFunction()->getNumParams();
- newCopies.resize(numParams * numLocals); // start with enough room for the params
+ // start with enough room for the params
+ newCopies.resize(numParams * numLocals);
std::fill(newCopies.begin(), newCopies.end(), 0);
Index nextFree = 0;
removedCopies = 0;
@@ -234,9 +255,12 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector
Index found = -1;
uint8_t foundCopies = -1;
for (Index j = 0; j < nextFree; j++) {
- if (!newInterferences[j * numLocals + actual] && getFunction()->getLocalType(actual) == types[j]) {
- // this does not interfere, so it might be what we want. but pick the one eliminating the most copies
- // (we could stop looking forward when there are no more items that have copies anyhow, but it doesn't seem to help)
+ if (!newInterferences[j * numLocals + actual] &&
+ getFunction()->getLocalType(actual) == types[j]) {
+ // this does not interfere, so it might be what we want. but pick the
+ // one eliminating the most copies (we could stop looking forward when
+ // there are no more items that have copies anyhow, but it doesn't seem
+ // to help)
auto currCopies = newCopies[j * numLocals + actual];
if (found == Index(-1) || currCopies > foundCopies) {
indices[actual] = found = j;
@@ -258,46 +282,53 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector
#endif
// merge new interferences and copies for the new index
for (Index k = i + 1; k < numLocals; k++) {
- auto j = order[k]; // go in the order, we only need to update for those we will see later
- newInterferences[found * numLocals + j] = newInterferences[found * numLocals + j] | interferes(actual, j);
+ // go in the order, we only need to update for those we will see later
+ auto j = order[k];
+ newInterferences[found * numLocals + j] =
+ newInterferences[found * numLocals + j] | interferes(actual, j);
newCopies[found * numLocals + j] += getCopies(actual, j);
}
}
}
-// given a baseline order, adjust it based on an important order of priorities (higher values
-// are higher priority). The priorities take precedence, unless they are equal and then
-// the original order should be kept.
-std::vector<Index> adjustOrderByPriorities(std::vector<Index>& baseline, std::vector<Index>& priorities) {
+// given a baseline order, adjust it based on an important order of priorities
+// (higher values are higher priority). The priorities take precedence, unless
+// they are equal and then the original order should be kept.
+std::vector<Index> adjustOrderByPriorities(std::vector<Index>& baseline,
+ std::vector<Index>& priorities) {
std::vector<Index> ret = baseline;
std::vector<Index> reversed = makeReversed(baseline);
std::sort(ret.begin(), ret.end(), [&priorities, &reversed](Index x, Index y) {
- return priorities[x] > priorities[y] || (priorities[x] == priorities[y] && reversed[x] < reversed[y]);
+ return priorities[x] > priorities[y] ||
+ (priorities[x] == priorities[y] && reversed[x] < reversed[y]);
});
return ret;
}
void CoalesceLocals::pickIndices(std::vector<Index>& indices) {
- if (numLocals == 0) return;
+ if (numLocals == 0)
+ return;
if (numLocals == 1) {
indices.push_back(0);
return;
}
- // take into account total copies. but we must keep params in place, so give them max priority
+ // take into account total copies. but we must keep params in place, so give
+ // them max priority
auto adjustedTotalCopies = totalCopies;
auto numParams = getFunction()->getNumParams();
for (Index i = 0; i < numParams; i++) {
adjustedTotalCopies[i] = std::numeric_limits<Index>::max();
}
- // first try the natural order. this is less arbitrary than it seems, as the program
- // may have a natural order of locals inherent in it.
+ // first try the natural order. this is less arbitrary than it seems, as the
+ // program may have a natural order of locals inherent in it.
auto order = makeIdentity(numLocals);
order = adjustOrderByPriorities(order, adjustedTotalCopies);
Index removedCopies;
pickIndicesFromOrder(order, indices, removedCopies);
auto maxIndex = *std::max_element(indices.begin(), indices.end());
- // next try the reverse order. this both gives us another chance at something good,
- // and also the very naturalness of the simple order may be quite suboptimal
+ // next try the reverse order. this both gives us another chance at something
+ // good, and also the very naturalness of the simple order may be quite
+ // suboptimal
setIdentity(order);
for (Index i = numParams; i < numLocals; i++) {
order[i] = numParams + numLocals - 1 - i;
@@ -306,15 +337,18 @@ void CoalesceLocals::pickIndices(std::vector<Index>& indices) {
std::vector<Index> reverseIndices;
Index reverseRemovedCopies;
pickIndicesFromOrder(order, reverseIndices, reverseRemovedCopies);
- auto reverseMaxIndex = *std::max_element(reverseIndices.begin(), reverseIndices.end());
- // prefer to remove copies foremost, as it matters more for code size (minus gzip), and
- // improves throughput.
- if (reverseRemovedCopies > removedCopies || (reverseRemovedCopies == removedCopies && reverseMaxIndex < maxIndex)) {
+ auto reverseMaxIndex =
+ *std::max_element(reverseIndices.begin(), reverseIndices.end());
+ // prefer to remove copies foremost, as it matters more for code size (minus
+ // gzip), and improves throughput.
+ if (reverseRemovedCopies > removedCopies ||
+ (reverseRemovedCopies == removedCopies && reverseMaxIndex < maxIndex)) {
indices.swap(reverseIndices);
}
}
-void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root) {
+void CoalesceLocals::applyIndices(std::vector<Index>& indices,
+ Expression* root) {
assert(indices.size() == numLocals);
for (auto& curr : basicBlocks) {
auto& actions = curr->contents.actions;
@@ -325,15 +359,19 @@ void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root)
} else if (action.isSet()) {
auto* set = (*action.origin)->cast<SetLocal>();
set->index = indices[set->index];
- // in addition, we can optimize out redundant copies and ineffective sets
+ // in addition, we can optimize out redundant copies and ineffective
+ // sets
GetLocal* get;
- if ((get = set->value->dynCast<GetLocal>()) && get->index == set->index) {
+ if ((get = set->value->dynCast<GetLocal>()) &&
+ get->index == set->index) {
action.removeCopy();
continue;
}
// remove ineffective actions
if (!action.effective) {
- *action.origin = set->value; // value may have no side effects, further optimizations can eliminate it
+ // value may have no side effects, further optimizations can eliminate
+ // it
+ *action.origin = set->value;
if (!set->isTee()) {
// we need to drop it
Drop* drop = ExpressionManipulator::convert<SetLocal, Drop>(set);
@@ -382,10 +420,12 @@ void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) {
double getFitness() { return fitness; }
void dump(std::string text) {
std::cout << text + ": ( ";
- for (Index i = 0; i < size(); i++) std::cout << (*this)[i] << " ";
+ for (Index i = 0; i < size(); i++)
+ std::cout << (*this)[i] << " ";
std::cout << ")\n";
std::cout << "of quality: " << getFitness() << "\n";
}
+
private:
double fitness;
};
@@ -405,9 +445,11 @@ void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) {
// secondarily, it is nice to not reorder locals unnecessarily
double fragment = 1.0 / (2.0 * parent->numLocals);
for (Index i = 0; i < parent->numLocals; i++) {
- if ((*order)[i] == i) fitness += fragment; // boost for each that wasn't moved
+ if ((*order)[i] == i)
+ fitness += fragment; // boost for each that wasn't moved
}
- fitness = (100 * fitness) + removedCopies; // removing copies is a secondary concern
+ // removing copies is a secondary concern
+ fitness = (100 * fitness) + removedCopies;
order->setFitness(fitness);
}
@@ -418,16 +460,19 @@ void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) {
(*ret)[i] = i;
}
if (first) {
- // as the first guess, use the natural order. this is not arbitrary for two reasons.
- // first, there may be an inherent order in the input (frequent indices are lower,
- // etc.). second, by ensuring we start with the natural order, we ensure we are at
- // least as good as the non-learning variant.
- // TODO: use ::pickIndices from the parent, so we literally get the simpler approach
- // as our first option
+ // as the first guess, use the natural order. this is not arbitrary for
+ // two reasons. first, there may be an inherent order in the input
+ // (frequent indices are lower, etc.). second, by ensuring we start with
+ // the natural order, we ensure we are at least as good as the
+ // non-learning variant.
+ // TODO: use ::pickIndices from the parent, so we literally get the
+ // simpler approach as our first option
first = false;
} else {
// leave params alone, shuffle the rest
- std::shuffle(ret->begin() + parent->getFunction()->getNumParams(), ret->end(), noise);
+ std::shuffle(ret->begin() + parent->getFunction()->getNumParams(),
+ ret->end(),
+ noise);
}
calculateFitness(ret);
#ifdef CFG_LEARN_DEBUG
@@ -455,7 +500,9 @@ void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) {
// if (i, i + 1) is in reverse order in right, flip them
if (reverseRight[(*ret)[i]] > reverseRight[(*ret)[i + 1]]) {
std::swap((*ret)[i], (*ret)[i + 1]);
- i++; // if we don't skip, we might end up pushing an element all the way to the end, which is not very perturbation-y
+ // if we don't skip, we might end up pushing an element all the way to
+ // the end, which is not very perturbation-y
+ i++;
}
}
calculateFitness(ret);
@@ -475,7 +522,8 @@ void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) {
std::cout << "[learning for " << getFunction()->name << "]\n";
#endif
auto numVars = this->getFunction()->getNumVars();
- const int GENERATION_SIZE = std::min(Index(numVars * (numVars - 1)), Index(20));
+ const int GENERATION_SIZE =
+ std::min(Index(numVars * (numVars - 1)), Index(20));
Generator generator(this);
GeneticLearner<Order, double, Generator> learner(generator, GENERATION_SIZE);
#ifdef CFG_LEARN_DEBUG
@@ -486,7 +534,8 @@ void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) {
while (1) {
learner.runGeneration();
auto newBest = learner.getBest()->getFitness();
- if (newBest == oldBest) break; // unlikely we can improve
+ if (newBest == oldBest)
+ break; // unlikely we can improve
oldBest = newBest;
#ifdef CFG_LEARN_DEBUG
learner.getBest()->dump("current best");
@@ -495,16 +544,15 @@ void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) {
#ifdef CFG_LEARN_DEBUG
learner.getBest()->dump("the best");
#endif
- this->pickIndicesFromOrder(*learner.getBest(), indices); // TODO: cache indices in Orders, at the cost of more memory?
+ // TODO: cache indices in Orders, at the cost of more memory?
+ this->pickIndicesFromOrder(*learner.getBest(), indices);
}
// declare passes
-Pass *createCoalesceLocalsPass() {
- return new CoalesceLocals();
-}
+Pass* createCoalesceLocalsPass() { return new CoalesceLocals(); }
-Pass *createCoalesceLocalsWithLearningPass() {
+Pass* createCoalesceLocalsWithLearningPass() {
return new CoalesceLocalsWithLearning();
}