summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/passes/CoalesceLocals.cpp120
-rw-r--r--src/support/learning.h113
-rw-r--r--src/wasm-traversal.h15
3 files changed, 233 insertions, 15 deletions
diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp
index 05756556b..bb9b97073 100644
--- a/src/passes/CoalesceLocals.cpp
+++ b/src/passes/CoalesceLocals.cpp
@@ -22,6 +22,7 @@
//
+#include <algorithm>
#include <memory>
#include <unordered_set>
@@ -29,6 +30,7 @@
#include "pass.h"
#include "ast_utils.h"
#include "cfg/cfg-traversal.h"
+#include "support/learning.h"
#ifdef CFG_PROFILE
#include "support/timing.h"
#endif
@@ -185,7 +187,9 @@ struct CoalesceLocals : public WalkerPass<CFGWalker<CoalesceLocals, Visitor<Coal
void scanLivenessThroughActions(std::vector<Action>& actions, LocalSet& live);
- std::vector<Index> pickIndices(); // returns a vector of oldIndex => newIndex
+ void pickIndicesFromOrder(std::vector<Index>& order, std::vector<Index>& indices);
+
+ virtual void pickIndices(std::vector<Index>& indices); // returns a vector of oldIndex => newIndex
void applyIndices(std::vector<Index>& indices, Expression* root);
@@ -230,7 +234,8 @@ void CoalesceLocals::walk(Expression*& root) {
timer.dump();
#endif
// pick new indices
- auto indices = pickIndices();
+ std::vector<Index> indices;
+ pickIndices(indices);
// apply indices
applyIndices(indices, root);
}
@@ -346,12 +351,10 @@ void CoalesceLocals::scanLivenessThroughActions(std::vector<Action>& actions, Lo
// Indices decision making
-std::vector<Index> CoalesceLocals::pickIndices() { // returns a vector of oldIndex => newIndex
+void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector<Index>& indices) {
// simple greedy coloring
- // TODO: multiple orderings
// TODO: take into account eliminated copies
// TODO: take into account distribution (99-1 is better than 50-50 with two registers, for gzip)
- std::vector<Index> indices; // old => new
std::vector<WasmType> types;
std::vector<bool> newInterferences; // new index * numLocals => list of all interferences of locals merged to it
indices.resize(numLocals);
@@ -363,6 +366,7 @@ std::vector<Index> CoalesceLocals::pickIndices() { // returns a vector of oldInd
auto numParams = getFunction()->getNumParams();
Index i = 0;
for (; i < numParams; i++) {
+ assert(order[i] == i); // order must leave the params in place
indices[i] = i;
types[i] = getFunction()->getLocalType(i);
for (size_t j = 0; j < numLocals; j++) {
@@ -371,24 +375,35 @@ std::vector<Index> CoalesceLocals::pickIndices() { // returns a vector of oldInd
nextFree++;
}
for (; i < numLocals; i++) {
+ Index actual = order[i];
Index found = -1;
for (Index j = 0; j < nextFree; j++) {
- if (!newInterferences[j * numLocals + i] && getFunction()->getLocalType(i) == types[j]) {
- indices[i] = found = j;
+ if (!newInterferences[j * numLocals + actual] && getFunction()->getLocalType(actual) == types[j]) {
+ indices[actual] = found = j;
break;
}
}
if (found == Index(-1)) {
- indices[i] = found = nextFree;
- types[found] = getFunction()->getLocalType(i);
+ indices[actual] = found = nextFree;
+ types[found] = getFunction()->getLocalType(actual);
nextFree++;
}
// merge new interferences for the new index
for (size_t j = 0; j < numLocals; j++) {
- newInterferences[found * numLocals + j] = newInterferences[found * numLocals + j] | interferes(i, j);
+ newInterferences[found * numLocals + j] = newInterferences[found * numLocals + j] | interferes(actual, j);
}
}
- return indices;
+}
+
+void CoalesceLocals::pickIndices(std::vector<Index>& indices) {
+ // use the natural order. this is less arbitrary than it seems, as the program
+ // may have a natural order of locals inherent in it.
+ std::vector<Index> order;
+ order.resize(numLocals);
+ for (size_t i = 0; i < numLocals; i++) {
+ order[i] = i;
+ }
+ pickIndicesFromOrder(order, indices);
}
void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root) {
@@ -431,6 +446,87 @@ void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root)
getFunction()->localIndices.clear();
}
-static RegisterPass<CoalesceLocals> registerPass("coalesce-locals", "reduce # of locals by coalescing");
+struct CoalesceLocalsWithLearning : public CoalesceLocals {
+ virtual CoalesceLocals* create() override { return new CoalesceLocalsWithLearning; }
+
+ virtual void pickIndices(std::vector<Index>& indices) override;
+};
+
+void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) {
+ if (getFunction()->getNumVars() <= 1) {
+ // nothing to think about here
+ CoalesceLocals::pickIndices(indices);
+ return;
+ }
+
+ struct Order : public std::vector<Index> {
+ void setFitness(Index f) { fitness = f; }
+ Index getFitness() { return fitness; }
+ void dump(std::string text) {
+ std::cout << text + ": ( ";
+ for (Index i = 0; i < size(); i++) std::cout << (*this)[i] << " ";
+ std::cout << ")\n";
+ std::cout << "of quality: " << getFitness() << "\n";
+ }
+ private:
+ Index fitness;
+ };
+
+ struct Generator {
+ Generator(CoalesceLocalsWithLearning* parent) : parent(parent), noise(42) {}
+
+ Order* makeRandom() {
+ auto* ret = new Order;
+ ret->resize(parent->numLocals);
+ for (Index i = 0; i < parent->numLocals; i++) {
+ (*ret)[i] = i;
+ }
+ if (first) {
+ // as the first guess, use the natural order. this is not arbitrary for two reasons.
+ // first, there may be an inherent order in the input (frequent indices are lower,
+ // etc.). second, by ensuring we start with the natural order, we ensure we are at
+ // least as good as the non-learning variant.
+ first = false;
+ } else {
+ // leave params alone, shuffle the rest
+ std::random_shuffle(ret->begin() + parent->getFunction()->getNumParams(), ret->end(), [this](int i) {
+ return noise() % i;
+ });
+ }
+ // apply the order
+ std::vector<Index> indices; // the phenotype
+ parent->pickIndicesFromOrder(*ret, indices);
+ auto maxIndex = *std::max_element(indices.begin(), indices.end());
+ assert(maxIndex <= parent->numLocals);
+ auto fitness = parent->numLocals - maxIndex; // higher fitness is better
+ ret->setFitness(fitness);
+#ifdef CFG_LEARN_DEBUG
+ ret->dump("new one");
+#endif
+ // all done
+ return ret;
+ }
+
+ private:
+ CoalesceLocalsWithLearning* parent;
+ std::mt19937 noise;
+ bool first = true;
+ };
+
+ Generator generator(this);
+ GeneticLearner<Order, Index, Generator> learner(generator, 10);
+ // TODO: run some generations
+ auto* best = learner.getBest();
+#ifdef CFG_LEARN_DEBUG
+ best->dump("new one");
+#endif
+ pickIndicesFromOrder(*best, indices); // TODO: cache indices in Orders, at the cost of more memory?
+}
+
+// declare passes
+
+static RegisterPass<CoalesceLocals> registerPass1("coalesce-locals", "reduce # of locals by coalescing");
+
+static RegisterPass<CoalesceLocalsWithLearning> registerPass2("coalesce-locals-learning", "reduce # of locals by coalescing and learning");
} // namespace wasm
diff --git a/src/support/learning.h b/src/support/learning.h
new file mode 100644
index 000000000..2c251d87b
--- /dev/null
+++ b/src/support/learning.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2016 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef wasm_learning_h
+#define wasm_learning_h
+
+#include <algorithm>
+#include <random>
+
+namespace wasm {
+
+//
+// Machine learning using a genetic algorithm.
+//
+// The Genome class is the element on which to learn. It must
+// implement the following:
+//
+// * Fitness getFitness(); - calculate how good this item is.
+//
+// The Generator must implement the following:
+//
+// * Genome* makeRandom(); - make a random element
+// * Genome* makeMixture(Genome* one, Genome* two); - make a new element by mixing two
+//
+// Fitness is the type of the fitness values, e.g. uint32_t. More is better.
+//
+// Typical usage of this class is to run call runGeneration(), check the best
+// quality using getBest()->getFitness(), and do that repeatedly until the
+// fitness is good enough. Then acquireBest() to get ownership of the best,
+// and the learner can be discarded (with all the rest of the population
+// cleaned up).
+
+template<typename Genome, typename Fitness, typename Generator>
+class GeneticLearner {
+ Generator& generator;
+
+ typedef std::unique_ptr<Genome> unique_ptr;
+ std::vector<unique_ptr> population;
+
+ void sort() {
+ std::sort(population.begin(), population.end(), [this](const unique_ptr& left, const unique_ptr& right) {
+ return left->getFitness() > right->getFitness();
+ });
+ }
+
+ std::mt19937 noise;
+
+ size_t randomIndex() {
+ // simple random index that favorizes low indexes TODO tweak
+ return std::min(noise() % population.size(), noise() % population.size());
+ }
+
+public:
+ GeneticLearner(Generator& generator, size_t size) : generator(generator), noise(1337) {
+ population.resize(size);
+ for (size_t i = 0; i < size; i++) {
+ population[i] = unique_ptr(generator.makeRandom());
+ }
+ sort();
+ }
+
+ Genome* getBest() {
+ return population[0].get();
+ }
+
+ unique_ptr acquireBest() {
+ return population[0];
+ }
+
+ void runGeneration() {
+ size_t size = population.size();
+
+ // we have a mix of promoted from the last generation, mixed from the last generation, and random
+ const size_t promoted = (25 * size) / 100;
+ const size_t mixed = (50 * size) / 100;
+
+ // promoted just stay in place
+ // mixtures are computed, then added back in (as we still need them as we work)
+ std::vector<unique_ptr> mixtures;
+ mixtures.resize(mixed);
+ for (size_t i = 0; i < mixed; i++) {
+ mixtures[i] = unique_ptr(generator.makeMixture(population[randomIndex()].get(), population[randomIndex()].get()));
+ }
+ for (size_t i = 0; i < mixed; i++) {
+ population[promoted + i].swap(mixtures[i]);
+ }
+ // TODO: de-duplicate at this point
+ // randoms fill in the test
+ for (size_t i = promoted + mixed; i < size; i++) {
+ population[i] = unique_ptr(generator.makeRandom());
+ }
+
+ sort();
+ }
+};
+
+} // namespace wasm
+
+#endif // wasm_learning_h
+
diff --git a/src/wasm-traversal.h b/src/wasm-traversal.h
index afa20ecb5..d7fc83f59 100644
--- a/src/wasm-traversal.h
+++ b/src/wasm-traversal.h
@@ -154,6 +154,14 @@ struct Walker : public VisitorType {
// function either (which could be very inefficient).
bool isFunctionParallel() { return false; }
+ // This method is used to create instances per function for a function-parallel
+ // pass. You may need to override this if you subclass a Walker, as otherwise
+ // this will create the parent class.
+ // Note that this returns nullptr, and we check if the result is nullptr and
+ // do new SubType later. This is important since non-function parallel
+ // passes may not be constructable via new SubType.
+ virtual SubType* create() { return nullptr; }
+
// Useful methods for visitor implementions
// Replace the current node. You can call this in your visit*() methods.
@@ -191,13 +199,14 @@ struct Walker : public VisitorType {
self->visitExport(curr.get());
}
- auto processFunction = [](Module* module, SubType* instance, Function* func) {
+ auto processFunction = [this](Module* module, SubType* instance, Function* func) {
std::unique_ptr<SubType> allocated;
if (!instance) {
- instance = new SubType;
- allocated = std::unique_ptr<SubType>(instance);
+ instance = create();
+ if (!instance) instance = new SubType;
assert(module);
instance->setModule(module);
+ allocated = std::unique_ptr<SubType>(instance);
}
instance->setFunction(func);
instance->walk(func->body);