diff options
author | Alon Zakai <alonzakai@gmail.com> | 2016-05-17 13:42:26 -0700 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2016-05-17 13:42:26 -0700 |
commit | 92cbfdb2fb58986724bb796489b8138d217ca5f1 (patch) | |
tree | e539c33bfff5d3d677a1731e4185887edb0fba63 /src | |
parent | 90d4fa0164ab0fad4ca7b60489c43eebd41ed9e3 (diff) | |
parent | 5d76c53d350cc4565c8c4f4298c8fd346ca8fcb5 (diff) | |
download | binaryen-92cbfdb2fb58986724bb796489b8138d217ca5f1.tar.gz binaryen-92cbfdb2fb58986724bb796489b8138d217ca5f1.tar.bz2 binaryen-92cbfdb2fb58986724bb796489b8138d217ca5f1.zip |
Merge pull request #515 from WebAssembly/learn-coloring
Smarter local coalescing
Diffstat (limited to 'src')
-rw-r--r-- | src/passes/CoalesceLocals.cpp | 120 | ||||
-rw-r--r-- | src/support/learning.h | 113 | ||||
-rw-r--r-- | src/wasm-traversal.h | 15 |
3 files changed, 233 insertions, 15 deletions
diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp index 05756556b..bb9b97073 100644 --- a/src/passes/CoalesceLocals.cpp +++ b/src/passes/CoalesceLocals.cpp @@ -22,6 +22,7 @@ // +#include <algorithm> #include <memory> #include <unordered_set> @@ -29,6 +30,7 @@ #include "pass.h" #include "ast_utils.h" #include "cfg/cfg-traversal.h" +#include "support/learning.h" #ifdef CFG_PROFILE #include "support/timing.h" #endif @@ -185,7 +187,9 @@ struct CoalesceLocals : public WalkerPass<CFGWalker<CoalesceLocals, Visitor<Coal void scanLivenessThroughActions(std::vector<Action>& actions, LocalSet& live); - std::vector<Index> pickIndices(); // returns a vector of oldIndex => newIndex + void pickIndicesFromOrder(std::vector<Index>& order, std::vector<Index>& indices); + + virtual void pickIndices(std::vector<Index>& indices); // returns a vector of oldIndex => newIndex void applyIndices(std::vector<Index>& indices, Expression* root); @@ -230,7 +234,8 @@ void CoalesceLocals::walk(Expression*& root) { timer.dump(); #endif // pick new indices - auto indices = pickIndices(); + std::vector<Index> indices; + pickIndices(indices); // apply indices applyIndices(indices, root); } @@ -346,12 +351,10 @@ void CoalesceLocals::scanLivenessThroughActions(std::vector<Action>& actions, Lo // Indices decision making -std::vector<Index> CoalesceLocals::pickIndices() { // returns a vector of oldIndex => newIndex +void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector<Index>& indices) { // simple greedy coloring - // TODO: multiple orderings // TODO: take into account eliminated copies // TODO: take into account distribution (99-1 is better than 50-50 with two registers, for gzip) - std::vector<Index> indices; // old => new std::vector<WasmType> types; std::vector<bool> newInterferences; // new index * numLocals => list of all interferences of locals merged to it indices.resize(numLocals); @@ -363,6 +366,7 @@ std::vector<Index> CoalesceLocals::pickIndices() { // returns a vector of oldInd auto numParams = getFunction()->getNumParams(); Index i = 0; for (; i < numParams; i++) { + assert(order[i] == i); // order must leave the params in place indices[i] = i; types[i] = getFunction()->getLocalType(i); for (size_t j = 0; j < numLocals; j++) { @@ -371,24 +375,35 @@ std::vector<Index> CoalesceLocals::pickIndices() { // returns a vector of oldInd nextFree++; } for (; i < numLocals; i++) { + Index actual = order[i]; Index found = -1; for (Index j = 0; j < nextFree; j++) { - if (!newInterferences[j * numLocals + i] && getFunction()->getLocalType(i) == types[j]) { - indices[i] = found = j; + if (!newInterferences[j * numLocals + actual] && getFunction()->getLocalType(actual) == types[j]) { + indices[actual] = found = j; break; } } if (found == Index(-1)) { - indices[i] = found = nextFree; - types[found] = getFunction()->getLocalType(i); + indices[actual] = found = nextFree; + types[found] = getFunction()->getLocalType(actual); nextFree++; } // merge new interferences for the new index for (size_t j = 0; j < numLocals; j++) { - newInterferences[found * numLocals + j] = newInterferences[found * numLocals + j] | interferes(i, j); + newInterferences[found * numLocals + j] = newInterferences[found * numLocals + j] | interferes(actual, j); } } - return indices; +} + +void CoalesceLocals::pickIndices(std::vector<Index>& indices) { + // use the natural order. this is less arbitrary than it seems, as the program + // may have a natural order of locals inherent in it. + std::vector<Index> order; + order.resize(numLocals); + for (size_t i = 0; i < numLocals; i++) { + order[i] = i; + } + pickIndicesFromOrder(order, indices); } void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root) { @@ -431,6 +446,87 @@ void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root) getFunction()->localIndices.clear(); } -static RegisterPass<CoalesceLocals> registerPass("coalesce-locals", "reduce # of locals by coalescing"); +struct CoalesceLocalsWithLearning : public CoalesceLocals { + virtual CoalesceLocals* create() override { return new CoalesceLocalsWithLearning; } + + virtual void pickIndices(std::vector<Index>& indices) override; +}; + +void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) { + if (getFunction()->getNumVars() <= 1) { + // nothing to think about here + CoalesceLocals::pickIndices(indices); + return; + } + + struct Order : public std::vector<Index> { + void setFitness(Index f) { fitness = f; } + Index getFitness() { return fitness; } + void dump(std::string text) { + std::cout << text + ": ( "; + for (Index i = 0; i < size(); i++) std::cout << (*this)[i] << " "; + std::cout << ")\n"; + std::cout << "of quality: " << getFitness() << "\n"; + } + private: + Index fitness; + }; + + struct Generator { + Generator(CoalesceLocalsWithLearning* parent) : parent(parent), noise(42) {} + + Order* makeRandom() { + auto* ret = new Order; + ret->resize(parent->numLocals); + for (Index i = 0; i < parent->numLocals; i++) { + (*ret)[i] = i; + } + if (first) { + // as the first guess, use the natural order. this is not arbitrary for two reasons. + // first, there may be an inherent order in the input (frequent indices are lower, + // etc.). second, by ensuring we start with the natural order, we ensure we are at + // least as good as the non-learning variant. + first = false; + } else { + // leave params alone, shuffle the rest + std::random_shuffle(ret->begin() + parent->getFunction()->getNumParams(), ret->end(), [this](int i) { + return noise() % i; + }); + } + // apply the order + std::vector<Index> indices; // the phenotype + parent->pickIndicesFromOrder(*ret, indices); + auto maxIndex = *std::max_element(indices.begin(), indices.end()); + assert(maxIndex <= parent->numLocals); + auto fitness = parent->numLocals - maxIndex; // higher fitness is better + ret->setFitness(fitness); +#ifdef CFG_LEARN_DEBUG + ret->dump("new one"); +#endif + // all done + return ret; + } + + private: + CoalesceLocalsWithLearning* parent; + std::mt19937 noise; + bool first = true; + }; + + Generator generator(this); + GeneticLearner<Order, Index, Generator> learner(generator, 10); + // TODO: run some generations + auto* best = learner.getBest(); +#ifdef CFG_LEARN_DEBUG + best->dump("new one"); +#endif + pickIndicesFromOrder(*best, indices); // TODO: cache indices in Orders, at the cost of more memory? +} + +// declare passes + +static RegisterPass<CoalesceLocals> registerPass1("coalesce-locals", "reduce # of locals by coalescing"); + +static RegisterPass<CoalesceLocalsWithLearning> registerPass2("coalesce-locals-learning", "reduce # of locals by coalescing and learning"); } // namespace wasm diff --git a/src/support/learning.h b/src/support/learning.h new file mode 100644 index 000000000..2c251d87b --- /dev/null +++ b/src/support/learning.h @@ -0,0 +1,113 @@ +/* + * Copyright 2016 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_learning_h +#define wasm_learning_h + +#include <algorithm> +#include <random> + +namespace wasm { + +// +// Machine learning using a genetic algorithm. +// +// The Genome class is the element on which to learn. It must +// implement the following: +// +// * Fitness getFitness(); - calculate how good this item is. +// +// The Generator must implement the following: +// +// * Genome* makeRandom(); - make a random element +// * Genome* makeMixture(Genome* one, Genome* two); - make a new element by mixing two +// +// Fitness is the type of the fitness values, e.g. uint32_t. More is better. +// +// Typical usage of this class is to run call runGeneration(), check the best +// quality using getBest()->getFitness(), and do that repeatedly until the +// fitness is good enough. Then acquireBest() to get ownership of the best, +// and the learner can be discarded (with all the rest of the population +// cleaned up). + +template<typename Genome, typename Fitness, typename Generator> +class GeneticLearner { + Generator& generator; + + typedef std::unique_ptr<Genome> unique_ptr; + std::vector<unique_ptr> population; + + void sort() { + std::sort(population.begin(), population.end(), [this](const unique_ptr& left, const unique_ptr& right) { + return left->getFitness() > right->getFitness(); + }); + } + + std::mt19937 noise; + + size_t randomIndex() { + // simple random index that favorizes low indexes TODO tweak + return std::min(noise() % population.size(), noise() % population.size()); + } + +public: + GeneticLearner(Generator& generator, size_t size) : generator(generator), noise(1337) { + population.resize(size); + for (size_t i = 0; i < size; i++) { + population[i] = unique_ptr(generator.makeRandom()); + } + sort(); + } + + Genome* getBest() { + return population[0].get(); + } + + unique_ptr acquireBest() { + return population[0]; + } + + void runGeneration() { + size_t size = population.size(); + + // we have a mix of promoted from the last generation, mixed from the last generation, and random + const size_t promoted = (25 * size) / 100; + const size_t mixed = (50 * size) / 100; + + // promoted just stay in place + // mixtures are computed, then added back in (as we still need them as we work) + std::vector<unique_ptr> mixtures; + mixtures.resize(mixed); + for (size_t i = 0; i < mixed; i++) { + mixtures[i] = unique_ptr(generator.makeMixture(population[randomIndex()].get(), population[randomIndex()].get())); + } + for (size_t i = 0; i < mixed; i++) { + population[promoted + i].swap(mixtures[i]); + } + // TODO: de-duplicate at this point + // randoms fill in the test + for (size_t i = promoted + mixed; i < size; i++) { + population[i] = unique_ptr(generator.makeRandom()); + } + + sort(); + } +}; + +} // namespace wasm + +#endif // wasm_learning_h + diff --git a/src/wasm-traversal.h b/src/wasm-traversal.h index afa20ecb5..d7fc83f59 100644 --- a/src/wasm-traversal.h +++ b/src/wasm-traversal.h @@ -154,6 +154,14 @@ struct Walker : public VisitorType { // function either (which could be very inefficient). bool isFunctionParallel() { return false; } + // This method is used to create instances per function for a function-parallel + // pass. You may need to override this if you subclass a Walker, as otherwise + // this will create the parent class. + // Note that this returns nullptr, and we check if the result is nullptr and + // do new SubType later. This is important since non-function parallel + // passes may not be constructable via new SubType. + virtual SubType* create() { return nullptr; } + // Useful methods for visitor implementions // Replace the current node. You can call this in your visit*() methods. @@ -191,13 +199,14 @@ struct Walker : public VisitorType { self->visitExport(curr.get()); } - auto processFunction = [](Module* module, SubType* instance, Function* func) { + auto processFunction = [this](Module* module, SubType* instance, Function* func) { std::unique_ptr<SubType> allocated; if (!instance) { - instance = new SubType; - allocated = std::unique_ptr<SubType>(instance); + instance = create(); + if (!instance) instance = new SubType; assert(module); instance->setModule(module); + allocated = std::unique_ptr<SubType>(instance); } instance->setFunction(func); instance->walk(func->body); |