3 files changed, 233 insertions, 15 deletions
diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp
index 05756556b..bb9b97073 100644
--- a/src/passes/CoalesceLocals.cpp
+++ b/src/passes/CoalesceLocals.cpp
@@ -22,6 +22,7 @@
 //
 
 
+#include <algorithm>
 #include <memory>
 #include <unordered_set>
 
@@ -29,6 +30,7 @@
 #include "pass.h"
 #include "ast_utils.h"
 #include "cfg/cfg-traversal.h"
+#include "support/learning.h"
 #ifdef CFG_PROFILE
 #include "support/timing.h"
 #endif
@@ -185,7 +187,9 @@ struct CoalesceLocals : public WalkerPass<CFGWalker<CoalesceLocals, Visitor<Coal
 
   void scanLivenessThroughActions(std::vector<Action>& actions, LocalSet& live);
 
-  std::vector<Index> pickIndices(); // returns a vector of oldIndex => newIndex
+  void pickIndicesFromOrder(std::vector<Index>& order, std::vector<Index>& indices);
+
+  virtual void pickIndices(std::vector<Index>& indices); // returns a vector of oldIndex => newIndex
 
   void applyIndices(std::vector<Index>& indices, Expression* root);
 
@@ -230,7 +234,8 @@ void CoalesceLocals::walk(Expression*& root) {
   timer.dump();
 #endif
   // pick new indices
-  auto indices = pickIndices();
+  std::vector<Index> indices;
+  pickIndices(indices);
   // apply indices
   applyIndices(indices, root);
 }
@@ -346,12 +351,10 @@ void CoalesceLocals::scanLivenessThroughActions(std::vector<Action>& actions, Lo
 
 // Indices decision making
 
-std::vector<Index> CoalesceLocals::pickIndices() { // returns a vector of oldIndex => newIndex
+void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, std::vector<Index>& indices) {
   // simple greedy coloring
-  // TODO: multiple orderings
   // TODO: take into account eliminated copies
   // TODO: take into account distribution (99-1 is better than 50-50 with two registers, for gzip)
-  std::vector<Index> indices; // old => new
   std::vector<WasmType> types;
   std::vector<bool> newInterferences; // new index * numLocals => list of all interferences of locals merged to it
   indices.resize(numLocals);
@@ -363,6 +366,7 @@ std::vector<Index> CoalesceLocals::pickIndices() { // returns a vector of oldInd
   auto numParams = getFunction()->getNumParams();
   Index i = 0;
   for (; i < numParams; i++) {
+    assert(order[i] == i); // order must leave the params in place
     indices[i] = i;
     types[i] = getFunction()->getLocalType(i);
     for (size_t j = 0; j < numLocals; j++) {
@@ -371,24 +375,35 @@ std::vector<Index> CoalesceLocals::pickIndices() { // returns a vector of oldInd
     nextFree++;
   }
   for (; i < numLocals; i++) {
+    Index actual = order[i];
     Index found = -1;
     for (Index j = 0; j < nextFree; j++) {
-      if (!newInterferences[j * numLocals + i] && getFunction()->getLocalType(i) == types[j]) {
-        indices[i] = found = j;
+      if (!newInterferences[j * numLocals + actual] && getFunction()->getLocalType(actual) == types[j]) {
+        indices[actual] = found = j;
         break;
       }
     }
     if (found == Index(-1)) {
-      indices[i] = found = nextFree;
-      types[found] = getFunction()->getLocalType(i);
+      indices[actual] = found = nextFree;
+      types[found] = getFunction()->getLocalType(actual);
       nextFree++;
     }
     // merge new interferences for the new index
     for (size_t j = 0; j < numLocals; j++) {
-      newInterferences[found * numLocals + j] = newInterferences[found * numLocals + j] | interferes(i, j);
+      newInterferences[found * numLocals + j] = newInterferences[found * numLocals + j] | interferes(actual, j);
     }
   }
-  return indices;
+}
+
+void CoalesceLocals::pickIndices(std::vector<Index>& indices) {
+  // use the natural order. this is less arbitrary than it seems, as the program
+  // may have a natural order of locals inherent in it.
+  std::vector<Index> order;
+  order.resize(numLocals);
+  for (size_t i = 0; i < numLocals; i++) {
+    order[i] = i;
+  }
+  pickIndicesFromOrder(order, indices);
 }
 
 void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root) {
@@ -431,6 +446,87 @@ void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root)
   getFunction()->localIndices.clear();
 }
 
-static RegisterPass<CoalesceLocals> registerPass("coalesce-locals", "reduce # of locals by coalescing");
+struct CoalesceLocalsWithLearning : public CoalesceLocals {
+  virtual CoalesceLocals* create() override { return new CoalesceLocalsWithLearning; }
+
+  virtual void pickIndices(std::vector<Index>& indices) override;
+};
+
+void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) {
+  if (getFunction()->getNumVars() <= 1) {
+    // nothing to think about here
+    CoalesceLocals::pickIndices(indices);
+    return;
+  }
+
+  struct Order : public std::vector<Index> {
+    void setFitness(Index f) { fitness = f; }
+    Index getFitness() { return fitness; }
+    void dump(std::string text) {
+      std::cout << text + ": ( ";
+      for (Index i = 0; i < size(); i++) std::cout << (*this)[i] << " ";
+      std::cout << ")\n";
+      std::cout << "of quality: " << getFitness() << "\n";
+    }
+  private:
+    Index fitness;
+  };
+
+  struct Generator {
+    Generator(CoalesceLocalsWithLearning* parent) : parent(parent), noise(42) {}
+
+    Order* makeRandom() {
+      auto* ret = new Order;
+      ret->resize(parent->numLocals);
+      for (Index i = 0; i < parent->numLocals; i++) {
+        (*ret)[i] = i;
+      }
+      if (first) {
+        // as the first guess, use the natural order. this is not arbitrary for two reasons.
+        // first, there may be an inherent order in the input (frequent indices are lower,
+        // etc.). second, by ensuring we start with the natural order, we ensure we are at
+        // least as good as the non-learning variant.
+        first = false;
+      } else {
+        // leave params alone, shuffle the rest
+        std::random_shuffle(ret->begin() + parent->getFunction()->getNumParams(), ret->end(), [this](int i) {
+          return noise() % i;
+        });
+      }
+      // apply the order
+      std::vector<Index> indices; // the phenotype
+      parent->pickIndicesFromOrder(*ret, indices);
+      auto maxIndex = *std::max_element(indices.begin(), indices.end());
+      assert(maxIndex <= parent->numLocals);
+      auto fitness = parent->numLocals - maxIndex; // higher fitness is better
+      ret->setFitness(fitness);
+#ifdef CFG_LEARN_DEBUG
+      ret->dump("new one");
+#endif
+      // all done
+      return ret;
+    }
+
+  private:
+    CoalesceLocalsWithLearning* parent;
+    std::mt19937 noise;
+    bool first = true;
+  };
+
+  Generator generator(this);
+  GeneticLearner<Order, Index, Generator> learner(generator, 10);
+  // TODO: run some generations
+  auto* best = learner.getBest();
+#ifdef CFG_LEARN_DEBUG
+  best->dump("new one");
+#endif
+  pickIndicesFromOrder(*best, indices); // TODO: cache indices in Orders, at the cost of more memory?
+}
+
+// declare passes
+
+static RegisterPass<CoalesceLocals> registerPass1("coalesce-locals", "reduce # of locals by coalescing");
+
+static RegisterPass<CoalesceLocalsWithLearning> registerPass2("coalesce-locals-learning", "reduce # of locals by coalescing and learning");
 
 } // namespace wasm
diff --git a/src/support/learning.h b/src/support/learning.h
new file mode 100644
index 000000000..2c251d87b
--- /dev/null
+++ b/src/support/learning.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2016 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef wasm_learning_h
+#define wasm_learning_h
+
+#include <algorithm>
+#include <random>
+
+namespace wasm {
+
+//
+// Machine learning using a genetic algorithm.
+//
+// The Genome class is the element on which to learn. It must
+// implement the following:
+//
+//  * Fitness getFitness(); - calculate how good this item is.
+//
+// The Generator must implement the following:
+//
+//  * Genome* makeRandom(); - make a random element
+//  * Genome* makeMixture(Genome* one, Genome* two); - make a new element by mixing two
+//
+// Fitness is the type of the fitness values, e.g. uint32_t. More is better.
+//
+// Typical usage of this class is to run call runGeneration(), check the best
+// quality using getBest()->getFitness(), and do that repeatedly until the
+// fitness is good enough. Then acquireBest() to get ownership of the best,
+// and the learner can be discarded (with all the rest of the population
+// cleaned up).
+
+template<typename Genome, typename Fitness, typename Generator>
+class GeneticLearner {
+  Generator& generator;
+
+  typedef std::unique_ptr<Genome> unique_ptr;
+  std::vector<unique_ptr> population;
+
+  void sort() {
+    std::sort(population.begin(), population.end(), [this](const unique_ptr& left, const unique_ptr& right) {
+      return left->getFitness() > right->getFitness();
+    });
+  }
+
+  std::mt19937 noise;
+
+  size_t randomIndex() {
+    // simple random index that favorizes low indexes TODO tweak
+    return std::min(noise() % population.size(), noise() % population.size());
+  }
+
+public:
+  GeneticLearner(Generator& generator, size_t size) : generator(generator), noise(1337) {
+    population.resize(size);
+    for (size_t i = 0; i < size; i++) {
+      population[i] = unique_ptr(generator.makeRandom());
+    }
+    sort();
+  }
+
+  Genome* getBest() {
+    return population[0].get();
+  }
+
+  unique_ptr acquireBest() {
+    return population[0];
+  }
+
+  void runGeneration() {
+    size_t size = population.size();
+
+    // we have a mix of promoted from the last generation, mixed from the last generation, and random
+    const size_t promoted = (25 * size) / 100;
+    const size_t mixed = (50 * size) / 100;
+
+    // promoted just stay in place
+    // mixtures are computed, then added back in (as we still need them as we work)
+    std::vector<unique_ptr> mixtures;
+    mixtures.resize(mixed);
+    for (size_t i = 0; i < mixed; i++) {
+      mixtures[i] = unique_ptr(generator.makeMixture(population[randomIndex()].get(), population[randomIndex()].get()));
+    }
+    for (size_t i = 0; i < mixed; i++) {
+      population[promoted + i].swap(mixtures[i]);
+    }
+    // TODO: de-duplicate at this point
+    // randoms fill in the test
+    for (size_t i = promoted + mixed; i < size; i++) {
+      population[i] = unique_ptr(generator.makeRandom());
+    }
+
+    sort();
+  }
+};
+
+}  // namespace wasm
+
+#endif  // wasm_learning_h
+
diff --git a/src/wasm-traversal.h b/src/wasm-traversal.h
index afa20ecb5..d7fc83f59 100644
--- a/src/wasm-traversal.h
+++ b/src/wasm-traversal.h
@@ -154,6 +154,14 @@ struct Walker : public VisitorType {
   // function either (which could be very inefficient).
   bool isFunctionParallel() { return false; }
 
+  // This method is used to create instances per function for a function-parallel
+  // pass. You may need to override this if you subclass a Walker, as otherwise
+  // this will create the parent class.
+  // Note that this returns nullptr, and we check if the result is nullptr and
+  // do  new SubType  later. This is important since non-function parallel
+  // passes may not be constructable via  new SubType.
+  virtual SubType* create() { return nullptr; }
+
   // Useful methods for visitor implementions
 
   // Replace the current node. You can call this in your visit*() methods.
@@ -191,13 +199,14 @@ struct Walker : public VisitorType {
       self->visitExport(curr.get());
     }
 
-    auto processFunction = [](Module* module, SubType* instance, Function* func) {
+    auto processFunction = [this](Module* module, SubType* instance, Function* func) {
       std::unique_ptr<SubType> allocated;
       if (!instance) {
-        instance = new SubType;
-        allocated = std::unique_ptr<SubType>(instance);
+        instance = create();
+        if (!instance) instance = new SubType;
         assert(module);
         instance->setModule(module);
+        allocated = std::unique_ptr<SubType>(instance);
       }
       instance->setFunction(func);
       instance->walk(func->body);