/*
 * Copyright 2016 WebAssembly Community Group participants
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//
// Coalesce locals, in order to reduce the total number of locals. This
// is similar to register allocation, however, there is never any
// spilling, and there isn't a fixed number of locals.
//
// NB: This pass is nonlinear in the number of locals. It is best to run it
//     after the number of locals has been somewhat reduced by other passes,
//     for example by simplify-locals (to remove unneeded uses of locals) and
//     reorder-locals (to sort them by # of uses and remove all unneeded ones).
//

#include <algorithm>
#include <memory>
#include <unordered_set>

#include "cfg/liveness-traversal.h"
#include "ir/numbering.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/learning.h"
#include "support/permutations.h"
#include "support/sparse_square_matrix.h"
#include "wasm.h"
#ifdef CFG_PROFILE
#include "support/timing.h"
#endif

namespace wasm {

struct CoalesceLocals
  : public WalkerPass<LivenessWalker<CoalesceLocals, Visitor<CoalesceLocals>>> {
  bool isFunctionParallel() override { return true; }

  // This pass merges locals, mapping the originals to new ones.
  // FIXME DWARF updating does not handle local changes yet.
  bool invalidatesDWARF() override { return true; }

  std::unique_ptr<Pass> create() override {
    return std::make_unique<CoalesceLocals>();
  }

  // Branches outside of the function can be ignored, as we only look at locals
  // which vanish when we leave.
  bool ignoreBranchesOutsideOfFunc = true;

  // main entry point

  void doWalkFunction(Function* func);

  void increaseBackEdgePriorities();

  // Calculate interferences between locals. This will will fill
  // the data structure |interferences|.
  void calculateInterferences();

  void pickIndicesFromOrder(std::vector<Index>& order,
                            std::vector<Index>& indices);
  void pickIndicesFromOrder(std::vector<Index>& order,
                            std::vector<Index>& indices,
                            Index& removedCopies);

  // returns a vector of oldIndex => newIndex
  virtual void pickIndices(std::vector<Index>& indices);

  void applyIndices(std::vector<Index>& indices, Expression* root);

  // interference state

  // canonicalized - accesses should check (low, high)
  sparse_square_matrix<bool> interferences;

  void interfere(Index i, Index j) {
    if (i == j) {
      return;
    }
    interferences.set(std::min(i, j), std::max(i, j), true);
  }

  // optimized version where you know that low < high
  void interfereLowHigh(Index low, Index high) {
    assert(low < high);
    interferences.set(low, high, true);
  }

  void unInterfere(Index i, Index j) {
    interferences.set(std::min(i, j), std::max(i, j), false);
  }

  bool interferes(Index i, Index j) {
    return interferences.get(std::min(i, j), std::max(i, j));
  }

private:
  // In some cases we need to refinalize at the end.
  bool refinalize = false;
};

void CoalesceLocals::doWalkFunction(Function* func) {
  Super::doWalkFunction(func);
  // prioritize back edges
  increaseBackEdgePriorities();
  // use liveness to find interference
  calculateInterferences();
  // pick new indices
  std::vector<Index> indices;
  pickIndices(indices);
  // apply indices
  applyIndices(indices, func->body);

  if (refinalize) {
    ReFinalize().walkFunctionInModule(func, getModule());
  }
}

// A copy on a backedge can be especially costly, forcing us to branch just to
// do that copy. Add weight to such copies, so we prioritize getting rid of
// them.
void CoalesceLocals::increaseBackEdgePriorities() {
  for (auto* loopTop : loopTops) {
    // ignore the first edge, it is the initial entry, we just want backedges
    auto& in = loopTop->in;
    for (Index i = 1; i < in.size(); i++) {
      auto* arrivingBlock = in[i];
      if (arrivingBlock->out.size() > 1) {
        // we just want unconditional branches to the loop top, true phi
        // fragments
        continue;
      }
      for (auto& action : arrivingBlock->contents.actions) {
        if (action.isSet()) {
          auto* set = (*action.origin)->cast<LocalSet>();
          if (auto* get = getCopy(set)) {
            // this is indeed a copy, add to the cost (default cost is 2, so
            // this adds 50%, and can mostly break ties)
            addCopy(set->index, get->index);
          }
        }
      }
    }
  }
}

void CoalesceLocals::calculateInterferences() {
  interferences.recreate(numLocals);

  // We will track the values in each local, using a numbering where each index
  // represents a unique different value. This array maps a local index to the
  // value index it contains.
  //
  // To avoid reallocating this array all the time, allocate it once outside the
  // loop.
  std::vector<Index> values(numLocals);

  ValueNumbering valueNumbering;

  auto* func = getFunction();

  for (auto& curr : basicBlocks) {
    if (liveBlocks.count(curr.get()) == 0) {
      continue; // ignore dead blocks
    }

    // First, find which gets end a live range. While doing so, also calculate
    // the effectiveness of sets.
    auto& actions = curr->contents.actions;
    std::vector<bool> endsLiveRange(actions.size(), false);
    auto live = curr->contents.end;
    for (int i = int(actions.size()) - 1; i >= 0; i--) {
      auto& action = actions[i];
      auto index = action.index;
      if (action.isGet()) {
        if (!live.has(index)) {
          // The local is not live after us, so its liveness ends here.
          endsLiveRange[i] = true;
          live.insert(index);
        }
      } else {
        // This is a set. Check if the local is alive after it; if it is then
        // the set if effective as there is some get that can read the value.
        if (live.erase(index)) {
          action.effective = true;
        }
      }
    }

    // We have processed from the end of the block to the start, updating |live|
    // as we go, and now it must be equal to the state at the start of the
    // block. We will also use |live| in the next loop, and assume it begins
    // in that state.
    assert(live == curr->contents.start);

    // Now that we know live ranges, check if locals interfere in this block.
    // Locals interfere if they might contain different values on areas where
    // their live ranges overlap. To evaluate that, we do an analysis inside
    // the block that gives each set a unique value number, and as those flow
    // around through copies between sets we can see when sets are guaranteed to
    // be equal.

    if (curr.get() == entry) {
      // Each parameter is assumed to have a different value on entry.
      for (Index i = 0; i < func->getNumParams(); i++) {
        values[i] = valueNumbering.getUniqueValue();
      }

      for (Index i = func->getNumParams(); i < func->getNumLocals(); i++) {
        auto type = func->getLocalType(i);
        if (!LiteralUtils::canMakeZero(type)) {
          // The default value for a type for which we can't make a zero cannot
          // be used anyhow, but we must give it some value in this analysis. A
          // unique one seems least likely to result in surprise during
          // debugging.
          values[i] = valueNumbering.getUniqueValue();
        } else {
          values[i] = valueNumbering.getValue(Literal::makeZeros(type));
        }
      }
    } else {
      // In any block but the entry, assume that each live local might have a
      // different value at the start.
      // TODO: Propagating value IDs across blocks could identify more copies,
      //       however, it would also be nonlinear.
      for (auto index : curr->contents.start) {
        values[index] = valueNumbering.getUniqueValue();
      }
    }

    // Traverse through the block from start to finish. We keep track of both
    // liveness (in |live|) and the value IDs in each local (in |values|)
    // while doing so.
    for (Index i = 0; i < actions.size(); i++) {
      auto& action = actions[i];
      auto index = action.index;
      if (action.isGet()) {
        if (endsLiveRange[i]) {
          [[maybe_unused]] bool erased = live.erase(action.index);
          assert(erased);
        }
        continue;
      }

      // This is a set. Find the value being assigned to the local.
      auto* set = (*action.origin)->cast<LocalSet>();
      Index newValue;
      if (set->value->is<LocalGet>() || set->value->is<LocalSet>()) {
        // This is a copy: Either it is a get or a tee, that occurs right
        // before us. Set our new value to theirs.
        assert(i > 0 && set->value == *actions[i - 1].origin);
        newValue = values[actions[i - 1].index];
      } else {
        // This is not a copy.
        newValue = valueNumbering.getValue(set->value);
      }
      values[index] = newValue;

      // If this set has no gets that read from it, then it does not start a
      // live range, and it cannot cause interference.
      if (!action.effective) {
        continue;
      }

      // Update interferences: This will interfere with any other local that
      // is currently live and contains a different value.
      for (auto other : live) {
        // This index cannot have been live before this set (as we would be
        // trampling some other set before us, if so; and then that set would
        // have been ineffective). We will mark this index as live right after
        // this loop).
        assert(other != index);
        if (values[other] != newValue) {
          interfere(other, index);
        }
      }
      live.insert(action.index);
    }

    // Note that we do not need to do anything for merges: while in general an
    // interference can happen either in a block or when control flow merges,
    // in wasm we have default values for all locals. As a result, if a local is
    // live at the beginning of a block, it will be live at the ends of *all*
    // the blocks reaching it: there is no possibility of an "unset local." That
    // is, imagine we have this merge with a conflict:
    //
    //  [a is set to some value] ->-
    //                              |
    //                              |->- [merge block where a and b are used]
    //                              |
    //  [b is set to some value] ->-
    //
    // It is true that a conflict happens in the merge block, and if we had
    // unset locals then the top block would have b unset, and the bottom block
    // would have a unset, and so there would be no conflict there and the
    // problem would only appear in the merge. But in wasm, that a and b are
    // used in the merge block means that they are live at the end of both the
    // top and bottom block, and that liveness will extend all the way back to
    // *some* set of those values, possibly only the zero-initialization at the
    // function start. Therefore a conflict will be noticed in both the top and
    // bottom blocks, and that merge block does not need to reason about merging
    // its inputs. In other words, a conflict will appear in the middle of a
    // block, somewhere, and therefore we leave it to that block to identify,
    // and so blocks only need to reason about their own contents and not what
    // arrives to them.
    //
    // The one exception here is the entry to the function, see below.
  }

  // We must not try to coalesce parameters as they are fixed. Mark them as
  // "interfering" so that we do not need to special-case them later.
  auto numParams = getFunction()->getNumParams();
  for (Index i = 0; i < numParams; i++) {
    for (Index j = i + 1; j < numParams; j++) {
      interfereLowHigh(i, j);
    }
  }

  // We must handle interference between uses of the zero-init value and
  // parameters manually. A zero initialization represents a set (to a default
  // value), and that set would be what alerts us to a conflict, but there is no
  // actual set in the IR since the zero-init value is applied implicitly.
  for (auto i : entry->contents.start) {
    if (i >= numParams) {
      for (Index j = 0; j < numParams; j++) {
        interfereLowHigh(j, i);
      }
    }
  }
}

// Indices decision making

void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order,
                                          std::vector<Index>& indices) {
  Index removedCopies;
  pickIndicesFromOrder(order, indices, removedCopies);
}

void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order,
                                          std::vector<Index>& indices,
                                          Index& removedCopies) {
// mostly-simple greedy coloring
#if CFG_DEBUG
  std::cerr << "\npickIndicesFromOrder on " << getFunction()->name << '\n';
  std::cerr << getFunction()->body << '\n';
  std::cerr << "order:\n";
  for (auto i : order)
    std::cerr << i << ' ';
  std::cerr << '\n';
  std::cerr << "interferences:\n";
  for (Index i = 0; i < numLocals; i++) {
    for (Index j = 0; j < i + 1; j++) {
      std::cerr << "  ";
    }
    for (Index j = i + 1; j < numLocals; j++) {
      std::cerr << int(interferes(i, j)) << ' ';
    }
    std::cerr << " : $" << i << '\n';
  }
  std::cerr << "copies:\n";
  for (Index i = 0; i < numLocals; i++) {
    for (Index j = 0; j < i + 1; j++) {
      std::cerr << "  ";
    }
    for (Index j = i + 1; j < numLocals; j++) {
      std::cerr << int(getCopies(i, j)) << ' ';
    }
    std::cerr << " : $" << i << '\n';
  }
  std::cerr << "total copies:\n";
  for (Index i = 0; i < numLocals; i++) {
    std::cerr << " $" << i << ": " << totalCopies[i] << '\n';
  }
#endif
  // TODO: take into account distribution (99-1 is better than 50-50 with two
  // registers, for gzip)
  std::vector<Type> types;
  // new index * numLocals => list of all interferences of locals merged to it
  sparse_square_matrix<bool> newInterferences;

  // new index * numLocals => list of all copies of locals merged to it
  sparse_square_matrix<uint8_t> newCopies;

  indices.resize(numLocals);
  types.resize(numLocals);

  auto numParams = getFunction()->getNumParams();

  newInterferences.recreate(numLocals);
  newCopies.recreate(numLocals);

  Index nextFree = 0;
  removedCopies = 0;
  // we can't reorder parameters, they are fixed in order, and cannot coalesce
  Index i = 0;
  for (; i < numParams; i++) {
    assert(order[i] == i); // order must leave the params in place
    indices[i] = i;
    types[i] = getFunction()->getLocalType(i);
    for (Index j = numParams; j < numLocals; j++) {
      newInterferences.set(i, j, interferes(i, j));
      newCopies.set(i, j, getCopies(i, j));
    }
    nextFree++;
  }
  for (; i < numLocals; i++) {
    Index actual = order[i];
    Index found = -1;
    uint8_t foundCopies = -1;
    for (Index j = 0; j < nextFree; j++) {
      if (!newInterferences.get(j, actual) &&
          getFunction()->getLocalType(actual) == types[j]) {
        // this does not interfere, so it might be what we want. but pick the
        // one eliminating the most copies (we could stop looking forward when
        // there are no more items that have copies anyhow, but it doesn't seem
        // to help)
        auto currCopies = newCopies.get(j, actual);
        if (found == Index(-1) || currCopies > foundCopies) {
          indices[actual] = found = j;
          foundCopies = currCopies;
        }
      }
    }
    if (found == Index(-1)) {
      indices[actual] = found = nextFree;
      types[found] = getFunction()->getLocalType(actual);
      nextFree++;
      removedCopies += getCopies(found, actual);
    } else {
      removedCopies += foundCopies;
    }
#if CFG_DEBUG
    std::cerr << "set local $" << actual << " to $" << found << '\n';
#endif
    // merge new interferences and copies for the new index
    for (Index k = i + 1; k < numLocals; k++) {
      // go in the order, we only need to update for those we will see later
      auto j = order[k];
      newInterferences.set(
        found, j, newInterferences.get(found, j) || interferes(actual, j));
      newCopies.set(found, j, newCopies.get(found, j) + getCopies(actual, j));
    }
  }
}

// given a baseline order, adjust it based on an important order of priorities
// (higher values are higher priority). The priorities take precedence, unless
// they are equal and then the original order should be kept.
std::vector<Index> adjustOrderByPriorities(std::vector<Index>& baseline,
                                           std::vector<Index>& priorities) {
  std::vector<Index> ret = baseline;
  std::vector<Index> reversed = makeReversed(baseline);
  std::sort(ret.begin(), ret.end(), [&priorities, &reversed](Index x, Index y) {
    return priorities[x] > priorities[y] ||
           (priorities[x] == priorities[y] && reversed[x] < reversed[y]);
  });
  return ret;
}

void CoalesceLocals::pickIndices(std::vector<Index>& indices) {
  if (numLocals == 0) {
    return;
  }
  if (numLocals == 1) {
    indices.push_back(0);
    return;
  }
  // take into account total copies. but we must keep params in place, so give
  // them max priority
  auto adjustedTotalCopies = totalCopies;
  auto numParams = getFunction()->getNumParams();
  for (Index i = 0; i < numParams; i++) {
    adjustedTotalCopies[i] = std::numeric_limits<Index>::max();
  }
  // first try the natural order. this is less arbitrary than it seems, as the
  // program may have a natural order of locals inherent in it.
  auto order = makeIdentity(numLocals);
  order = adjustOrderByPriorities(order, adjustedTotalCopies);
  Index removedCopies;
  pickIndicesFromOrder(order, indices, removedCopies);
  auto maxIndex = *std::max_element(indices.begin(), indices.end());
  // next try the reverse order. this both gives us another chance at something
  // good, and also the very naturalness of the simple order may be quite
  // suboptimal
  setIdentity(order);
  for (Index i = numParams; i < numLocals; i++) {
    order[i] = numParams + numLocals - 1 - i;
  }
  order = adjustOrderByPriorities(order, adjustedTotalCopies);
  std::vector<Index> reverseIndices;
  Index reverseRemovedCopies;
  pickIndicesFromOrder(order, reverseIndices, reverseRemovedCopies);
  auto reverseMaxIndex =
    *std::max_element(reverseIndices.begin(), reverseIndices.end());
  // prefer to remove copies foremost, as it matters more for code size (minus
  // gzip), and improves throughput.
  if (reverseRemovedCopies > removedCopies ||
      (reverseRemovedCopies == removedCopies && reverseMaxIndex < maxIndex)) {
    indices.swap(reverseIndices);
  }
}

void CoalesceLocals::applyIndices(std::vector<Index>& indices,
                                  Expression* root) {
  assert(indices.size() == numLocals);
  for (auto& curr : basicBlocks) {
    auto& actions = curr->contents.actions;
    for (auto& action : actions) {
      if (action.isGet()) {
        auto* get = (*action.origin)->cast<LocalGet>();
        get->index = indices[get->index];
      } else if (action.isSet()) {
        auto* set = (*action.origin)->cast<LocalSet>();
        set->index = indices[set->index];
        // in addition, we can optimize out redundant copies and ineffective
        // sets
        if (auto* get = set->value->dynCast<LocalGet>()) {
          if (get->index == set->index) {
            action.removeCopy();
            continue;
          }
        }
        if (auto* subSet = set->value->dynCast<LocalSet>()) {
          // Only do so if not only the index matches but also the type. If the
          // inner type is more refined, leave that for other passes.
          if (subSet->index == set->index &&
              subSet->value->type == subSet->type) {
            set->value = subSet->value;
            continue;
          }
        }

        // Remove ineffective actions, that is, dead stores.
        //
        // Note that this may have downsides for non-nullable locals:
        //
        //   x = whatever; // dead set for validation
        //   if (..) {
        //     x = value1;
        //   } else {
        //     x = value2;
        //   }
        //
        // The dead set ensures validation, at the cost of extra code size and
        // slower speed in some tiers (the optimizing tier, at least, will
        // remove such dead sets anyhow). In theory keeping such a dead set may
        // be worthwhile, as it may save code size (by keeping the local
        // non-nullable and avoiding ref.as_non_nulls later). But the tradeoff
        // here isn't clear, so do the simple thing for now and remove all dead
        // sets.
        if (!action.effective) {
          // value may have no side effects, further optimizations can eliminate
          // it
          auto* value = set->value;
          if (!set->isTee()) {
            // we need to drop it
            Drop* drop = ExpressionManipulator::convert<LocalSet, Drop>(set);
            drop->value = value;
            *action.origin = drop;
          } else {
            auto originalType = (*action.origin)->type;
            if (originalType != set->value->type) {
              // The value had a more refined type, which we must propagate at
              // the end.
              refinalize = true;
            }
            *action.origin = set->value;
          }
          continue;
        }
      }
    }
  }
  // update type list
  auto numParams = getFunction()->getNumParams();
  Index newNumLocals = 0;
  for (auto index : indices) {
    newNumLocals = std::max(newNumLocals, index + 1);
  }
  auto oldVars = getFunction()->vars;
  getFunction()->vars.resize(newNumLocals - numParams);
  for (Index index = numParams; index < numLocals; index++) {
    Index newIndex = indices[index];
    if (newIndex >= numParams) {
      getFunction()->vars[newIndex - numParams] = oldVars[index - numParams];
    }
  }
  // names are gone
  getFunction()->localNames.clear();
  getFunction()->localIndices.clear();
}

struct CoalesceLocalsWithLearning : public CoalesceLocals {
  std::unique_ptr<Pass> create() override {
    return std::make_unique<CoalesceLocalsWithLearning>();
  }

  virtual void pickIndices(std::vector<Index>& indices) override;
};

void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) {
  if (getFunction()->getNumVars() <= 1) {
    // nothing to think about here
    CoalesceLocals::pickIndices(indices);
    return;
  }

  struct Order : public std::vector<Index> {
    void setFitness(double f) { fitness = f; }
    double getFitness() { return fitness; }
    void dump(std::string text) {
      std::cout << text + ": ( ";
      for (Index i = 0; i < size(); i++) {
        std::cout << (*this)[i] << " ";
      }
      std::cout << ")\n";
      std::cout << "of quality: " << getFitness() << "\n";
    }

  private:
    double fitness;
  };

  struct Generator {
    Generator(CoalesceLocalsWithLearning* parent) : parent(parent), noise(42) {}

    void calculateFitness(Order* order) {
      // apply the order
      std::vector<Index> indices; // the phenotype
      Index removedCopies;
      parent->pickIndicesFromOrder(*order, indices, removedCopies);
      auto maxIndex = *std::max_element(indices.begin(), indices.end());
      assert(maxIndex <= parent->numLocals);
      // main part of fitness is the number of locals
      double fitness = parent->numLocals - maxIndex; // higher fitness is better
      // secondarily, it is nice to not reorder locals unnecessarily
      double fragment = 1.0 / (2.0 * parent->numLocals);
      for (Index i = 0; i < parent->numLocals; i++) {
        if ((*order)[i] == i) {
          fitness += fragment; // boost for each that wasn't moved
        }
      }
      // removing copies is a secondary concern
      fitness = (100 * fitness) + removedCopies;
      order->setFitness(fitness);
    }

    Order* makeRandom() {
      auto* ret = new Order;
      ret->resize(parent->numLocals);
      for (Index i = 0; i < parent->numLocals; i++) {
        (*ret)[i] = i;
      }
      if (first) {
        // as the first guess, use the natural order. this is not arbitrary for
        // two reasons. first, there may be an inherent order in the input
        // (frequent indices are lower, etc.). second, by ensuring we start with
        // the natural order, we ensure we are at least as good as the
        // non-learning variant.
        // TODO: use ::pickIndices from the parent, so we literally get the
        //       simpler approach as our first option
        first = false;
      } else {
        // leave params alone, shuffle the rest
        std::shuffle(ret->begin() + parent->getFunction()->getNumParams(),
                     ret->end(),
                     noise);
      }
      calculateFitness(ret);
#ifdef CFG_LEARN_DEBUG
      order->dump("new rando");
#endif
      return ret;
    }

    Order* makeMixture(Order* left, Order* right) {
      // perturb left using right. this is useful since
      // we don't care about absolute locations, relative ones matter more,
      // and a true merge of two vectors could obscure that (e.g.
      // a.......... and ..........a would merge a into the middle, for no
      // reason), and cause a lot of unnecessary noise
      Index size = left->size();
      Order reverseRight; // reverseRight[x] is the index of x in right
      reverseRight.resize(size);
      for (Index i = 0; i < size; i++) {
        reverseRight[(*right)[i]] = i;
      }
      auto* ret = new Order;
      *ret = *left;
      assert(size >= 1);
      for (Index i = parent->getFunction()->getNumParams(); i < size - 1; i++) {
        // if (i, i + 1) is in reverse order in right, flip them
        if (reverseRight[(*ret)[i]] > reverseRight[(*ret)[i + 1]]) {
          std::swap((*ret)[i], (*ret)[i + 1]);
          // if we don't skip, we might end up pushing an element all the way to
          // the end, which is not very perturbation-y
          i++;
        }
      }
      calculateFitness(ret);
#ifdef CFG_LEARN_DEBUG
      ret->dump("new mixture");
#endif
      return ret;
    }

  private:
    CoalesceLocalsWithLearning* parent;
    std::mt19937 noise;
    bool first = true;
  };

#ifdef CFG_LEARN_DEBUG
  std::cout << "[learning for " << getFunction()->name << "]\n";
#endif
  auto numVars = this->getFunction()->getNumVars();
  const int GENERATION_SIZE =
    std::min(Index(numVars * (numVars - 1)), Index(20));
  Generator generator(this);
  GeneticLearner<Order, double, Generator> learner(generator, GENERATION_SIZE);
#ifdef CFG_LEARN_DEBUG
  learner.getBest()->dump("first best");
#endif
  // keep working while we see improvement
  auto oldBest = learner.getBest()->getFitness();
  while (1) {
    learner.runGeneration();
    auto newBest = learner.getBest()->getFitness();
    if (newBest == oldBest) {
      break; // unlikely we can improve
    }
    oldBest = newBest;
#ifdef CFG_LEARN_DEBUG
    learner.getBest()->dump("current best");
#endif
  }
#ifdef CFG_LEARN_DEBUG
  learner.getBest()->dump("the best");
#endif
  // TODO: cache indices in Orders, at the cost of more memory?
  this->pickIndicesFromOrder(*learner.getBest(), indices);
}

// declare passes

Pass* createCoalesceLocalsPass() { return new CoalesceLocals(); }

Pass* createCoalesceLocalsWithLearningPass() {
  return new CoalesceLocalsWithLearning();
}

} // namespace wasm