summaryrefslogtreecommitdiff
path: root/src/passes/ReorderGlobals.cpp
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2024-08-29 15:08:00 -0700
committerGitHub <noreply@github.com>2024-08-29 15:08:00 -0700
commit871ff0d4f910b565c15f82e8f3c9aa769b01d286 (patch)
treebe231d3804086dbb4a335a2fe0334475a5937130 /src/passes/ReorderGlobals.cpp
parentb63aeadb09a4450f55041dfb3fb7260807e91dfc (diff)
downloadbinaryen-871ff0d4f910b565c15f82e8f3c9aa769b01d286.tar.gz
binaryen-871ff0d4f910b565c15f82e8f3c9aa769b01d286.tar.bz2
binaryen-871ff0d4f910b565c15f82e8f3c9aa769b01d286.zip
Simplify ReorderGlobals using new topological sort utils (#6885)
Use the new TopologicalSort and MinTopologicalSortOf utilities instead of the old CRTP topological sort utility and a bespoke heap-based topological sort in ReorderGlobals. Since there is no longer a heap to pop from, the direction of the custom comparator is now much more intuitive. Further simplify the code by switching from tracking the new order of globals using a sequence of new indices to tracking the order using a sequence of old indices. This change also makes the pass about 20% faster on a large real-world module.
Diffstat (limited to 'src/passes/ReorderGlobals.cpp')
-rw-r--r--src/passes/ReorderGlobals.cpp203
1 files changed, 62 insertions, 141 deletions
diff --git a/src/passes/ReorderGlobals.cpp b/src/passes/ReorderGlobals.cpp
index 4602f3284..d17bb86cc 100644
--- a/src/passes/ReorderGlobals.cpp
+++ b/src/passes/ReorderGlobals.cpp
@@ -35,7 +35,7 @@
#include "ir/find_all.h"
#include "pass.h"
-#include "support/topological_sort.h"
+#include "support/topological_orders.h"
#include "wasm.h"
namespace wasm {
@@ -75,8 +75,8 @@ struct ReorderGlobals : public Pass {
// For efficiency we will use global indices rather than names. That is, we
// use the index of the global in the original ordering to identify each
- // global. A different ordering is then a vector of new indices, saying where
- // each one moves, which is logically a mapping between indices.
+ // global. A different ordering is then a vector of old indices, saying where
+ // each element comes from, which is logically a mapping between indices.
using IndexIndexMap = std::vector<Index>;
// We will also track counts of uses for each global. We use floating-point
@@ -190,26 +190,16 @@ struct ReorderGlobals : public Pass {
double const EXPONENTIAL_FACTOR = 0.095;
IndexCountMap sumCounts(globals.size()), exponentialCounts(globals.size());
- struct Sort : public TopologicalSort<Index, Sort> {
- const Dependencies& deps;
-
- Sort(Index numGlobals, const Dependencies& deps) : deps(deps) {
- for (Index i = 0; i < numGlobals; i++) {
- push(i);
+ std::vector<std::vector<size_t>> dependenceGraph(globals.size());
+ for (size_t i = 0; i < globals.size(); ++i) {
+ if (auto it = deps.dependsOn.find(i); it != deps.dependsOn.end()) {
+ for (auto dep : it->second) {
+ dependenceGraph[i].push_back(dep);
}
}
+ }
- void pushPredecessors(Index global) {
- auto iter = deps.dependedUpon.find(global);
- if (iter == deps.dependedUpon.end()) {
- return;
- }
- for (auto dep : iter->second) {
- push(dep);
- }
- }
- } sort(globals.size(), deps);
-
+ auto sort = *TopologicalSort(dependenceGraph);
for (auto global : sort) {
// We can compute this global's count as in the sorted order all the
// values it cares about are resolved. Start with the self-count, then
@@ -236,160 +226,91 @@ struct ReorderGlobals : public Pass {
}
// Apply the indices we computed.
- std::vector<std::unique_ptr<Global>> old(std::move(globals));
+ auto old = std::move(globals);
globals.resize(old.size());
for (Index i = 0; i < old.size(); i++) {
- globals[(*best)[i]] = std::move(old[i]);
+ globals[i] = std::move(old[(*best)[i]]);
}
module->updateMaps();
}
IndexIndexMap doSort(const IndexCountMap& counts,
- const Dependencies& originalDeps,
+ const Dependencies& deps,
Module* module) {
auto& globals = module->globals;
- // Copy the deps as we will operate on them as we go.
- auto deps = originalDeps;
-
// To sort the globals we do a simple greedy approach of always picking the
// global with the highest count at every point in time, subject to the
// constraint that we can only emit globals that have all of their
- // dependencies already emitted. To do so we keep a list of the "available"
- // globals, which are those with no remaining dependencies. Then by keeping
- // the list of available globals in heap form we can simply pop the largest
- // from the heap each time, and add new available ones as they become so.
+ // dependencies already emitted.
//
- // Other approaches here could be to do a topological sort, but the optimal
- // order may not require strict ordering by topological depth, e.g.:
- /*
- // $c - $a
- // /
- // $e
- // \
- // $d - $b
- */
- // Here $e depends on $c and $d, $c depends on $a, and $d on $b. This is a
- // partial order, as $d can be before or after $a, for example. As a result,
- // if we sorted topologically by sub-trees here then we'd keep $c and $a
- // together, and $d and $b, but a better order might interleave them. A good
- // order also may not keep topological depths separated, e.g. we may want to
- // put $a in between $c and $d despite it having a greater depth.
+ // The greedy approach here may also be suboptimal, however. Consider that
+ // we might see that the best available global is $a, but if we instead
+ // selected some other global $b, that would allow us to select a third
+ // global $c that depends on $b, and $c might have a much higher use count
+ // than $a. For that reason we try several variations of this with different
+ // counts, see earlier.
//
- // The greedy approach here may also be unoptimal, however. Consider that we
- // might see that the best available global is $a, but if we popped $b
- // instead that could unlock $c which depends on $b, and $c may have a much
- // higher use count than $a. For that reason we try several variations of
- // this with different counts, see earlier.
- std::vector<Index> availableHeap;
-
- // Comparison function. Given a and b, returns if a should be before b. This
- // is used in a heap, where "highest" means "popped first", so see the notes
- // below on how we order.
- auto cmp = [&](Index a, Index b) {
- // Imports always go first. The binary writer takes care of this itself
- // anyhow, but it is better to do it here in the IR so we can actually
- // see what the final layout will be.
- auto aImported = globals[a]->imported();
- auto bImported = globals[b]->imported();
- // The highest items will be popped first off the heap, so we want imports
- // to be at higher indexes, that is,
- //
- // unimported, unimported, imported, imported.
- //
- // Then the imports are popped first.
- if (aImported != bImported) {
- return bImported;
- }
-
- // Sort by the counts. We want higher counts at higher indexes so they are
- // popped first, that is,
- //
- // 10, 20, 30, 40
- //
- auto aCount = counts[a];
- auto bCount = counts[b];
- if (aCount != bCount) {
- return aCount < bCount;
- }
-
- // Break ties using the original order, which means just using the
- // indices we have. We need lower indexes at the top so they are popped
- // first, that is,
- //
- // 3, 2, 1, 0
- //
- return a > b;
- };
-
- // Push an item that just became available to the available heap.
- auto push = [&](Index global) {
- availableHeap.push_back(global);
- std::push_heap(availableHeap.begin(), availableHeap.end(), cmp);
- };
-
- // The initially available globals are those with no dependencies.
- for (Index i = 0; i < globals.size(); i++) {
- if (deps.dependsOn[i].empty()) {
- push(i);
- }
+ // Sort the globals into the optimal order based on the counts, ignoring
+ // dependencies for now.
+ std::vector<Index> sortedGlobals;
+ sortedGlobals.resize(globals.size());
+ for (Index i = 0; i < globals.size(); ++i) {
+ sortedGlobals[i] = i;
}
+ std::sort(
+ sortedGlobals.begin(), sortedGlobals.end(), [&](Index a, Index b) {
+ // Imports always go first. The binary writer takes care of this itself
+ // anyhow, but it is better to do it here in the IR so we can actually
+ // see what the final layout will be.
+ auto aImported = globals[a]->imported();
+ auto bImported = globals[b]->imported();
+ if (aImported != bImported) {
+ return aImported;
+ }
- // Pop off the heap: Emit the global and its final, sorted index. Keep
- // doing that until we finish processing all the globals.
- IndexIndexMap sortedindices(globals.size());
- Index numSortedindices = 0;
- while (!availableHeap.empty()) {
- std::pop_heap(availableHeap.begin(), availableHeap.end(), cmp);
- auto global = availableHeap.back();
- sortedindices[global] = numSortedindices++;
- availableHeap.pop_back();
-
- // Each time we pop we emit the global, which means anything that only
- // depended on it becomes available to be popped as well.
- for (auto other : deps.dependedUpon[global]) {
- assert(deps.dependsOn[other].count(global));
- deps.dependsOn[other].erase(global);
- if (deps.dependsOn[other].empty()) {
- push(other);
+ // Sort by the counts. Higher counts come first.
+ auto aCount = counts[a];
+ auto bCount = counts[b];
+ if (aCount != bCount) {
+ return aCount > bCount;
}
+
+ // Break ties using the original order, which means just using the
+ // indices we have.
+ return a < b;
+ });
+
+ // Now use that optimal order to create an ordered graph that includes the
+ // dependencies. The final order will be the minimum topological sort of
+ // this graph.
+ std::vector<std::pair<Index, std::vector<Index>>> graph;
+ graph.reserve(globals.size());
+ for (auto i : sortedGlobals) {
+ std::vector<Index> children;
+ if (auto it = deps.dependedUpon.find(i); it != deps.dependedUpon.end()) {
+ children = std::vector<Index>(it->second.begin(), it->second.end());
}
+ graph.emplace_back(i, std::move(children));
}
- // All globals must have been handled. Cycles would prevent this, but they
- // cannot exist in valid IR.
- assert(numSortedindices == globals.size());
-
- return sortedindices;
+ return *MinTopologicalSortOf<Index>(graph.begin(), graph.end());
}
// Given an indexing of the globals and the counts of how many times each is
// used, estimate the size of relevant parts of the wasm binary (that is, of
// LEBs in global.gets).
double computeSize(IndexIndexMap& indices, IndexCountMap& counts) {
- // |indices| maps each old index to its new position in the sort. We need
- // the reverse map here, which at index 0 has the old index of the global
- // that will be first, and so forth.
- IndexIndexMap actualOrder(indices.size());
- for (Index i = 0; i < indices.size(); i++) {
- // Each global has a unique index, so we only replace 0's here, and they
- // must be in bounds.
- assert(indices[i] < indices.size());
- assert(actualOrder[indices[i]] == 0);
-
- actualOrder[indices[i]] = i;
- }
-
if (always) {
// In this mode we gradually increase the cost of later globals, in an
// unrealistic but smooth manner.
double total = 0;
- for (Index i = 0; i < actualOrder.size(); i++) {
+ for (Index i = 0; i < indices.size(); i++) {
// Multiply the count for this global by a smoothed LEB factor, which
// starts at 1 (for 1 byte) at index 0, and then increases linearly with
// i, so that after 128 globals we reach 2 (which is the true index at
// which the LEB size normally jumps from 1 to 2), and so forth.
- total += counts[actualOrder[i]] * (1.0 + (i / 128.0));
+ total += counts[indices[i]] * (1.0 + (i / 128.0));
}
return total;
}
@@ -401,7 +322,7 @@ struct ReorderGlobals : public Pass {
// forth.
size_t sizeInBits = 0;
size_t nextSizeIncrease = 0;
- for (Index i = 0; i < actualOrder.size(); i++) {
+ for (Index i = 0; i < indices.size(); i++) {
if (i == nextSizeIncrease) {
sizeInBits++;
// At the current size we have 7 * sizeInBits bits to use. For example,
@@ -410,7 +331,7 @@ struct ReorderGlobals : public Pass {
// larger LEB.
nextSizeIncrease = 1 << (7 * sizeInBits);
}
- total += counts[actualOrder[i]] * sizeInBits;
+ total += counts[indices[i]] * sizeInBits;
}
return total;
}