From 9c64fcdf873faec6b6814436bce2db35484888d7 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 4 Sep 2024 17:16:09 -0700 Subject: Use TopologicalSort::minSort to order rec groups (#6892) Rec groups need to be topologically sorted for the output module to be valid, but the specific order of rec groups also affects the module size because types at lower indices requires fewer bytes to reference. We previously optimized for code size when gathering types by sorting the list of groups before doing the topological sort. This was brittle, though, and depended on implementation details of the topological sort to be correct. Replace the old topological sort with use of the new `TopologicalSort::minSort` utility, which is a more principled method of achieving a minimal topological sort with respect to some comparator. Also draw inspiration from ReorderGlobals and apply an exponential factor to take the users of a rec group into account when determining its weight. --- src/ir/module-utils.cpp | 132 ++++++++++++++++++++++-------------------------- 1 file changed, 59 insertions(+), 73 deletions(-) (limited to 'src') diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp index da2d7bb9f..bd73a2be9 100644 --- a/src/ir/module-utils.cpp +++ b/src/ir/module-utils.cpp @@ -20,7 +20,7 @@ #include "ir/manipulation.h" #include "ir/properties.h" #include "support/insert_ordered.h" -#include "support/topological_sort.h" +#include "support/topological_orders.h" namespace wasm::ModuleUtils { @@ -684,95 +684,81 @@ std::vector getPrivateHeapTypes(Module& wasm) { IndexedHeapTypes getOptimizedIndexedHeapTypes(Module& wasm) { auto counts = getHeapTypeCounts(wasm); - // Types have to be arranged into topologically ordered recursion groups. - // Under isorecrsive typing, the topological sort has to take all referenced - // rec groups into account. First, sort the groups by average use count among - // their members so that the later topological sort will place frequently used - // types first. - struct GroupInfo { - size_t index; - double useCount = 0; - std::unordered_set preds; - std::vector sortedPreds; - GroupInfo(size_t index) : index(index) {} - bool operator<(const GroupInfo& other) const { - if (useCount != other.useCount) { - return useCount < other.useCount; - } - return index > other.index; + // Collect the rec groups. + std::unordered_map groupIndices; + std::vector groups; + for (auto& [type, _] : counts) { + auto group = type.getRecGroup(); + if (groupIndices.insert({group, groups.size()}).second) { + groups.push_back(group); } - }; + } - struct GroupInfoMap : std::unordered_map { - void sort(std::vector& groups) { - std::sort(groups.begin(), groups.end(), [&](auto& a, auto& b) { - return this->at(a) < this->at(b); - }); + // Collect the total use counts for each group. + std::vector groupCounts; + groupCounts.reserve(groups.size()); + for (auto group : groups) { + size_t count = 0; + for (auto type : group) { + count += counts.at(type); } - }; + groupCounts.push_back(count); + } - // Collect the information that will be used to sort the recursion groups. - GroupInfoMap groupInfos; - for (auto& [type, _] : counts) { - RecGroup group = type.getRecGroup(); - // Try to initialize a new info or get the existing info. - auto& info = groupInfos.insert({group, {groupInfos.size()}}).first->second; - // Update the reference count. - info.useCount += counts.at(type); - // Collect predecessor groups. - for (auto child : type.getReferencedHeapTypes()) { - if (!child.isBasic()) { - RecGroup otherGroup = child.getRecGroup(); - if (otherGroup != group) { - info.preds.insert(otherGroup); + // Collect the reverse dependencies of each group. + std::vector> depSets(groups.size()); + for (size_t i = 0; i < groups.size(); ++i) { + for (auto type : groups[i]) { + for (auto child : type.getReferencedHeapTypes()) { + if (child.isBasic()) { + continue; } + auto childGroup = child.getRecGroup(); + if (childGroup == groups[i]) { + continue; + } + depSets[groupIndices.at(childGroup)].insert(i); } } } - - // Fix up the use counts to be averages to ensure groups are used comensurate - // with the amount of index space they occupy. Skip this for nominal types - // since their internal group size is always 1. - for (auto& [group, info] : groupInfos) { - info.useCount /= group.size(); + TopologicalSort::Graph deps; + deps.reserve(groups.size()); + for (size_t i = 0; i < groups.size(); ++i) { + deps.emplace_back(depSets[i].begin(), depSets[i].end()); } - // Sort the predecessors so the most used will be visited first. - for (auto& [group, info] : groupInfos) { - info.sortedPreds.insert( - info.sortedPreds.end(), info.preds.begin(), info.preds.end()); - groupInfos.sort(info.sortedPreds); - info.preds.clear(); - } + // Experimentally determined to be pretty good for a variety of programs in + // different languages. + constexpr double childFactor = 0.25; - struct RecGroupSort : TopologicalSort { - GroupInfoMap& groupInfos; - RecGroupSort(GroupInfoMap& groupInfos) : groupInfos(groupInfos) { - // Sort all the groups so the topological sort visits the most used first. - std::vector sortedGroups; - sortedGroups.reserve(groupInfos.size()); - for (auto& [group, _] : groupInfos) { - sortedGroups.push_back(group); - } - groupInfos.sort(sortedGroups); - for (auto group : sortedGroups) { - push(group); - } + // Each rec group's weight, adjusted for its size and incorporating the weight + // of its users. + std::vector weights(groups.size()); + for (size_t i = 0; i < groups.size(); ++i) { + weights[i] = double(groupCounts[i]) / groups[i].size(); + } + auto sorted = TopologicalSort::sort(deps); + for (auto it = sorted.rbegin(); it != sorted.rend(); ++it) { + for (auto user : deps[*it]) { + weights[*it] += childFactor * weights[user]; } + } - void pushPredecessors(RecGroup group) { - for (auto pred : groupInfos.at(group).sortedPreds) { - push(pred); - } + auto order = TopologicalSort::minSort(deps, [&](size_t a, size_t b) { + auto weightA = weights[a]; + auto weightB = weights[b]; + if (weightA != weightB) { + return weightA > weightB; } - }; + return a < b; + }); - // Perform the topological sort and collect the types. IndexedHeapTypes indexedTypes; indexedTypes.types.reserve(counts.size()); - for (auto group : RecGroupSort(groupInfos)) { - for (auto member : group) { - indexedTypes.types.push_back(member); + + for (auto groupIndex : order) { + for (auto type : groups[groupIndex]) { + indexedTypes.types.push_back(type); } } setIndices(indexedTypes); -- cgit v1.2.3