diff options
author | Thomas Lively <tlively@google.com> | 2024-09-10 12:01:00 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-10 12:01:00 -0700 |
commit | b4a34d20c957404206875242781e61dc84a1cd28 (patch) | |
tree | 6975aa542889060b63751a27ddc97850d0722d54 | |
parent | 9d3f8e5603c42fdb2517d15d865f5cee06d21db5 (diff) | |
download | binaryen-b4a34d20c957404206875242781e61dc84a1cd28.tar.gz binaryen-b4a34d20c957404206875242781e61dc84a1cd28.tar.bz2 binaryen-b4a34d20c957404206875242781e61dc84a1cd28.zip |
Add a --preserve-type-order option (#6916)
Unlike other module elements, types are not stored on the `Module`.
Instead, they are collected by traversing the IR before printing and
binary writing. The code that collects the types tries to optimize the
order of rec groups based on the number of times each type is used. As a
result, the output order of types generally has no relation to the input
order of types. In addition, most type optimizations rewrite the types
into a single large rec group, and the order of types in that group is
essentially arbitrary. Changes to the code for counting type uses,
sorting types, or sorting rec groups can yield very large changes in the
output order of types, producing test diffs that are hard to review and
potentially harming the readability of tests by moving output types away
from the corresponding input types.
To help make test output more stable and readable, introduce a tool
option that causes the order of output types to match the order of input
types as closely as possible. It is implemented by having the parsers
record the indices of the input types on the `Module` just like they
already record the type names. The `GlobalTypeRewriter` infrastructure
used by type optimizations associates the new types with the old indices
just like it already does for names and also respects the input order
when rewriting types into a large recursion group.
By default, wasm-opt and other tools clear the recorded type indices
after parsing the module, so their default behavior is not modified by
this change.
Follow-on PRs will use the new flag in more tests, which will generate
large diffs but leave the tests in stable, more readable states that
will no longer change due to other changes to the optimizing type
sorting logic.
33 files changed, 381 insertions, 71 deletions
diff --git a/src/binaryen-c.cpp b/src/binaryen-c.cpp index 31378ce22..64462c354 100644 --- a/src/binaryen-c.cpp +++ b/src/binaryen-c.cpp @@ -5647,6 +5647,9 @@ BinaryenModuleRef BinaryenModuleReadWithFeatures(char* input, p.dump(std::cerr); Fatal() << "error in parsing wasm binary"; } + // Do not regress code size by maintaining type order. TODO: Add an option to + // control this. + wasm->typeIndices.clear(); return wasm; } diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp index 58f7d4637..a16592d15 100644 --- a/src/ir/module-utils.cpp +++ b/src/ir/module-utils.cpp @@ -764,7 +764,39 @@ IndexedHeapTypes getOptimizedIndexedHeapTypes(Module& wasm) { } } + // If we've preserved the input type order on the module, we have to respect + // that first. Use the index of the first type from each group. In principle + // we could try to do something more robust like take the minimum index of all + // the types in the group, but if the groups haven't been preserved, then we + // won't be able to perfectly preserve the order anyway. + std::vector<std::optional<Index>> groupTypeIndices; + if (wasm.typeIndices.empty()) { + groupTypeIndices.resize(groups.size()); + } else { + groupTypeIndices.reserve(groups.size()); + for (auto group : groups) { + groupTypeIndices.emplace_back(); + if (auto it = wasm.typeIndices.find(group[0]); + it != wasm.typeIndices.end()) { + groupTypeIndices.back() = it->second; + } + } + } + auto order = TopologicalSort::minSort(deps, [&](size_t a, size_t b) { + auto indexA = groupTypeIndices[a]; + auto indexB = groupTypeIndices[b]; + // Groups with indices must be sorted before groups without indices to + // ensure transitivity of this comparison relation. + if (indexA.has_value() != indexB.has_value()) { + return indexA.has_value(); + } + // Sort by preserved index if we can. + if (indexA && *indexA != *indexB) { + return *indexA < *indexB; + } + // Otherwise sort by weight and break ties by the arbitrary deterministic + // order in which we've collected types. auto weightA = weights[a]; auto weightB = weights[b]; if (weightA != weightB) { diff --git a/src/ir/type-updating.cpp b/src/ir/type-updating.cpp index 17437cf2e..467971989 100644 --- a/src/ir/type-updating.cpp +++ b/src/ir/type-updating.cpp @@ -20,6 +20,7 @@ #include "ir/module-utils.h" #include "ir/names.h" #include "ir/utils.h" +#include "support/topological_sort.h" #include "wasm-type-ordering.h" #include "wasm-type.h" #include "wasm.h" @@ -38,44 +39,125 @@ GlobalTypeRewriter::TypeMap GlobalTypeRewriter::rebuildTypes( // Find the heap types that are not publicly observable. Even in a closed // world scenario, don't modify public types because we assume that they may // be reflected on or used for linking. Figure out where each private type - // will be located in the builder. Sort the private types so that supertypes - // come before their subtypes. - Index i = 0; - auto privateTypes = ModuleUtils::getPrivateHeapTypes(wasm); + // will be located in the builder. + // + // There are two code paths here: a new one that is used when there are type + // indices to preserve and an old one that is used otherwise. The old code + // path is kept around to avoid unnecessary changes to test outputs while we + // incrementally add --preserve-type-order to tests that could benefit from + // it. Once we are done adding --preserve-type-order to tests, we should + // remove the old code path here since the new code path is strictly better. + if (wasm.typeIndices.size()) { + // New code path, currently used only with --preserve-type-order. + auto typeInfo = ModuleUtils::collectHeapTypeInfo( + wasm, + ModuleUtils::TypeInclusion::UsedIRTypes, + ModuleUtils::VisibilityHandling::FindVisibility); + + std::unordered_set<HeapType> additionalSet(additionalPrivateTypes.begin(), + additionalPrivateTypes.end()); + + std::vector<std::pair<HeapType, SmallVector<HeapType, 1>>> + privateSupertypes; + privateSupertypes.reserve(typeInfo.size()); + for (auto& [type, info] : typeInfo) { + if (info.visibility != ModuleUtils::Visibility::Private && + !additionalSet.count(type)) { + continue; + } + privateSupertypes.push_back({type, {}}); + + if (auto super = getDeclaredSuperType(type)) { + auto it = typeInfo.find(*super); + // Record the supertype only if it is among the private types. + if ((it != typeInfo.end() && + it->second.visibility == ModuleUtils::Visibility::Private) || + additionalSet.count(*super)) { + privateSupertypes.back().second.push_back(*super); + } + } + } + + // Topological sort to have subtypes first. This is the opposite of the + // order we need, so the comparison is the opposite of what we ultimately + // want. + std::vector<HeapType> sorted; + if (wasm.typeIndices.empty()) { + sorted = TopologicalSort::sortOf(privateSupertypes.begin(), + privateSupertypes.end()); + } else { + sorted = + TopologicalSort::minSortOf(privateSupertypes.begin(), + privateSupertypes.end(), + [&](Index a, Index b) { + auto typeA = privateSupertypes[a].first; + auto typeB = privateSupertypes[b].first; + // Preserve type order. + auto itA = wasm.typeIndices.find(typeA); + auto itB = wasm.typeIndices.find(typeB); + bool hasA = itA != wasm.typeIndices.end(); + bool hasB = itB != wasm.typeIndices.end(); + if (hasA != hasB) { + // Types with preserved indices must be + // sorted before (after in this reversed + // comparison) types without indices to + // maintain transitivity. + return !hasA; + } + if (hasA && *itA != *itB) { + return !(itA->second < itB->second); + } + // Break ties by the arbitrary order we + // have collected the types in. + return a > b; + }); + } + std::reverse(sorted.begin(), sorted.end()); + Index i = 0; + for (auto type : sorted) { + typeIndices[type] = i++; + } + } else { + // Old code path. - if (!additionalPrivateTypes.empty()) { - // Only add additional private types that are not already in the list. - std::unordered_set<HeapType> privateTypesSet(privateTypes.begin(), - privateTypes.end()); + auto privateTypes = ModuleUtils::getPrivateHeapTypes(wasm); - for (auto t : additionalPrivateTypes) { - if (!privateTypesSet.count(t)) { - privateTypes.push_back(t); - privateTypesSet.insert(t); + if (!additionalPrivateTypes.empty()) { + // Only add additional private types that are not already in the list. + std::unordered_set<HeapType> privateTypesSet(privateTypes.begin(), + privateTypes.end()); + + for (auto t : additionalPrivateTypes) { + if (!privateTypesSet.count(t)) { + privateTypes.push_back(t); + privateTypesSet.insert(t); + } } } - } - // Topological sort to have supertypes first, but we have to account for the - // fact that we may be replacing the supertypes to get the order correct. - struct SupertypesFirst - : HeapTypeOrdering::SupertypesFirstBase<SupertypesFirst> { - GlobalTypeRewriter& parent; + // Topological sort to have supertypes first, but we have to account for the + // fact that we may be replacing the supertypes to get the order correct. + struct SupertypesFirst + : HeapTypeOrdering::SupertypesFirstBase<SupertypesFirst> { + GlobalTypeRewriter& parent; - SupertypesFirst(GlobalTypeRewriter& parent) : parent(parent) {} - std::optional<HeapType> getDeclaredSuperType(HeapType type) { - return parent.getDeclaredSuperType(type); - } - }; + SupertypesFirst(GlobalTypeRewriter& parent) : parent(parent) {} + std::optional<HeapType> getDeclaredSuperType(HeapType type) { + return parent.getDeclaredSuperType(type); + } + }; - SupertypesFirst sortedTypes(*this); - for (auto type : sortedTypes.sort(privateTypes)) { - typeIndices[type] = i++; + SupertypesFirst sortedTypes(*this); + Index i = 0; + for (auto type : sortedTypes.sort(privateTypes)) { + typeIndices[type] = i++; + } } if (typeIndices.size() == 0) { return {}; } + typeBuilder.grow(typeIndices.size()); // All the input types are distinct, so we need to make sure the output types @@ -86,7 +168,7 @@ GlobalTypeRewriter::TypeMap GlobalTypeRewriter::rebuildTypes( typeBuilder.createRecGroup(0, typeBuilder.size()); // Create the temporary heap types. - i = 0; + Index i = 0; auto map = [&](HeapType type) -> HeapType { if (auto it = typeIndices.find(type); it != typeIndices.end()) { return typeBuilder[it->second]; @@ -144,7 +226,7 @@ GlobalTypeRewriter::TypeMap GlobalTypeRewriter::rebuildTypes( for (auto [type, index] : typeIndices) { oldToNewTypes[type] = newTypes[index]; } - mapTypeNames(oldToNewTypes); + mapTypeNamesAndIndices(oldToNewTypes); return oldToNewTypes; } @@ -268,7 +350,7 @@ void GlobalTypeRewriter::mapTypes(const TypeMap& oldToNewTypes) { } } -void GlobalTypeRewriter::mapTypeNames(const TypeMap& oldToNewTypes) { +void GlobalTypeRewriter::mapTypeNamesAndIndices(const TypeMap& oldToNewTypes) { // Update type names to avoid duplicates. std::unordered_set<Name> typeNames; for (auto& [type, info] : wasm.typeNames) { @@ -281,16 +363,21 @@ void GlobalTypeRewriter::mapTypeNames(const TypeMap& oldToNewTypes) { } if (auto it = wasm.typeNames.find(old); it != wasm.typeNames.end()) { - wasm.typeNames[new_] = wasm.typeNames[old]; + auto& oldNames = it->second; + wasm.typeNames[new_] = oldNames; // Use the existing name in the new type, as usually it completely // replaces the old. Rename the old name in a unique way to avoid // confusion in the case that it remains used. - auto deduped = - Names::getValidName(wasm.typeNames[old].name, - [&](Name test) { return !typeNames.count(test); }); - wasm.typeNames[old].name = deduped; + auto deduped = Names::getValidName( + oldNames.name, [&](Name test) { return !typeNames.count(test); }); + oldNames.name = deduped; typeNames.insert(deduped); } + if (auto it = wasm.typeIndices.find(old); it != wasm.typeIndices.end()) { + // It's ok if we end up with duplicate indices. Ties will be resolved in + // some arbitrary manner. + wasm.typeIndices[new_] = it->second; + } } } diff --git a/src/ir/type-updating.h b/src/ir/type-updating.h index a8e071fb6..fe1cd2806 100644 --- a/src/ir/type-updating.h +++ b/src/ir/type-updating.h @@ -371,8 +371,9 @@ public: // Users of `mapTypes` may want to update the type names according to their // mapping. This is not done automatically in `mapTypes` because other users - // may want the names to reflect that types have been replaced. - void mapTypeNames(const TypeMap& oldToNewTypes); + // may want the names to reflect that types have been replaced. Do the same + // mapping for recorded type indices. + void mapTypeNamesAndIndices(const TypeMap& oldToNewTypes); // Subclasses can implement these methods to modify the new set of types that // we map to. By default, we simply copy over the types, and these functions diff --git a/src/parser/parse-3-implicit-types.cpp b/src/parser/parse-3-implicit-types.cpp index 3a3a867e1..cf13ae0f7 100644 --- a/src/parser/parse-3-implicit-types.cpp +++ b/src/parser/parse-3-implicit-types.cpp @@ -29,6 +29,10 @@ parseImplicitTypeDefs(ParseDeclsCtx& decls, WithPosition with(ctx, pos); CHECK_ERR(typeuse(ctx)); } + // Record type indices now that all the types have been parsed. + for (Index i = 0; i < types.size(); ++i) { + decls.wasm.typeIndices.insert({types[i], i}); + } return Ok{}; } diff --git a/src/passes/MinimizeRecGroups.cpp b/src/passes/MinimizeRecGroups.cpp index e89d98442..0faf297f5 100644 --- a/src/passes/MinimizeRecGroups.cpp +++ b/src/passes/MinimizeRecGroups.cpp @@ -857,7 +857,7 @@ struct MinimizeRecGroups : Pass { } GlobalTypeRewriter rewriter(wasm); rewriter.mapTypes(oldToNew); - rewriter.mapTypeNames(oldToNew); + rewriter.mapTypeNamesAndIndices(oldToNew); } }; diff --git a/src/passes/RoundTrip.cpp b/src/passes/RoundTrip.cpp index 129b89ac8..123752f79 100644 --- a/src/passes/RoundTrip.cpp +++ b/src/passes/RoundTrip.cpp @@ -40,6 +40,11 @@ struct RoundTrip : public Pass { // the target features section has been stripped. We also need them in order // to tell the builder which features to build with. auto features = module->features; + + // We need to know whether we should preserve the type order when we read + // the module back in. + bool preserveTypeOrder = !module->typeIndices.empty(); + // Write, clear, and read the module WasmBinaryWriter(module, buffer, getPassOptions()).write(); ModuleUtils::clearModule(*module); @@ -53,6 +58,10 @@ struct RoundTrip : public Pass { std::cerr << '\n'; Fatal() << "error in parsing wasm binary"; } + + if (!preserveTypeOrder) { + module->typeIndices.clear(); + } } }; diff --git a/src/support/topological_sort.h b/src/support/topological_sort.h index b75f6b78d..be5e49b8c 100644 --- a/src/support/topological_sort.h +++ b/src/support/topological_sort.h @@ -42,10 +42,17 @@ using Graph = std::vector<std::vector<Index>>; // Return a topological sort of the vertices in the given adjacency graph. inline std::vector<Index> sort(const Graph& graph); +// Return the topological sort of the vertices in the given adjacency graph that +// is lexicographically minimal with respect to the provided comparator on +// vertex indices. Implemented using a min-heap internally. +template<typename F = std::less<Index>> +std::vector<Index> minSort(const Graph& graph, F cmp = std::less<Index>{}); + // A utility that finds a topological sort of a graph with arbitrary element // types. The provided iterators must be to pairs of elements and collections of // their children. -template<typename It> decltype(auto) sortOf(It begin, It end) { +template<typename It, typename SortT, SortT Sort, typename... Args> +decltype(auto) sortOfImpl(It begin, It end, Args... args) { using T = std::remove_cv_t<typename It::value_type::first_type>; std::unordered_map<T, Index> indices; std::vector<T> elements; @@ -67,17 +74,23 @@ template<typename It> decltype(auto) sortOf(It begin, It end) { // Compute the topological order and convert back to original elements. std::vector<T> order; order.reserve(elements.size()); - for (auto i : sort(indexGraph)) { + for (auto i : Sort(indexGraph, std::forward<Args>(args)...)) { order.emplace_back(std::move(elements[i])); } return order; } -// Return the topological sort of the vertices in the given adjacency graph that -// is lexicographically minimal with respect to the provided comparator on -// vertex indices. Implemented using a min-heap internally. -template<typename F = std::less<Index>> -std::vector<Index> minSort(const Graph& graph, F cmp = std::less<Index>{}); +template<typename It> decltype(auto) sortOf(It begin, It end) { + return sortOfImpl<It, std::vector<Index> (&)(const Graph&), sort>(begin, end); +} + +template<typename It, typename Cmp> +decltype(auto) minSortOf(It begin, It end, Cmp cmp) { + return sortOfImpl<It, + std::vector<Index> (&)(const Graph&, Cmp), + minSort, + Cmp>(begin, end, cmp); +} } // namespace TopologicalSort diff --git a/src/tools/tool-options.h b/src/tools/tool-options.h index bfa9a9b5c..f900d76ba 100644 --- a/src/tools/tool-options.h +++ b/src/tools/tool-options.h @@ -31,6 +31,7 @@ struct ToolOptions : public Options { PassOptions passOptions; bool quiet = false; + bool preserveTypeOrder = false; IRProfile profile = IRProfile::Normal; constexpr static const char* ToolOptionsCategory = "Tool options"; @@ -158,6 +159,16 @@ struct ToolOptions : public Options { [this](Options*, const std::string&) { passOptions.closedWorld = true; }) + .add( + "--preserve-type-order", + "", + "Preserve the order of types from the input (useful for debugging and " + "testing)", + ToolOptionsCategory, + Options::Arguments::Zero, + [&](Options* o, const std::string& arguments) { + preserveTypeOrder = true; + }) .add("--generate-stack-ir", "", "generate StackIR during writing", @@ -213,11 +224,17 @@ struct ToolOptions : public Options { return *this; } - void applyFeatures(Module& module) const { + void applyOptionsBeforeParse(Module& module) const { module.features.enable(enabledFeatures); module.features.disable(disabledFeatures); } + void applyOptionsAfterParse(Module& module) const { + if (!preserveTypeOrder) { + module.typeIndices.clear(); + } + } + virtual void addPassArg(const std::string& key, const std::string& value) { passOptions.arguments[key] = value; } diff --git a/src/tools/wasm-as.cpp b/src/tools/wasm-as.cpp index a767e6908..73ae82134 100644 --- a/src/tools/wasm-as.cpp +++ b/src/tools/wasm-as.cpp @@ -107,13 +107,15 @@ int main(int argc, const char* argv[]) { auto input(read_file<std::string>(options.extra["infile"], Flags::Text)); Module wasm; - options.applyFeatures(wasm); + options.applyOptionsBeforeParse(wasm); auto parsed = WATParser::parseModule(wasm, input); if (auto* err = parsed.getErr()) { Fatal() << err->msg; } + options.applyOptionsAfterParse(wasm); + if (options.extra["validate"] != "none") { if (options.debug) { std::cerr << "Validating..." << std::endl; diff --git a/src/tools/wasm-ctor-eval.cpp b/src/tools/wasm-ctor-eval.cpp index 6809fa32a..d74790b2a 100644 --- a/src/tools/wasm-ctor-eval.cpp +++ b/src/tools/wasm-ctor-eval.cpp @@ -1445,7 +1445,7 @@ int main(int argc, const char* argv[]) { options.parse(argc, argv); Module wasm; - options.applyFeatures(wasm); + options.applyOptionsBeforeParse(wasm); { if (options.debug) { @@ -1460,6 +1460,8 @@ int main(int argc, const char* argv[]) { } } + options.applyOptionsAfterParse(wasm); + if (!WasmValidator().validate(wasm)) { std::cout << wasm << '\n'; Fatal() << "error in validating input"; diff --git a/src/tools/wasm-dis.cpp b/src/tools/wasm-dis.cpp index f9f303359..1603736ce 100644 --- a/src/tools/wasm-dis.cpp +++ b/src/tools/wasm-dis.cpp @@ -64,7 +64,7 @@ int main(int argc, const char* argv[]) { std::cerr << "parsing binary..." << std::endl; } Module wasm; - options.applyFeatures(wasm); + options.applyOptionsBeforeParse(wasm); try { ModuleReader().readBinary(options.extra["infile"], wasm, sourceMapFilename); } catch (ParseException& p) { @@ -82,6 +82,8 @@ int main(int argc, const char* argv[]) { Fatal() << "error in parsing wasm source mapping"; } + options.applyOptionsAfterParse(wasm); + // TODO: Validation. However, validating would mean that users are forced to // run with wasm-dis -all or such, to enable the features (unless the // features section is present, but that's rare in general). It would be diff --git a/src/tools/wasm-emscripten-finalize.cpp b/src/tools/wasm-emscripten-finalize.cpp index 6b4e994ac..505e78349 100644 --- a/src/tools/wasm-emscripten-finalize.cpp +++ b/src/tools/wasm-emscripten-finalize.cpp @@ -196,7 +196,7 @@ int main(int argc, const char* argv[]) { auto writeOutput = outfile.size() > 0 || !emitBinary; Module wasm; - options.applyFeatures(wasm); + options.applyOptionsBeforeParse(wasm); ModuleReader reader; // If we are not writing output then we definitely don't need to read debug // info. However, if we emit output then definitely load the names section so @@ -226,6 +226,8 @@ int main(int argc, const char* argv[]) { Fatal() << "error in parsing wasm source map"; } + options.applyOptionsAfterParse(wasm); + BYN_TRACE_WITH_TYPE("emscripten-dump", "Module before:\n"); BYN_DEBUG_WITH_TYPE("emscripten-dump", std::cerr << &wasm); diff --git a/src/tools/wasm-merge.cpp b/src/tools/wasm-merge.cpp index 449f0cfdb..3de228350 100644 --- a/src/tools/wasm-merge.cpp +++ b/src/tools/wasm-merge.cpp @@ -695,7 +695,7 @@ Input source maps can be specified by adding an -ism option right after the modu currModule = laterInput.get(); } - options.applyFeatures(*currModule); + options.applyOptionsBeforeParse(*currModule); ModuleReader reader; try { @@ -705,6 +705,8 @@ Input source maps can be specified by adding an -ism option right after the modu Fatal() << "error in parsing wasm input: " << inputFile; } + options.applyOptionsAfterParse(*currModule); + if (options.passOptions.validate) { if (!WasmValidator().validate(*currModule)) { std::cout << *currModule << '\n'; diff --git a/src/tools/wasm-metadce.cpp b/src/tools/wasm-metadce.cpp index 9cc06375e..41dcf6ad4 100644 --- a/src/tools/wasm-metadce.cpp +++ b/src/tools/wasm-metadce.cpp @@ -486,7 +486,7 @@ int main(int argc, const char* argv[]) { } Module wasm; - options.applyFeatures(wasm); + options.applyOptionsBeforeParse(wasm); { if (options.debug) { @@ -502,6 +502,8 @@ int main(int argc, const char* argv[]) { } } + options.applyOptionsAfterParse(wasm); + if (options.passOptions.validate) { if (!WasmValidator().validate(wasm)) { std::cout << wasm << '\n'; diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp index d488171fb..3e1152179 100644 --- a/src/tools/wasm-opt.cpp +++ b/src/tools/wasm-opt.cpp @@ -243,7 +243,7 @@ int main(int argc, const char* argv[]) { options.parse(argc, argv); Module wasm; - options.applyFeatures(wasm); + options.applyOptionsBeforeParse(wasm); BYN_TRACE("reading...\n"); @@ -294,6 +294,8 @@ int main(int argc, const char* argv[]) { "request for silly amounts of memory)"; } + options.applyOptionsAfterParse(wasm); + if (options.passOptions.validate) { if (!WasmValidator().validate(wasm, options.passOptions)) { exitOnInvalidWasm("error validating input"); diff --git a/src/tools/wasm-reduce.cpp b/src/tools/wasm-reduce.cpp index c276296ad..8d9858b78 100644 --- a/src/tools/wasm-reduce.cpp +++ b/src/tools/wasm-reduce.cpp @@ -362,6 +362,9 @@ struct Reducer void loadWorking() { module = std::make_unique<Module>(); + + toolOptions.applyOptionsBeforeParse(*module); + ModuleReader reader; try { reader.read(working, *module); @@ -371,15 +374,14 @@ struct Reducer Fatal() << "error in parsing working wasm binary"; } + toolOptions.applyOptionsAfterParse(*module); + // If there is no features section, assume we may need them all (without // this, a module with no features section but that uses e.g. atomics and // bulk memory would not work). if (!module->hasFeaturesSection) { module->features = FeatureSet::All; } - // Apply features the user passed on the commandline. - toolOptions.applyFeatures(*module); - builder = std::make_unique<Builder>(*module); setModule(module.get()); } diff --git a/src/tools/wasm-split/wasm-split.cpp b/src/tools/wasm-split/wasm-split.cpp index eed26f1e1..c1ec6052f 100644 --- a/src/tools/wasm-split/wasm-split.cpp +++ b/src/tools/wasm-split/wasm-split.cpp @@ -38,7 +38,7 @@ using namespace wasm; namespace { void parseInput(Module& wasm, const WasmSplitOptions& options) { - options.applyFeatures(wasm); + options.applyOptionsBeforeParse(wasm); ModuleReader reader; reader.setProfile(options.profile); try { @@ -52,6 +52,8 @@ void parseInput(Module& wasm, const WasmSplitOptions& options) { "request for silly amounts of memory)"; } + options.applyOptionsAfterParse(wasm); + if (options.passOptions.validate && !WasmValidator().validate(wasm)) { Fatal() << "error validating input"; } diff --git a/src/tools/wasm2js.cpp b/src/tools/wasm2js.cpp index 286f89890..2c48e5be0 100644 --- a/src/tools/wasm2js.cpp +++ b/src/tools/wasm2js.cpp @@ -786,7 +786,9 @@ void AssertionEmitter::emit() { if (auto* mod = std::get_if<WASTModule>(&cmd)) { if (auto* w = std::get_if<std::shared_ptr<Module>>(mod)) { wasm = *w; - options.applyFeatures(*wasm); + // We have already done the parse, but we still do this to apply the + // features from the command line. + options.applyOptionsBeforeParse(*wasm); std::stringstream funcNameS; funcNameS << ASM_FUNC << i; std::stringstream moduleNameS; @@ -928,6 +930,7 @@ int main(int argc, const char* argv[]) { // is defined. if (binaryInput) { wasm = std::make_shared<Module>(); + options.applyOptionsBeforeParse(*wasm); ModuleReader reader; reader.read(input, *wasm, ""); } else { @@ -946,6 +949,9 @@ int main(int argc, const char* argv[]) { if (auto* mod = std::get_if<WASTModule>(&(*script)[0].cmd)) { if (auto* w = std::get_if<std::shared_ptr<Module>>(mod)) { wasm = *w; + // This isn't actually before the parse, but we can't apply the + // feature options any earlier. FIXME. + options.applyOptionsBeforeParse(*wasm); } } if (!wasm) { @@ -965,7 +971,7 @@ int main(int argc, const char* argv[]) { Fatal() << "error: modules with multiple tables are not supported yet."; } - options.applyFeatures(*wasm); + options.applyOptionsAfterParse(*wasm); if (options.passOptions.validate) { if (!WasmValidator().validate(*wasm)) { std::cout << *wasm << '\n'; diff --git a/src/wasm.h b/src/wasm.h index a7ad6ec6c..a86f77013 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2308,6 +2308,7 @@ public: Name name; std::unordered_map<HeapType, TypeNames> typeNames; + std::unordered_map<HeapType, Index> typeIndices; MixedArena allocator; diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 3c8cc86df..151a7f911 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2464,7 +2464,12 @@ void WasmBinaryReader::readTypes() { if (auto* err = result.getError()) { Fatal() << "Invalid type: " << err->reason << " at index " << err->index; } - types = *result; + types = std::move(*result); + + // Record the type indices. + for (Index i = 0; i < types.size(); ++i) { + wasm.typeIndices.insert({types[i], i}); + } } Name WasmBinaryReader::getFunctionName(Index index) { diff --git a/test/example/module-splitting.txt b/test/example/module-splitting.txt index 69fabc816..b72867dda 100644 --- a/test/example/module-splitting.txt +++ b/test/example/module-splitting.txt @@ -629,10 +629,10 @@ Before: Keeping: null After: (module - (type $0 (func (param i32) (result i32))) - (type $1 (func)) + (type $0 (func)) + (type $1 (func (param i32) (result i32))) (import "env" "base" (global $base i32)) - (import "placeholder" "0" (func $placeholder_0 (type $0) (param i32) (result i32))) + (import "placeholder" "0" (func $placeholder_0 (type $1) (param i32) (result i32))) (table $table 1000 funcref) (table $0 1 funcref) (elem $0 (table $table) (global.get $base) func $null $trampoline_foo) @@ -641,17 +641,17 @@ After: (export "%table" (table $table)) (export "%table_2" (table $0)) (export "%global" (global $base)) - (func $null (type $1) + (func $null (type $0) (nop) ) - (func $foo (type $0) (param $0 i32) (result i32) - (call_indirect $0 (type $0) + (func $foo (type $1) (param $0 i32) (result i32) + (call_indirect $0 (type $1) (local.get $0) (i32.const 0) ) ) - (func $trampoline_foo (type $0) (param $0 i32) (result i32) - (call_indirect $0 (type $0) + (func $trampoline_foo (type $1) (param $0 i32) (result i32) + (call_indirect $0 (type $1) (local.get $0) (i32.const 0) ) diff --git a/test/lit/help/wasm-as.test b/test/lit/help/wasm-as.test index a2ee3d45e..5dd4b1515 100644 --- a/test/lit/help/wasm-as.test +++ b/test/lit/help/wasm-as.test @@ -139,6 +139,9 @@ ;; CHECK-NEXT: them and pass them back in, but not ;; CHECK-NEXT: inspect their contents or call them. ;; CHECK-NEXT: +;; CHECK-NEXT: --preserve-type-order Preserve the order of types from the +;; CHECK-NEXT: input (useful for debugging and testing) +;; CHECK-NEXT: ;; CHECK-NEXT: --generate-stack-ir generate StackIR during writing ;; CHECK-NEXT: ;; CHECK-NEXT: --optimize-stack-ir optimize StackIR during writing diff --git a/test/lit/help/wasm-ctor-eval.test b/test/lit/help/wasm-ctor-eval.test index 4ad75a8d0..abb80b12e 100644 --- a/test/lit/help/wasm-ctor-eval.test +++ b/test/lit/help/wasm-ctor-eval.test @@ -146,6 +146,9 @@ ;; CHECK-NEXT: them and pass them back in, but not ;; CHECK-NEXT: inspect their contents or call them. ;; CHECK-NEXT: +;; CHECK-NEXT: --preserve-type-order Preserve the order of types from the +;; CHECK-NEXT: input (useful for debugging and testing) +;; CHECK-NEXT: ;; CHECK-NEXT: --generate-stack-ir generate StackIR during writing ;; CHECK-NEXT: ;; CHECK-NEXT: --optimize-stack-ir optimize StackIR during writing diff --git a/test/lit/help/wasm-dis.test b/test/lit/help/wasm-dis.test index 96e999c2c..90bc12b99 100644 --- a/test/lit/help/wasm-dis.test +++ b/test/lit/help/wasm-dis.test @@ -132,6 +132,9 @@ ;; CHECK-NEXT: them and pass them back in, but not ;; CHECK-NEXT: inspect their contents or call them. ;; CHECK-NEXT: +;; CHECK-NEXT: --preserve-type-order Preserve the order of types from the +;; CHECK-NEXT: input (useful for debugging and testing) +;; CHECK-NEXT: ;; CHECK-NEXT: --generate-stack-ir generate StackIR during writing ;; CHECK-NEXT: ;; CHECK-NEXT: --optimize-stack-ir optimize StackIR during writing diff --git a/test/lit/help/wasm-emscripten-finalize.test b/test/lit/help/wasm-emscripten-finalize.test index fbf86209c..cdd378d40 100644 --- a/test/lit/help/wasm-emscripten-finalize.test +++ b/test/lit/help/wasm-emscripten-finalize.test @@ -174,6 +174,9 @@ ;; CHECK-NEXT: them and pass them back in, but not ;; CHECK-NEXT: inspect their contents or call them. ;; CHECK-NEXT: +;; CHECK-NEXT: --preserve-type-order Preserve the order of types from the +;; CHECK-NEXT: input (useful for debugging and testing) +;; CHECK-NEXT: ;; CHECK-NEXT: --generate-stack-ir generate StackIR during writing ;; CHECK-NEXT: ;; CHECK-NEXT: --optimize-stack-ir optimize StackIR during writing diff --git a/test/lit/help/wasm-merge.test b/test/lit/help/wasm-merge.test index 108fd3673..ee1fed13a 100644 --- a/test/lit/help/wasm-merge.test +++ b/test/lit/help/wasm-merge.test @@ -162,6 +162,9 @@ ;; CHECK-NEXT: them and pass them back in, but not ;; CHECK-NEXT: inspect their contents or call them. ;; CHECK-NEXT: +;; CHECK-NEXT: --preserve-type-order Preserve the order of types from the +;; CHECK-NEXT: input (useful for debugging and testing) +;; CHECK-NEXT: ;; CHECK-NEXT: --generate-stack-ir generate StackIR during writing ;; CHECK-NEXT: ;; CHECK-NEXT: --optimize-stack-ir optimize StackIR during writing diff --git a/test/lit/help/wasm-metadce.test b/test/lit/help/wasm-metadce.test index 4ddf55203..50295b1f0 100644 --- a/test/lit/help/wasm-metadce.test +++ b/test/lit/help/wasm-metadce.test @@ -248,9 +248,9 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --merge-blocks merges blocks to their parents ;; CHECK-NEXT: -;; CHECK-NEXT: --merge-j2cl-itables Merges itable structures into +;; CHECK-NEXT: --merge-j2cl-itables Merges itable structures into ;; CHECK-NEXT: vtables to make types more -;; CHECK-NEXT: compact +;; CHECK-NEXT: compact ;; CHECK-NEXT: ;; CHECK-NEXT: --merge-locals merges locals when beneficial ;; CHECK-NEXT: @@ -771,6 +771,10 @@ ;; CHECK-NEXT: in, but not inspect their ;; CHECK-NEXT: contents or call them. ;; CHECK-NEXT: +;; CHECK-NEXT: --preserve-type-order Preserve the order of types from +;; CHECK-NEXT: the input (useful for debugging +;; CHECK-NEXT: and testing) +;; CHECK-NEXT: ;; CHECK-NEXT: --generate-stack-ir generate StackIR during writing ;; CHECK-NEXT: ;; CHECK-NEXT: --optimize-stack-ir optimize StackIR during writing diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index f55798253..0691d1c18 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -257,7 +257,7 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --merge-blocks merges blocks to their parents ;; CHECK-NEXT: -;; CHECK-NEXT: --merge-j2cl-itables Merges itable structures into +;; CHECK-NEXT: --merge-j2cl-itables Merges itable structures into ;; CHECK-NEXT: vtables to make types more ;; CHECK-NEXT: compact ;; CHECK-NEXT: @@ -780,6 +780,10 @@ ;; CHECK-NEXT: in, but not inspect their ;; CHECK-NEXT: contents or call them. ;; CHECK-NEXT: +;; CHECK-NEXT: --preserve-type-order Preserve the order of types from +;; CHECK-NEXT: the input (useful for debugging +;; CHECK-NEXT: and testing) +;; CHECK-NEXT: ;; CHECK-NEXT: --generate-stack-ir generate StackIR during writing ;; CHECK-NEXT: ;; CHECK-NEXT: --optimize-stack-ir optimize StackIR during writing diff --git a/test/lit/help/wasm-reduce.test b/test/lit/help/wasm-reduce.test index 61e171ba9..cc75aed2b 100644 --- a/test/lit/help/wasm-reduce.test +++ b/test/lit/help/wasm-reduce.test @@ -168,6 +168,9 @@ ;; CHECK-NEXT: them and pass them back in, but not ;; CHECK-NEXT: inspect their contents or call them. ;; CHECK-NEXT: +;; CHECK-NEXT: --preserve-type-order Preserve the order of types from the +;; CHECK-NEXT: input (useful for debugging and testing) +;; CHECK-NEXT: ;; CHECK-NEXT: --generate-stack-ir generate StackIR during writing ;; CHECK-NEXT: ;; CHECK-NEXT: --optimize-stack-ir optimize StackIR during writing diff --git a/test/lit/help/wasm-split.test b/test/lit/help/wasm-split.test index a6074c85d..dc521a82f 100644 --- a/test/lit/help/wasm-split.test +++ b/test/lit/help/wasm-split.test @@ -249,6 +249,9 @@ ;; CHECK-NEXT: them and pass them back in, but not ;; CHECK-NEXT: inspect their contents or call them. ;; CHECK-NEXT: +;; CHECK-NEXT: --preserve-type-order Preserve the order of types from the +;; CHECK-NEXT: input (useful for debugging and testing) +;; CHECK-NEXT: ;; CHECK-NEXT: --generate-stack-ir generate StackIR during writing ;; CHECK-NEXT: ;; CHECK-NEXT: --optimize-stack-ir optimize StackIR during writing diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index 39c4d5f5e..89dcaa028 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -211,7 +211,7 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --merge-blocks merges blocks to their parents ;; CHECK-NEXT: -;; CHECK-NEXT: --merge-j2cl-itables Merges itable structures into +;; CHECK-NEXT: --merge-j2cl-itables Merges itable structures into ;; CHECK-NEXT: vtables to make types more ;; CHECK-NEXT: compact ;; CHECK-NEXT: @@ -734,6 +734,10 @@ ;; CHECK-NEXT: in, but not inspect their ;; CHECK-NEXT: contents or call them. ;; CHECK-NEXT: +;; CHECK-NEXT: --preserve-type-order Preserve the order of types from +;; CHECK-NEXT: the input (useful for debugging +;; CHECK-NEXT: and testing) +;; CHECK-NEXT: ;; CHECK-NEXT: --generate-stack-ir generate StackIR during writing ;; CHECK-NEXT: ;; CHECK-NEXT: --optimize-stack-ir optimize StackIR during writing diff --git a/test/lit/passes/remove-unused-types-preserve-order.wast b/test/lit/passes/remove-unused-types-preserve-order.wast new file mode 100644 index 000000000..12f467e58 --- /dev/null +++ b/test/lit/passes/remove-unused-types-preserve-order.wast @@ -0,0 +1,81 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: wasm-opt %s -all --closed-world --preserve-type-order \ +;; RUN: --remove-unused-types -S -o - | filecheck %s +;; RUN: wasm-opt %s -all --closed-world --preserve-type-order \ +;; RUN: --remove-unused-types --roundtrip -S -o - | filecheck %s + +(module + (rec + (type $unused-1 (struct)) + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (struct)) + (type $A (struct)) + (type $unused-2 (struct)) + ) + ;; CHECK: (type $B (struct)) + (type $B (struct)) + (type $unused-3 (struct (field i32))) + (rec + ;; CHECK: (type $C (struct (field (ref null $D)))) + (type $C (struct (field (ref null $D)))) + (type $unused-4 (struct)) + ;; CHECK: (type $D (struct (field (ref null $C)))) + (type $D (struct (field (ref null $C)))) + ) + ;; CHECK: (type $E (struct (field (ref null $E)))) + (type $E (struct (field (ref null $E)))) + + ;; Use the types in a shuffled order, using later types the most. If we + ;; weren't deliberately and correctly preserving type order, we would end up + ;; with some other order. + + ;; CHECK: (global $E1 (ref null $E) (ref.null none)) + (global $E1 (ref null $E) (ref.null none)) + ;; CHECK: (global $E2 (ref null $E) (ref.null none)) + (global $E2 (ref null $E) (ref.null none)) + ;; CHECK: (global $E3 (ref null $E) (ref.null none)) + (global $E3 (ref null $E) (ref.null none)) + ;; CHECK: (global $E4 (ref null $E) (ref.null none)) + (global $E4 (ref null $E) (ref.null none)) + ;; CHECK: (global $E5 (ref null $E) (ref.null none)) + (global $E5 (ref null $E) (ref.null none)) + ;; CHECK: (global $E6 (ref null $E) (ref.null none)) + (global $E6 (ref null $E) (ref.null none)) + ;; CHECK: (global $E7 (ref null $E) (ref.null none)) + (global $E7 (ref null $E) (ref.null none)) + ;; CHECK: (global $E8 (ref null $E) (ref.null none)) + (global $E8 (ref null $E) (ref.null none)) + ;; CHECK: (global $E9 (ref null $E) (ref.null none)) + (global $E9 (ref null $E) (ref.null none)) + + ;; CHECK: (global $C1 (ref null $C) (ref.null none)) + (global $C1 (ref null $C) (ref.null none)) + ;; CHECK: (global $C2 (ref null $C) (ref.null none)) + (global $C2 (ref null $C) (ref.null none)) + ;; CHECK: (global $C3 (ref null $C) (ref.null none)) + (global $C3 (ref null $C) (ref.null none)) + ;; CHECK: (global $C4 (ref null $C) (ref.null none)) + (global $C4 (ref null $C) (ref.null none)) + ;; CHECK: (global $C5 (ref null $C) (ref.null none)) + (global $C5 (ref null $C) (ref.null none)) + ;; CHECK: (global $C6 (ref null $C) (ref.null none)) + (global $C6 (ref null $C) (ref.null none)) + + ;; CHECK: (global $A1 (ref null $A) (ref.null none)) + (global $A1 (ref null $A) (ref.null none)) + ;; CHECK: (global $A2 (ref null $A) (ref.null none)) + (global $A2 (ref null $A) (ref.null none)) + ;; CHECK: (global $A3 (ref null $A) (ref.null none)) + (global $A3 (ref null $A) (ref.null none)) + ;; CHECK: (global $A4 (ref null $A) (ref.null none)) + (global $A4 (ref null $A) (ref.null none)) + + ;; CHECK: (global $D1 (ref null $D) (ref.null none)) + (global $D1 (ref null $D) (ref.null none)) + ;; CHECK: (global $D2 (ref null $D) (ref.null none)) + (global $D2 (ref null $D) (ref.null none)) + + ;; CHECK: (global $B1 (ref null $B) (ref.null none)) + (global $B1 (ref null $B) (ref.null none)) +) |