diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/wasm/wasm-type.cpp | 472 |
1 files changed, 219 insertions, 253 deletions
diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp index 8bfd31127..1b6313ad7 100644 --- a/src/wasm/wasm-type.cpp +++ b/src/wasm/wasm-type.cpp @@ -22,6 +22,7 @@ #include <sstream> #include <unordered_map> #include <unordered_set> +#include <variant> #include "compiler-support.h" #include "support/hash.h" @@ -2357,6 +2358,116 @@ void TypeBuilder::setNominal(size_t i) { namespace { +// Helper for TypeBuilder::build() that keeps track of temporary types and +// provides logic for replacing them gradually with more canonical types. +struct CanonicalizationState { + // The list of types being built and canonicalized. Will eventually be + // returned to the TypeBuilder user. + std::vector<HeapType> results; + // The newly constructed, temporary HeapTypeInfos that still need to be + // canonicalized. + std::vector<std::unique_ptr<HeapTypeInfo>> newInfos; + + // Either the fully canonical HeapType or a new temporary HeapTypeInfo that a + // previous HeapTypeInfo will be replaced with. + struct Replacement : std::variant<HeapType, std::unique_ptr<HeapTypeInfo>> { + using Super = std::variant<HeapType, std::unique_ptr<HeapTypeInfo>>; + Replacement(std::unique_ptr<HeapTypeInfo>&& info) + : Super(std::move(info)) {} + Replacement(HeapType type) : Super(type) {} + HeapType* getHeapType() { return std::get_if<HeapType>(this); } + std::unique_ptr<HeapTypeInfo>* getHeapTypeInfo() { + return std::get_if<std::unique_ptr<HeapTypeInfo>>(this); + } + HeapType getAsHeapType() { + if (auto* type = getHeapType()) { + return *type; + } + return asHeapType(*getHeapTypeInfo()); + } + }; + + using ReplacementMap = std::unordered_map<HeapType, Replacement>; + void update(ReplacementMap& replacements); + +#if TRACE_CANONICALIZATION + void dump() { + std::cerr << "Results:\n"; + for (size_t i = 0; i < results.size(); ++i) { + std::cerr << i << ": " << results[i] << "\n"; + } + std::cerr << "NewInfos:\n"; + for (size_t i = 0; i < newInfos.size(); ++i) { + std::cerr << asHeapType(newInfos[i]) << "\n"; + } + std::cerr << '\n'; + } +#endif // TRACE_CANONICALIZATION +}; + +void CanonicalizationState::update(ReplacementMap& replacements) { + if (replacements.empty()) { + return; + } + + // Update the results vector. + for (auto& type : results) { + if (auto it = replacements.find(type); it != replacements.end()) { + type = it->second.getAsHeapType(); + } + } + + // Remove replaced types from newInfos. + bool needsConsolidation = false; + for (auto& info : newInfos) { + HeapType oldType = asHeapType(info); + if (auto it = replacements.find(oldType); it != replacements.end()) { + auto& replacement = it->second; + if (auto* newInfo = replacement.getHeapTypeInfo()) { + // Replace the old info with the new info in `newInfos`, replacing the + // moved info in `replacement` with the corresponding new HeapType so we + // can still perform the correct replacement in the next step. + HeapType newType = asHeapType(*newInfo); + info = std::move(*newInfo); + replacement = {newType}; + } else { + // The old info is replaced, but there is no new Info to replace it, so + // just delete the old info. We will remove the holes in `newInfos` + // afterwards. + info = nullptr; + needsConsolidation = true; + } + } + } + if (needsConsolidation) { + newInfos.erase(std::remove(newInfos.begin(), newInfos.end(), nullptr), + newInfos.end()); + } + + // Replace all old types reachable from the new set of temporary types. + for (auto& info : newInfos) { + struct ChildUpdater : HeapTypeChildWalker<ChildUpdater> { + ReplacementMap& replacements; + ChildUpdater(ReplacementMap& replacements) : replacements(replacements) {} + void noteChild(HeapType* child) { + if (auto it = replacements.find(*child); it != replacements.end()) { + *child = it->second.getAsHeapType(); + } + } + }; + HeapType root = asHeapType(info); + ChildUpdater(replacements).walkRoot(&root); + + // If this is a nominal type, we may need to update its supertype as well. + if (info->supertype) { + HeapType super(uintptr_t(info->supertype)); + if (auto it = replacements.find(super); it != replacements.end()) { + info->supertype = getHeapTypeInfo(it->second.getAsHeapType()); + } + } + } +} + // A wrapper around a HeapType that provides equality and hashing based only on // its top-level shape, up to but not including its closest HeapType // descendants. This is the shape that determines the most fine-grained initial @@ -2519,20 +2630,8 @@ size_t Partitions::Set::split() { // minimal type definition graph from an input graph. See // https://arxiv.org/pdf/0802.2826.pdf. struct ShapeCanonicalizer { - // The minimized HeapTypes, possibly including both new temporary HeapTypes as - // well as globally canonical HeapTypes that were reachable from the input - // roots. - std::vector<HeapType> results; - - // The new, temporary, minimal HeapTypeInfos. Contains empty unique_ptrs at - // indices corresponding to globally canonical HeapTypes. - std::vector<std::unique_ptr<HeapTypeInfo>> infos; - - // Returns the partition index for an input root HeapType. This index is also - // the index of its minimized version in `minimized`, and if that minimized - // version is not globally canonical, also the index of the minimized - // HeapTypeInfo in `infos`. - size_t getIndex(HeapType type); + // The results of shape canonicalization. + CanonicalizationState::ReplacementMap replacements; ShapeCanonicalizer(std::vector<HeapType>& roots); @@ -2559,8 +2658,7 @@ private: Partitions splitters; void initialize(std::vector<HeapType>& roots); - bool replaceHeapType(HeapType* heapType); - void translatePartitionsToTypes(); + void createReplacements(); // Return pointers to the non-basic HeapType children of `ht`, including // BasicKind children. @@ -2583,10 +2681,6 @@ private: #endif }; -size_t ShapeCanonicalizer::getIndex(HeapType type) { - return partitions.getSetForElem(states.at(type)).index; -} - ShapeCanonicalizer::ShapeCanonicalizer(std::vector<HeapType>& roots) { #if TRACE_CANONICALIZATION std::cerr << "Root HeapTypes:\n"; @@ -2675,7 +2769,7 @@ ShapeCanonicalizer::ShapeCanonicalizer(std::vector<HeapType>& roots) { dumpPartitions(); #endif - translatePartitionsToTypes(); + createReplacements(); } void ShapeCanonicalizer::initialize(std::vector<HeapType>& roots) { @@ -2806,18 +2900,7 @@ void ShapeCanonicalizer::initialize(std::vector<HeapType>& roots) { } } -bool ShapeCanonicalizer::replaceHeapType(HeapType* heapType) { - auto it = states.find(*heapType); - if (it != states.end()) { - // heapType hasn't already been replaced; replace it. - auto set = partitions.getSetForElem(it->second); - *heapType = results.at(set.index); - return true; - } - return false; -} - -void ShapeCanonicalizer::translatePartitionsToTypes() { +void ShapeCanonicalizer::createReplacements() { // Create a single new HeapTypeInfo for each partition. Initialize each new // HeapTypeInfo as a copy of a representative HeapTypeInfo from its partition, // then patch all the children of the new HeapTypeInfos to refer to other new @@ -2836,106 +2919,61 @@ void ShapeCanonicalizer::translatePartitionsToTypes() { auto it = std::find_if(partition.begin(), partition.end(), [this](size_t i) { return !isTemp(heapTypes[i]); }); - if (it == partition.end()) { - // We do not already know about a globally canonical type for this - // partition. Create a copy. - const auto& representative = - *getHeapTypeInfo(heapTypes[*partition.begin()]); - infos.push_back(std::make_unique<HeapTypeInfo>(representative)); - infos.back()->isTemp = true; - results.push_back(asHeapType(infos.back())); + HeapType newType; + auto partitionIt = partition.begin(); + if (it != partition.end()) { + newType = heapTypes[*it]; } else { - // We already have a globally canonical type for this partition. - results.push_back(heapTypes[*it]); - infos.push_back({}); - } - } - for (auto& info : infos) { - if (!info) { - // No need to replace the children of globally canonical HeapTypes. - continue; + // We do not already know about a globally canonical type for this + // partition. Create a copy and a replacement that owns it. + HeapType oldType = heapTypes[*partitionIt++]; + const auto& representative = *getHeapTypeInfo(oldType); + auto info = std::make_unique<HeapTypeInfo>(representative); + info->isTemp = true; + newType = asHeapType(info); + replacements.insert({oldType, std::move(info)}); } - - struct ChildUpdater : HeapTypeChildWalker<ChildUpdater> { - ShapeCanonicalizer& canonicalizer; - ChildUpdater(ShapeCanonicalizer& canonicalizer) - : canonicalizer(canonicalizer) {} - void noteChild(HeapType* child) { - if (child->isBasic() || !isTemp(*child)) { - // Child doesn't need replacement. - return; - } - canonicalizer.replaceHeapType(child); + // Create replacements for all the other types in the partition. + for (; partitionIt != partition.end(); ++partitionIt) { + if (partitionIt != it) { + replacements.insert({heapTypes[*partitionIt], newType}); } - }; - HeapType root = asHeapType(info); - ChildUpdater(*this).walkRoot(&root); - - // If this is a nominal type, we may need to update its supertype as well. - if (info->supertype) { - HeapType heapType(uintptr_t(info->supertype)); - replaceHeapType(&heapType); - info->supertype = getHeapTypeInfo(heapType); } } - -#if TRACE_CANONICALIZATION - std::cerr << "Minimization results:\n"; - for (HeapType ht : results) { - std::cerr << ht << '\n'; - } - std::cerr << '\n'; -#endif } -// Map each temporary Type and HeapType to the locations where they will -// have to be replaced with canonical Types and HeapTypes. -struct Locations : TypeGraphWalker<Locations> { - std::unordered_map<Type, std::unordered_set<Type*>> types; - std::unordered_map<HeapType, std::unordered_set<HeapType*>> heapTypes; - - void preVisitType(Type* type) { - if (!type->isBasic()) { - types[*type].insert(type); - } - } - void preVisitHeapType(HeapType* ht) { - if (!ht->isBasic()) { - heapTypes[*ht].insert(ht); - } - } -}; - // Replaces temporary types and heap types in a type definition graph with their // globally canonical versions to prevent temporary types or heap type from // leaking into the global stores. -std::vector<HeapType> -globallyCanonicalize(std::vector<std::unique_ptr<HeapTypeInfo>>& infos) { - Locations locations; - std::vector<HeapType> results; - results.reserve(infos.size()); - for (auto& info : infos) { - if (!info) { - // TODO: That we have to deal with null info pointers here is a sign of a - // very leaky abstraction. Hack around it by for now to keep the diff for - // this change easier to reason about, but fix this in a followup to make - // the code itself easier to reason about. - - // Produce an arbitrary HeapType that will not be used. - results.push_back(HeapType(0)); - continue; +void globallyCanonicalize(CanonicalizationState& state) { + // Map each temporary Type and HeapType to the locations where they will + // have to be replaced with canonical Types and HeapTypes. + struct Locations : TypeGraphWalker<Locations> { + std::unordered_map<Type, std::unordered_set<Type*>> types; + std::unordered_map<HeapType, std::unordered_set<HeapType*>> heapTypes; + + void preVisitType(Type* type) { + if (!type->isBasic()) { + types[*type].insert(type); + } + } + void preVisitHeapType(HeapType* ht) { + if (!ht->isBasic()) { + heapTypes[*ht].insert(ht); + } } + }; - results.push_back(asHeapType(info)); - locations.walkRoot(&results.back()); + Locations locations; + for (auto& type : state.results) { + if (!type.isBasic() && getHeapTypeInfo(type)->isTemp) { + locations.walkRoot(&type); + } } #if TRACE_CANONICALIZATION std::cerr << "Initial Types:\n"; - for (HeapType type : results) { - std::cerr << type << '\n'; - } - std::cerr << '\n'; + state.dump(); #endif // Canonicalize HeapTypes at all their use sites. HeapTypes for which there @@ -2952,10 +2990,7 @@ globallyCanonicalize(std::vector<std::unique_ptr<HeapTypeInfo>>& infos) { // not yet escaped the builder, rather than shape. std::lock_guard<std::recursive_mutex> lock(globalHeapTypeStore.mutex); std::unordered_map<HeapType, HeapType> canonicalHeapTypes; - for (auto& info : infos) { - if (!info) { - continue; - } + for (auto& info : state.newInfos) { HeapType original = asHeapType(info); HeapType canonical = globalHeapTypeStore.insert(std::move(info)); if (original != canonical) { @@ -2986,23 +3021,16 @@ globallyCanonicalize(std::vector<std::unique_ptr<HeapTypeInfo>>& infos) { #if TRACE_CANONICALIZATION std::cerr << "Final Types:\n"; - for (HeapType type : results) { - std::cerr << type << '\n'; - } - std::cerr << '\n'; + state.dump(); #endif - - return results; } -std::vector<HeapType> -buildEquirecursive(std::vector<std::unique_ptr<HeapTypeInfo>> infos) { - std::vector<HeapType> heapTypes; - for (auto& info : infos) { +void canonicalizeEquirecursive(CanonicalizationState& state) { + // Equirecursive types always have null supertypes. + for (auto& info : state.newInfos) { if (!info->isNominal) { info->supertype = nullptr; } - heapTypes.push_back(asHeapType(info)); } #if TIME_CANONICALIZATION @@ -3010,58 +3038,37 @@ buildEquirecursive(std::vector<std::unique_ptr<HeapTypeInfo>> infos) { #endif // Canonicalize the shape of the type definition graph. - ShapeCanonicalizer minimized(heapTypes); + ShapeCanonicalizer minimized(state.results); + state.update(minimized.replacements); #if TIME_CANONICALIZATION - auto afterShape = std::chrono::steady_clock::now(); -#endif - - // The shape of the definition graph is now canonicalized, but it is still - // comprised of temporary types and heap types. Get or create their globally - // canonical versions. - std::vector<HeapType> canonical = globallyCanonicalize(minimized.infos); - -#if TIME_CANONICALIZATION - auto afterGlobal = std::chrono::steady_clock::now(); - - std::cerr << "Starting types: " << heapTypes.size() << '\n'; - std::cerr << "Minimized types: " << minimized.results.size() << '\n'; - + auto end = std::chrono::steady_clock::now(); std::cerr << "Shape canonicalization: " - << std::chrono::duration_cast<std::chrono::milliseconds>( - afterShape - start) - .count() - << " ms\n"; - std::cerr << "Global canonicalization: " - << std::chrono::duration_cast<std::chrono::milliseconds>( - afterGlobal - afterShape) + << std::chrono::duration_cast<std::chrono::milliseconds>(end - + start) .count() << " ms\n"; #endif +} - // Map the original heap types to their minimized and globally canonical - // versions. - for (auto& type : heapTypes) { - size_t index = minimized.getIndex(type); - // TODO: This is messy. Clean it up. - if (minimized.infos.at(index)) { - type = canonical.at(index); - } else { - type = minimized.results.at(index); - } - } +void canonicalizeNominal(CanonicalizationState& state) { + // Nominal types do not require separate canonicalization, so just validate + // that their subtyping is correct. - return heapTypes; -} +#if TIME_CANONICALIZATION + auto start = std::chrono::steady_clock::now(); +#endif -void validateNominalSubTyping(const std::vector<HeapType>& heapTypes) { assert(typeSystem == TypeSystem::Nominal); // Ensure there are no cycles in the subtype graph. This is the classic DFA // algorithm for detecting cycles, but in the form of a simple loop because // each node (type) has at most one child (supertype). std::unordered_set<HeapTypeInfo*> seen; - for (auto type : heapTypes) { + for (auto type : state.results) { + if (type.isBasic()) { + continue; + } std::unordered_set<HeapTypeInfo*> path; for (auto* curr = getHeapTypeInfo(type); seen.insert(curr).second && curr->supertype != nullptr; @@ -3074,7 +3081,10 @@ void validateNominalSubTyping(const std::vector<HeapType>& heapTypes) { } // Ensure that all the subtype relations are valid. - for (HeapType type : heapTypes) { + for (HeapType type : state.results) { + if (type.isBasic()) { + continue; + } auto* sub = getHeapTypeInfo(type); auto* super = sub->supertype; if (super == nullptr) { @@ -3110,145 +3120,101 @@ void validateNominalSubTyping(const std::vector<HeapType>& heapTypes) { break; } } -} - -std::vector<HeapType> -buildNominal(std::vector<std::unique_ptr<HeapTypeInfo>> infos) { -#if TIME_CANONICALIZATION - auto start = std::chrono::steady_clock::now(); -#endif - - // Move the HeapTypes and the Types they reach to the global stores. - std::vector<HeapType> heapTypes = globallyCanonicalize(infos); - -#if TIME_CANONICALIZATION - auto afterMove = std::chrono::steady_clock::now(); -#endif - -#if TRACE_CANONICALIZATION - std::cerr << "After building:\n"; - for (size_t i = 0; i < heapTypes.size(); ++i) { - std::cerr << i << ": " << heapTypes[i] << "\n"; - } -#endif // TRACE_CANONICALIZATION - - validateNominalSubTyping(heapTypes); #if TIME_CANONICALIZATION auto end = std::chrono::steady_clock::now(); - std::cerr << "Moving types took " - << std::chrono::duration_cast<std::chrono::milliseconds>(afterMove - - start) - .count() - << " ms\n"; std::cerr << "Validating subtyping took " << std::chrono::duration_cast<std::chrono::milliseconds>(end - - afterMove) + start) .count() << " ms\n"; #endif - - return heapTypes; } -void replaceBasicHeapTypes(std::vector<std::unique_ptr<HeapTypeInfo>>& infos) { +void canonicalizeBasicHeapTypes(CanonicalizationState& state) { // Replace heap types backed by BasicKind HeapTypeInfos with their // corresponding BasicHeapTypes. The heap types backed by BasicKind // HeapTypeInfos exist only to support building basic types in a TypeBuilder // and are never canonical. - for (auto& info : infos) { - struct BasicTypeReplacer : HeapTypeChildWalker<BasicTypeReplacer> { - void noteChild(HeapType* child) { - if (child->isBasic()) { - // This is already a real basic type. No canonicalization necessary. - return; - } - auto* info = getHeapTypeInfo(*child); - if (info->kind == HeapTypeInfo::BasicKind) { - *child = info->basic; - } - } - }; - HeapType type = asHeapType(info); - BasicTypeReplacer().walkRoot(&type); + CanonicalizationState::ReplacementMap replacements; + for (auto& info : state.newInfos) { + if (info->kind == HeapTypeInfo::BasicKind) { + replacements.insert({asHeapType(info), HeapType(info->basic)}); + } + // Basic supertypes should be implicit. if (info->supertype && info->supertype->kind == HeapTypeInfo::BasicKind) { info->supertype = nullptr; } } + state.update(replacements); } } // anonymous namespace std::vector<HeapType> TypeBuilder::build() { size_t entryCount = impl->entries.size(); - std::vector<std::optional<HeapType>> basicHeapTypes(entryCount); - std::vector<std::unique_ptr<HeapTypeInfo>> toBuild; - // Mark the entries finalized and "build" basic heap types. + // Initialize the canonicalization state using the HeapTypeInfos from the + // builder, marking entries finalized. + CanonicalizationState state; + state.results.reserve(entryCount); + state.newInfos.reserve(entryCount); for (size_t i = 0; i < entryCount; ++i) { assert(impl->entries[i].initialized && "Cannot access uninitialized HeapType"); auto& info = impl->entries[i].info; info->isFinalized = true; - if (info->kind == HeapTypeInfo::BasicKind) { - basicHeapTypes[i] = info->basic; - } else { - toBuild.emplace_back(std::move(info)); - } + state.results.push_back(asHeapType(info)); + state.newInfos.emplace_back(std::move(info)); } #if TRACE_CANONICALIZATION - auto dumpTypes = [&]() { - for (size_t i = 0, j = 0; i < basicHeapTypes.size(); ++i) { - if (basicHeapTypes[i]) { - std::cerr << i << ": " << *basicHeapTypes[i] << "\n"; - } else { - std::cerr << i << ": " << asHeapType(toBuild[j++]) << "\n"; - } - } - }; std::cerr << "Before replacing basic heap types:\n"; - dumpTypes(); -#endif // TRACE_CANONICALIZATION + state.dump(); +#endif // Eagerly replace references to built basic heap types so the more // complicated canonicalization algorithms don't need to consider them. - replaceBasicHeapTypes(toBuild); + canonicalizeBasicHeapTypes(state); #if TRACE_CANONICALIZATION std::cerr << "After replacing basic heap types:\n"; - dumpTypes(); -#endif // TRACE_CANONICALIZATION + state.dump(); +#endif - std::vector<HeapType> built; switch (typeSystem) { case TypeSystem::Equirecursive: - built = buildEquirecursive(std::move(toBuild)); + canonicalizeEquirecursive(state); break; case TypeSystem::Nominal: - built = buildNominal(std::move(toBuild)); + canonicalizeNominal(state); break; } - // Combine the basic types with the built types. - std::vector<HeapType> results(entryCount); - for (size_t i = 0, builtIndex = 0; i < entryCount; ++i) { - if (basicHeapTypes[i]) { - results[i] = *basicHeapTypes[i]; - } else { - results[i] = built[builtIndex++]; - } - } +#if TIME_CANONICALIZATION + auto start = std::chrono::steady_clock::now(); +#endif + + globallyCanonicalize(state); + +#if TIME_CANONICALIZATION + auto end = std::chrono::steady_clock::now(); + std::cerr << "Global canonicalization took " + << std::chrono::duration_cast<std::chrono::milliseconds>(end - + start) + .count() + << " ms\n"; +#endif // Note built signature types. See comment in `HeapType::HeapType(Signature)`. - for (auto type : results) { + for (auto type : state.results) { if (type.isSignature() && (type.isNominal() || getTypeSystem() == TypeSystem::Nominal)) { nominalSignatureCache.insertType(type); } } - return results; + return state.results; } } // namespace wasm |