summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2023-06-15 13:58:19 -0700
committerGitHub <noreply@github.com>2023-06-15 13:58:19 -0700
commit599a9627ce88b66eeb4e2be24163fa62f0533743 (patch)
tree1133da8d2c66466c1d2dda80959fe85bda8c40a7 /src
parent00211a90878cf0f37fbfb124332c4abc9863032b (diff)
downloadbinaryen-599a9627ce88b66eeb4e2be24163fa62f0533743.tar.gz
binaryen-599a9627ce88b66eeb4e2be24163fa62f0533743.tar.bz2
binaryen-599a9627ce88b66eeb4e2be24163fa62f0533743.zip
[NFC] Rewrite isorecursive canonicalization (#5774)
Rewrite the type canonicalization algorithm to fully canonicalize a single rec group at a time rather than canonicalizing multiple rec groups at once in multiple stages. The previous code was useful when it had to be shared with equirecursive and nominal canonicalization, but was much more complicated than necessary for just isorecursive canonicalization, which is all we support today.
Diffstat (limited to 'src')
-rw-r--r--src/wasm-type.h1
-rw-r--r--src/wasm/wasm-type.cpp499
2 files changed, 159 insertions, 341 deletions
diff --git a/src/wasm-type.h b/src/wasm-type.h
index 65fdd5650..095186c3c 100644
--- a/src/wasm-type.h
+++ b/src/wasm-type.h
@@ -631,6 +631,7 @@ struct TypeBuilder {
const std::vector<HeapType>& operator*() const {
return std::get<std::vector<HeapType>>(*this);
}
+ const std::vector<HeapType>* operator->() const { return &*(*this); }
const Error* getError() const { return std::get_if<Error>(this); }
};
diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp
index 6a3ae3c92..c2c78792f 100644
--- a/src/wasm/wasm-type.cpp
+++ b/src/wasm/wasm-type.cpp
@@ -32,16 +32,11 @@
#include "wasm-type.h"
#define TRACE_CANONICALIZATION 0
-#define TIME_CANONICALIZATION 0
-#if TRACE_CANONICALIZATION || TIME_CANONICALIZATION
+#if TRACE_CANONICALIZATION
#include <iostream>
#endif
-#if TIME_CANONICALIZATION
-#include <chrono>
-#endif
-
namespace wasm {
namespace {
@@ -2174,7 +2169,7 @@ struct TypeBuilder::Impl {
// Store of temporary recursion groups, which will be moved to the global
// collection of recursion groups as part of building.
- std::vector<std::unique_ptr<RecGroupInfo>> recGroups;
+ std::unordered_map<RecGroup, std::unique_ptr<RecGroupInfo>> recGroups;
struct Entry {
std::unique_ptr<HeapTypeInfo> info;
@@ -2276,191 +2271,174 @@ void TypeBuilder::createRecGroup(size_t index, size_t length) {
if (length < 2) {
return;
}
- auto& groups = impl->recGroups;
- groups.emplace_back(std::make_unique<RecGroupInfo>());
- groups.back()->reserve(length);
+ auto groupInfo = std::make_unique<RecGroupInfo>();
+ groupInfo->reserve(length);
for (size_t i = 0; i < length; ++i) {
auto& info = impl->entries[index + i].info;
assert(info->recGroup == nullptr && "group already assigned");
- auto* recGroup = groups.back().get();
- recGroup->push_back(asHeapType(info));
- info->recGroup = recGroup;
+ groupInfo->push_back(asHeapType(info));
+ info->recGroup = groupInfo.get();
info->recGroupIndex = i;
}
+ impl->recGroups.insert(
+ {RecGroup(uintptr_t(groupInfo.get())), std::move(groupInfo)});
}
namespace {
-// Helper for TypeBuilder::build() that keeps track of temporary types and
-// provides logic for replacing them gradually with more canonical types.
-struct CanonicalizationState {
- // The list of types being built and canonicalized. Will eventually be
- // returned to the TypeBuilder user.
- std::vector<HeapType> results;
- // The newly constructed, temporary HeapTypeInfos that still need to be
- // canonicalized.
- std::vector<std::unique_ptr<HeapTypeInfo>> newInfos;
-
- std::unordered_map<RecGroup, RecGroup> canonGroups;
-
- // Either the fully canonical HeapType or a new temporary HeapTypeInfo that a
- // previous HeapTypeInfo will be replaced with.
- struct Replacement : std::variant<HeapType, std::unique_ptr<HeapTypeInfo>> {
- using Super = std::variant<HeapType, std::unique_ptr<HeapTypeInfo>>;
- Replacement(std::unique_ptr<HeapTypeInfo>&& info)
- : Super(std::move(info)) {}
- Replacement(HeapType type) : Super(type) {}
- HeapType* getHeapType() { return std::get_if<HeapType>(this); }
- std::unique_ptr<HeapTypeInfo>* getHeapTypeInfo() {
- return std::get_if<std::unique_ptr<HeapTypeInfo>>(this);
+bool isValidSupertype(const HeapTypeInfo& sub, const HeapTypeInfo& super) {
+ if (sub.kind != super.kind) {
+ return false;
+ }
+ SubTyper typer;
+ switch (sub.kind) {
+ case HeapTypeInfo::SignatureKind:
+ return typer.isSubType(sub.signature, super.signature);
+ case HeapTypeInfo::StructKind:
+ return typer.isSubType(sub.struct_, super.struct_);
+ case HeapTypeInfo::ArrayKind:
+ return typer.isSubType(sub.array, super.array);
+ }
+ WASM_UNREACHABLE("unknown kind");
+}
+
+void updateReferencedHeapTypes(
+ std::unique_ptr<HeapTypeInfo>& info,
+ const std::unordered_map<HeapType, HeapType>& canonicalized) {
+ // Update the reference types that refer to canonicalized heap types to be
+ // their canonical versions. Update the Types rather than the HeapTypes so
+ // that the validation of supertypes sees canonical types.
+ struct ChildUpdater : TypeGraphWalkerBase<ChildUpdater> {
+ const std::unordered_map<HeapType, HeapType>& canonicalized;
+ bool isTopLevel = true;
+
+ ChildUpdater(const std::unordered_map<HeapType, HeapType>& canonicalized)
+ : canonicalized(canonicalized) {}
+
+ void scanType(Type* type) {
+ isTopLevel = false;
+ if (type->isRef()) {
+ auto ht = type->getHeapType();
+ if (auto it = canonicalized.find(ht); it != canonicalized.end()) {
+ *type = Type(it->second, type->getNullability());
+ } else if (isTemp(*type) && !isTemp(ht)) {
+ *type = Type(ht, type->getNullability());
+ }
+ } else if (type->isTuple()) {
+ TypeGraphWalkerBase<ChildUpdater>::scanType(type);
+ }
}
- HeapType getAsHeapType() {
- if (auto* type = getHeapType()) {
- return *type;
+
+ void scanHeapType(HeapType* type) {
+ if (isTopLevel) {
+ TypeGraphWalkerBase<ChildUpdater>::scanHeapType(type);
}
- return asHeapType(*getHeapTypeInfo());
}
};
- using ReplacementMap = std::unordered_map<HeapType, Replacement>;
-
- // Updates the `results` and `newInfo` lists, but does not modify any of the
- // infos to update HeapType use sites.
- void updateShallow(ReplacementMap& replacements);
- // Updates the HeapType use sites within `info`.
- void updateUses(ReplacementMap& replacements,
- std::unique_ptr<HeapTypeInfo>& info);
+ // Update the children.
+ ChildUpdater updater(canonicalized);
+ auto root = asHeapType(info);
+ updater.walkRoot(&root);
-#if TRACE_CANONICALIZATION
- void dump() {
- IndexedTypeNameGenerator print(results);
- std::cerr << "Results:\n";
- for (size_t i = 0; i < results.size(); ++i) {
- std::cerr << i << ": " << print(results[i]) << "\n";
- }
- std::cerr << "NewInfos:\n";
- for (size_t i = 0; i < newInfos.size(); ++i) {
- std::cerr << print(asHeapType(newInfos[i])) << "\n";
+ // Update the supertype.
+ if (info->supertype) {
+ HeapType super(uintptr_t(info->supertype));
+ if (auto it = canonicalized.find(super); it != canonicalized.end()) {
+ info->supertype = getHeapTypeInfo(it->second);
}
- std::cerr << '\n';
}
-#endif // TRACE_CANONICALIZATION
-};
+}
-void CanonicalizationState::updateShallow(ReplacementMap& replacements) {
- if (replacements.empty()) {
- return;
+TypeBuilder::BuildResult
+buildRecGroup(std::unique_ptr<RecGroupInfo>&& groupInfo,
+ std::vector<std::unique_ptr<HeapTypeInfo>>&& typeInfos,
+ std::unordered_map<HeapType, HeapType>& canonicalized) {
+ // First, we need to replace any referenced temporary HeapTypes from
+ // previously built groups with their canonicalized versions.
+ for (auto& info : typeInfos) {
+ updateReferencedHeapTypes(info, canonicalized);
}
- // Update the results vector.
- for (auto& type : results) {
- if (auto it = replacements.find(type); it != replacements.end()) {
- type = it->second.getAsHeapType();
+ // Collect the types and check supertype validity.
+ std::unordered_set<HeapType> seenTypes;
+ for (size_t i = 0; i < typeInfos.size(); ++i) {
+ auto& info = typeInfos[i];
+ if (auto* super = info->supertype) {
+ // The supertype must be canonical (i.e. defined in a previous rec group)
+ // or have already been defined in this rec group.
+ if (super->isTemp && !seenTypes.count(HeapType(uintptr_t(super)))) {
+ return {TypeBuilder::Error{
+ i, TypeBuilder::ErrorReason::ForwardSupertypeReference}};
+ }
+ // The supertype must have a valid structure.
+ if (!isValidSupertype(*info, *super)) {
+ return {
+ TypeBuilder::Error{i, TypeBuilder::ErrorReason::InvalidSupertype}};
+ }
}
+ seenTypes.insert(asHeapType(info));
}
- // Remove replaced types from newInfos.
- bool needsConsolidation = false;
- for (auto& info : newInfos) {
- HeapType oldType = asHeapType(info);
- if (auto it = replacements.find(oldType); it != replacements.end()) {
- auto& replacement = it->second;
- if (auto* newInfo = replacement.getHeapTypeInfo()) {
- // Replace the old info with the new info in `newInfos`, replacing the
- // moved info in `replacement` with the corresponding new HeapType so we
- // can still perform the correct replacement in the next step.
- HeapType newType = asHeapType(*newInfo);
- info = std::move(*newInfo);
- replacement = {newType};
- } else {
- // The old info is replaced, but there is no new Info to replace it, so
- // just delete the old info. We will remove the holes in `newInfos`
- // afterwards.
- info = nullptr;
- needsConsolidation = true;
+ // Check that children are either already canonical (i.e. defined in a
+ // previous rec group) or are defined in this current rec group.
+ for (size_t i = 0; i < typeInfos.size(); ++i) {
+ auto type = asHeapType(typeInfos[i]);
+ for (auto child : type.getHeapTypeChildren()) {
+ if (isTemp(child) && !seenTypes.count(child)) {
+ return {TypeBuilder::Error{
+ i, TypeBuilder::ErrorReason::ForwardChildReference}};
}
}
}
- if (needsConsolidation) {
- newInfos.erase(std::remove(newInfos.begin(), newInfos.end(), nullptr),
- newInfos.end());
- }
-}
-void CanonicalizationState::updateUses(ReplacementMap& replacements,
- std::unique_ptr<HeapTypeInfo>& info) {
- if (replacements.empty()) {
- return;
- }
- // Replace all old types reachable from `info`.
- struct ChildUpdater : HeapTypeChildWalker<ChildUpdater> {
- ReplacementMap& replacements;
- ChildUpdater(ReplacementMap& replacements) : replacements(replacements) {}
- void noteChild(HeapType* child) {
- if (auto it = replacements.find(*child); it != replacements.end()) {
- *child = it->second.getAsHeapType();
- }
+ // The rec group is valid, so we can try to move the group into the global rec
+ // group store. If the returned canonical group is not the same as the input
+ // group, then there is already a canonical version of this rec group. Lock
+ // the global rec group store here to avoid leaking temporary types to other
+ // threads that may be trying to build an identical group.
+ auto group = asHeapType(typeInfos[0]).getRecGroup();
+ std::lock_guard<std::mutex> lock(globalRecGroupStore.mutex);
+ auto canonical = groupInfo ? globalRecGroupStore.insert(std::move(groupInfo))
+ : globalRecGroupStore.insert(group);
+ if (group != canonical) {
+ // Replace the non-canonical types with their canonical equivalents.
+ assert(canonical.size() == group.size());
+ for (size_t i = 0; i < group.size(); ++i) {
+ canonicalized.insert({group[i], canonical[i]});
}
- };
- HeapType root = asHeapType(info);
- ChildUpdater(replacements).walkRoot(&root);
+ // Return the canonical types.
+ return {std::vector<HeapType>(canonical.begin(), canonical.end())};
+ }
- // We may need to update its supertype as well.
- if (info->supertype) {
- HeapType super(uintptr_t(info->supertype));
- if (auto it = replacements.find(super); it != replacements.end()) {
- info->supertype = getHeapTypeInfo(it->second.getAsHeapType());
+ // The group was successfully moved to the global rec group store, so it is
+ // now canonical. We need to move its heap types to the heap type store so
+ // they become canonical as well.
+ {
+ std::lock_guard<std::recursive_mutex> lock(globalHeapTypeStoreMutex);
+ for (auto& info : typeInfos) {
+ info->isTemp = false;
+ globalHeapTypeStore.emplace_back(std::move(info));
}
}
-}
-// Replaces temporary types and heap types in a type definition graph with their
-// globally canonical versions to prevent temporary types or heap type from
-// leaking into the global stores.
-void globallyCanonicalize(CanonicalizationState& state) {
- // Map each temporary Type and HeapType to the locations where they will
- // have to be replaced with canonical Types and HeapTypes.
+ std::vector<HeapType> results(group.begin(), group.end());
+
+ // We need to make the Types canonical as well, but right now there is no way
+ // to move them to their global store, so we have to create new types and
+ // replace the old ones. TODO simplify this.
struct Locations : TypeGraphWalker<Locations> {
std::unordered_map<Type, std::unordered_set<Type*>> types;
-
void preVisitType(Type* type) {
- if (!type->isBasic()) {
+ if (isTemp(*type)) {
types[*type].insert(type);
}
}
};
Locations locations;
- for (auto& type : state.results) {
- if (!type.isBasic() && getHeapTypeInfo(type)->isTemp) {
- locations.walkRoot(&type);
- }
- }
-
-#if TRACE_CANONICALIZATION
- std::cerr << "Initial Types:\n";
- state.dump();
-#endif
-
- // Canonicalize HeapTypes at all their use sites. HeapTypes whose rec groups
- // are newly canonical are moved to the globalHeapTypeStore so that their
- // lifetimes are extended beyond the life of the TypeBuilder.
- //
- // Keep a lock on the global HeapType store as long as it can reach temporary
- // types to ensure that no other threads observe the temporary types, for
- // example if another thread concurrently constructs a new HeapType with the
- // same shape as one being canonicalized here. This cannot happen with Types
- // because they are hashed in the global store by pointer identity, which has
- // not yet escaped the builder, rather than shape.
- std::lock_guard<std::recursive_mutex> lock(globalHeapTypeStoreMutex);
- for (auto& info : state.newInfos) {
- auto original = asHeapType(info);
- auto canonical =
- state.canonGroups.at(original.getRecGroup())[info->recGroupIndex];
- if (original == canonical) {
- info->isTemp = false;
- globalHeapTypeStore.emplace_back(std::move(info));
- }
+ for (auto& type : results) {
+ locations.walkRoot(&type);
}
// Canonicalize non-tuple Types (which never directly refer to other Types)
@@ -2479,212 +2457,51 @@ void globallyCanonicalize(CanonicalizationState& state) {
canonicalizeTypes(false);
canonicalizeTypes(true);
-#if TRACE_CANONICALIZATION
- std::cerr << "Final Types:\n";
- state.dump();
-#endif
+ return {results};
}
-std::optional<TypeBuilder::Error>
-validateSubtyping(const std::vector<HeapType>& types) {
- for (size_t i = 0; i < types.size(); ++i) {
- HeapType type = types[i];
- if (type.isBasic()) {
- continue;
- }
- auto* sub = getHeapTypeInfo(type);
- auto* super = sub->supertype;
- if (super == nullptr) {
- continue;
- }
-
- auto fail = [&]() {
- return TypeBuilder::Error{i, TypeBuilder::ErrorReason::InvalidSupertype};
- };
+} // anonymous namespace
- if (sub->kind != super->kind) {
- return fail();
- }
- SubTyper typer;
- switch (sub->kind) {
- case HeapTypeInfo::SignatureKind:
- if (!typer.isSubType(sub->signature, super->signature)) {
- return fail();
- }
- break;
- case HeapTypeInfo::StructKind:
- if (!typer.isSubType(sub->struct_, super->struct_)) {
- return fail();
- }
- break;
- case HeapTypeInfo::ArrayKind:
- if (!typer.isSubType(sub->array, super->array)) {
- return fail();
- }
- break;
- }
- }
- return {};
-}
+TypeBuilder::BuildResult TypeBuilder::build() {
+ size_t entryCount = impl->entries.size();
-std::optional<TypeBuilder::Error> canonicalizeIsorecursive(
- CanonicalizationState& state,
- std::vector<std::unique_ptr<RecGroupInfo>>& recGroupInfos) {
+ std::vector<HeapType> results;
+ results.reserve(entryCount);
- // Map rec groups to the unique pointers to their infos.
- std::unordered_map<RecGroup, std::unique_ptr<RecGroupInfo>> groupInfoMap;
- for (auto& info : recGroupInfos) {
- RecGroup group{uintptr_t(info.get())};
- groupInfoMap[group] = std::move(info);
- }
+ // Map temporary HeapTypes to their canonicalized versions so they can be
+ // replaced in later rec groups.
+ std::unordered_map<HeapType, HeapType> canonicalized;
- // Check that supertypes precede their subtypes and that other child types
- // either precede their parents or appear later in the same recursion group.
- // `indexOfType` both maps types to their indices and keeps track of which
- // types we have seen so far.
- std::unordered_map<HeapType, size_t> indexOfType;
- std::vector<RecGroup> groups;
+ // Canonicalize each rec group, one at a time.
size_t groupStart = 0;
+ while (groupStart < entryCount) {
+ auto group = asHeapType(impl->entries[groupStart].info).getRecGroup();
- // Validate the children of all types in a recursion group after all the types
- // have been registered in `indexOfType`.
- auto finishGroup = [&](size_t groupEnd) -> std::optional<TypeBuilder::Error> {
- for (size_t index = groupStart; index < groupEnd; ++index) {
- HeapType type = state.results[index];
- for (HeapType child : type.getHeapTypeChildren()) {
- // Only basic children, globally canonical children, and children
- // defined in this or previous recursion groups are allowed.
- if (isTemp(child) && !indexOfType.count(child)) {
- return {{index, TypeBuilder::ErrorReason::ForwardChildReference}};
- }
- }
+ std::unique_ptr<RecGroupInfo> groupInfo;
+ if (auto it = impl->recGroups.find(group); it != impl->recGroups.end()) {
+ groupInfo = std::move(it->second);
}
- groupStart = groupEnd;
- return {};
- };
- for (size_t index = 0; index < state.results.size(); ++index) {
- HeapType type = state.results[index];
- if (type.isBasic()) {
- continue;
- }
- // Validate the supertype. Temporary supertypes must precede their subtypes.
- if (auto super = type.getSuperType()) {
- if (isTemp(*super) && !indexOfType.count(*super)) {
- return {{index, TypeBuilder::ErrorReason::ForwardSupertypeReference}};
- }
+ size_t groupSize = group.size();
+ std::vector<std::unique_ptr<HeapTypeInfo>> typeInfos;
+ typeInfos.reserve(groupSize);
+ for (size_t i = 0; i < groupSize; ++i) {
+ typeInfos.emplace_back(std::move(impl->entries[groupStart + i].info));
}
- // Check whether we have finished a rec group.
- auto newGroup = type.getRecGroup();
- if (groups.empty() || groups.back() != newGroup) {
- if (auto error = finishGroup(index)) {
- return *error;
- }
- groups.push_back(newGroup);
- }
- // Register this type as seen.
- indexOfType.insert({type, index});
- }
- if (auto error = finishGroup(state.results.size())) {
- return *error;
- }
-
- // Now that we know everything is valid, start canonicalizing recursion
- // groups. Before canonicalizing each group, update all the HeapType use sites
- // within it to make sure it only refers to other canonical groups or to
- // itself (since other groups it refers to may have been duplicates of
- // previously canonicalized groups and therefore would not be canonical). To
- // canonicalize the group, try to insert it into the global store. If that
- // fails, we already have an isorecursively equivalent group, so update the
- // replacements accordingly.
- CanonicalizationState::ReplacementMap replacements;
- {
- std::lock_guard<std::mutex> lock(globalRecGroupStore.mutex);
- groupStart = 0;
- for (auto group : groups) {
- size_t size = group.size();
- for (size_t i = 0; i < size; ++i) {
- state.updateUses(replacements, state.newInfos[groupStart + i]);
- }
- groupStart += size;
- RecGroup canonical(0);
- // There may or may not be a `RecGroupInfo` for this group depending on
- // whether it is a singleton group or not. If there is one, it is in the
- // `groupInfoMap`. Make the info available for the global store to take
- // ownership of it in case this group is made the canonical group for its
- // structure.
- if (auto it = groupInfoMap.find(group); it != groupInfoMap.end()) {
- canonical = globalRecGroupStore.insert(std::move(it->second));
- } else {
- canonical = globalRecGroupStore.insert(group);
- }
- if (group != canonical) {
- // Replace the non-canonical types with their canonical equivalents.
- assert(canonical.size() == size);
- for (size_t i = 0; i < size; ++i) {
- replacements.insert({group[i], canonical[i]});
- }
- }
- state.canonGroups.insert({group, canonical});
- }
- }
- state.updateShallow(replacements);
- return {};
-}
-
-} // anonymous namespace
-TypeBuilder::BuildResult TypeBuilder::build() {
- size_t entryCount = impl->entries.size();
-
- // Initialize the canonicalization state using the HeapTypeInfos from the
- // builder, marking entries finalized.
- CanonicalizationState state;
- state.results.reserve(entryCount);
- state.newInfos.reserve(entryCount);
- for (size_t i = 0; i < entryCount; ++i) {
- assert(impl->entries[i].initialized &&
- "Cannot access uninitialized HeapType");
- auto& info = impl->entries[i].info;
- state.results.push_back(asHeapType(info));
- state.newInfos.emplace_back(std::move(info));
- }
-
-#if TIME_CANONICALIZATION
- using instant_t = std::chrono::time_point<std::chrono::steady_clock>;
- auto getMillis = [&](instant_t start, instant_t end) {
- return std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
- .count();
- };
- auto start = std::chrono::steady_clock::now();
-#endif
-
- if (auto error = canonicalizeIsorecursive(state, impl->recGroups)) {
- return {*error};
- }
-
-#if TIME_CANONICALIZATION
- auto afterStructureCanonicalization = std::chrono::steady_clock::now();
-#endif
-
- globallyCanonicalize(state);
+ auto built =
+ buildRecGroup(std::move(groupInfo), std::move(typeInfos), canonicalized);
+ if (auto* error = built.getError()) {
+ return {TypeBuilder::Error{groupStart + error->index, error->reason}};
+ }
-#if TIME_CANONICALIZATION
- auto end = std::chrono::steady_clock::now();
- std::cerr << "Total canonicalization time was " << getMillis(start, end)
- << " ms\n";
- std::cerr << "Structure canonicalization took "
- << getMillis(start, afterStructureCanonicalization) << " ms\n";
- std::cerr << "Global canonicalization took "
- << getMillis(afterStructureCanonicalization, end) << " ms\n";
-#endif
+ assert(built->size() == groupSize);
+ results.insert(results.end(), built->begin(), built->end());
- // Check that the declared supertypes are structurally valid.
- if (auto error = validateSubtyping(state.results)) {
- return {*error};
+ groupStart += groupSize;
}
- return {state.results};
+ return {results};
}
} // namespace wasm