diff options
author | Thomas Lively <tlively@google.com> | 2023-03-03 15:18:36 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-03 21:18:36 +0000 |
commit | e13ad341c86ded9c646b7106e76472d6343156d9 (patch) | |
tree | 2ecbbc42381ca9c6cde6e414c91314313d4db625 /src | |
parent | dc8f514bfa4617861b51b6cef23af73464d3b650 (diff) | |
download | binaryen-e13ad341c86ded9c646b7106e76472d6343156d9.tar.gz binaryen-e13ad341c86ded9c646b7106e76472d6343156d9.tar.bz2 binaryen-e13ad341c86ded9c646b7106e76472d6343156d9.zip |
Add a fuzzer utility for ensuring types are inhabitable (#5541)
Some valid GC types, such as non-nullable references to bottom heap types and
types that contain non-nullable references to themselves, are uninhabitable,
meaning it is not possible to construct values of those types. This can cause
problems for the fuzzer, which generally needs to be able to construct values of
arbitrary types.
To simplify things for the fuzzer, introduce a utility for transforming type
graphs such that all their types are inhabitable. The utility performs a DFS to
find cycles of non-nullable references and breaks those cycles by introducing
nullability.
The new utility is itself fuzzed in the type fuzzer.
Diffstat (limited to 'src')
-rw-r--r-- | src/tools/fuzzing/heap-types.cpp | 364 | ||||
-rw-r--r-- | src/tools/fuzzing/heap-types.h | 6 | ||||
-rw-r--r-- | src/tools/wasm-fuzz-types.cpp | 115 | ||||
-rw-r--r-- | src/wasm-type.h | 2 | ||||
-rw-r--r-- | src/wasm/wasm-type.cpp | 27 |
5 files changed, 511 insertions, 3 deletions
diff --git a/src/tools/fuzzing/heap-types.cpp b/src/tools/fuzzing/heap-types.cpp index a356e8b5b..cb33a65ed 100644 --- a/src/tools/fuzzing/heap-types.cpp +++ b/src/tools/fuzzing/heap-types.cpp @@ -16,6 +16,8 @@ #include <variant> +#include "ir/subtypes.h" +#include "support/insert_ordered.h" #include "tools/fuzzing/heap-types.h" #include "tools/fuzzing/parameters.h" @@ -654,4 +656,366 @@ HeapTypeGenerator::create(Random& rand, FeatureSet features, size_t n) { return HeapTypeGeneratorImpl(rand, features, n).result; } +namespace { + +// `makeInhabitable` implementation. +// +// There are two root causes of uninhabitability: First, a non-nullable +// reference to a bottom type is always uninhabitable. Second, a cycle in the +// type graph formed from non-nullable references makes all the types involved +// in that cycle uninhabitable because there is no way to construct the types +// one at at time. All types that reference uninhabitable types via non-nullable +// references are also themselves uninhabitable, but these transitively +// uninhabitable types will become inhabitable once we fix the root causes, so +// we don't worry about them. +// +// To modify uninhabitable types to make them habitable, it suffices to make all +// non-nullable references to bottom types nullable and to break all cycles of +// non-nullable references by making one of the references nullable. To preserve +// valid subtyping, the newly nullable fields must also be made nullable in any +// supertypes in which they appear. +struct Inhabitator { + // Uniquely identify fields as an index into a type. + using FieldPos = std::pair<HeapType, Index>; + + // When we make a reference nullable, we typically need to make the same + // reference in other types nullable to maintain valid subtyping. Which types + // we need to update depends on the variance of the reference, which is + // determined by how it is used in its enclosing heap type. + // + // An invariant field of a heaptype must have the same type in subtypes of + // that heaptype. A covariant field of a heaptype must be typed with a subtype + // of its original type in subtypes of the heaptype. A contravariant field of + // a heap type must be typed with a supertype of its original type in subtypes + // of the heaptype. + enum Variance { Invariant, Covariant, Contravariant }; + + // The input types. + const std::vector<HeapType>& types; + + // The fields we will make nullable. + std::unordered_set<FieldPos> nullables; + + SubTypes subtypes; + + Inhabitator(const std::vector<HeapType>& types) + : types(types), subtypes(types) {} + + Variance getVariance(FieldPos field); + void markNullable(FieldPos field); + void markBottomRefsNullable(); + void markExternRefsNullable(); + void breakNonNullableCycles(); + + std::vector<HeapType> build(); +}; + +Inhabitator::Variance Inhabitator::getVariance(FieldPos field) { + auto [type, idx] = field; + assert(!type.isBasic()); + if (type.isStruct()) { + if (type.getStruct().fields[idx].mutable_ == Mutable) { + return Invariant; + } else { + return Covariant; + } + } + if (type.isArray()) { + if (type.getArray().element.mutable_ == Mutable) { + return Invariant; + } else { + return Covariant; + } + } + if (type.isSignature()) { + if (idx < type.getSignature().params.size()) { + return Contravariant; + } else { + return Covariant; + } + } + WASM_UNREACHABLE("unexpected kind"); +} + +void Inhabitator::markNullable(FieldPos field) { + // Mark the given field nullable in the original type and in other types + // necessary to keep subtyping valid. + nullables.insert(field); + auto [curr, idx] = field; + switch (getVariance(field)) { + case Covariant: + // Mark the field null in all supertypes. If the supertype field is + // already nullable or does not exist, that's ok and this will have no + // effect. + while (auto super = curr.getSuperType()) { + nullables.insert({*super, idx}); + curr = *super; + } + break; + case Invariant: + // Find the top type for which this field exists and mark the field + // nullable in all of its subtypes. + if (curr.isArray()) { + while (auto super = curr.getSuperType()) { + curr = *super; + } + } else { + assert(curr.isStruct()); + while (auto super = curr.getSuperType()) { + if (super->getStruct().fields.size() <= idx) { + break; + } + curr = *super; + } + } + [[fallthrough]]; + case Contravariant: + // Mark the field nullable in all subtypes. If the subtype field is + // already nullable, that's ok and this will have no effect. TODO: Remove + // this extra `index` variable once we have C++20. It's a workaround for + // lambdas being unable to capture structured bindings. + const size_t index = idx; + subtypes.iterSubTypes(curr, [&](HeapType type, Index) { + nullables.insert({type, index}); + }); + break; + } +} + +void Inhabitator::markBottomRefsNullable() { + for (auto type : types) { + auto children = type.getTypeChildren(); + for (size_t i = 0; i < children.size(); ++i) { + auto child = children[i]; + if (child.isRef() && child.getHeapType().isBottom() && + child.isNonNullable()) { + markNullable({type, i}); + } + } + } +} + +void Inhabitator::markExternRefsNullable() { + // The fuzzer cannot instantiate non-nullable externrefs, so make sure they + // are all nullable. + // TODO: Remove this once the fuzzer imports externref globals or gets some + // other way to instantiate externrefs. + for (auto type : types) { + auto children = type.getTypeChildren(); + for (size_t i = 0; i < children.size(); ++i) { + auto child = children[i]; + if (child.isRef() && child.getHeapType() == HeapType::ext && + child.isNonNullable()) { + markNullable({type, i}); + } + } + } +} + +// Use a depth-first search to find cycles, marking the last found reference in +// the cycle to be made non-nullable. +void Inhabitator::breakNonNullableCycles() { + // Types we've finished visiting. We don't need to visit them again. + std::unordered_set<HeapType> visited; + + // The path of types we are currently visiting. If one of them comes back up, + // we've found a cycle. Map the types to the other types they reference and + // our current index into that list so we can track where we are in each level + // of the search. + InsertOrderedMap<HeapType, std::pair<std::vector<Type>, Index>> visiting; + + for (auto root : types) { + if (visited.count(root)) { + continue; + } + assert(visiting.size() == 0); + visiting.insert({root, {root.getTypeChildren(), 0}}); + + while (visiting.size()) { + auto& [curr, state] = *std::prev(visiting.end()); + auto& [children, idx] = state; + + while (idx < children.size()) { + // Skip non-reference children because they cannot refer to other types. + if (!children[idx].isRef()) { + ++idx; + continue; + } + // Skip nullable references because they don't cause uninhabitable + // cycles. + if (children[idx].isNullable()) { + ++idx; + continue; + } + // Skip references that we have already marked nullable to satisfy + // subtyping constraints. TODO: We could take such nullable references + // into account when detecting cycles by tracking where in the current + // search path we have made references nullable. + if (nullables.count({curr, idx})) { + ++idx; + continue; + } + // Skip references to types that we have finished visiting. We have + // visited the full graph reachable from such references, so we know + // they cannot cycle back to anything we are currently visiting. + auto heapType = children[idx].getHeapType(); + if (visited.count(heapType)) { + ++idx; + continue; + } + // If this ref forms a cycle, break the cycle by marking it nullable and + // continue. + if (auto it = visiting.find(heapType); it != visiting.end()) { + markNullable({curr, idx}); + ++idx; + continue; + } + break; + } + + // If we've finished the DFS on the current type, pop it off the search + // path and continue searching the previous type. + if (idx == children.size()) { + visited.insert(curr); + visiting.erase(std::prev(visiting.end())); + continue; + } + + // Otherwise we have a non-nullable reference we need to search. + assert(children[idx].isRef() && children[idx].isNonNullable()); + auto next = children[idx++].getHeapType(); + visiting.insert({next, {next.getTypeChildren(), 0}}); + } + } +} + +std::vector<HeapType> Inhabitator::build() { + std::unordered_map<HeapType, size_t> typeIndices; + for (size_t i = 0; i < types.size(); ++i) { + typeIndices.insert({types[i], i}); + } + + TypeBuilder builder(types.size()); + + // Copy types. Update references to point to the corresponding new type and + // make them nullable where necessary. + auto updateType = [&](FieldPos pos, Type& type) { + if (!type.isRef()) { + return; + } + auto heapType = type.getHeapType(); + auto nullability = type.getNullability(); + if (auto it = typeIndices.find(heapType); it != typeIndices.end()) { + heapType = builder[it->second]; + } + if (nullables.count(pos)) { + nullability = Nullable; + } + type = builder.getTempRefType(heapType, nullability); + }; + + for (size_t i = 0; i < types.size(); ++i) { + auto type = types[i]; + if (type.isStruct()) { + Struct copy = type.getStruct(); + for (size_t j = 0; j < copy.fields.size(); ++j) { + updateType({type, j}, copy.fields[j].type); + } + builder[i] = copy; + continue; + } + if (type.isArray()) { + Array copy = type.getArray(); + updateType({type, 0}, copy.element.type); + builder[i] = copy; + continue; + } + if (type.isSignature()) { + auto sig = type.getSignature(); + size_t j = 0; + std::vector<Type> params; + for (auto param : sig.params) { + params.push_back(param); + updateType({type, j++}, params.back()); + } + std::vector<Type> results; + for (auto result : sig.results) { + results.push_back(result); + updateType({type, j++}, results.back()); + } + builder[i] = Signature(builder.getTempTupleType(params), + builder.getTempTupleType(results)); + continue; + } + WASM_UNREACHABLE("unexpected type kind"); + } + + // Establish rec groups. + for (size_t start = 0; start < types.size();) { + size_t size = types[start].getRecGroup().size(); + builder.createRecGroup(start, size); + start += size; + } + + // Establish supertypes. + for (size_t i = 0; i < types.size(); ++i) { + if (auto super = types[i].getSuperType()) { + if (auto it = typeIndices.find(*super); it != typeIndices.end()) { + builder[i].subTypeOf(builder[it->second]); + } else { + builder[i].subTypeOf(*super); + } + } + } + + auto built = builder.build(); + assert(!built.getError() && "unexpected build error"); + return *built; +} + +} // anonymous namespace + +std::vector<HeapType> +HeapTypeGenerator::makeInhabitable(const std::vector<HeapType>& types) { + if (types.empty()) { + return {}; + } + + // Remove duplicate and basic types. We will insert them back at the end. + std::unordered_map<HeapType, size_t> typeIndices; + std::vector<size_t> deduplicatedIndices; + std::vector<HeapType> deduplicated; + for (auto type : types) { + if (type.isBasic()) { + deduplicatedIndices.push_back(-1); + continue; + } + auto [it, inserted] = typeIndices.insert({type, deduplicated.size()}); + if (inserted) { + deduplicated.push_back(type); + } + deduplicatedIndices.push_back(it->second); + } + assert(deduplicatedIndices.size() == types.size()); + + // Construct the new types. + Inhabitator inhabitator(deduplicated); + inhabitator.markBottomRefsNullable(); + inhabitator.markExternRefsNullable(); + inhabitator.breakNonNullableCycles(); + deduplicated = inhabitator.build(); + + // Re-duplicate and re-insert basic types as necessary. + std::vector<HeapType> result; + for (size_t i = 0; i < types.size(); ++i) { + if (deduplicatedIndices[i] == (size_t)-1) { + assert(types[i].isBasic()); + result.push_back(types[i]); + } else { + result.push_back(deduplicated[deduplicatedIndices[i]]); + } + } + return result; +} + } // namespace wasm diff --git a/src/tools/fuzzing/heap-types.h b/src/tools/fuzzing/heap-types.h index c54e3053d..7759a88f0 100644 --- a/src/tools/fuzzing/heap-types.h +++ b/src/tools/fuzzing/heap-types.h @@ -38,6 +38,12 @@ struct HeapTypeGenerator { // Create a populated `HeapTypeGenerator` with `n` random HeapTypes with // interesting subtyping. static HeapTypeGenerator create(Random& rand, FeatureSet features, size_t n); + + // Given a sequence of newly-built heap types, produce a sequence of similar + // or identical types that are all inhabitable, i.e. that are possible to + // create values for. + static std::vector<HeapType> + makeInhabitable(const std::vector<HeapType>& types); }; } // namespace wasm diff --git a/src/tools/wasm-fuzz-types.cpp b/src/tools/wasm-fuzz-types.cpp index 0e40e7a15..4dfa1cf62 100644 --- a/src/tools/wasm-fuzz-types.cpp +++ b/src/tools/wasm-fuzz-types.cpp @@ -47,12 +47,13 @@ struct Fuzzer { // Generate types and run checkers on them. void run(uint64_t seed); - void printTypes(); + static void printTypes(const std::vector<HeapType>&); // Checkers for various properties. void checkSubtypes() const; void checkLUBs() const; void checkCanonicalization(); + void checkInhabitable(); }; void Fuzzer::run(uint64_t seed) { @@ -80,15 +81,16 @@ void Fuzzer::run(uint64_t seed) { subtypeIndices = std::move(generator.subtypeIndices); if (verbose) { - printTypes(); + printTypes(types); } checkSubtypes(); checkLUBs(); checkCanonicalization(); + checkInhabitable(); } -void Fuzzer::printTypes() { +void Fuzzer::printTypes(const std::vector<HeapType>& types) { std::cout << "Built " << types.size() << " types:\n"; struct FatalTypeNameGenerator : TypeNameGeneratorBase<FatalTypeNameGenerator> { @@ -489,6 +491,113 @@ void Fuzzer::checkCanonicalization() { } } +static std::optional<HeapType> +findUninhabitable(HeapType parent, + Type type, + std::unordered_set<HeapType>& visited, + std::unordered_set<HeapType>& visiting); + +// Simple recursive DFS through non-nullable references to see if we find any +// cycles. +static std::optional<HeapType> +findUninhabitable(HeapType type, + std::unordered_set<HeapType>& visited, + std::unordered_set<HeapType>& visiting) { + if (type.isBasic() || visited.count(type)) { + return std::nullopt; + } else if (type.isBasic()) { + return std::nullopt; + } + + if (!visiting.insert(type).second) { + return type; + } + + if (type.isStruct()) { + for (auto& field : type.getStruct().fields) { + if (auto t = findUninhabitable(type, field.type, visited, visiting)) { + return t; + } + } + } else if (type.isArray()) { + if (auto t = findUninhabitable( + type, type.getArray().element.type, visited, visiting)) { + return t; + } + } else if (type.isSignature()) { + auto sig = type.getSignature(); + for (auto types : {sig.params, sig.results}) { + for (auto child : types) { + if (auto t = findUninhabitable(type, child, visited, visiting)) { + return t; + } + } + } + } else { + WASM_UNREACHABLE("unexpected type kind"); + } + visiting.erase(type); + visited.insert(type); + return {}; +} + +static std::optional<HeapType> +findUninhabitable(HeapType parent, + Type type, + std::unordered_set<HeapType>& visited, + std::unordered_set<HeapType>& visiting) { + if (type.isRef() && type.isNonNullable()) { + if (type.getHeapType().isBottom() || type.getHeapType() == HeapType::ext) { + return parent; + } + return findUninhabitable(type.getHeapType(), visited, visiting); + } + return std::nullopt; +} + +void Fuzzer::checkInhabitable() { + std::vector<HeapType> inhabitable = HeapTypeGenerator::makeInhabitable(types); + if (verbose) { + std::cout << "\nInhabitable types:\n\n"; + printTypes(inhabitable); + } + + // Check whether any of the original types are uninhabitable. + std::unordered_set<HeapType> visited, visiting; + bool haveUninhabitable = false; + for (auto type : types) { + if (findUninhabitable(type, visited, visiting)) { + haveUninhabitable = true; + break; + } + } + visited.clear(); + visiting.clear(); + + if (haveUninhabitable) { + // Verify that the transformed types are inhabitable. + for (auto type : inhabitable) { + if (auto uninhabitable = findUninhabitable(type, visited, visiting)) { + IndexedTypeNameGenerator print(inhabitable); + Fatal() << "Found uninhabitable type: " << print(*uninhabitable); + } + } + } else if (getTypeSystem() == TypeSystem::Isorecursive) { + // Verify the produced inhabitable types are the same as the original types. + if (types.size() != inhabitable.size()) { + Fatal() << "Number of inhabitable types does not match number of " + "original types"; + } + for (size_t i = 0; i < types.size(); ++i) { + if (types[i] != inhabitable[i]) { + IndexedTypeNameGenerator print(types); + Fatal() << "makeInhabitable incorrectly changed type " + << print(types[i]); + } + } + } +} + } // namespace wasm int main(int argc, const char* argv[]) { diff --git a/src/wasm-type.h b/src/wasm-type.h index 67223705e..d72f5477d 100644 --- a/src/wasm-type.h +++ b/src/wasm-type.h @@ -405,6 +405,8 @@ public: // Returns true if left is a subtype of right. Subtype includes itself. static bool isSubType(HeapType left, HeapType right); + std::vector<Type> getTypeChildren() const; + // Return the ordered HeapType children, looking through child Types. std::vector<HeapType> getHeapTypeChildren() const; diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp index e46e0c6ae..2c7c3e232 100644 --- a/src/wasm/wasm-type.cpp +++ b/src/wasm/wasm-type.cpp @@ -1450,6 +1450,33 @@ bool HeapType::isSubType(HeapType left, HeapType right) { return SubTyper().isSubType(left, right); } +std::vector<Type> HeapType::getTypeChildren() const { + if (isBasic()) { + return {}; + } + if (isStruct()) { + std::vector<Type> children; + for (auto& field : getStruct().fields) { + children.push_back(field.type); + } + return children; + } + if (isArray()) { + return {getArray().element.type}; + } + if (isSignature()) { + std::vector<Type> children; + auto sig = getSignature(); + for (auto tuple : {sig.params, sig.results}) { + for (auto t : tuple) { + children.push_back(t); + } + } + return children; + } + WASM_UNREACHABLE("unexpected kind"); +} + std::vector<HeapType> HeapType::getHeapTypeChildren() const { HeapTypeChildCollector collector; collector.walkRoot(const_cast<HeapType*>(this)); |