diff options
author | Thomas Lively <tlively@google.com> | 2023-07-06 15:55:16 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-06 12:55:16 -0700 |
commit | 73f24c0af330578e9fa01e3d5fc6391619baaa9e (patch) | |
tree | a503cf40ed9a873a0f18d9bb8f6d51f71b91a9b9 /src | |
parent | 195e18d0602108b18f305711f8ea1c902b729cb0 (diff) | |
download | binaryen-73f24c0af330578e9fa01e3d5fc6391619baaa9e.tar.gz binaryen-73f24c0af330578e9fa01e3d5fc6391619baaa9e.tar.bz2 binaryen-73f24c0af330578e9fa01e3d5fc6391619baaa9e.zip |
Initial support for `final` types (#5803)
Implement support in the type system for final types, which are not allowed to
have any subtypes. Final types are syntactically different from similar
non-final types, so type canonicalization is made aware of finality. Similarly,
TypeMerging and TypeSSA are updated to work correctly in the presence of final
types as well.
Implement binary and text parsing and emitting of final types. Use the standard
text format to represent final types and interpret the non-standard
"struct_subtype" and friends as non-final. This allows a graceful upgrade path
for users currently using the non-standard text format, where they can update
their code to use final types correctly at the point when they update to use the
standard format. Once users have migrated to using the fully expanded standard
text format, we can update update Binaryen's parsers to interpret the MVP
shorthands as final types to match the spec without breaking those users.
To make it safe for V8 to independently start interpreting types declared
without `sub` as final, also reserve that shorthand encoding only for types that
have no strict subtypes.
Diffstat (limited to 'src')
-rw-r--r-- | src/ir/type-updating.cpp | 1 | ||||
-rw-r--r-- | src/passes/Print.cpp | 21 | ||||
-rw-r--r-- | src/passes/TypeMerging.cpp | 11 | ||||
-rw-r--r-- | src/passes/TypeSSA.cpp | 6 | ||||
-rw-r--r-- | src/tools/fuzzing/heap-types.cpp | 10 | ||||
-rw-r--r-- | src/tools/wasm-fuzz-types.cpp | 5 | ||||
-rw-r--r-- | src/wasm-binary.h | 9 | ||||
-rw-r--r-- | src/wasm-type.h | 7 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 39 | ||||
-rw-r--r-- | src/wasm/wasm-s-parser.cpp | 17 | ||||
-rw-r--r-- | src/wasm/wasm-type.cpp | 90 |
11 files changed, 152 insertions, 64 deletions
diff --git a/src/ir/type-updating.cpp b/src/ir/type-updating.cpp index aa042d86e..e11705812 100644 --- a/src/ir/type-updating.cpp +++ b/src/ir/type-updating.cpp @@ -70,6 +70,7 @@ void GlobalTypeRewriter::update() { // Create the temporary heap types. i = 0; for (auto [type, _] : typeIndices) { + typeBuilder[i].setFinal(type.isFinal()); if (type.isSignature()) { auto sig = type.getSignature(); TypeList newParams, newResults; diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 5d49ad900..67d0dbdcc 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -3017,13 +3017,20 @@ struct PrintSExpression : public UnifiedExpressionVisitor<PrintSExpression> { o << ')'; } void handleHeapType(HeapType type) { - bool hasSuper = false; - // TODO: Consider finality once we support that. - if (auto super = type.getSuperType()) { - hasSuper = true; + auto super = type.getSuperType(); + bool useSub = false; + // TODO: Once we parse MVP signature types as final, use the MVP shorthand + // for final types without supertypes. + if (super || type.isFinal()) { + useSub = true; o << "(sub "; - TypeNamePrinter(o, currModule).print(*super); - o << ' '; + if (type.isFinal()) { + o << "final "; + } + if (super) { + TypeNamePrinter(o, currModule).print(*super); + o << ' '; + } } if (type.isSignature()) { handleSignature(type); @@ -3034,7 +3041,7 @@ struct PrintSExpression : public UnifiedExpressionVisitor<PrintSExpression> { } else { o << type; } - if (hasSuper) { + if (useSub) { o << ')'; } } diff --git a/src/passes/TypeMerging.cpp b/src/passes/TypeMerging.cpp index 1897adc3c..fa36eb767 100644 --- a/src/passes/TypeMerging.cpp +++ b/src/passes/TypeMerging.cpp @@ -467,6 +467,9 @@ bool shapeEq(HeapType a, HeapType b) { // Check whether `a` and `b` have the same top-level structure, including the // position and identity of any children that are not included as transitions // in the DFA, i.e. any children that are not nontrivial references. + if (a.isFinal() != b.isFinal()) { + return false; + } if (a.isStruct() && b.isStruct()) { return shapeEq(a.getStruct(), b.getStruct()); } @@ -480,15 +483,15 @@ bool shapeEq(HeapType a, HeapType b) { } size_t shapeHash(HeapType a) { - size_t digest; + size_t digest = hash(a.isFinal()); if (a.isStruct()) { - digest = hash(0); + rehash(digest, 0); hash_combine(digest, shapeHash(a.getStruct())); } else if (a.isArray()) { - digest = hash(1); + rehash(digest, 1); hash_combine(digest, shapeHash(a.getArray())); } else if (a.isSignature()) { - digest = hash(2); + rehash(digest, 2); hash_combine(digest, shapeHash(a.getSignature())); } else { WASM_UNREACHABLE("unexpected kind"); diff --git a/src/passes/TypeSSA.cpp b/src/passes/TypeSSA.cpp index 33433083f..25575e70e 100644 --- a/src/passes/TypeSSA.cpp +++ b/src/passes/TypeSSA.cpp @@ -109,6 +109,7 @@ std::vector<HeapType> ensureTypesAreInNewRecGroup(RecGroup recGroup, if (auto super = type.getSuperType()) { builder[i].subTypeOf(*super); } + builder[i].setFinal(type.isFinal()); } // Implement the hash as a struct with "random" fields, and add it. @@ -306,6 +307,11 @@ struct TypeSSA : public Pass { return false; } + if (curr->type.getHeapType().isFinal()) { + // We can't create new subtypes of a final type anyway. + return false; + } + // Look for at least one interesting operand. We will consider each operand // against the declared type, that is, the type declared for where it is // stored. If it has more information than the declared type then it is diff --git a/src/tools/fuzzing/heap-types.cpp b/src/tools/fuzzing/heap-types.cpp index 26cc518be..2f77c5765 100644 --- a/src/tools/fuzzing/heap-types.cpp +++ b/src/tools/fuzzing/heap-types.cpp @@ -85,6 +85,13 @@ struct HeapTypeGeneratorImpl { } } + // Types without nontrivial subtypes may be marked final. + for (Index i = 0; i < builder.size(); ++i) { + if (subtypeIndices[i].size() == 1 && rand.oneIn(2)) { + builder[i].setFinal(); + } + } + // Initialize the recursion groups. recGroupEnds.reserve(builder.size()); // Create isorecursive recursion groups. Choose an expected group size @@ -878,7 +885,7 @@ std::vector<HeapType> Inhabitator::build() { start += size; } - // Establish supertypes. + // Establish supertypes and finality. for (size_t i = 0; i < types.size(); ++i) { if (auto super = types[i].getSuperType()) { if (auto it = typeIndices.find(*super); it != typeIndices.end()) { @@ -887,6 +894,7 @@ std::vector<HeapType> Inhabitator::build() { builder[i].subTypeOf(*super); } } + builder[i].setFinal(types[i].isFinal()); } auto built = builder.build(); diff --git a/src/tools/wasm-fuzz-types.cpp b/src/tools/wasm-fuzz-types.cpp index 4f480ba9d..50f1e1e3c 100644 --- a/src/tools/wasm-fuzz-types.cpp +++ b/src/tools/wasm-fuzz-types.cpp @@ -262,6 +262,11 @@ void Fuzzer::checkCanonicalization() { } } + // Set finality + for (size_t i = 0; i < types.size(); ++i) { + builder[i].setFinal(types[i].isFinal()); + } + // Set up recursion groups and record group ends to ensure we only select // valid children. recGroupEnds.reserve(builder.size()); diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 745358cbc..c6b3e7b6f 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -384,10 +384,11 @@ enum EncodedType { nullfuncref = -0x18, // 0x68 nullref = -0x1b, // 0x65 // type forms - Func = -0x20, // 0x60 - Struct = -0x21, // 0x5f - Array = -0x22, // 0x5e - Sub = -0x30, // 0x50 + Func = -0x20, // 0x60 + Struct = -0x21, // 0x5f + Array = -0x22, // 0x5e + Sub = -0x30, // 0x50 + SubFinal = -0x32, // 0x4e // prototype nominal forms we still parse FuncSubtype = -0x23, // 0x5d StructSubtype = -0x24, // 0x5c diff --git a/src/wasm-type.h b/src/wasm-type.h index a8a331ace..323ef0207 100644 --- a/src/wasm-type.h +++ b/src/wasm-type.h @@ -359,6 +359,7 @@ public: bool isArray() const; bool isString() const; bool isBottom() const; + bool isFinal() const; Signature getSignature() const; const Struct& getStruct() const; @@ -584,6 +585,8 @@ struct TypeBuilder { // not overlap or go out of bounds. void createRecGroup(size_t i, size_t length); + void setFinal(size_t i, bool final = true); + enum class ErrorReason { // There is a cycle in the supertype relation. SelfSupertype, @@ -645,6 +648,10 @@ struct TypeBuilder { builder.setSubType(index, other); return *this; } + Entry& setFinal(bool final = true) { + builder.setFinal(index, final); + return *this; + } }; Entry operator[](size_t i) { return Entry{*this, i}; } diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index d9b7e1a84..639f4b848 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -236,6 +236,19 @@ void WasmBinaryWriter::writeTypes() { lastGroup = currGroup; } } + + // As a temporary measure, detect which types have subtypes and always use + // `sub` or `sub final` for these types. The standard says that types without + // `sub` or `sub final` are final, but we currently treat them as non-final. + // To avoid unsafe ambiguity, only use the short form for types that it would + // be safe to treat as final, i.e. types without subtypes. + std::vector<bool> hasSubtypes(indexedTypes.types.size()); + for (auto type : indexedTypes.types) { + if (auto super = type.getSuperType()) { + hasSubtypes[indexedTypes.indices[*super]] = true; + } + } + BYN_TRACE("== writeTypes\n"); auto start = startSection(BinaryConsts::Section::Type); o << U32LEB(numGroups); @@ -251,10 +264,21 @@ void WasmBinaryWriter::writeTypes() { lastGroup = currGroup; // Emit the type definition. BYN_TRACE("write " << type << std::endl); - if (auto super = type.getSuperType()) { - // Subtype constructor and vector of 1 supertype. - o << S32LEB(BinaryConsts::EncodedType::Sub) << U32LEB(1); - writeHeapType(*super); + auto super = type.getSuperType(); + // TODO: Use the binary shorthand for final types once we parse MVP + // signatures as final. + if (type.isFinal() || super || hasSubtypes[i]) { + if (type.isFinal()) { + o << S32LEB(BinaryConsts::EncodedType::SubFinal); + } else { + o << S32LEB(BinaryConsts::EncodedType::Sub); + } + if (super) { + o << U32LEB(1); + writeHeapType(*super); + } else { + o << U32LEB(0); + } } if (type.isSignature()) { o << S32LEB(BinaryConsts::EncodedType::Func); @@ -2210,7 +2234,12 @@ void WasmBinaryReader::readTypes() { form = getS32LEB(); } std::optional<uint32_t> superIndex; - if (form == BinaryConsts::EncodedType::Sub) { + if (form == BinaryConsts::EncodedType::Sub || + form == BinaryConsts::EncodedType::SubFinal) { + if (form == BinaryConsts::EncodedType::SubFinal) { + // TODO: Interpret type definitions without any `sub` as final as well. + builder[i].setFinal(); + } uint32_t supers = getU32LEB(); if (supers > 0) { if (supers != 1) { diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp index e99200d58..9b7a1a90d 100644 --- a/src/wasm/wasm-s-parser.cpp +++ b/src/wasm/wasm-s-parser.cpp @@ -52,7 +52,8 @@ namespace wasm { static Name STRUCT("struct"), FIELD("field"), ARRAY("array"), FUNC_SUBTYPE("func_subtype"), STRUCT_SUBTYPE("struct_subtype"), ARRAY_SUBTYPE("array_subtype"), EXTENDS("extends"), REC("rec"), I8("i8"), - I16("i16"), DECLARE("declare"), ITEM("item"), OFFSET("offset"), SUB("sub"); + I16("i16"), DECLARE("declare"), ITEM("item"), OFFSET("offset"), SUB("sub"), + FINAL("final"); static Address getAddress(const Element* s) { return std::stoll(s->toString()); @@ -926,11 +927,19 @@ void SExpressionWasmBuilder::preParseHeapTypes(Element& module) { Element& kind = *def[0]; Element* super = nullptr; if (kind == SUB) { - if (def.size() != 3) { + Index i = 1; + if (*def[i] == FINAL) { + builder[index].setFinal(); + ++i; + } + if (def[i]->dollared()) { + super = def[i]; + ++i; + } + Element& subtype = *def[i++]; + if (i != def.size()) { throw ParseException("invalid 'sub' form", kind.line, kind.col); } - super = def[1]; - Element& subtype = *def[2]; if (!subtype.isList() || subtype.size() < 1) { throw ParseException( "invalid subtype definition", subtype.line, subtype.col); diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp index aa08c52c2..6baf0feb0 100644 --- a/src/wasm/wasm-type.cpp +++ b/src/wasm/wasm-type.cpp @@ -85,6 +85,7 @@ struct HeapTypeInfo { // Used in assertions to ensure that temporary types don't leak into the // global store. bool isTemp = false; + bool isFinal = false; // The supertype of this HeapType, if it exists. HeapTypeInfo* supertype = nullptr; // The recursion group of this type or null if the recursion group is trivial @@ -153,12 +154,9 @@ struct TypePrinter { std::ostream& print(HeapType type); std::ostream& print(const Tuple& tuple); std::ostream& print(const Field& field); - std::ostream& print(const Signature& sig, - std::optional<HeapType> super = std::nullopt); - std::ostream& print(const Struct& struct_, - std::optional<HeapType> super = std::nullopt); - std::ostream& print(const Array& array, - std::optional<HeapType> super = std::nullopt); + std::ostream& print(const Signature& sig); + std::ostream& print(const Struct& struct_); + std::ostream& print(const Array& array); }; struct RecGroupHasher { @@ -1156,6 +1154,14 @@ bool HeapType::isBottom() const { return false; } +bool HeapType::isFinal() const { + if (isBasic()) { + return false; + } else { + return getHeapTypeInfo(*this)->isFinal; + } +} + Signature HeapType::getSignature() const { assert(isSignature()); return getHeapTypeInfo(*this)->signature; @@ -1735,19 +1741,38 @@ std::ostream& TypePrinter::print(HeapType type) { if (isTemp(type)) { os << "(; temp ;) "; } + #if TRACE_CANONICALIZATION os << "(;" << ((type.getID() >> 4) % 1000) << ";)"; #endif + + // TODO: Use shorthand for final types once we parse MVP signatures as final. + bool useSub = false; + auto super = type.getSuperType(); + if (super || type.isFinal()) { + useSub = true; + os << "(sub "; + if (type.isFinal()) { + os << "final "; + } + if (super) { + printHeapTypeName(*super); + os << ' '; + } + } if (type.isSignature()) { - print(type.getSignature(), type.getSuperType()); + print(type.getSignature()); } else if (type.isStruct()) { - print(type.getStruct(), type.getSuperType()); + print(type.getStruct()); } else if (type.isArray()) { - print(type.getArray(), type.getSuperType()); + print(type.getArray()); } else { WASM_UNREACHABLE("unexpected type"); } - return os << ")"; + if (useSub) { + os << ')'; + } + return os << ')'; } std::ostream& TypePrinter::print(const Tuple& tuple) { @@ -1783,8 +1808,7 @@ std::ostream& TypePrinter::print(const Field& field) { return os; } -std::ostream& TypePrinter::print(const Signature& sig, - std::optional<HeapType> super) { +std::ostream& TypePrinter::print(const Signature& sig) { auto printPrefixed = [&](const char* prefix, Type type) { os << '(' << prefix; for (Type t : type) { @@ -1795,9 +1819,6 @@ std::ostream& TypePrinter::print(const Signature& sig, }; os << "(func"; - if (super) { - os << "_subtype"; - } if (sig.params.getID() != Type::none) { os << ' '; printPrefixed("param", sig.params); @@ -1806,19 +1827,11 @@ std::ostream& TypePrinter::print(const Signature& sig, os << ' '; printPrefixed("result", sig.results); } - if (super) { - os << ' '; - printHeapTypeName(*super); - } return os << ')'; } -std::ostream& TypePrinter::print(const Struct& struct_, - std::optional<HeapType> super) { +std::ostream& TypePrinter::print(const Struct& struct_) { os << "(struct"; - if (super) { - os << "_subtype"; - } if (struct_.fields.size()) { os << " (field"; } @@ -1829,25 +1842,12 @@ std::ostream& TypePrinter::print(const Struct& struct_, if (struct_.fields.size()) { os << ')'; } - if (super) { - os << ' '; - printHeapTypeName(*super); - } return os << ')'; } -std::ostream& TypePrinter::print(const Array& array, - std::optional<HeapType> super) { - os << "(array"; - if (super) { - os << "_subtype"; - } - os << ' '; +std::ostream& TypePrinter::print(const Array& array) { + os << "(array "; print(array.element); - if (super) { - os << ' '; - printHeapTypeName(*super); - } return os << ')'; } @@ -1916,6 +1916,7 @@ size_t RecGroupHasher::hash(const HeapTypeInfo& info) const { if (info.supertype) { hash_combine(digest, hash(HeapType(uintptr_t(info.supertype)))); } + wasm::rehash(digest, info.isFinal); wasm::rehash(digest, info.kind); switch (info.kind) { case HeapTypeInfo::SignatureKind: @@ -2038,6 +2039,9 @@ bool RecGroupEquator::eq(const HeapTypeInfo& a, const HeapTypeInfo& b) const { return false; } } + if (a.isFinal != b.isFinal) { + return false; + } if (a.kind != b.kind) { return false; } @@ -2291,9 +2295,17 @@ void TypeBuilder::createRecGroup(size_t index, size_t length) { {RecGroup(uintptr_t(groupInfo.get())), std::move(groupInfo)}); } +void TypeBuilder::setFinal(size_t i, bool final) { + assert(i < size() && "index out of bounds"); + impl->entries[i].info->isFinal = final; +} + namespace { bool isValidSupertype(const HeapTypeInfo& sub, const HeapTypeInfo& super) { + if (super.isFinal) { + return false; + } if (sub.kind != super.kind) { return false; } |