summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2023-07-06 15:55:16 -0400
committerGitHub <noreply@github.com>2023-07-06 12:55:16 -0700
commit73f24c0af330578e9fa01e3d5fc6391619baaa9e (patch)
treea503cf40ed9a873a0f18d9bb8f6d51f71b91a9b9 /src
parent195e18d0602108b18f305711f8ea1c902b729cb0 (diff)
downloadbinaryen-73f24c0af330578e9fa01e3d5fc6391619baaa9e.tar.gz
binaryen-73f24c0af330578e9fa01e3d5fc6391619baaa9e.tar.bz2
binaryen-73f24c0af330578e9fa01e3d5fc6391619baaa9e.zip
Initial support for `final` types (#5803)
Implement support in the type system for final types, which are not allowed to have any subtypes. Final types are syntactically different from similar non-final types, so type canonicalization is made aware of finality. Similarly, TypeMerging and TypeSSA are updated to work correctly in the presence of final types as well. Implement binary and text parsing and emitting of final types. Use the standard text format to represent final types and interpret the non-standard "struct_subtype" and friends as non-final. This allows a graceful upgrade path for users currently using the non-standard text format, where they can update their code to use final types correctly at the point when they update to use the standard format. Once users have migrated to using the fully expanded standard text format, we can update update Binaryen's parsers to interpret the MVP shorthands as final types to match the spec without breaking those users. To make it safe for V8 to independently start interpreting types declared without `sub` as final, also reserve that shorthand encoding only for types that have no strict subtypes.
Diffstat (limited to 'src')
-rw-r--r--src/ir/type-updating.cpp1
-rw-r--r--src/passes/Print.cpp21
-rw-r--r--src/passes/TypeMerging.cpp11
-rw-r--r--src/passes/TypeSSA.cpp6
-rw-r--r--src/tools/fuzzing/heap-types.cpp10
-rw-r--r--src/tools/wasm-fuzz-types.cpp5
-rw-r--r--src/wasm-binary.h9
-rw-r--r--src/wasm-type.h7
-rw-r--r--src/wasm/wasm-binary.cpp39
-rw-r--r--src/wasm/wasm-s-parser.cpp17
-rw-r--r--src/wasm/wasm-type.cpp90
11 files changed, 152 insertions, 64 deletions
diff --git a/src/ir/type-updating.cpp b/src/ir/type-updating.cpp
index aa042d86e..e11705812 100644
--- a/src/ir/type-updating.cpp
+++ b/src/ir/type-updating.cpp
@@ -70,6 +70,7 @@ void GlobalTypeRewriter::update() {
// Create the temporary heap types.
i = 0;
for (auto [type, _] : typeIndices) {
+ typeBuilder[i].setFinal(type.isFinal());
if (type.isSignature()) {
auto sig = type.getSignature();
TypeList newParams, newResults;
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index 5d49ad900..67d0dbdcc 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -3017,13 +3017,20 @@ struct PrintSExpression : public UnifiedExpressionVisitor<PrintSExpression> {
o << ')';
}
void handleHeapType(HeapType type) {
- bool hasSuper = false;
- // TODO: Consider finality once we support that.
- if (auto super = type.getSuperType()) {
- hasSuper = true;
+ auto super = type.getSuperType();
+ bool useSub = false;
+ // TODO: Once we parse MVP signature types as final, use the MVP shorthand
+ // for final types without supertypes.
+ if (super || type.isFinal()) {
+ useSub = true;
o << "(sub ";
- TypeNamePrinter(o, currModule).print(*super);
- o << ' ';
+ if (type.isFinal()) {
+ o << "final ";
+ }
+ if (super) {
+ TypeNamePrinter(o, currModule).print(*super);
+ o << ' ';
+ }
}
if (type.isSignature()) {
handleSignature(type);
@@ -3034,7 +3041,7 @@ struct PrintSExpression : public UnifiedExpressionVisitor<PrintSExpression> {
} else {
o << type;
}
- if (hasSuper) {
+ if (useSub) {
o << ')';
}
}
diff --git a/src/passes/TypeMerging.cpp b/src/passes/TypeMerging.cpp
index 1897adc3c..fa36eb767 100644
--- a/src/passes/TypeMerging.cpp
+++ b/src/passes/TypeMerging.cpp
@@ -467,6 +467,9 @@ bool shapeEq(HeapType a, HeapType b) {
// Check whether `a` and `b` have the same top-level structure, including the
// position and identity of any children that are not included as transitions
// in the DFA, i.e. any children that are not nontrivial references.
+ if (a.isFinal() != b.isFinal()) {
+ return false;
+ }
if (a.isStruct() && b.isStruct()) {
return shapeEq(a.getStruct(), b.getStruct());
}
@@ -480,15 +483,15 @@ bool shapeEq(HeapType a, HeapType b) {
}
size_t shapeHash(HeapType a) {
- size_t digest;
+ size_t digest = hash(a.isFinal());
if (a.isStruct()) {
- digest = hash(0);
+ rehash(digest, 0);
hash_combine(digest, shapeHash(a.getStruct()));
} else if (a.isArray()) {
- digest = hash(1);
+ rehash(digest, 1);
hash_combine(digest, shapeHash(a.getArray()));
} else if (a.isSignature()) {
- digest = hash(2);
+ rehash(digest, 2);
hash_combine(digest, shapeHash(a.getSignature()));
} else {
WASM_UNREACHABLE("unexpected kind");
diff --git a/src/passes/TypeSSA.cpp b/src/passes/TypeSSA.cpp
index 33433083f..25575e70e 100644
--- a/src/passes/TypeSSA.cpp
+++ b/src/passes/TypeSSA.cpp
@@ -109,6 +109,7 @@ std::vector<HeapType> ensureTypesAreInNewRecGroup(RecGroup recGroup,
if (auto super = type.getSuperType()) {
builder[i].subTypeOf(*super);
}
+ builder[i].setFinal(type.isFinal());
}
// Implement the hash as a struct with "random" fields, and add it.
@@ -306,6 +307,11 @@ struct TypeSSA : public Pass {
return false;
}
+ if (curr->type.getHeapType().isFinal()) {
+ // We can't create new subtypes of a final type anyway.
+ return false;
+ }
+
// Look for at least one interesting operand. We will consider each operand
// against the declared type, that is, the type declared for where it is
// stored. If it has more information than the declared type then it is
diff --git a/src/tools/fuzzing/heap-types.cpp b/src/tools/fuzzing/heap-types.cpp
index 26cc518be..2f77c5765 100644
--- a/src/tools/fuzzing/heap-types.cpp
+++ b/src/tools/fuzzing/heap-types.cpp
@@ -85,6 +85,13 @@ struct HeapTypeGeneratorImpl {
}
}
+ // Types without nontrivial subtypes may be marked final.
+ for (Index i = 0; i < builder.size(); ++i) {
+ if (subtypeIndices[i].size() == 1 && rand.oneIn(2)) {
+ builder[i].setFinal();
+ }
+ }
+
// Initialize the recursion groups.
recGroupEnds.reserve(builder.size());
// Create isorecursive recursion groups. Choose an expected group size
@@ -878,7 +885,7 @@ std::vector<HeapType> Inhabitator::build() {
start += size;
}
- // Establish supertypes.
+ // Establish supertypes and finality.
for (size_t i = 0; i < types.size(); ++i) {
if (auto super = types[i].getSuperType()) {
if (auto it = typeIndices.find(*super); it != typeIndices.end()) {
@@ -887,6 +894,7 @@ std::vector<HeapType> Inhabitator::build() {
builder[i].subTypeOf(*super);
}
}
+ builder[i].setFinal(types[i].isFinal());
}
auto built = builder.build();
diff --git a/src/tools/wasm-fuzz-types.cpp b/src/tools/wasm-fuzz-types.cpp
index 4f480ba9d..50f1e1e3c 100644
--- a/src/tools/wasm-fuzz-types.cpp
+++ b/src/tools/wasm-fuzz-types.cpp
@@ -262,6 +262,11 @@ void Fuzzer::checkCanonicalization() {
}
}
+ // Set finality
+ for (size_t i = 0; i < types.size(); ++i) {
+ builder[i].setFinal(types[i].isFinal());
+ }
+
// Set up recursion groups and record group ends to ensure we only select
// valid children.
recGroupEnds.reserve(builder.size());
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index 745358cbc..c6b3e7b6f 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -384,10 +384,11 @@ enum EncodedType {
nullfuncref = -0x18, // 0x68
nullref = -0x1b, // 0x65
// type forms
- Func = -0x20, // 0x60
- Struct = -0x21, // 0x5f
- Array = -0x22, // 0x5e
- Sub = -0x30, // 0x50
+ Func = -0x20, // 0x60
+ Struct = -0x21, // 0x5f
+ Array = -0x22, // 0x5e
+ Sub = -0x30, // 0x50
+ SubFinal = -0x32, // 0x4e
// prototype nominal forms we still parse
FuncSubtype = -0x23, // 0x5d
StructSubtype = -0x24, // 0x5c
diff --git a/src/wasm-type.h b/src/wasm-type.h
index a8a331ace..323ef0207 100644
--- a/src/wasm-type.h
+++ b/src/wasm-type.h
@@ -359,6 +359,7 @@ public:
bool isArray() const;
bool isString() const;
bool isBottom() const;
+ bool isFinal() const;
Signature getSignature() const;
const Struct& getStruct() const;
@@ -584,6 +585,8 @@ struct TypeBuilder {
// not overlap or go out of bounds.
void createRecGroup(size_t i, size_t length);
+ void setFinal(size_t i, bool final = true);
+
enum class ErrorReason {
// There is a cycle in the supertype relation.
SelfSupertype,
@@ -645,6 +648,10 @@ struct TypeBuilder {
builder.setSubType(index, other);
return *this;
}
+ Entry& setFinal(bool final = true) {
+ builder.setFinal(index, final);
+ return *this;
+ }
};
Entry operator[](size_t i) { return Entry{*this, i}; }
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index d9b7e1a84..639f4b848 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -236,6 +236,19 @@ void WasmBinaryWriter::writeTypes() {
lastGroup = currGroup;
}
}
+
+ // As a temporary measure, detect which types have subtypes and always use
+ // `sub` or `sub final` for these types. The standard says that types without
+ // `sub` or `sub final` are final, but we currently treat them as non-final.
+ // To avoid unsafe ambiguity, only use the short form for types that it would
+ // be safe to treat as final, i.e. types without subtypes.
+ std::vector<bool> hasSubtypes(indexedTypes.types.size());
+ for (auto type : indexedTypes.types) {
+ if (auto super = type.getSuperType()) {
+ hasSubtypes[indexedTypes.indices[*super]] = true;
+ }
+ }
+
BYN_TRACE("== writeTypes\n");
auto start = startSection(BinaryConsts::Section::Type);
o << U32LEB(numGroups);
@@ -251,10 +264,21 @@ void WasmBinaryWriter::writeTypes() {
lastGroup = currGroup;
// Emit the type definition.
BYN_TRACE("write " << type << std::endl);
- if (auto super = type.getSuperType()) {
- // Subtype constructor and vector of 1 supertype.
- o << S32LEB(BinaryConsts::EncodedType::Sub) << U32LEB(1);
- writeHeapType(*super);
+ auto super = type.getSuperType();
+ // TODO: Use the binary shorthand for final types once we parse MVP
+ // signatures as final.
+ if (type.isFinal() || super || hasSubtypes[i]) {
+ if (type.isFinal()) {
+ o << S32LEB(BinaryConsts::EncodedType::SubFinal);
+ } else {
+ o << S32LEB(BinaryConsts::EncodedType::Sub);
+ }
+ if (super) {
+ o << U32LEB(1);
+ writeHeapType(*super);
+ } else {
+ o << U32LEB(0);
+ }
}
if (type.isSignature()) {
o << S32LEB(BinaryConsts::EncodedType::Func);
@@ -2210,7 +2234,12 @@ void WasmBinaryReader::readTypes() {
form = getS32LEB();
}
std::optional<uint32_t> superIndex;
- if (form == BinaryConsts::EncodedType::Sub) {
+ if (form == BinaryConsts::EncodedType::Sub ||
+ form == BinaryConsts::EncodedType::SubFinal) {
+ if (form == BinaryConsts::EncodedType::SubFinal) {
+ // TODO: Interpret type definitions without any `sub` as final as well.
+ builder[i].setFinal();
+ }
uint32_t supers = getU32LEB();
if (supers > 0) {
if (supers != 1) {
diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp
index e99200d58..9b7a1a90d 100644
--- a/src/wasm/wasm-s-parser.cpp
+++ b/src/wasm/wasm-s-parser.cpp
@@ -52,7 +52,8 @@ namespace wasm {
static Name STRUCT("struct"), FIELD("field"), ARRAY("array"),
FUNC_SUBTYPE("func_subtype"), STRUCT_SUBTYPE("struct_subtype"),
ARRAY_SUBTYPE("array_subtype"), EXTENDS("extends"), REC("rec"), I8("i8"),
- I16("i16"), DECLARE("declare"), ITEM("item"), OFFSET("offset"), SUB("sub");
+ I16("i16"), DECLARE("declare"), ITEM("item"), OFFSET("offset"), SUB("sub"),
+ FINAL("final");
static Address getAddress(const Element* s) {
return std::stoll(s->toString());
@@ -926,11 +927,19 @@ void SExpressionWasmBuilder::preParseHeapTypes(Element& module) {
Element& kind = *def[0];
Element* super = nullptr;
if (kind == SUB) {
- if (def.size() != 3) {
+ Index i = 1;
+ if (*def[i] == FINAL) {
+ builder[index].setFinal();
+ ++i;
+ }
+ if (def[i]->dollared()) {
+ super = def[i];
+ ++i;
+ }
+ Element& subtype = *def[i++];
+ if (i != def.size()) {
throw ParseException("invalid 'sub' form", kind.line, kind.col);
}
- super = def[1];
- Element& subtype = *def[2];
if (!subtype.isList() || subtype.size() < 1) {
throw ParseException(
"invalid subtype definition", subtype.line, subtype.col);
diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp
index aa08c52c2..6baf0feb0 100644
--- a/src/wasm/wasm-type.cpp
+++ b/src/wasm/wasm-type.cpp
@@ -85,6 +85,7 @@ struct HeapTypeInfo {
// Used in assertions to ensure that temporary types don't leak into the
// global store.
bool isTemp = false;
+ bool isFinal = false;
// The supertype of this HeapType, if it exists.
HeapTypeInfo* supertype = nullptr;
// The recursion group of this type or null if the recursion group is trivial
@@ -153,12 +154,9 @@ struct TypePrinter {
std::ostream& print(HeapType type);
std::ostream& print(const Tuple& tuple);
std::ostream& print(const Field& field);
- std::ostream& print(const Signature& sig,
- std::optional<HeapType> super = std::nullopt);
- std::ostream& print(const Struct& struct_,
- std::optional<HeapType> super = std::nullopt);
- std::ostream& print(const Array& array,
- std::optional<HeapType> super = std::nullopt);
+ std::ostream& print(const Signature& sig);
+ std::ostream& print(const Struct& struct_);
+ std::ostream& print(const Array& array);
};
struct RecGroupHasher {
@@ -1156,6 +1154,14 @@ bool HeapType::isBottom() const {
return false;
}
+bool HeapType::isFinal() const {
+ if (isBasic()) {
+ return false;
+ } else {
+ return getHeapTypeInfo(*this)->isFinal;
+ }
+}
+
Signature HeapType::getSignature() const {
assert(isSignature());
return getHeapTypeInfo(*this)->signature;
@@ -1735,19 +1741,38 @@ std::ostream& TypePrinter::print(HeapType type) {
if (isTemp(type)) {
os << "(; temp ;) ";
}
+
#if TRACE_CANONICALIZATION
os << "(;" << ((type.getID() >> 4) % 1000) << ";)";
#endif
+
+ // TODO: Use shorthand for final types once we parse MVP signatures as final.
+ bool useSub = false;
+ auto super = type.getSuperType();
+ if (super || type.isFinal()) {
+ useSub = true;
+ os << "(sub ";
+ if (type.isFinal()) {
+ os << "final ";
+ }
+ if (super) {
+ printHeapTypeName(*super);
+ os << ' ';
+ }
+ }
if (type.isSignature()) {
- print(type.getSignature(), type.getSuperType());
+ print(type.getSignature());
} else if (type.isStruct()) {
- print(type.getStruct(), type.getSuperType());
+ print(type.getStruct());
} else if (type.isArray()) {
- print(type.getArray(), type.getSuperType());
+ print(type.getArray());
} else {
WASM_UNREACHABLE("unexpected type");
}
- return os << ")";
+ if (useSub) {
+ os << ')';
+ }
+ return os << ')';
}
std::ostream& TypePrinter::print(const Tuple& tuple) {
@@ -1783,8 +1808,7 @@ std::ostream& TypePrinter::print(const Field& field) {
return os;
}
-std::ostream& TypePrinter::print(const Signature& sig,
- std::optional<HeapType> super) {
+std::ostream& TypePrinter::print(const Signature& sig) {
auto printPrefixed = [&](const char* prefix, Type type) {
os << '(' << prefix;
for (Type t : type) {
@@ -1795,9 +1819,6 @@ std::ostream& TypePrinter::print(const Signature& sig,
};
os << "(func";
- if (super) {
- os << "_subtype";
- }
if (sig.params.getID() != Type::none) {
os << ' ';
printPrefixed("param", sig.params);
@@ -1806,19 +1827,11 @@ std::ostream& TypePrinter::print(const Signature& sig,
os << ' ';
printPrefixed("result", sig.results);
}
- if (super) {
- os << ' ';
- printHeapTypeName(*super);
- }
return os << ')';
}
-std::ostream& TypePrinter::print(const Struct& struct_,
- std::optional<HeapType> super) {
+std::ostream& TypePrinter::print(const Struct& struct_) {
os << "(struct";
- if (super) {
- os << "_subtype";
- }
if (struct_.fields.size()) {
os << " (field";
}
@@ -1829,25 +1842,12 @@ std::ostream& TypePrinter::print(const Struct& struct_,
if (struct_.fields.size()) {
os << ')';
}
- if (super) {
- os << ' ';
- printHeapTypeName(*super);
- }
return os << ')';
}
-std::ostream& TypePrinter::print(const Array& array,
- std::optional<HeapType> super) {
- os << "(array";
- if (super) {
- os << "_subtype";
- }
- os << ' ';
+std::ostream& TypePrinter::print(const Array& array) {
+ os << "(array ";
print(array.element);
- if (super) {
- os << ' ';
- printHeapTypeName(*super);
- }
return os << ')';
}
@@ -1916,6 +1916,7 @@ size_t RecGroupHasher::hash(const HeapTypeInfo& info) const {
if (info.supertype) {
hash_combine(digest, hash(HeapType(uintptr_t(info.supertype))));
}
+ wasm::rehash(digest, info.isFinal);
wasm::rehash(digest, info.kind);
switch (info.kind) {
case HeapTypeInfo::SignatureKind:
@@ -2038,6 +2039,9 @@ bool RecGroupEquator::eq(const HeapTypeInfo& a, const HeapTypeInfo& b) const {
return false;
}
}
+ if (a.isFinal != b.isFinal) {
+ return false;
+ }
if (a.kind != b.kind) {
return false;
}
@@ -2291,9 +2295,17 @@ void TypeBuilder::createRecGroup(size_t index, size_t length) {
{RecGroup(uintptr_t(groupInfo.get())), std::move(groupInfo)});
}
+void TypeBuilder::setFinal(size_t i, bool final) {
+ assert(i < size() && "index out of bounds");
+ impl->entries[i].info->isFinal = final;
+}
+
namespace {
bool isValidSupertype(const HeapTypeInfo& sub, const HeapTypeInfo& super) {
+ if (super.isFinal) {
+ return false;
+ }
if (sub.kind != super.kind) {
return false;
}