summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Lively <7121787+tlively@users.noreply.github.com>2021-06-11 09:41:09 -0400
committerGitHub <noreply@github.com>2021-06-11 06:41:09 -0700
commit446634a503116186a025d23375c430cb837e00f0 (patch)
tree4538df79e3c7cabec072ff90053c30ea6309a0f5
parentca263c00ec8ff3b7c51d066b273eeee50180091b (diff)
downloadbinaryen-446634a503116186a025d23375c430cb837e00f0.tar.gz
binaryen-446634a503116186a025d23375c430cb837e00f0.tar.bz2
binaryen-446634a503116186a025d23375c430cb837e00f0.zip
Nominal subtyping (#3927)
Add methods to the TypeBuilder interface to declare subtyping relationships between the built types. These relationships are validated and recorded globally as part of type building. If the relationships are not valid, a fatal error is produced. In the future, it would be better to report the error to the TypeBuilder client code, but this behavior is sufficient for now. Also updates SubTyper and TypeBounder to be aware of nominal mode so that subtyping and LUBs are correctly calculated. Tests of the failing behavior will be added in a future PR that exposes this functionality to the command line, since the current `example` testing infrastructure cannot handle testing fatal errors.
-rw-r--r--src/wasm-type.h14
-rw-r--r--src/wasm/wasm-type.cpp302
-rw-r--r--test/example/type-builder-nominal.cpp143
-rw-r--r--test/example/type-builder-nominal.txt2
4 files changed, 386 insertions, 75 deletions
diff --git a/src/wasm-type.h b/src/wasm-type.h
index 11b23ed52..ff7e9af9c 100644
--- a/src/wasm-type.h
+++ b/src/wasm-type.h
@@ -550,9 +550,16 @@ struct TypeBuilder {
Type getTempRefType(HeapType heapType, Nullability nullable);
Type getTempRttType(Rtt rtt);
+ // In nominal mode, declare the HeapType being built at index `i` to be an
+ // immediate subtype of the HeapType being built at index `j`. Does nothing in
+ // equirecursive mode.
+ void setSubType(size_t i, size_t j);
+
// Returns all of the newly constructed heap types. May only be called once
// all of the heap types have been initialized with `setHeapType`. In nominal
- // mode, all of the constructed HeapTypes will be fresh and distinct.
+ // mode, all of the constructed HeapTypes will be fresh and distinct. In
+ // nominal mode, will also produce a fatal error if the declared subtype
+ // relationships are not valid.
std::vector<HeapType> build();
// Utility for ergonomically using operator[] instead of explicit setHeapType
@@ -581,6 +588,11 @@ struct TypeBuilder {
builder.setHeapType(index, array);
return *this;
}
+ Entry& subTypeOf(Entry other) {
+ assert(&builder == &other.builder);
+ builder.setSubType(index, other.index);
+ return *this;
+ }
};
Entry operator[](size_t i) { return Entry{*this, i}; }
diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp
index 8d107a833..96cb0b557 100644
--- a/src/wasm/wasm-type.cpp
+++ b/src/wasm/wasm-type.cpp
@@ -101,6 +101,8 @@ struct HeapTypeInfo {
// Otherwise, the type definition tree is still being constructed via the
// TypeBuilder interface, so hashing and equality use pointer identity.
bool isFinalized = true;
+ // In nominal mode, the supertype of this HeapType, if it exists.
+ HeapTypeInfo* supertype = nullptr;
enum Kind {
BasicKind,
SignatureKind,
@@ -172,6 +174,8 @@ private:
// temporary types, so they should never be used directly.
bool lub(Type a, Type b, Type& out);
HeapType lub(HeapType a, HeapType b);
+ HeapType::BasicHeapType lub(HeapType::BasicHeapType a,
+ HeapType::BasicHeapType b);
bool lub(const Tuple& a, const Tuple& b, Tuple& out);
bool lub(const Field& a, const Field& b, Field& out);
bool lub(const Signature& a, const Signature& b, Signature& out);
@@ -582,6 +586,7 @@ bool TypeInfo::operator==(const TypeInfo& other) const {
HeapTypeInfo::HeapTypeInfo(const HeapTypeInfo& other) {
kind = other.kind;
+ supertype = other.supertype;
switch (kind) {
case BasicKind:
new (&basic) auto(other.basic);
@@ -1262,30 +1267,45 @@ bool SubTyper::isSubType(HeapType a, HeapType b) {
if (a == b) {
return true;
}
- if (seen.count({a, b})) {
- // We weren't able to disprove that a == b since we last saw them, so the
- // relation holds coinductively.
- return true;
- }
- // Everything is a subtype of any.
- if (b == HeapType::any) {
- return true;
- }
- // Various things are subtypes of eq.
- if (b == HeapType::eq) {
- return a == HeapType::i31 || a.isData();
+ if (b.isBasic()) {
+ switch (b.getBasic()) {
+ case HeapType::func:
+ return a.isSignature();
+ case HeapType::ext:
+ return false;
+ case HeapType::any:
+ return true;
+ case HeapType::eq:
+ return a == HeapType::i31 || a.isData();
+ case HeapType::i31:
+ return false;
+ case HeapType::data:
+ return a.isData();
+ }
}
- // Some are also subtypes of data.
- if (b == HeapType::data) {
- return a.isData();
+ if (a.isBasic()) {
+ // Basic HeapTypes are never subtypes of compound HeapTypes.
+ return false;
}
- // Signatures are subtypes of funcref.
- if (b == HeapType::func) {
- return a.isSignature();
+ if (typeSystem == TypeSystem::Nominal) {
+ // Subtyping must be declared in a nominal system, not derived from
+ // structure, so we will not recurse. TODO: optimize this search with some
+ // form of caching.
+ HeapTypeInfo* curr = getHeapTypeInfo(a);
+ while ((curr = curr->supertype)) {
+ if (curr == getHeapTypeInfo(b)) {
+ return true;
+ }
+ }
+ return false;
}
// As we recurse, we will coinductively assume that a == b unless proven
// otherwise.
- seen.insert({a, b});
+ if (!seen.insert({a, b}).second) {
+ // We weren't able to disprove that a == b since we last saw them, so the
+ // relation holds coinductively.
+ return true;
+ }
if (a.isSignature() && b.isSignature()) {
return isSubType(a.getSignature(), b.getSignature());
}
@@ -1416,44 +1436,71 @@ HeapType TypeBounder::lub(HeapType a, HeapType b) {
if (a == b) {
return a;
}
- // Canonicalize to have the basic HeapType on the left.
- if (b.isBasic()) {
- std::swap(a, b);
+
+ auto getBasicApproximation = [](HeapType x) {
+ if (x.isBasic()) {
+ return x.getBasic();
+ }
+ auto* info = getHeapTypeInfo(x);
+ switch (info->kind) {
+ case HeapTypeInfo::BasicKind:
+ break;
+ case HeapTypeInfo::SignatureKind:
+ return HeapType::func;
+ case HeapTypeInfo::StructKind:
+ case HeapTypeInfo::ArrayKind:
+ return HeapType::data;
+ }
+ WASM_UNREACHABLE("unexpected kind");
+ };
+
+ auto getBasicLUB = [&]() {
+ return lub(getBasicApproximation(a), getBasicApproximation(b));
+ };
+
+ if (a.isBasic() || b.isBasic()) {
+ return getBasicLUB();
}
- if (a.isBasic()) {
- switch (a.getBasic()) {
- case HeapType::func:
- if (b.isFunction()) {
- return HeapType::func;
- } else {
- return HeapType::any;
- }
- case HeapType::ext:
- return HeapType::any;
- case HeapType::any:
- return HeapType::any;
- case HeapType::eq:
- if (b == HeapType::i31 || b.isData()) {
- return HeapType::eq;
- } else {
- return HeapType::any;
- }
- case HeapType::i31:
- if (b.isData()) {
- return HeapType::eq;
- } else {
- return HeapType::any;
+
+ HeapTypeInfo* infoA = getHeapTypeInfo(a);
+ HeapTypeInfo* infoB = getHeapTypeInfo(b);
+
+ if (infoA->kind != infoB->kind) {
+ return getBasicLUB();
+ }
+
+ if (typeSystem == TypeSystem::Nominal) {
+ // Walk up the subtype tree to find the LUB. Ascend the tree from both `a`
+ // and `b` in lockstep. The first type we see for a second time must be the
+ // LUB because there are no cycles and the only way to encounter a type
+ // twice is for it to be on the path above both `a` and `b`.
+ std::unordered_set<HeapTypeInfo*> seen;
+ auto* currA = infoA;
+ auto* currB = infoB;
+ seen.insert(currA);
+ seen.insert(currB);
+ while (true) {
+ auto* nextA = currA->supertype;
+ auto* nextB = currB->supertype;
+ if (nextA == nullptr && nextB == nullptr) {
+ // Did not find a LUB in the subtype tree.
+ return getBasicLUB();
+ }
+ if (nextA) {
+ if (!seen.insert(nextA).second) {
+ return HeapType(uintptr_t(nextA));
}
- case HeapType::data:
- if (b.isData()) {
- return HeapType::data;
- } else if (b == HeapType::i31) {
- return HeapType::eq;
- } else {
- return HeapType::any;
+ currA = nextA;
+ }
+ if (currB) {
+ if (!seen.insert(nextB).second) {
+ return HeapType(uintptr_t(nextB));
}
+ currB = nextB;
+ }
}
}
+
// Allocate a new slot to construct the LUB of this pair if we have not
// already seen it before. Canonicalize the pair to have the element with the
// smaller ID first since order does not matter.
@@ -1467,30 +1514,60 @@ HeapType TypeBounder::lub(HeapType a, HeapType b) {
}
builder.grow(1);
- if (a.isSignature() && b.isSignature()) {
- Signature sig;
- if (lub(a.getSignature(), b.getSignature(), sig)) {
- return builder[index] = sig;
- } else {
- return builder[index] = HeapType::func;
+ switch (infoA->kind) {
+ case HeapTypeInfo::BasicKind:
+ WASM_UNREACHABLE("unexpected kind");
+ case HeapTypeInfo::SignatureKind: {
+ Signature sig;
+ if (lub(infoA->signature, infoB->signature, sig)) {
+ return builder[index] = sig;
+ } else {
+ return builder[index] = HeapType::func;
+ }
}
- } else if (a.isStruct() && b.isStruct()) {
- return builder[index] = lub(a.getStruct(), b.getStruct());
- } else if (a.isArray() && b.isArray()) {
- Array array;
- if (lub(a.getArray(), b.getArray(), array)) {
- return builder[index] = array;
- } else {
- return builder[index] = HeapType::data;
+ case HeapTypeInfo::StructKind: {
+ return builder[index] = lub(infoA->struct_, infoB->struct_);
}
- } else {
- // The types are not of the same kind, so the LUB is either `data` or `any`.
- if (a.isSignature() || b.isSignature()) {
- return builder[index] = HeapType::any;
- } else {
- return builder[index] = HeapType::data;
+ case HeapTypeInfo::ArrayKind: {
+ Array array;
+ if (lub(infoA->array, infoB->array, array)) {
+ return builder[index] = array;
+ } else {
+ return builder[index] = HeapType::data;
+ }
}
}
+ WASM_UNREACHABLE("unexpected kind");
+}
+
+HeapType::BasicHeapType TypeBounder::lub(HeapType::BasicHeapType a,
+ HeapType::BasicHeapType b) {
+ if (a == b) {
+ return a;
+ }
+ // Canonicalize to have `x` be the lesser type.
+ if (unsigned(a) > unsigned(b)) {
+ std::swap(a, b);
+ }
+ switch (a) {
+ case HeapType::func:
+ case HeapType::ext:
+ case HeapType::any:
+ return HeapType::any;
+ case HeapType::eq:
+ if (b == HeapType::i31 || b == HeapType::data) {
+ return HeapType::eq;
+ }
+ return HeapType::any;
+ case HeapType::i31:
+ if (b == HeapType::data) {
+ return HeapType::eq;
+ }
+ return HeapType::any;
+ case HeapType::data:
+ return HeapType::any;
+ }
+ WASM_UNREACHABLE("unexpected basic type");
}
bool TypeBounder::lub(const Tuple& a, const Tuple& b, Tuple& out) {
@@ -2102,6 +2179,7 @@ struct TypeBuilder::Impl {
set(Signature());
}
void set(HeapTypeInfo&& hti) {
+ hti.supertype = info->supertype;
*info = std::move(hti);
info->isTemp = true;
info->isFinalized = false;
@@ -2186,6 +2264,13 @@ Type TypeBuilder::getTempRttType(Rtt rtt) {
return markTemp(impl->typeStore.insert(rtt));
}
+void TypeBuilder::setSubType(size_t i, size_t j) {
+ assert(i < size() && j < size() && "index out of bounds");
+ HeapTypeInfo* sub = impl->entries[i].info.get();
+ HeapTypeInfo* super = impl->entries[j].info.get();
+ sub->supertype = super;
+}
+
namespace {
// A wrapper around a HeapType that provides equality and hashing based only on
@@ -2872,6 +2957,64 @@ std::vector<HeapType> buildEquirecursive(TypeBuilder& builder) {
return heapTypes;
}
+void validateNominalSubTyping(const std::vector<HeapType>& heapTypes) {
+ assert(typeSystem == TypeSystem::Nominal);
+
+ // Ensure there are no cycles in the subtype graph. This is the classic DFA
+ // algorithm for detecting cycles, but in the form of a simple loop because
+ // each node (type) has at most one child (supertype).
+ std::unordered_set<HeapTypeInfo*> seen;
+ for (auto type : heapTypes) {
+ std::unordered_set<HeapTypeInfo*> path;
+ for (auto* curr = getHeapTypeInfo(type);
+ seen.insert(curr).second && curr->supertype != nullptr;
+ curr = curr->supertype) {
+ if (!path.insert(curr).second) {
+ Fatal() << HeapType(uintptr_t(curr))
+ << " cannot be a subtype of itself";
+ }
+ }
+ }
+
+ // Ensure that all the subtype relations are valid.
+ for (HeapType type : heapTypes) {
+ auto* sub = getHeapTypeInfo(type);
+ auto* super = sub->supertype;
+ if (super == nullptr) {
+ continue;
+ }
+
+ auto fail = [&]() {
+ Fatal() << type << " cannot be a subtype of "
+ << HeapType(uintptr_t(super));
+ };
+
+ if (sub->kind != super->kind) {
+ fail();
+ }
+ SubTyper typer;
+ switch (sub->kind) {
+ case HeapTypeInfo::BasicKind:
+ WASM_UNREACHABLE("unexpected kind");
+ case HeapTypeInfo::SignatureKind:
+ if (!typer.isSubType(sub->signature, super->signature)) {
+ fail();
+ }
+ break;
+ case HeapTypeInfo::StructKind:
+ if (!typer.isSubType(sub->struct_, super->struct_)) {
+ fail();
+ }
+ break;
+ case HeapTypeInfo::ArrayKind:
+ if (!typer.isSubType(sub->array, super->array)) {
+ fail();
+ }
+ break;
+ }
+ }
+}
+
std::vector<HeapType> buildNominal(TypeBuilder& builder) {
#if TIME_CANONICALIZATION
auto start = std::chrono::steady_clock::now();
@@ -2887,12 +3030,23 @@ std::vector<HeapType> buildNominal(TypeBuilder& builder) {
}
#if TIME_CANONICALIZATION
+ auto afterMove = std::chrono::steady_clock::now();
+#endif
+
+ validateNominalSubTyping(heapTypes);
+
+#if TIME_CANONICALIZATION
auto end = std::chrono::steady_clock::now();
- std::cerr << "Building took "
- << std::chrono::duration_cast<std::chrono::milliseconds>(end -
+ std::cerr << "Moving types took "
+ << std::chrono::duration_cast<std::chrono::milliseconds>(afterMove -
start)
.count()
<< " ms\n";
+ std::cerr << "Validating subtyping took "
+ << std::chrono::duration_cast<std::chrono::milliseconds>(end -
+ afterMove)
+ .count()
+ << " ms\n";
#endif
return heapTypes;
diff --git a/test/example/type-builder-nominal.cpp b/test/example/type-builder-nominal.cpp
index 8da80d4bb..bd0791ae9 100644
--- a/test/example/type-builder-nominal.cpp
+++ b/test/example/type-builder-nominal.cpp
@@ -252,6 +252,148 @@ void test_recursive() {
}
}
+void test_subtypes() {
+ std::cout << ";; Test subtyping\n";
+
+ auto LUB = [&](HeapType a, HeapType b) {
+ Type refA = Type(a, Nullable);
+ Type refB = Type(b, Nullable);
+ Type lubAB = Type::getLeastUpperBound(refA, refB);
+ Type lubBA = Type::getLeastUpperBound(refB, refA);
+ assert(lubAB == lubBA);
+ assert(lubAB != Type::none);
+ HeapType lub = lubAB.getHeapType();
+ assert(Type::hasLeastUpperBound(refA, refB));
+ assert(Type::hasLeastUpperBound(refB, refA));
+ assert(Type::isSubType(refA, lubAB));
+ assert(Type::isSubType(refB, lubAB));
+ assert(HeapType::isSubType(a, lub));
+ assert(HeapType::isSubType(b, lub));
+ assert(lub == a || !HeapType::isSubType(lub, a));
+ assert(lub == b || !HeapType::isSubType(lub, b));
+ return lub;
+ };
+
+ {
+ // Basic Types
+ for (auto other : {HeapType::func,
+ HeapType::ext,
+ HeapType::any,
+ HeapType::eq,
+ HeapType::i31,
+ HeapType::data}) {
+ assert(LUB(HeapType::any, other) == HeapType::any);
+ }
+ assert(LUB(HeapType::eq, HeapType::func) == HeapType::any);
+ assert(LUB(HeapType::i31, HeapType::data) == HeapType::eq);
+ }
+
+ {
+ // Identity
+ std::vector<HeapType> built;
+ {
+ TypeBuilder builder(3);
+ builder[0] = Signature(Type::none, Type::none);
+ builder[1] = Struct{};
+ builder[2] = Array(Field(Type::i32, Mutable));
+ built = builder.build();
+ }
+ assert(LUB(built[0], built[0]) == built[0]);
+ assert(LUB(built[1], built[1]) == built[1]);
+ assert(LUB(built[2], built[2]) == built[2]);
+ }
+
+ {
+ // No subtype declarations mean no subtypes
+ std::vector<HeapType> built;
+ {
+ TypeBuilder builder(5);
+ Type structRef0 = builder.getTempRefType(builder[0], Nullable);
+ Type structRef1 = builder.getTempRefType(builder[1], Nullable);
+ builder[0] = Struct{};
+ builder[1] = Struct{};
+ builder[2] = Signature(Type::none, Type::anyref);
+ builder[3] = Signature(Type::none, structRef0);
+ builder[4] = Signature(Type::none, structRef1);
+ built = builder.build();
+ }
+ assert(LUB(built[0], built[1]) == HeapType::data);
+ assert(LUB(built[2], built[3]) == HeapType::func);
+ assert(LUB(built[2], built[4]) == HeapType::func);
+ }
+
+ {
+ // Subtyping of identical types
+ std::vector<HeapType> built;
+ {
+ TypeBuilder builder(6);
+ builder[0].subTypeOf(builder[1]);
+ builder[2].subTypeOf(builder[3]);
+ builder[4].subTypeOf(builder[5]);
+ builder[0] =
+ Struct({Field(Type::i32, Mutable), Field(Type::anyref, Mutable)});
+ builder[1] =
+ Struct({Field(Type::i32, Mutable), Field(Type::anyref, Mutable)});
+ builder[2] = Signature(Type::i32, Type::anyref);
+ builder[3] = Signature(Type::i32, Type::anyref);
+ builder[4] = Array(Field(Type::i32, Mutable));
+ builder[5] = Array(Field(Type::i32, Mutable));
+ built = builder.build();
+ }
+ assert(LUB(built[0], built[1]) == built[1]);
+ assert(LUB(built[2], built[3]) == built[3]);
+ assert(LUB(built[4], built[5]) == built[5]);
+ }
+
+ {
+ // Width subtyping
+ std::vector<HeapType> built;
+ {
+ TypeBuilder builder(2);
+ builder[0] = Struct({Field(Type::i32, Immutable)});
+ builder[1] =
+ Struct({Field(Type::i32, Immutable), Field(Type::i32, Immutable)});
+ builder[1].subTypeOf(builder[0]);
+ built = builder.build();
+ }
+ assert(LUB(built[1], built[0]) == built[0]);
+ }
+
+ {
+ // Depth subtyping
+ std::vector<HeapType> built;
+ {
+ TypeBuilder builder(2);
+ builder[0] = Struct({Field(Type::anyref, Immutable)});
+ builder[1] = Struct({Field(Type::funcref, Immutable)});
+ builder[1].subTypeOf(builder[0]);
+ built = builder.build();
+ }
+ assert(LUB(built[1], built[0]) == built[0]);
+ }
+
+ {
+ // Mutually recursive subtyping
+ std::vector<HeapType> built;
+ {
+ TypeBuilder builder(4);
+ Type a = builder.getTempRefType(builder[0], Nullable);
+ Type b = builder.getTempRefType(builder[1], Nullable);
+ Type c = builder.getTempRefType(builder[2], Nullable);
+ Type d = builder.getTempRefType(builder[3], Nullable);
+ builder[1].subTypeOf(builder[0]);
+ builder[3].subTypeOf(builder[2]);
+ builder[0] = Struct({Field(c, Immutable)});
+ builder[1] = Struct({Field(d, Immutable)});
+ builder[2] = Struct({Field(a, Immutable)});
+ builder[3] = Struct({Field(b, Immutable)});
+ built = builder.build();
+ }
+ assert(LUB(built[0], built[1]) == built[0]);
+ assert(LUB(built[2], built[3]) == built[2]);
+ }
+}
+
int main() {
wasm::setTypeSystem(TypeSystem::Nominal);
@@ -262,5 +404,6 @@ int main() {
test_canonicalization();
test_signatures(i == 1);
test_recursive();
+ test_subtypes();
}
}
diff --git a/test/example/type-builder-nominal.txt b/test/example/type-builder-nominal.txt
index fd3c4cddb..2ecb08c89 100644
--- a/test/example/type-builder-nominal.txt
+++ b/test/example/type-builder-nominal.txt
@@ -44,6 +44,7 @@ After building types:
(func (result (ref null ...1)))
(func (result (ref null (func (result ...1)))))
+;; Test subtyping
;; Test TypeBuilder
Before setting heap types:
(ref $sig) => (ref [T](func))
@@ -90,3 +91,4 @@ After building types:
(func (result (ref null ...1)))
(func (result (ref null (func (result ...1)))))
+;; Test subtyping