summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2020-12-15 14:37:52 -0800
committerGitHub <noreply@github.com>2020-12-15 14:37:52 -0800
commitb50bdf3491dcd8f1c379df6d64bc8fbc10518326 (patch)
tree89e6bd0e94cdee3e090eb89f645580945d08fe01 /src
parenteff70e05b38e4e86ccbae169dbd400711f2fd561 (diff)
downloadbinaryen-b50bdf3491dcd8f1c379df6d64bc8fbc10518326.tar.gz
binaryen-b50bdf3491dcd8f1c379df6d64bc8fbc10518326.tar.bz2
binaryen-b50bdf3491dcd8f1c379df6d64bc8fbc10518326.zip
[GC] Fully implement RTT semantics (#3441)
This adds info to RTT literals so that they can represent the chain of rtt.canon/sub commands that generated them, and it adds an internal RTT for each GC allocation (array or struct). The approach taken is to simply store the full chain of rtt.sub types that led to each literal. This is not efficient, but it is simple and seems sufficient for the semantics described in the GC MVP doc - specifically, only the types matter, in that repeated executions of rtt.canon/sub on the same inputs yield equal outputs. This PR fixes a bunch of minor issues regarding that, enough to allow testing of the optimization and execution of ref.test/cast.
Diffstat (limited to 'src')
-rw-r--r--src/ir/global-utils.h5
-rw-r--r--src/ir/properties.h7
-rw-r--r--src/literal.h55
-rw-r--r--src/passes/Precompute.cpp4
-rw-r--r--src/wasm-interpreter.h97
-rw-r--r--src/wasm/literal.cpp86
-rw-r--r--src/wasm/wasm-type.cpp9
7 files changed, 227 insertions, 36 deletions
diff --git a/src/ir/global-utils.h b/src/ir/global-utils.h
index 7dc4c6af3..053eb0456 100644
--- a/src/ir/global-utils.h
+++ b/src/ir/global-utils.h
@@ -56,13 +56,14 @@ getGlobalInitializedToImport(Module& wasm, Name module, Name base) {
inline bool canInitializeGlobal(const Expression* curr) {
if (auto* tuple = curr->dynCast<TupleMake>()) {
for (auto* op : tuple->operands) {
- if (!Properties::isSingleConstantExpression(op) && !op->is<GlobalGet>()) {
+ if (!canInitializeGlobal(op)) {
return false;
}
}
return true;
}
- return Properties::isSingleConstantExpression(curr) || curr->is<GlobalGet>();
+ return Properties::isSingleConstantExpression(curr) ||
+ curr->is<GlobalGet>() || curr->is<RttCanon>() || curr->is<RttSub>();
}
} // namespace GlobalUtils
diff --git a/src/ir/properties.h b/src/ir/properties.h
index 485a37e22..b60c74d98 100644
--- a/src/ir/properties.h
+++ b/src/ir/properties.h
@@ -80,10 +80,13 @@ inline bool isNamedControlFlow(Expression* curr) {
return false;
}
+// A constant expression is something like a Const: it has a fixed value known
+// at compile time, and passes that propagate constants can try to propagate it.
+// Constant expressions are also allowed in global initializers in wasm.
+// TODO: look into adding more things here like RttCanon.
inline bool isSingleConstantExpression(const Expression* curr) {
return curr->is<Const>() || curr->is<RefNull>() || curr->is<RefFunc>() ||
- (curr->is<I31New>() && curr->cast<I31New>()->value->is<Const>()) ||
- curr->is<RttCanon>() || curr->is<RttSub>();
+ (curr->is<I31New>() && curr->cast<I31New>()->value->is<Const>());
}
inline bool isConstantExpression(const Expression* curr) {
diff --git a/src/literal.h b/src/literal.h
index 8a1829d10..bf8937533 100644
--- a/src/literal.h
+++ b/src/literal.h
@@ -31,6 +31,10 @@ namespace wasm {
class Literals;
struct ExceptionPackage;
+struct GCData;
+// Subclass the vector type so that this is not easily confused with a vector of
+// types (which could be confusing on the Literal constructor).
+struct RttSupers : std::vector<Type> {};
class Literal {
// store only integers, whose bits are deterministic. floats
@@ -47,7 +51,20 @@ class Literal {
// we store the referred data as a Literals object (which is natural for an
// Array, and for a Struct, is just the fields in order). The type is used
// to indicate whether this is a Struct or an Array, and of what type.
- std::shared_ptr<Literals> gcData;
+ std::shared_ptr<GCData> gcData;
+ // RTT values are "structural" in that the MVP doc says that multiple
+ // invocations of ref.canon return things that are observably identical, and
+ // the same is true for ref.sub. That is, what matters is the types; there
+ // is no unique identifier created in each ref.canon/sub. To track the
+ // types, we maintain a simple vector of the supertypes. Thus, an rtt.canon
+ // of type A will have an empty vector; an rtt.sub of type B of that initial
+ // canon would have a vector of size 1 containing A; a subsequent rtt.sub
+ // would have A, B, and so forth.
+ // (This encoding is very inefficient and not at all what a production VM
+ // would do, but it is simple.)
+ // The unique_ptr here is to avoid increasing the size of the union as well
+ // as the Literal class itself.
+ std::unique_ptr<RttSupers> rttSupers;
// TODO: Literals of type `externref` can only be `null` currently but we
// will need to represent extern values eventually, to
// 1) run the spec tests and fuzzer with reference types enabled and
@@ -79,18 +96,11 @@ public:
explicit Literal(Name func, Type type) : func(func), type(type) {}
explicit Literal(std::unique_ptr<ExceptionPackage>&& exn)
: exn(std::move(exn)), type(Type::exnref) {}
- explicit Literal(std::shared_ptr<Literals> gcData, Type type)
- : gcData(gcData), type(type) {}
+ explicit Literal(std::shared_ptr<GCData> gcData, Type type);
+ explicit Literal(std::unique_ptr<RttSupers>&& rttSupers, Type type);
Literal(const Literal& other);
Literal& operator=(const Literal& other);
- ~Literal() {
- if (type.isException()) {
- exn.~unique_ptr();
- }
- if (type.isStruct() || type.isArray()) {
- gcData.~shared_ptr();
- }
- }
+ ~Literal();
bool isConcrete() const { return type != Type::none; }
bool isNone() const { return type == Type::none; }
@@ -290,7 +300,8 @@ public:
return func;
}
ExceptionPackage getExceptionPackage() const;
- std::shared_ptr<Literals> getGCData() const;
+ std::shared_ptr<GCData> getGCData() const;
+ const RttSupers& getRttSupers() const;
// careful!
int32_t* geti32Ptr() {
@@ -629,6 +640,11 @@ public:
Literal widenHighUToVecI32x4() const;
Literal swizzleVec8x16(const Literal& other) const;
+ // Checks if an RTT value is a sub-rtt of another, that is, whether GC data
+ // with this object's RTT can be successfuly cast using the other RTT
+ // according to the wasm rules for that.
+ bool isSubRtt(const Literal& other) const;
+
private:
Literal addSatSI8(const Literal& other) const;
Literal addSatUI8(const Literal& other) const;
@@ -686,6 +702,14 @@ std::ostream& operator<<(std::ostream& o, wasm::Literal literal);
std::ostream& operator<<(std::ostream& o, wasm::Literals literals);
std::ostream& operator<<(std::ostream& o, const ExceptionPackage& exn);
+// A GC Struct or Array is a set of values with a run-time type saying what it
+// is.
+struct GCData {
+ Literal rtt;
+ Literals values;
+ GCData(Literal rtt, Literals values) : rtt(rtt), values(values) {}
+};
+
} // namespace wasm
namespace std {
@@ -748,7 +772,12 @@ template<> struct hash<wasm::Literal> {
} else if (a.type.isRef()) {
return hashRef();
} else if (a.type.isRtt()) {
- WASM_UNREACHABLE("TODO: rtt literals");
+ const auto& supers = a.getRttSupers();
+ wasm::rehash(digest, supers.size());
+ for (auto super : supers) {
+ wasm::rehash(digest, super.getID());
+ }
+ return digest;
}
WASM_UNREACHABLE("unexpected type");
}
diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp
index 355af7ed0..dceb45fb2 100644
--- a/src/passes/Precompute.cpp
+++ b/src/passes/Precompute.cpp
@@ -229,6 +229,10 @@ private:
if (curr->type.isRef()) {
return Flow(NONCONSTANT_FLOW);
}
+ // Don't try to precompute an Rtt. TODO figure out when that would be safe
+ if (curr->type.isRtt()) {
+ return Flow(NONCONSTANT_FLOW);
+ }
try {
return PrecomputingExpressionRunner(
getModule(), getValues, replaceExpression)
diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h
index dbc14de5f..223d02d87 100644
--- a/src/wasm-interpreter.h
+++ b/src/wasm-interpreter.h
@@ -1385,13 +1385,80 @@ public:
NOTE_EVAL1(value);
return Literal(value.geti31(curr->signed_));
}
+
+ // Helper for ref.test, ref.cast, and br_on_cast, which share almost all their
+ // logic except for what they return.
+ struct Cast {
+ enum Outcome {
+ // We took a break before doing anything.
+ Break,
+ // The input was null.
+ Null,
+ // The cast succeeded.
+ Success,
+ // The cast failed.
+ Failure
+ } outcome;
+
+ Flow breaking;
+ Literal originalRef;
+ Literal castRef;
+ };
+
+ template<typename T> Cast doCast(T* curr) {
+ Cast cast;
+ Flow ref = this->visit(curr->ref);
+ if (ref.breaking()) {
+ cast.outcome = cast.Break;
+ cast.breaking = ref;
+ return cast;
+ }
+ Flow rtt = this->visit(curr->rtt);
+ if (rtt.breaking()) {
+ cast.outcome = cast.Break;
+ cast.breaking = rtt;
+ return cast;
+ }
+ cast.originalRef = ref.getSingleValue();
+ auto gcData = cast.originalRef.getGCData();
+ if (!gcData) {
+ cast.outcome = cast.Null;
+ return cast;
+ }
+ auto refRtt = gcData->rtt;
+ auto intendedRtt = rtt.getSingleValue();
+ if (!refRtt.isSubRtt(intendedRtt)) {
+ cast.outcome = cast.Failure;
+ } else {
+ cast.outcome = cast.Success;
+ cast.castRef =
+ Literal(gcData, Type(intendedRtt.type.getHeapType(), Nullable));
+ }
+ return cast;
+ }
+
Flow visitRefTest(RefTest* curr) {
NOTE_ENTER("RefTest");
- WASM_UNREACHABLE("TODO (gc): ref.test");
+ auto cast = doCast(curr);
+ if (cast.outcome == cast.Break) {
+ return cast.breaking;
+ }
+ return Literal(int32_t(cast.outcome == cast.Success));
}
Flow visitRefCast(RefCast* curr) {
NOTE_ENTER("RefCast");
- WASM_UNREACHABLE("TODO (gc): ref.cast");
+ auto cast = doCast(curr);
+ if (cast.outcome == cast.Break) {
+ return cast.breaking;
+ }
+ if (cast.outcome == cast.Null) {
+ return Literal::makeNull(curr->type);
+ }
+ if (cast.outcome == cast.Failure) {
+ trap("cast error");
+ }
+ assert(cast.outcome == cast.Success);
+ return cast.castRef;
}
Flow visitBrOnCast(BrOnCast* curr) {
NOTE_ENTER("BrOnCast");
@@ -1403,7 +1470,10 @@ public:
if (parent.breaking()) {
return parent;
}
- return Literal(curr->type);
+ auto parentValue = parent.getSingleValue();
+ auto newSupers = std::make_unique<RttSupers>(parentValue.getRttSupers());
+ newSupers->push_back(parentValue.type);
+ return Literal(std::move(newSupers), curr->type);
}
Flow visitStructNew(StructNew* curr) {
NOTE_ENTER("StructNew");
@@ -1424,7 +1494,8 @@ public:
data[i] = value.getSingleValue();
}
}
- return Flow(Literal(std::make_shared<Literals>(data), curr->type));
+ return Flow(Literal(std::make_shared<GCData>(rtt.getSingleValue(), data),
+ curr->type));
}
Flow visitStructGet(StructGet* curr) {
NOTE_ENTER("StructGet");
@@ -1437,7 +1508,7 @@ public:
trap("null ref");
}
auto field = curr->ref->type.getHeapType().getStruct().fields[curr->index];
- return extendForPacking((*data)[curr->index], field, curr->signed_);
+ return extendForPacking(data->values[curr->index], field, curr->signed_);
}
Flow visitStructSet(StructSet* curr) {
NOTE_ENTER("StructSet");
@@ -1454,7 +1525,8 @@ public:
trap("null ref");
}
auto field = curr->ref->type.getHeapType().getStruct().fields[curr->index];
- (*data)[curr->index] = truncateForPacking(value.getSingleValue(), field);
+ data->values[curr->index] =
+ truncateForPacking(value.getSingleValue(), field);
return Flow();
}
Flow visitArrayNew(ArrayNew* curr) {
@@ -1484,7 +1556,8 @@ public:
data[i] = value;
}
}
- return Flow(Literal(std::make_shared<Literals>(data), curr->type));
+ return Flow(Literal(std::make_shared<GCData>(rtt.getSingleValue(), data),
+ curr->type));
}
Flow visitArrayGet(ArrayGet* curr) {
NOTE_ENTER("ArrayGet");
@@ -1501,11 +1574,11 @@ public:
trap("null ref");
}
Index i = index.getSingleValue().geti32();
- if (i >= data->size()) {
+ if (i >= data->values.size()) {
trap("array oob");
}
auto field = curr->ref->type.getHeapType().getArray().element;
- return extendForPacking((*data)[i], field, curr->signed_);
+ return extendForPacking(data->values[i], field, curr->signed_);
}
Flow visitArraySet(ArraySet* curr) {
NOTE_ENTER("ArraySet");
@@ -1526,11 +1599,11 @@ public:
trap("null ref");
}
Index i = index.getSingleValue().geti32();
- if (i >= data->size()) {
+ if (i >= data->values.size()) {
trap("array oob");
}
auto field = curr->ref->type.getHeapType().getArray().element;
- (*data)[i] = truncateForPacking(value.getSingleValue(), field);
+ data->values[i] = truncateForPacking(value.getSingleValue(), field);
return Flow();
}
Flow visitArrayLen(ArrayLen* curr) {
@@ -1543,7 +1616,7 @@ public:
if (!data) {
trap("null ref");
}
- return Literal(int32_t(data->size()));
+ return Literal(int32_t(data->values.size()));
}
virtual void trap(const char* why) { WASM_UNREACHABLE("unimp"); }
diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp
index 9bdf886b3..f526cebf3 100644
--- a/src/wasm/literal.cpp
+++ b/src/wasm/literal.cpp
@@ -37,6 +37,11 @@ Literal::Literal(Type type) : type(type) {
assert(type != Type::unreachable && (!type.isRef() || type.isNullable()));
if (type.isException()) {
new (&exn) std::unique_ptr<ExceptionPackage>();
+ } else if (isGCData()) {
+ new (&gcData) std::shared_ptr<GCData>();
+ } else if (type.isRtt()) {
+ // Allocate a new RttSupers (with no data).
+ new (&rttSupers) auto(std::make_unique<RttSupers>());
} else {
memset(&v128, 0, 16);
}
@@ -47,6 +52,19 @@ Literal::Literal(const uint8_t init[16]) : type(Type::v128) {
memcpy(&v128, init, 16);
}
+Literal::Literal(std::shared_ptr<GCData> gcData, Type type)
+ : gcData(gcData), type(type) {
+ // Null data is only allowed if nullable.
+ assert(gcData || type.isNullable());
+ // The type must be a proper type for GC data.
+ assert(isGCData());
+}
+
+Literal::Literal(std::unique_ptr<RttSupers>&& rttSupers, Type type)
+ : rttSupers(std::move(rttSupers)), type(type) {
+ assert(type.isRtt());
+}
+
Literal::Literal(const Literal& other) : type(other.type) {
if (type.isException()) {
// Avoid calling the destructor on an uninitialized value
@@ -56,13 +74,12 @@ Literal::Literal(const Literal& other) : type(other.type) {
new (&exn) std::unique_ptr<ExceptionPackage>();
}
} else if (other.isGCData()) {
- // Avoid calling the destructor on an uninitialized value
- new (&gcData) std::shared_ptr<Literals>(other.gcData);
+ new (&gcData) std::shared_ptr<GCData>(other.gcData);
} else if (type.isFunction()) {
func = other.func;
} else if (type.isRtt()) {
- // Nothing to do: Rtts help JITs optimize, but are not used in the
- // interpreter yet, and they are opaque to the wasm itself.
+ // Allocate a new RttSupers with a copy of the other's data.
+ new (&rttSupers) auto(std::make_unique<RttSupers>(*other.rttSupers));
} else {
TODO_SINGLE_COMPOUND(type);
switch (type.getBasic()) {
@@ -92,6 +109,21 @@ Literal::Literal(const Literal& other) : type(other.type) {
}
}
+Literal::~Literal() {
+ if (type.isException()) {
+ exn.~unique_ptr();
+ } else if (isGCData()) {
+ gcData.~shared_ptr();
+ } else if (type.isRtt()) {
+ rttSupers.~unique_ptr();
+ } else if (type.isFunction()) {
+ // Nothing special to do.
+ } else {
+ // Basic types need no special handling.
+ assert(type.isBasic());
+ }
+}
+
Literal& Literal::operator=(const Literal& other) {
if (this != &other) {
this->~Literal();
@@ -168,6 +200,8 @@ Literal Literal::makeZero(Type type) {
} else {
return makeNull(type);
}
+ } else if (type.isRtt()) {
+ return Literal(type);
} else {
return makeFromInt32(0, type);
}
@@ -195,11 +229,16 @@ ExceptionPackage Literal::getExceptionPackage() const {
return *exn;
}
-std::shared_ptr<Literals> Literal::getGCData() const {
+std::shared_ptr<GCData> Literal::getGCData() const {
assert(isGCData());
return gcData;
}
+const RttSupers& Literal::getRttSupers() const {
+ assert(type.isRtt());
+ return *rttSupers;
+}
+
Literal Literal::castToF32() {
assert(type == Type::i32);
Literal ret(Type::f32);
@@ -333,7 +372,7 @@ bool Literal::operator==(const Literal& other) const {
} else if (type.isRef()) {
return compareRef();
} else if (type.isRtt()) {
- WASM_UNREACHABLE("TODO: rtt literals");
+ return *rttSupers == *other.rttSupers;
}
WASM_UNREACHABLE("unexpected type");
}
@@ -442,10 +481,16 @@ std::ostream& operator<<(std::ostream& o, Literal literal) {
} else if (literal.isGCData()) {
auto data = literal.getGCData();
if (data) {
- o << "[ref " << *data << ']';
+ o << "[ref " << data->rtt << ' ' << data->values << ']';
} else {
o << "[ref null " << literal.type << ']';
}
+ } else if (literal.type.isRtt()) {
+ o << "[rtt ";
+ for (Type super : literal.getRttSupers()) {
+ o << super << " :> ";
+ }
+ o << literal.type << ']';
} else {
TODO_SINGLE_COMPOUND(literal.type);
switch (literal.type.getBasic()) {
@@ -2362,4 +2407,31 @@ Literal Literal::swizzleVec8x16(const Literal& other) const {
return Literal(result);
}
+bool Literal::isSubRtt(const Literal& other) const {
+ assert(type.isRtt() && other.type.isRtt());
+ // For this literal to be a sub-rtt of the other rtt, the supers must be a
+ // superset. That is, if other is a->b->c then we should be a->b->c as well
+ // with possibly ->d->.. added. The rttSupers array represents those chains,
+ // but only the supers, which means the last item in the chain is simply the
+ // type of the literal.
+ const auto& supers = getRttSupers();
+ const auto& otherSupers = other.getRttSupers();
+ if (otherSupers.size() > supers.size()) {
+ return false;
+ }
+ for (Index i = 0; i < otherSupers.size(); i++) {
+ if (supers[i] != otherSupers[i]) {
+ return false;
+ }
+ }
+ // If we have more supers than other, compare that extra super. Otherwise,
+ // we have the same amount of supers, and must be completely identical to
+ // other.
+ if (otherSupers.size() < supers.size()) {
+ return other.type == supers[otherSupers.size()];
+ } else {
+ return other.type == type;
+ }
+}
+
} // namespace wasm
diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp
index 95d351ba2..2eef7eb3d 100644
--- a/src/wasm/wasm-type.cpp
+++ b/src/wasm/wasm-type.cpp
@@ -622,6 +622,15 @@ bool Type::isSubType(Type left, Type right) {
}
return true;
}
+ if (left.isRtt() && right.isRtt()) {
+ auto leftRtt = left.getRtt();
+ auto rightRtt = right.getRtt();
+ // (rtt n $x) is a subtype of (rtt $x), that is, if the only difference in
+ // information is that the left side specifies a depth while the right side
+ // allows any depth.
+ return leftRtt.heapType == rightRtt.heapType && leftRtt.hasDepth() &&
+ !rightRtt.hasDepth();
+ }
return false;
}