summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Lively <7121787+tlively@users.noreply.github.com>2021-02-18 21:48:19 -0800
committerGitHub <noreply@github.com>2021-02-18 21:48:19 -0800
commitb6c094c8760b3c3e7ffbc54c46c6329b22d88cfe (patch)
tree7174a70f0a73974b3b472eb0a18d175abf7e9b5f
parent22fc60d86538a6111f7b953fd70362ee73dce7d8 (diff)
downloadbinaryen-b6c094c8760b3c3e7ffbc54c46c6329b22d88cfe.tar.gz
binaryen-b6c094c8760b3c3e7ffbc54c46c6329b22d88cfe.tar.bz2
binaryen-b6c094c8760b3c3e7ffbc54c46c6329b22d88cfe.zip
Support type uses before definitions in text parser (#3584)
Traverses the module to find type definitions and uses a TypeBuilder to construct the corresponding HeapTypes rather than constructing them directly. This allows types to be used in the definitions of other types before they themselves are defined, which is an important step toward supporting recursive types. After this PR, no further text parsing changes will be necessary to support recursive types. Beyond allowing types to be used before their definitions, this PR also makes a couple incidental changes to the parser's behavior. First, compound heaptypes can now only be declared in `(type ...)` elements and cannot be declared inline at their site of use. This reduces the flexibility of the parser, but is in line with what the text format spec will probably look like eventually (see https://github.com/WebAssembly/function-references/issues/42). The second change is that `(type ...)` elements are now all parsed before `(func ...)` elements rather than in text order with them, so the type indices will be different and wasts using numeric type indices will be broken. Note however, that we were already not completely spec compliant in this regard because we parsed types defined by `(type...)` and `(func...)` elements before types defined by the type uses of `call_indirect` instructions.
-rw-r--r--src/passes/Print.cpp2
-rw-r--r--src/wasm-s-parser.h4
-rw-r--r--src/wasm/wasm-s-parser.cpp263
-rw-r--r--test/lit/forward-declared-types.wast21
-rw-r--r--test/spec/old_func.wast10
-rw-r--r--test/typed-function-references.wast14
-rw-r--r--test/typed-function-references.wast.from-wast8
-rw-r--r--test/typed-function-references.wast.fromBinary8
-rw-r--r--test/typed-function-references.wast.fromBinary.noDebugInfo8
9 files changed, 230 insertions, 108 deletions
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index b067e43d5..a75c15534 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -175,7 +175,7 @@ static std::ostream& operator<<(std::ostream& o, const SExprType& sType) {
o << '(';
auto sep = "";
for (const auto& t : type) {
- o << sep << t;
+ o << sep << SExprType(t);
sep = " ";
}
o << ')';
diff --git a/src/wasm-s-parser.h b/src/wasm-s-parser.h
index bb436bbc4..25efd1fc1 100644
--- a/src/wasm-s-parser.h
+++ b/src/wasm-s-parser.h
@@ -70,6 +70,8 @@ public:
List& list();
Element* operator[](unsigned i);
size_t size() { return list().size(); }
+ List::Iterator begin() { return list().begin(); }
+ List::Iterator end() { return list().end(); }
// string methods
cashew::IString str() const;
@@ -140,6 +142,7 @@ public:
SExpressionWasmBuilder(Module& wasm, Element& module, IRProfile profile);
private:
+ void preParseHeapTypes(Element& module);
// pre-parse types and function definitions, so we know function return types
// before parsing their contents
void preParseFunctionType(Element& s);
@@ -308,7 +311,6 @@ private:
// Parses something like (func ..), (array ..), (struct)
HeapType parseHeapType(Element& s);
- void parseType(Element& s);
void parseEvent(Element& s, bool preParseImport = false);
Function::DebugLocation getDebugLocation(const SourceLocation& loc);
diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp
index 13b76801b..4e9ebcc45 100644
--- a/src/wasm/wasm-s-parser.cpp
+++ b/src/wasm/wasm-s-parser.cpp
@@ -351,6 +351,9 @@ SExpressionWasmBuilder::SExpressionWasmBuilder(Module& wasm,
binaryBuilder.read();
return;
}
+
+ preParseHeapTypes(module);
+
Index implementedFunctions = 0;
functionCounter = 0;
for (unsigned j = i; j < module.size(); j++) {
@@ -657,11 +660,193 @@ size_t SExpressionWasmBuilder::parseTypeUse(Element& s,
return parseTypeUse(s, startPos, functionSignature, params);
}
+void SExpressionWasmBuilder::preParseHeapTypes(Element& module) {
+ auto forEachType = [&](auto f) {
+ for (auto* elemPtr : module) {
+ auto& elem = *elemPtr;
+ if (elementStartsWith(elem, TYPE)) {
+ f(elem);
+ }
+ }
+ };
+
+ size_t numTypes = 0;
+ forEachType([&](Element& elem) {
+ // Map type names to indices
+ if (elem[1]->dollared()) {
+ std::string name = elem[1]->c_str();
+ if (!typeIndices.insert({name, numTypes}).second) {
+ throw ParseException("duplicate function type", elem.line, elem.col);
+ }
+ }
+ ++numTypes;
+ });
+
+ TypeBuilder builder(numTypes);
+
+ auto parseRefType = [&](Element& elem) -> Type {
+ // '(' 'ref' 'null'? ht ')'
+ bool nullable = elem[1]->isStr() && *elem[1] == NULL_;
+ auto& referent = nullable ? *elem[2] : *elem[1];
+ const char* name = referent.c_str();
+ if (referent.dollared()) {
+ // TODO: Support non-nullable types
+ return builder.getTempRefType(typeIndices[name], Nullable);
+ } else if (String::isNumber(name)) {
+ // TODO: Support non-nullable types
+ return builder.getTempRefType(atoi(name), Nullable);
+ } else {
+ // TODO: Support non-nullable types
+ return Type(stringToHeapType(name), Nullable);
+ }
+ };
+
+ auto parseRttType = [&](Element& elem) -> Type {
+ // '(' 'rtt' depth? typeidx ')'
+ uint32_t depth;
+ Element* idx;
+ switch (elem.size()) {
+ default:
+ throw ParseException(
+ "unexpected number of rtt parameters", elem.line, elem.col);
+ case 2:
+ depth = Rtt::NoDepth;
+ idx = elem[1];
+ break;
+ case 3:
+ if (!String::isNumber(elem[1]->c_str())) {
+ throw ParseException(
+ "invalid rtt depth", elem[1]->line, elem[1]->col);
+ }
+ depth = atoi(elem[1]->c_str());
+ idx = elem[2];
+ break;
+ }
+ if (idx->dollared()) {
+ return builder.getTempRttType(typeIndices[idx->c_str()], depth);
+ } else if (String::isNumber(idx->c_str())) {
+ return builder.getTempRttType(atoi(idx->c_str()), depth);
+ } else {
+ throw ParseException("invalid type index", idx->line, idx->col);
+ }
+ };
+
+ auto parseValType = [&](Element& elem) {
+ if (elem.isStr()) {
+ return stringToType(elem.c_str());
+ } else if (*elem[0] == REF) {
+ return parseRefType(elem);
+ } else if (*elem[0] == RTT) {
+ return parseRttType(elem);
+ } else {
+ throw ParseException("unknown valtype kind", elem[0]->line, elem[0]->col);
+ }
+ };
+
+ auto parseParams = [&](Element& elem) {
+ auto it = ++elem.begin();
+ if (it != elem.end() && (*it)->dollared()) {
+ ++it;
+ }
+ std::vector<Type> params;
+ for (auto end = elem.end(); it != end; ++it) {
+ params.push_back(parseValType(**it));
+ }
+ return params;
+ };
+
+ auto parseResults = [&](Element& elem) {
+ std::vector<Type> results;
+ for (auto it = ++elem.begin(); it != elem.end(); ++it) {
+ results.push_back(parseValType(**it));
+ }
+ return results;
+ };
+
+ auto parseSignatureDef = [&](Element& elem) {
+ // '(' 'func' vec(param) vec(result) ')'
+ // param ::= '(' 'param' id? valtype ')'
+ // result ::= '(' 'result' valtype ')'
+ std::vector<Type> params, results;
+ for (auto it = ++elem.begin(), end = elem.end(); it != end; ++it) {
+ Element& curr = **it;
+ if (elementStartsWith(curr, PARAM)) {
+ auto newParams = parseParams(curr);
+ params.insert(params.end(), newParams.begin(), newParams.end());
+ } else if (elementStartsWith(curr, RESULT)) {
+ auto newResults = parseResults(curr);
+ results.insert(results.end(), newResults.begin(), newResults.end());
+ }
+ }
+ return Signature(builder.getTempTupleType(params),
+ builder.getTempTupleType(results));
+ };
+
+ auto parseField = [&](Element* elem) {
+ Mutability mutable_ = Immutable;
+ // elem is a list, containing either
+ // TYPE
+ // or
+ // (field TYPE)
+ // or
+ // (field $name TYPE)
+ Name name;
+ if (elementStartsWith(elem, FIELD)) {
+ if (elem->size() == 3) {
+ name = (*elem)[1]->str();
+ }
+ elem = (*elem)[elem->size() - 1];
+ }
+ // The element may also be (mut (..)).
+ if (elementStartsWith(elem, MUT)) {
+ mutable_ = Mutable;
+ elem = (*elem)[1];
+ }
+ if (elem->isStr()) {
+ // elem is a simple string name like "i32". It can be a normal wasm type,
+ // or one of the special types only available in fields.
+ if (*elem == I8) {
+ return Field(Field::i8, mutable_, name);
+ } else if (*elem == I16) {
+ return Field(Field::i16, mutable_, name);
+ }
+ }
+ // Otherwise it's an arbitrary type.
+ return Field(parseValType(*elem), mutable_, name);
+ };
+
+ auto parseStructDef = [&](Element& elem) {
+ FieldList fields;
+ for (auto it = ++elem.begin(); it != elem.end(); ++it) {
+ fields.emplace_back(parseField(*it));
+ }
+ return Struct(fields);
+ };
+
+ auto parseArrayDef = [&](Element& elem) {
+ return Array(parseField(elem[1]));
+ };
+
+ size_t index = 0;
+ forEachType([&](Element& elem) {
+ Element& def = elem[1]->dollared() ? *elem[2] : *elem[1];
+ Element& kind = *def[0];
+ if (kind == FUNC) {
+ builder.setHeapType(index++, parseSignatureDef(def));
+ } else if (kind == STRUCT) {
+ builder.setHeapType(index++, parseStructDef(def));
+ } else if (kind == ARRAY) {
+ builder.setHeapType(index++, parseArrayDef(def));
+ } else {
+ throw ParseException("unknown heaptype kind", kind.line, kind.col);
+ }
+ });
+
+ types = builder.build();
+}
+
void SExpressionWasmBuilder::preParseFunctionType(Element& s) {
IString id = s[0]->str();
- if (id == TYPE) {
- return parseType(s);
- }
if (id != FUNC) {
return;
}
@@ -2935,81 +3120,9 @@ HeapType SExpressionWasmBuilder::parseHeapType(Element& s) {
return stringToHeapType(str, /* prefix = */ false);
}
}
- // It's a list.
- if (*s[0] == FUNC) {
- std::vector<Type> params;
- std::vector<Type> results;
- for (size_t k = 1; k < s.size(); k++) {
- Element& curr = *s[k];
- if (elementStartsWith(curr, PARAM)) {
- auto newParams = parseParamOrLocal(curr);
- params.insert(params.end(), newParams.begin(), newParams.end());
- } else if (elementStartsWith(curr, RESULT)) {
- auto newResults = parseResults(curr);
- results.insert(results.end(), newResults.begin(), newResults.end());
- }
- }
- return Signature(Type(params), Type(results));
- }
- // It's a struct or an array.
- auto parseField = [&](Element* t) {
- Mutability mutable_ = Immutable;
- // t is a list, containing either
- // TYPE
- // or
- // (field TYPE)
- // or
- // (field $name TYPE)
- Name name;
- if (elementStartsWith(t, FIELD)) {
- if (t->size() == 3) {
- name = (*t)[1]->str();
- }
- t = (*t)[t->size() - 1];
- }
- // The element may also be (mut (..)).
- if (elementStartsWith(t, MUT)) {
- mutable_ = Mutable;
- t = (*t)[1];
- }
- if (t->isStr()) {
- // t is a simple string name like "i32". It can be a normal wasm type, or
- // one of the special types only available in fields.
- if (*t == I8) {
- return Field(Field::i8, mutable_, name);
- } else if (*t == I16) {
- return Field(Field::i16, mutable_, name);
- }
- }
- // Otherwise it's an arbitrary type.
- return Field(elementToType(*t), mutable_, name);
- };
- if (elementStartsWith(s, STRUCT)) {
- FieldList fields;
- for (size_t k = 1; k < s.size(); k++) {
- fields.emplace_back(parseField(s[k]));
- }
- return Struct(fields);
- }
- if (elementStartsWith(s, ARRAY)) {
- return Array(parseField(s[1]));
- }
throw ParseException("invalid heap type", s.line, s.col);
}
-void SExpressionWasmBuilder::parseType(Element& s) {
- size_t i = 1;
- if (s[i]->isStr()) {
- std::string name = s[i]->str().str;
- if (typeIndices.find(name) != typeIndices.end()) {
- throw ParseException("duplicate function type", s.line, s.col);
- }
- typeIndices[name] = types.size();
- i++;
- }
- types.emplace_back(parseHeapType(*s[i]));
-}
-
void SExpressionWasmBuilder::parseEvent(Element& s, bool preParseImport) {
auto event = make_unique<Event>();
size_t i = 1;
diff --git a/test/lit/forward-declared-types.wast b/test/lit/forward-declared-types.wast
new file mode 100644
index 000000000..e9b66175b
--- /dev/null
+++ b/test/lit/forward-declared-types.wast
@@ -0,0 +1,21 @@
+;; Test that types can be used before they are defined
+
+;; RUN: wasm-opt %s -all -S -o - | filecheck %s
+
+;; CHECK: (type $none_=>_none (func))
+;; CHECK: (type $[rtt_2_$none_=>_none] (array (rtt 2 $none_=>_none)))
+;; CHECK: (type ${ref?|[rtt_2_$none_=>_none]|_ref?|none_->_none|} (struct (field (ref null $[rtt_2_$none_=>_none])) (field (ref null $none_=>_none))))
+;; CHECK: (type $none_=>_ref?|{ref?|[rtt_2_$none_=>_none]|_ref?|none_->_none|}| (func (result (ref null ${ref?|[rtt_2_$none_=>_none]|_ref?|none_->_none|}))))
+
+(module
+ (type $struct (struct
+ (field (ref $array))
+ (field (ref null $func))
+ ))
+ (type $array (array (field (rtt 2 $func))))
+ (type $func (func))
+
+ (func (result (ref null $struct))
+ (unreachable)
+ )
+)
diff --git a/test/spec/old_func.wast b/test/spec/old_func.wast
index ba06d3a65..5a2aa8165 100644
--- a/test/spec/old_func.wast
+++ b/test/spec/old_func.wast
@@ -170,19 +170,23 @@
)
(func (export "signature-implicit-reused")
+
+ ;; XXX: Use numeric indices in this test again once we have a
+ ;; spec-compliant text parser. Original comment follows.
+
;; The implicit index 16 in this test depends on the function and
;; type definitions, and may need adapting if they change.
- (call_indirect (type 16)
+ (call_indirect (type 2) ;; XXX: was `(type 16)`
(f64.const 0) (i64.const 0) (f64.const 0) (i64.const 0)
(f64.const 0) (i64.const 0) (f32.const 0) (i32.const 0)
(i32.const 0)
)
- (call_indirect (type 16)
+ (call_indirect (type 2) ;; XXX: was `(type 16)`
(f64.const 0) (i64.const 0) (f64.const 0) (i64.const 0)
(f64.const 0) (i64.const 0) (f32.const 0) (i32.const 0)
(i32.const 2)
)
- (call_indirect (type 16)
+ (call_indirect (type 2) ;; XXX: was `(type 16)`
(f64.const 0) (i64.const 0) (f64.const 0) (i64.const 0)
(f64.const 0) (i64.const 0) (f32.const 0) (i32.const 0)
(i32.const 3)
diff --git a/test/typed-function-references.wast b/test/typed-function-references.wast
index 45120192d..1057d4487 100644
--- a/test/typed-function-references.wast
+++ b/test/typed-function-references.wast
@@ -1,7 +1,10 @@
(module
;; inline ref type in result
- (type $f64_=>_ref_null<_->_eqref> (func (param f64) (result (ref null (func (result eqref))))))
+ (type $_=>_eqref (func (result eqref)))
+ (type $f64_=>_ref_null<_->_eqref> (func (param f64) (result (ref null $_=>_eqref))))
(type $=>eqref (func (result eqref)))
+ (type $=>anyref (func (result anyref)))
+ (type $mixed_results (func (result anyref f32 anyref f32)))
(type $i32-i32 (func (param i32) (result i32)))
@@ -25,20 +28,17 @@
(local.set $f (ref.func $call-ref-more))
(call_ref (i32.const 42) (local.get $f))
)
- (func $ref-in-sig (param $0 f64) (result (ref null (func (result eqref))))
+ (func $ref-in-sig (param $0 f64) (result (ref null $=>eqref))
(ref.null $=>eqref)
)
(func $type-only-in-tuple-local
- (local $x (i32 (ref null (func (result anyref))) f64))
+ (local $x (i32 (ref null $=>anyref) f64))
)
(func $type-only-in-tuple-block
(drop
- (block (result i32 (ref null (func (result anyref f32 anyref f32))) f64)
+ (block (result i32 (ref null $mixed_results) f64)
(unreachable)
)
)
)
- (func $nested-type-only-there (result (ref (func (result (ref (func (param i32 i32 i32 i32 i32)))))))
- (unreachable)
- )
)
diff --git a/test/typed-function-references.wast.from-wast b/test/typed-function-references.wast.from-wast
index f287bdb7c..a31eeecea 100644
--- a/test/typed-function-references.wast.from-wast
+++ b/test/typed-function-references.wast.from-wast
@@ -2,15 +2,12 @@
(type $none_=>_none (func))
(type $i32_=>_i32 (func (param i32) (result i32)))
(type $none_=>_eqref (func (result eqref)))
- (type $i32_i32_i32_i32_i32_=>_none (func (param i32 i32 i32 i32 i32)))
(type $none_=>_i32 (func (result i32)))
(type $none_=>_anyref (func (result anyref)))
(type $none_=>_anyref_f32_anyref_f32 (func (result anyref f32 anyref f32)))
(type $ref?|i32_->_i32|_=>_i32 (func (param (ref null $i32_=>_i32)) (result i32)))
(type $none_=>_i32_ref?|none_->_anyref_f32_anyref_f32|_f64 (func (result i32 (ref null $none_=>_anyref_f32_anyref_f32) f64)))
- (type $none_=>_ref?|i32_i32_i32_i32_i32_->_none| (func (result (ref null $i32_i32_i32_i32_i32_=>_none))))
(type $f64_=>_ref?|none_->_eqref| (func (param f64) (result (ref null $none_=>_eqref))))
- (type $none_=>_ref?|none_->_ref?|i32_i32_i32_i32_i32_->_none|| (func (result (ref null $none_=>_ref?|i32_i32_i32_i32_i32_->_none|))))
(func $call-ref
(call_ref
(ref.func $call-ref)
@@ -53,7 +50,7 @@
(ref.null $none_=>_eqref)
)
(func $type-only-in-tuple-local
- (local $x (i32 (ref null (func (result anyref))) f64))
+ (local $x (i32 (ref null $none_=>_anyref) f64))
(nop)
)
(func $type-only-in-tuple-block
@@ -63,7 +60,4 @@
)
)
)
- (func $nested-type-only-there (result (ref null $none_=>_ref?|i32_i32_i32_i32_i32_->_none|))
- (unreachable)
- )
)
diff --git a/test/typed-function-references.wast.fromBinary b/test/typed-function-references.wast.fromBinary
index 1e3b8b64c..0e4385c84 100644
--- a/test/typed-function-references.wast.fromBinary
+++ b/test/typed-function-references.wast.fromBinary
@@ -3,14 +3,11 @@
(type $none_=>_anyref_f32_anyref_f32 (func (result anyref f32 anyref f32)))
(type $i32_=>_i32 (func (param i32) (result i32)))
(type $none_=>_eqref (func (result eqref)))
- (type $i32_i32_i32_i32_i32_=>_none (func (param i32 i32 i32 i32 i32)))
(type $none_=>_i32 (func (result i32)))
(type $none_=>_anyref (func (result anyref)))
(type $ref?|i32_->_i32|_=>_i32 (func (param (ref null $i32_=>_i32)) (result i32)))
(type $none_=>_i32_ref?|none_->_anyref_f32_anyref_f32|_f64 (func (result i32 (ref null $none_=>_anyref_f32_anyref_f32) f64)))
- (type $none_=>_ref?|i32_i32_i32_i32_i32_->_none| (func (result (ref null $i32_i32_i32_i32_i32_=>_none))))
(type $f64_=>_ref?|none_->_eqref| (func (param f64) (result (ref null $none_=>_eqref))))
- (type $none_=>_ref?|none_->_ref?|i32_i32_i32_i32_i32_->_none|| (func (result (ref null $none_=>_ref?|i32_i32_i32_i32_i32_->_none|))))
(func $call-ref
(call_ref
(ref.func $call-ref)
@@ -59,7 +56,7 @@
(nop)
)
(func $type-only-in-tuple-block
- (local $0 (i32 (ref null (func (result anyref f32 anyref f32))) f64))
+ (local $0 (i32 (ref null $none_=>_anyref_f32_anyref_f32) f64))
(local $1 (ref null $none_=>_anyref_f32_anyref_f32))
(local $2 i32)
(local.set $0
@@ -93,8 +90,5 @@
)
)
)
- (func $nested-type-only-there (result (ref null $none_=>_ref?|i32_i32_i32_i32_i32_->_none|))
- (unreachable)
- )
)
diff --git a/test/typed-function-references.wast.fromBinary.noDebugInfo b/test/typed-function-references.wast.fromBinary.noDebugInfo
index a9d95b8d5..ad50ea1f7 100644
--- a/test/typed-function-references.wast.fromBinary.noDebugInfo
+++ b/test/typed-function-references.wast.fromBinary.noDebugInfo
@@ -3,14 +3,11 @@
(type $none_=>_anyref_f32_anyref_f32 (func (result anyref f32 anyref f32)))
(type $i32_=>_i32 (func (param i32) (result i32)))
(type $none_=>_eqref (func (result eqref)))
- (type $i32_i32_i32_i32_i32_=>_none (func (param i32 i32 i32 i32 i32)))
(type $none_=>_i32 (func (result i32)))
(type $none_=>_anyref (func (result anyref)))
(type $ref?|i32_->_i32|_=>_i32 (func (param (ref null $i32_=>_i32)) (result i32)))
(type $none_=>_i32_ref?|none_->_anyref_f32_anyref_f32|_f64 (func (result i32 (ref null $none_=>_anyref_f32_anyref_f32) f64)))
- (type $none_=>_ref?|i32_i32_i32_i32_i32_->_none| (func (result (ref null $i32_i32_i32_i32_i32_=>_none))))
(type $f64_=>_ref?|none_->_eqref| (func (param f64) (result (ref null $none_=>_eqref))))
- (type $none_=>_ref?|none_->_ref?|i32_i32_i32_i32_i32_->_none|| (func (result (ref null $none_=>_ref?|i32_i32_i32_i32_i32_->_none|))))
(func $0
(call_ref
(ref.func $0)
@@ -59,7 +56,7 @@
(nop)
)
(func $8
- (local $0 (i32 (ref null (func (result anyref f32 anyref f32))) f64))
+ (local $0 (i32 (ref null $none_=>_anyref_f32_anyref_f32) f64))
(local $1 (ref null $none_=>_anyref_f32_anyref_f32))
(local $2 i32)
(local.set $0
@@ -93,8 +90,5 @@
)
)
)
- (func $9 (result (ref null $none_=>_ref?|i32_i32_i32_i32_i32_->_none|))
- (unreachable)
- )
)