From 4a83a0fe2943e2db9941391c3d08eb3b6fdf2310 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 6 Dec 2023 16:13:12 -0800 Subject: [Parser] Parse tables and element segments (#6147) These module fields are especially complex to parse because they contain both nontrivial types and instructions, so their parsing logic needs to be spread out across the ParseDecls, ParseModuleTypes, and ParseDefs phases of parsing. This applies to in-line elements in table definitions as well, which means we need to be able to match a table to its in-line element segment across multiple phases. --- src/parser/contexts.h | 163 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 150 insertions(+), 13 deletions(-) (limited to 'src/parser/contexts.h') diff --git a/src/parser/contexts.h b/src/parser/contexts.h index adb7694f6..8395f4dc4 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -41,7 +41,7 @@ inline std::vector getUnnamedTypes(const std::vector& named) { struct Limits { uint64_t initial; - uint64_t max; + std::optional max; }; struct MemType { @@ -94,6 +94,7 @@ struct NullTypeParserCtx { using GlobalTypeT = Ok; using TypeUseT = Ok; using LocalsT = Ok; + using ElemListT = Ok; using DataStringT = Ok; HeapTypeT makeFunc() { return Ok{}; } @@ -265,7 +266,7 @@ template struct TypeParserCtx { DataStringT makeDataString() { return Ok{}; } void appendDataString(DataStringT&, std::string_view) {} - LimitsT makeLimits(uint64_t, std::optional) { return Ok{}; } + Result makeLimits(uint64_t, std::optional) { return Ok{}; } LimitsT getLimitsFromData(DataStringT) { return Ok{}; } MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; } @@ -282,6 +283,7 @@ struct NullInstrParserCtx { using FieldIdxT = Ok; using FuncIdxT = Ok; using LocalIdxT = Ok; + using TableIdxT = Ok; using GlobalIdxT = Ok; using MemoryIdxT = Ok; using DataIdxT = Ok; @@ -304,6 +306,8 @@ struct NullInstrParserCtx { LocalIdxT getLocalFromName(Name) { return Ok{}; } GlobalIdxT getGlobalFromIdx(uint32_t) { return Ok{}; } GlobalIdxT getGlobalFromName(Name) { return Ok{}; } + TableIdxT getTableFromIdx(uint32_t) { return Ok{}; } + TableIdxT getTableFromName(Name) { return Ok{}; } MemoryIdxT getMemoryFromIdx(uint32_t) { return Ok{}; } MemoryIdxT getMemoryFromName(Name) { return Ok{}; } DataIdxT getDataFromIdx(uint32_t) { return Ok{}; } @@ -470,8 +474,11 @@ struct NullInstrParserCtx { // Phase 1: Parse definition spans for top-level module elements and determine // their indices and names. struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { - using DataStringT = std::vector; + using ExprT = Ok; using LimitsT = Limits; + using ElemListT = Index; + using DataStringT = std::vector; + using TableTypeT = Limits; using MemTypeT = MemType; ParseInput in; @@ -488,18 +495,27 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { std::vector typeDefs; std::vector subtypeDefs; std::vector funcDefs; + std::vector tableDefs; std::vector memoryDefs; std::vector globalDefs; + std::vector elemDefs; std::vector dataDefs; std::vector tagDefs; // Positions of typeuses that might implicitly define new types. std::vector implicitTypeDefs; + // Map table indices to the indices of their implicit, in-line element + // segments. We need these to find associated segments in later parsing phases + // where we can parse their types and instructions. + std::unordered_map implicitElemIndices; + // Counters used for generating names for module elements. int funcCounter = 0; + int tableCounter = 0; int memoryCounter = 0; int globalCounter = 0; + int elemCounter = 0; int dataCounter = 0; int tagCounter = 0; @@ -534,14 +550,24 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { typeDefs.push_back({{}, pos, Index(typeDefs.size())}); } + Limits makeLimits(uint64_t n, std::optional m) { + return Limits{n, m}; + } + + Index makeElemList(TypeT) { return 0; } + Index makeFuncElemList() { return 0; } + void appendElem(Index& elems, ExprT) { ++elems; } + void appendFuncElem(Index& elems, FuncIdxT) { ++elems; } + + Limits getLimitsFromElems(Index elems) { return {elems, elems}; } + + Limits makeTableType(Limits limits, TypeT) { return limits; } + std::vector makeDataString() { return {}; } void appendDataString(std::vector& data, std::string_view str) { data.insert(data.end(), str.begin(), str.end()); } - Limits makeLimits(uint64_t n, std::optional m) { - return m ? Limits{n, *m} : Limits{n, Memory::kUnlimitedSize}; - } Limits getLimitsFromData(const std::vector& data) { uint64_t size = (data.size() + Memory::kPageSize - 1) / Memory::kPageSize; return {size, size}; @@ -567,6 +593,14 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { std::optional, Index pos); + Result + addTableDecl(Index pos, Name name, ImportNames* importNames, Limits limits); + Result<> + addTable(Name, const std::vector&, ImportNames*, Limits, Index); + + // TODO: Record index of implicit elem for use when parsing types and instrs. + Result<> addImplicitElems(TypeT, ElemListT&& elems); + Result addMemoryDecl(Index pos, Name name, ImportNames* importNames, MemType type); @@ -587,6 +621,10 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { std::optional, Index pos); + Result<> addElem(Name, TableIdxT*, std::optional, ElemListT&&, Index); + + Result<> addDeclareElem(Name, ElemListT&&, Index) { return Ok{}; } + Result<> addData(Name name, MemoryIdxT*, std::optional, @@ -741,25 +779,32 @@ struct ParseModuleTypesCtx : TypeParserCtx, // validate them when they are used. using GlobalTypeT = GlobalType; + using TableTypeT = Type; using TypeUseT = TypeUse; + using ElemListT = Type; + ParseInput in; Module& wasm; const std::vector& types; const std::unordered_map& implicitTypes; + const std::unordered_map& implicitElemIndices; // The index of the current type. Index index = 0; - ParseModuleTypesCtx(std::string_view in, - Module& wasm, - const std::vector& types, - const std::unordered_map& implicitTypes, - const IndexMap& typeIndices) + ParseModuleTypesCtx( + std::string_view in, + Module& wasm, + const std::vector& types, + const std::unordered_map& implicitTypes, + const std::unordered_map& implicitElemIndices, + const IndexMap& typeIndices) : TypeParserCtx(typeIndices), in(in), wasm(wasm), - types(types), implicitTypes(implicitTypes) {} + types(types), implicitTypes(implicitTypes), + implicitElemIndices(implicitElemIndices) {} Result getHeapTypeFromIdx(Index idx) { if (idx >= types.size()) { @@ -804,6 +849,18 @@ struct ParseModuleTypesCtx : TypeParserCtx, return {mutability, type}; } + Type makeElemList(Type type) { return type; } + Type makeFuncElemList() { return Type(HeapType::func, Nullable); } + void appendElem(ElemListT&, ExprT) {} + void appendFuncElem(ElemListT&, FuncIdxT) {} + + LimitsT getLimitsFromElems(ElemListT) { return Ok{}; } + + Type makeTableType(LimitsT, Type type) { return type; } + + LimitsT getLimitsFromData(DataStringT) { return Ok{}; } + MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; } + Result<> addFunc(Name name, const std::vector&, ImportNames*, @@ -828,6 +885,23 @@ struct ParseModuleTypesCtx : TypeParserCtx, return Ok{}; } + Result<> addTable( + Name, const std::vector&, ImportNames*, Type ttype, Index pos) { + auto& t = wasm.tables[index]; + if (!ttype.isRef()) { + return in.err(pos, "expected reference type"); + } + t->type = ttype; + return Ok{}; + } + + Result<> addImplicitElems(Type type, ElemListT&&) { + auto& t = wasm.tables[index]; + auto& e = wasm.elementSegments[implicitElemIndices.at(index)]; + e->type = t->type; + return Ok{}; + } + Result<> addMemory(Name, const std::vector&, ImportNames*, MemTypeT, Index) { return Ok{}; @@ -847,6 +921,15 @@ struct ParseModuleTypesCtx : TypeParserCtx, return Ok{}; } + Result<> + addElem(Name, TableIdxT*, std::optional, ElemListT&& type, Index) { + auto& e = wasm.elementSegments[index]; + e->type = type; + return Ok{}; + } + + Result<> addDeclareElem(Name, ElemListT&&, Index) { return Ok{}; } + Result<> addTag(Name, const std::vector&, ImportNames*, TypeUse use, Index pos) { auto& t = wasm.tags[index]; @@ -861,15 +944,18 @@ struct ParseModuleTypesCtx : TypeParserCtx, // Phase 5: Parse module element definitions, including instructions. struct ParseDefsCtx : TypeParserCtx { using GlobalTypeT = Ok; + using TableTypeT = Ok; using TypeUseT = HeapType; using ExprT = Expression*; + using ElemListT = std::vector; using FieldIdxT = Index; using FuncIdxT = Name; using LocalIdxT = Index; using LabelIdxT = Index; using GlobalIdxT = Name; + using TableIdxT = Name; using MemoryIdxT = Name; using DataIdxT = Name; using TagIdxT = Name; @@ -883,6 +969,7 @@ struct ParseDefsCtx : TypeParserCtx { const std::vector& types; const std::unordered_map& implicitTypes; + const std::unordered_map& implicitElemIndices; // The index of the current module element. Index index = 0; @@ -903,9 +990,11 @@ struct ParseDefsCtx : TypeParserCtx { Module& wasm, const std::vector& types, const std::unordered_map& implicitTypes, + const std::unordered_map& implicitElemIndices, const IndexMap& typeIndices) : TypeParserCtx(typeIndices), in(in), wasm(wasm), builder(wasm), - types(types), implicitTypes(implicitTypes), irBuilder(wasm) {} + types(types), implicitTypes(implicitTypes), + implicitElemIndices(implicitElemIndices), irBuilder(wasm) {} template Result withLoc(Index pos, Result res) { if (auto err = res.getErr()) { @@ -929,6 +1018,20 @@ struct ParseDefsCtx : TypeParserCtx { GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; } + std::vector makeElemList(TypeT) { return {}; } + std::vector makeFuncElemList() { return {}; } + void appendElem(std::vector& elems, Expression* expr) { + elems.push_back(expr); + } + void appendFuncElem(std::vector& elems, Name func) { + auto type = wasm.getFunction(func)->type; + elems.push_back(builder.makeRefFunc(func, type)); + } + + LimitsT getLimitsFromElems(std::vector& elems) { return Ok{}; } + + TableTypeT makeTableType(LimitsT, Type) { return Ok{}; } + Result getHeapTypeFromIdx(Index idx) { if (idx >= types.size()) { return in.err("type index out of bounds"); @@ -999,6 +1102,20 @@ struct ParseDefsCtx : TypeParserCtx { return name; } + Result getTableFromIdx(uint32_t idx) { + if (idx >= wasm.tables.size()) { + return in.err("table index out of bounds"); + } + return wasm.tables[idx]->name; + } + + Result getTableFromName(Name name) { + if (!wasm.getTableOrNull(name)) { + return in.err("table $" + name.toString() + " does not exist"); + } + return name; + } + Result getMemoryFromIdx(uint32_t idx) { if (idx >= wasm.memories.size()) { return in.err("memory index out of bounds"); @@ -1058,12 +1175,32 @@ struct ParseDefsCtx : TypeParserCtx { std::optional, Index pos); + Result<> + addTable(Name, const std::vector&, ImportNames*, TableTypeT, Index) { + return Ok{}; + } + Result<> addGlobal(Name, const std::vector&, ImportNames*, GlobalTypeT, std::optional exp, Index); + + Result<> addImplicitElems(Type type, std::vector&& elems); + + Result<> addDeclareElem(Name, std::vector&&, Index) { + // TODO: Validate that referenced functions appear in a declaratve element + // segment. + return Ok{}; + } + + Result<> addElem(Name, + Name* table, + std::optional offset, + std::vector&& elems, + Index pos); + Result<> addData(Name, Name* mem, std::optional offset, DataStringT, Index pos); -- cgit v1.2.3