From 657431cdc36272a64b6b77465b68eaed2c7dfe31 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 3 Jan 2023 14:46:54 -0600 Subject: [Parser] Parse data segments (#5373) * [NFC][Parser] Track definition indices For each definition in a module, record that definition's index in the relevant index space. Previously the index was inferred from its position in a list of module definitions, but that scheme does not scale to data segments defined inline inside memory definitions because these data segments occupy a slot in the data segment index space but do not have their own independent definitions. * clarify comment * [Parser] Parse data segments Parse active and passive data segments, including all their variations and abbreviations as well as data segments declared inline in memory declarations. Switch to parsing data strings, memory limits, and memory types during the ParseDecls phase so that the inline data segments can be completely parsed during that phase and never revisited. Parsing the inline data segments in a later phase would not work because they would be incorrectly inserted at the end of the data segment index space. Also update the printer to print a memory use on active data segments that are initialized in a non-default memory. --- src/wasm/wat-parser.cpp | 223 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 188 insertions(+), 35 deletions(-) (limited to 'src/wasm/wat-parser.cpp') diff --git a/src/wasm/wat-parser.cpp b/src/wasm/wat-parser.cpp index 7914c327e..f4057a732 100644 --- a/src/wasm/wat-parser.cpp +++ b/src/wasm/wat-parser.cpp @@ -532,9 +532,6 @@ struct NullTypeParserCtx { DataStringT makeDataString() { return Ok{}; } void appendDataString(DataStringT&, std::string_view) {} - LimitsT makeLimits(uint64_t, std::optional) { return Ok{}; } - LimitsT getLimitsFromData(DataStringT) { return Ok{}; } - MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; } }; @@ -550,10 +547,10 @@ template struct TypeParserCtx { using FieldsT = std::pair, std::vector>; using StructT = std::pair, Struct>; using ArrayT = Array; - using LimitsT = Limits; - using MemTypeT = MemType; + using LimitsT = Ok; + using MemTypeT = Ok; using LocalsT = std::vector; - using DataStringT = std::vector; + using DataStringT = Ok; // Map heap type names to their indices. const IndexMap& typeIndices; @@ -638,22 +635,13 @@ template struct TypeParserCtx { return it->second; } - std::vector makeDataString() { return {}; } - void appendDataString(std::vector& data, std::string_view str) { - data.insert(data.end(), str.begin(), str.end()); - } + DataStringT makeDataString() { return Ok{}; } + void appendDataString(DataStringT&, std::string_view) {} - Limits makeLimits(uint64_t n, std::optional m) { - return m ? Limits{n, *m} : Limits{n, Memory::kUnlimitedSize}; - } - Limits getLimitsFromData(const std::vector& data) { - uint64_t size = (data.size() + Memory::kPageSize - 1) / Memory::kPageSize; - return {size, size}; - } + LimitsT makeLimits(uint64_t, std::optional) { return Ok{}; } + LimitsT getLimitsFromData(DataStringT) { return Ok{}; } - MemType makeMemType(Type type, Limits limits, bool shared) { - return {type, limits, shared}; - } + MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; } }; struct NullInstrParserCtx { @@ -673,6 +661,7 @@ struct NullInstrParserCtx { InstrsT finishInstrs(InstrsT&) { return Ok{}; } ExprT makeExpr(InstrsT) { return Ok{}; } + ExprT instrToExpr(InstrT) { return Ok{}; } template FieldIdxT getFieldFromIdx(HeapTypeT, uint32_t) { return Ok{}; @@ -768,6 +757,10 @@ struct NullInstrParserCtx { // Phase 1: Parse definition spans for top-level module elements and determine // their indices and names. struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { + using DataStringT = std::vector; + using LimitsT = Limits; + using MemTypeT = MemType; + ParseInput in; // At this stage we only look at types to find implicit type definitions, @@ -784,6 +777,7 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { std::vector funcDefs; std::vector memoryDefs; std::vector globalDefs; + std::vector dataDefs; // Positions of typeuses that might implicitly define new types. std::vector implicitTypeDefs; @@ -792,6 +786,7 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { int funcCounter = 0; int memoryCounter = 0; int globalCounter = 0; + int dataCounter = 0; // Used to verify that all imports come before all non-imports. bool hasNonImport = false; @@ -811,6 +806,23 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { typeDefs.push_back({{}, pos, Index(typeDefs.size())}); } + std::vector makeDataString() { return {}; } + void appendDataString(std::vector& data, std::string_view str) { + data.insert(data.end(), str.begin(), str.end()); + } + + Limits makeLimits(uint64_t n, std::optional m) { + return m ? Limits{n, *m} : Limits{n, Memory::kUnlimitedSize}; + } + Limits getLimitsFromData(const std::vector& data) { + uint64_t size = (data.size() + Memory::kPageSize - 1) / Memory::kPageSize; + return {size, size}; + } + + MemType makeMemType(Type type, Limits limits, bool shared) { + return {type, limits, shared}; + } + Result makeTypeUse(Index pos, std::optional type, ParamsT*, ResultsT*) { if (!type) { @@ -856,8 +868,12 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { } Result - addMemoryDecl(Index pos, Name name, ImportNames* importNames) { + addMemoryDecl(Index pos, Name name, ImportNames* importNames, MemType type) { auto m = std::make_unique(); + m->indexType = type.type; + m->initial = type.limits.initial; + m->max = type.limits.max; + m->shared = type.shared; if (name) { // TODO: if the existing memory is not explicitly named, fix its name // and continue. @@ -877,18 +893,30 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { Result<> addMemory(Name name, const std::vector& exports, ImportNames* import, - MemTypeT, + MemType type, Index pos) { if (import && hasNonImport) { return in.err(pos, "import after non-import"); } - auto m = addMemoryDecl(pos, name, import); + auto m = addMemoryDecl(pos, name, import, type); CHECK_ERR(m); CHECK_ERR(addExports(in, wasm, *m, exports, ExternalKind::Memory)); memoryDefs.push_back({name, pos, Index(memoryDefs.size())}); return Ok{}; } + Result<> addImplicitData(DataStringT&& data) { + auto& mem = *wasm.memories.back(); + auto d = std::make_unique(); + d->memory = mem.name; + d->isPassive = false; + d->offset = Builder(wasm).makeConstPtr(0, mem.indexType); + d->data = std::move(data); + d->name = Names::getValidDataSegmentName(wasm, "implicit-data"); + wasm.addDataSegment(std::move(d)); + return Ok{}; + } + Result addGlobalDecl(Index pos, Name name, ImportNames* importNames) { auto g = std::make_unique(); @@ -923,6 +951,30 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { globalDefs.push_back({name, pos, Index(globalDefs.size())}); return Ok{}; } + + Result<> addData(Name name, + MemoryIdxT*, + std::optional, + std::vector&& data, + Index pos) { + auto d = std::make_unique(); + if (name) { + if (wasm.getDataSegmentOrNull(name)) { + // TODO: if the existing segment is not explicitly named, fix its name + // and continue. + return in.err(pos, "repeated data segment name"); + } + d->setExplicitName(name); + } else { + name = std::to_string(dataCounter++); + name = Names::getValidDataSegmentName(wasm, name); + d->name = name; + } + d->data = std::move(data); + dataDefs.push_back({name, pos, Index(wasm.dataSegments.size())}); + wasm.addDataSegment(std::move(d)); + return Ok{}; + } }; // Phase 2: Parse type definitions into a TypeBuilder. @@ -1139,16 +1191,13 @@ struct ParseModuleTypesCtx : TypeParserCtx, return Ok{}; } - Result<> addMemory( - Name, const std::vector&, ImportNames*, MemType type, Index pos) { - auto& m = wasm.memories[index]; - m->indexType = type.type; - m->initial = type.limits.initial; - m->max = type.limits.max; - m->shared = type.shared; + Result<> + addMemory(Name, const std::vector&, ImportNames*, MemTypeT, Index) { return Ok{}; } + Result<> addImplicitData(DataStringT&& data) { return Ok{}; } + Result<> addGlobal(Name, const std::vector&, ImportNames*, @@ -1321,6 +1370,14 @@ struct ParseDefsCtx : TypeParserCtx { return std::move(exprStack); } + Expression* instrToExpr(Ok&) { + assert(exprStack.size() == 1); + auto e = exprStack.back(); + exprStack.clear(); + unreachable = false; + return e; + } + GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; } Result getHeapTypeFromIdx(Index idx) { @@ -1463,6 +1520,25 @@ struct ParseDefsCtx : TypeParserCtx { return Ok{}; } + Result<> addData( + Name, Name* mem, std::optional offset, DataStringT, Index pos) { + auto& d = wasm.dataSegments[index]; + if (offset) { + d->isPassive = false; + d->offset = *offset; + if (mem) { + d->memory = *mem; + } else if (wasm.memories.size() > 0) { + d->memory = wasm.memories[0]->name; + } else { + return in.err(pos, "active segment with no memory"); + } + } else { + d->isPassive = true; + } + return Ok{}; + } + Result addScratchLocal(Index pos, Type type) { if (!func) { return in.err(pos, @@ -2080,6 +2156,7 @@ template Result fieldidx(Ctx&, typename Ctx::HeapTypeT); template MaybeResult maybeMemidx(Ctx&); template Result memidx(Ctx&); +template MaybeResult maybeMemuse(Ctx&); template Result globalidx(Ctx&); template Result localidx(Ctx&); template Result typeuse(Ctx&); @@ -2093,6 +2170,7 @@ template MaybeResult<> func(Ctx&); template MaybeResult<> memory(Ctx&); template MaybeResult<> global(Ctx&); template Result datastring(Ctx&); +template MaybeResult<> data(Ctx&); MaybeResult<> modulefield(ParseDeclsCtx&); Result<> module(ParseDeclsCtx&); @@ -3187,6 +3265,20 @@ template Result memidx(Ctx& ctx) { return ctx.in.err("expected memory index or identifier"); } +// memuse ::= '(' 'memory' x:memidx ')' => x +template +MaybeResult maybeMemuse(Ctx& ctx) { + if (!ctx.in.takeSExprStart("memory"sv)) { + return {}; + } + auto idx = memidx(ctx); + CHECK_ERR(idx); + if (!ctx.in.takeRParen()) { + return ctx.in.err("expected end of memory use"); + } + return *idx; +} + // globalidx ::= x:u32 => x // | v:id => x (if globals[x] = v) template Result globalidx(Ctx& ctx) { @@ -3462,18 +3554,18 @@ template MaybeResult<> memory(Ctx& ctx) { CHECK_ERR(import); std::optional mtype; - + std::optional data; if (ctx.in.takeSExprStart("data"sv)) { if (import) { return ctx.in.err("imported memories cannot have inline data"); } - auto data = datastring(ctx); - CHECK_ERR(data); + auto datastr = datastring(ctx); + CHECK_ERR(datastr); if (!ctx.in.takeRParen()) { return ctx.in.err("expected end of inline data"); } - mtype = ctx.makeMemType(Type::i32, ctx.getLimitsFromData(*data), false); - // TODO: addDataSegment as well. + mtype = ctx.makeMemType(Type::i32, ctx.getLimitsFromData(*datastr), false); + data = *datastr; } else { auto type = memtype(ctx); CHECK_ERR(type); @@ -3485,6 +3577,11 @@ template MaybeResult<> memory(Ctx& ctx) { } CHECK_ERR(ctx.addMemory(name, *exports, import.getPtr(), *mtype, pos)); + + if (data) { + CHECK_ERR(ctx.addImplicitData(std::move(*data))); + } + return Ok{}; } @@ -3535,6 +3632,57 @@ template Result datastring(Ctx& ctx) { return data; } +// data ::= '(' 'data' id? b*:datastring ')' => {init b*, mode passive} +// | '(' 'data' id? x:memuse? ('(' 'offset' e:expr ')' | e:instr) +// b*:datastring ') +// => {init b*, mode active {memory x, offset e}} +template MaybeResult<> data(Ctx& ctx) { + auto pos = ctx.in.getPos(); + if (!ctx.in.takeSExprStart("data"sv)) { + return {}; + } + + Name name; + if (auto id = ctx.in.takeID()) { + name = *id; + } + + auto mem = maybeMemuse(ctx); + CHECK_ERR(mem); + + std::optional offset; + if (ctx.in.takeSExprStart("offset"sv)) { + auto e = expr(ctx); + CHECK_ERR(e); + if (!ctx.in.takeRParen()) { + return ctx.in.err("expected end of offset expression"); + } + offset = *e; + } else if (ctx.in.takeLParen()) { + auto inst = instr(ctx); + CHECK_ERR(inst); + offset = ctx.instrToExpr(*inst); + if (!ctx.in.takeRParen()) { + return ctx.in.err("expected end of offset instruction"); + } + } + + if (mem && !offset) { + return ctx.in.err("expected offset for active segment"); + } + + auto str = datastring(ctx); + CHECK_ERR(str); + + if (!ctx.in.takeRParen()) { + return ctx.in.err("expected end of data segment"); + } + + CHECK_ERR(ctx.addData(name, mem.getPtr(), offset, std::move(*str), pos)); + + return Ok{}; +} + // modulefield ::= deftype // | import // | func @@ -3565,6 +3713,10 @@ MaybeResult<> modulefield(ParseDeclsCtx& ctx) { CHECK_ERR(res); return Ok{}; } + if (auto res = data(ctx)) { + CHECK_ERR(res); + return Ok{}; + } return ctx.in.err("unrecognized module field"); } @@ -3652,6 +3804,7 @@ Result<> parseModule(Module& wasm, std::string_view input) { // TODO: Parallelize this. ParseDefsCtx ctx(input, wasm, types, implicitTypes, *typeIndices); CHECK_ERR(parseDefs(ctx, decls.globalDefs, global)); + CHECK_ERR(parseDefs(ctx, decls.dataDefs, data)); for (Index i = 0; i < decls.funcDefs.size(); ++i) { ctx.index = i; -- cgit v1.2.3