From 5bf548b32e30fbae16dde5df703541ca7ac72f15 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 5 Jan 2023 09:48:51 -0600 Subject: [Parser] Parse blocks (#5393) Parse both the folded and unfolded forms of blocks and structure the code to make supporting additional block instructions like if-else and try-catch relatively simple. Parsing block types is extra fun because they may implicitly define new signature heap types via a typeuse, but only if their types are not given by a single result type. To figuring out whether a new type may be introduced in all the relevant parsing stages, always track at least the arity of parsed results. The parser parses block labels, but more work will be required to support branch instructions that use them. --- src/wasm/wat-parser.cpp | 248 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 231 insertions(+), 17 deletions(-) (limited to 'src/wasm/wat-parser.cpp') diff --git a/src/wasm/wat-parser.cpp b/src/wasm/wat-parser.cpp index b1be9b75e..409d51052 100644 --- a/src/wasm/wat-parser.cpp +++ b/src/wasm/wat-parser.cpp @@ -471,7 +471,8 @@ struct NullTypeParserCtx { using HeapTypeT = Ok; using TypeT = Ok; using ParamsT = Ok; - using ResultsT = Ok; + using ResultsT = size_t; + using BlockTypeT = Ok; using SignatureT = Ok; using StorageT = Ok; using FieldT = Ok; @@ -504,8 +505,12 @@ struct NullTypeParserCtx { ParamsT makeParams() { return Ok{}; } void appendParam(ParamsT&, Name, TypeT) {} - ResultsT makeResults() { return Ok{}; } - void appendResult(ResultsT&, TypeT) {} + // We have to count results because whether or not a block introduces a + // typeuse that may implicitly define a type depends on how many results it + // has. + size_t makeResults() { return 0; } + void appendResult(size_t& results, TypeT) { ++results; } + size_t getResultsSize(size_t results) { return results; } SignatureT makeFuncType(ParamsT*, ResultsT*) { return Ok{}; } @@ -534,6 +539,10 @@ struct NullTypeParserCtx { void appendDataString(DataStringT&, std::string_view) {} MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; } + + BlockTypeT getBlockTypeFromResult(size_t results) { return Ok{}; } + + Result<> getBlockTypeFromTypeUse(Index, TypeUseT) { return Ok{}; } }; template struct TypeParserCtx { @@ -542,6 +551,7 @@ template struct TypeParserCtx { using TypeT = Type; using ParamsT = std::vector; using ResultsT = std::vector; + using BlockTypeT = HeapType; using SignatureT = Signature; using StorageT = Field; using FieldT = Field; @@ -587,6 +597,7 @@ template struct TypeParserCtx { ResultsT makeResults() { return {}; } void appendResult(ResultsT& results, TypeT type) { results.push_back(type); } + size_t getResultsSize(const ResultsT& results) { return results.size(); } SignatureT makeFuncType(ParamsT* params, ResultsT* results) { std::vector empty; @@ -644,6 +655,11 @@ template struct TypeParserCtx { LimitsT getLimitsFromData(DataStringT) { return Ok{}; } MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; } + + HeapType getBlockTypeFromResult(const std::vector results) { + assert(results.size() == 1); + return HeapType(Signature(Type::none, results[0])); + } }; struct NullInstrParserCtx { @@ -683,6 +699,10 @@ struct NullInstrParserCtx { MemargT getMemarg(uint64_t, uint32_t) { return Ok{}; } + template + InstrT makeBlock(Index, std::optional, BlockTypeT, InstrsT) { + return Ok{}; + } InstrT makeUnreachable(Index) { return Ok{}; } InstrT makeNop(Index) { return Ok{}; } InstrT makeBinary(Index, BinaryOp) { return Ok{}; } @@ -792,7 +812,7 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { ParseInput in; // At this stage we only look at types to find implicit type definitions, - // which are inserted directly in to the context. We cannot materialize or + // which are inserted directly into the context. We cannot materialize or // validate any types because we don't know what types exist yet. // // Declared module elements are inserted into the module, but their bodies are @@ -1190,6 +1210,16 @@ struct ParseModuleTypesCtx : TypeParserCtx, return TypeUse{it->second, ids}; } + Result getBlockTypeFromTypeUse(Index pos, TypeUse use) { + assert(use.type.isSignature()); + if (use.type.getSignature().params != Type::none) { + return in.err(pos, "block parameters not yet supported"); + } + // TODO: Once we support block parameters, return an error here if any of + // them are named. + return use.type; + } + GlobalTypeT makeGlobalType(Mutability mutability, TypeT type) { return {mutability, type}; } @@ -1273,17 +1303,21 @@ struct ParseDefsCtx : TypeParserCtx { // local.get, etc. Function* func = nullptr; - // The stack of parsed expressions, used as the children of newly parsed - // expressions. - std::vector exprStack; + // The context for a single block scope, including the instructions parsed + // inside that scope so far and the ultimate result type we expect this block + // to have. + struct BlockCtx { + std::vector exprStack; + Type type; + }; + + // The stack of block contexts currently being parsed. + std::vector scopeStack; // Whether we have seen an unreachable instruction and are in // stack-polymorphic unreachable mode. bool unreachable = false; - // The expected result type of the instruction sequence we are parsing. - Type type; - ParseDefsCtx(std::string_view in, Module& wasm, const std::vector& types, @@ -1292,7 +1326,23 @@ struct ParseDefsCtx : TypeParserCtx { : TypeParserCtx(typeIndices), in(in), wasm(wasm), builder(wasm), types(types), implicitTypes(implicitTypes) {} + std::vector& getExprStack() { + if (scopeStack.empty()) { + // We are not in a function, so push a dummy scope. + scopeStack.push_back({{}, Type::none}); + } + return scopeStack.back().exprStack; + } + + void pushScope(Type type) { scopeStack.push_back({{}, type}); } + + Type getResultType() { + assert(!scopeStack.empty()); + return scopeStack.back().type; + } + Result<> push(Index pos, Expression* expr) { + auto& exprStack = getExprStack(); if (expr->type == Type::unreachable) { // We want to avoid popping back past this most recent unreachable // instruction. Drop all prior instructions so they won't be consumed by @@ -1310,7 +1360,7 @@ struct ParseDefsCtx : TypeParserCtx { for (Index i = 0; i < expr->type.size(); ++i) { CHECK_ERR(push(pos, builder.makeTupleExtract( - builder.makeLocalGet(*scratchIdx, expr->type[i]), i))); + builder.makeLocalGet(*scratchIdx, expr->type), i))); } } else { exprStack.push_back(expr); @@ -1319,6 +1369,8 @@ struct ParseDefsCtx : TypeParserCtx { } Result pop(Index pos) { + auto& exprStack = getExprStack(); + // Find the suffix of expressions that do not produce values. auto firstNone = exprStack.size(); for (; firstNone > 0; --firstNone) { @@ -1363,11 +1415,25 @@ struct ParseDefsCtx : TypeParserCtx { return builder.makeBlock(exprs, expr->type); } + HeapType getBlockTypeFromResult(const std::vector results) { + assert(results.size() == 1); + pushScope(results[0]); + return HeapType(Signature(Type::none, results[0])); + } + + Result getBlockTypeFromTypeUse(Index pos, HeapType type) { + pushScope(type.getSignature().results); + return type; + } + Ok makeInstrs() { return Ok{}; } void appendInstr(Ok&, InstrT instr) {} Result finishInstrs(Ok&) { + auto& exprStack = getExprStack(); + auto type = getResultType(); + // We have finished parsing a sequence of instructions. Fix up the parsed // instructions and reset the context for the next sequence. if (type.isTuple()) { @@ -1396,11 +1462,16 @@ struct ParseDefsCtx : TypeParserCtx { exprStack.push_back(*expr); } unreachable = false; - return std::move(exprStack); + auto ret = std::move(exprStack); + scopeStack.pop_back(); + return ret; } Expression* instrToExpr(Ok&) { + auto& exprStack = getExprStack(); + assert(scopeStack.size() == 1); assert(exprStack.size() == 1); + auto e = exprStack.back(); exprStack.clear(); unreachable = false; @@ -1433,7 +1504,7 @@ struct ParseDefsCtx : TypeParserCtx { Result getLocalFromIdx(uint32_t idx) { if (!func) { - return in.err("cannot access locals outside of a funcion"); + return in.err("cannot access locals outside of a function"); } if (idx >= func->getNumLocals()) { return in.err("local index out of bounds"); @@ -1627,6 +1698,20 @@ struct ParseDefsCtx : TypeParserCtx { return wasm.memories[0]->name; } + Result<> makeBlock(Index pos, + std::optional label, + HeapType type, + const std::vector& instrs) { + // TODO: validate labels? + // TODO: Move error on input types to here? + auto results = type.getSignature().results; + if (label) { + return push(pos, builder.makeBlock(*label, instrs, results)); + } else { + return push(pos, builder.makeBlock(instrs, results)); + } + } + Result<> makeUnreachable(Index pos) { return push(pos, builder.makeUnreachable()); } @@ -2153,10 +2238,17 @@ template Result memtype(Ctx&); template Result globaltype(Ctx&); // Instructions +template MaybeResult foldedBlockinstr(Ctx&); +template +MaybeResult unfoldedBlockinstr(Ctx&); +template MaybeResult blockinstr(Ctx&); +template MaybeResult plaininstr(Ctx&); template MaybeResult instr(Ctx&); template Result instrs(Ctx&); template Result expr(Ctx&); template Result memarg(Ctx&, uint32_t); +template Result blocktype(Ctx&); +template MaybeResult block(Ctx&, bool); template Result makeUnreachable(Ctx&, Index); template Result makeNop(Ctx&, Index); @@ -2665,7 +2757,37 @@ template Result globaltype(Ctx& ctx) { // Instructions // ============ -template MaybeResult instr(Ctx& ctx) { +// blockinstr ::= block | loop | if-else | try-catch +template +MaybeResult foldedBlockinstr(Ctx& ctx) { + if (auto i = block(ctx, true)) { + return i; + } + // TODO: Other block instructions + return {}; +} + +template +MaybeResult unfoldedBlockinstr(Ctx& ctx) { + if (auto i = block(ctx, false)) { + return i; + } + // TODO: Other block instructions + return {}; +} + +template MaybeResult blockinstr(Ctx& ctx) { + if (auto i = foldedBlockinstr(ctx)) { + return i; + } + if (auto i = unfoldedBlockinstr(ctx)) { + return i; + } + return {}; +} + +// plaininstr ::= ... all plain instructions ... +template MaybeResult plaininstr(Ctx& ctx) { auto pos = ctx.in.getPos(); auto keyword = ctx.in.takeKeyword(); if (!keyword) { @@ -2677,10 +2799,36 @@ template MaybeResult instr(Ctx& ctx) { #include } +// instr ::= plaininstr | blockinstr +template MaybeResult instr(Ctx& ctx) { + // Check for valid strings that are not instructions. + if (auto tok = ctx.in.peek()) { + if (auto keyword = tok->getKeyword()) { + if (keyword == "end"sv) { + return {}; + } + } + } + if (auto i = blockinstr(ctx)) { + return i; + } + if (auto i = plaininstr(ctx)) { + return i; + } + // TODO: Handle folded plain instructions as well. + return {}; +} + template Result instrs(Ctx& ctx) { auto insts = ctx.makeInstrs(); while (true) { + if (auto blockinst = foldedBlockinstr(ctx)) { + CHECK_ERR(blockinst); + ctx.appendInstr(insts, *blockinst); + continue; + } + // Parse an arbitrary number of folded instructions. if (ctx.in.takeLParen()) { // A stack of (start, end) position pairs defining the positions of // instructions that need to be parsed after their folded children. @@ -2704,7 +2852,10 @@ template Result instrs(Ctx& ctx) { // We have either the start of a new folded child or the end of the last // one. - if (ctx.in.takeLParen()) { + if (auto blockinst = foldedBlockinstr(ctx)) { + CHECK_ERR(blockinst); + ctx.appendInstr(insts, *blockinst); + } else if (ctx.in.takeLParen()) { foldedInstrs.push_back({ctx.in.getPos(), {}}); } else if (ctx.in.takeRParen()) { auto [start, end] = foldedInstrs.back(); @@ -2712,7 +2863,7 @@ template Result instrs(Ctx& ctx) { foldedInstrs.pop_back(); WithPosition with(ctx, start); - if (auto inst = instr(ctx)) { + if (auto inst = plaininstr(ctx)) { CHECK_ERR(inst); ctx.appendInstr(insts, *inst); } else { @@ -2763,6 +2914,69 @@ Result memarg(Ctx& ctx, uint32_t n) { return ctx.getMemarg(offset, align); } +// blocktype ::= (t:result)? => t? | x,I:typeuse => x if I = {} +template Result blocktype(Ctx& ctx) { + auto pos = ctx.in.getPos(); + + if (auto res = results(ctx)) { + CHECK_ERR(res); + if (ctx.getResultsSize(*res) == 1) { + return ctx.getBlockTypeFromResult(*res); + } + } + + // We either had no results or multiple results. Reset and parse again as a + // type use. + ctx.in.lexer.setIndex(pos); + auto use = typeuse(ctx); + CHECK_ERR(use); + + auto type = ctx.getBlockTypeFromTypeUse(pos, *use); + CHECK_ERR(type); + return *type; +} + +// block ::= 'block' label blocktype instr* 'end' id? if id = {} or id = label +// | '(' 'block' label blocktype instr* ')' +template +MaybeResult block(Ctx& ctx, bool folded) { + auto pos = ctx.in.getPos(); + + if (folded) { + if (!ctx.in.takeSExprStart("block"sv)) { + return {}; + } + } else { + if (!ctx.in.takeKeyword("block"sv)) { + return {}; + } + } + + auto label = ctx.in.takeID(); + + auto type = blocktype(ctx); + CHECK_ERR(type); + + auto insts = instrs(ctx); + CHECK_ERR(insts); + + if (folded) { + if (!ctx.in.takeRParen()) { + return ctx.in.err("expected ')' at end of block"); + } + } else { + if (!ctx.in.takeKeyword("end"sv)) { + return ctx.in.err("expected 'end' at end of block"); + } + auto id = ctx.in.takeID(); + if (id && id != label) { + return ctx.in.err("end label does not match block label"); + } + } + + return ctx.makeBlock(pos, label, *type, std::move(*insts)); +} + template Result makeUnreachable(Ctx& ctx, Index pos) { return ctx.makeUnreachable(pos); @@ -4023,7 +4237,7 @@ Result<> parseModule(Module& wasm, std::string_view input) { for (Index i = 0; i < decls.funcDefs.size(); ++i) { ctx.index = i; ctx.func = wasm.functions[i].get(); - ctx.type = ctx.func->getResults(); + ctx.pushScope(ctx.func->getResults()); WithPosition with(ctx, decls.funcDefs[i].pos); auto parsed = func(ctx); CHECK_ERR(parsed); -- cgit v1.2.3