From d491136eeb94b748225be50bdcc86c74cdbd154e Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 21 Sep 2023 12:42:07 -0700 Subject: [Parser] Parse if-else in the new wat parser and IRBuilder (#5963) Parse both the straight-line and folded versions of if, including the abbreviations that allow omitting the else clause. In the IRBuilder, generalize the scope stack to be able to track scopes other than blocks and add methods for visiting the beginnings of ifs and elses. --- src/gen-s-parser.inc | 56 ++++++-------------- src/parser/contexts.h | 19 ++++++- src/parser/parsers.h | 122 ++++++++++++++++++++++++++++++++++++++---- src/wasm-ir-builder.h | 93 +++++++++++++++++++++++++++++--- src/wasm/wasm-ir-builder.cpp | 123 +++++++++++++++++++++++++++++++++++++------ 5 files changed, 341 insertions(+), 72 deletions(-) (limited to 'src') diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index 416349032..5867913ea 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -3875,33 +3875,21 @@ switch (buf[0]) { } } case 'e': { - switch (buf[1]) { - case 'l': - if (op == "else"sv) { - auto ret = makeThenOrElse(ctx, pos); + switch (buf[7]) { + case 'e': + if (op == "extern.externalize"sv) { + auto ret = makeRefAs(ctx, pos, ExternExternalize); CHECK_ERR(ret); return *ret; } goto parse_error; - case 'x': { - switch (buf[7]) { - case 'e': - if (op == "extern.externalize"sv) { - auto ret = makeRefAs(ctx, pos, ExternExternalize); - CHECK_ERR(ret); - return *ret; - } - goto parse_error; - case 'i': - if (op == "extern.internalize"sv) { - auto ret = makeRefAs(ctx, pos, ExternInternalize); - CHECK_ERR(ret); - return *ret; - } - goto parse_error; - default: goto parse_error; + case 'i': + if (op == "extern.internalize"sv) { + auto ret = makeRefAs(ctx, pos, ExternInternalize); + CHECK_ERR(ret); + return *ret; } - } + goto parse_error; default: goto parse_error; } } @@ -9241,25 +9229,13 @@ switch (buf[0]) { default: goto parse_error; } } - case 'h': { - switch (buf[2]) { - case 'e': - if (op == "then"sv) { - auto ret = makeThenOrElse(ctx, pos); - CHECK_ERR(ret); - return *ret; - } - goto parse_error; - case 'r': - if (op == "throw"sv) { - auto ret = makeThrow(ctx, pos); - CHECK_ERR(ret); - return *ret; - } - goto parse_error; - default: goto parse_error; + case 'h': + if (op == "throw"sv) { + auto ret = makeThrow(ctx, pos); + CHECK_ERR(ret); + return *ret; } - } + goto parse_error; case 'r': if (op == "try"sv) { auto ret = makeTry(ctx, pos); diff --git a/src/parser/contexts.h b/src/parser/contexts.h index 210945e8d..dae938f7f 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -312,7 +312,12 @@ struct NullInstrParserCtx { InstrT makeBlock(Index, std::optional, BlockTypeT) { return Ok{}; } - InstrT finishBlock(Index, InstrsT) { return Ok{}; } + template + InstrT makeIf(Index, std::optional, BlockTypeT) { + return Ok{}; + } + InstrT visitEnd(Index, InstrsT) { return Ok{}; } + InstrT visitElse(Index) { return Ok{}; } InstrT makeUnreachable(Index) { return Ok{}; } InstrT makeNop(Index) { return Ok{}; } @@ -994,10 +999,20 @@ struct ParseDefsCtx : TypeParserCtx { type.getSignature().results)); } - Result<> finishBlock(Index pos, InstrsT) { + Result<> makeIf(Index pos, std::optional label, HeapType type) { + // TODO: validate labels? + // TODO: Move error on input types to here? + return withLoc( + pos, + irBuilder.makeIf(label ? *label : Name{}, type.getSignature().results)); + } + + Result<> visitEnd(Index pos, InstrsT) { return withLoc(pos, irBuilder.visitEnd()); } + Result<> visitElse(Index pos) { return withLoc(pos, irBuilder.visitElse()); } + Result<> makeUnreachable(Index pos) { return withLoc(pos, irBuilder.makeUnreachable()); } diff --git a/src/parser/parsers.h b/src/parser/parsers.h index 5f9f23a2a..d162de58c 100644 --- a/src/parser/parsers.h +++ b/src/parser/parsers.h @@ -48,11 +48,14 @@ MaybeResult unfoldedBlockinstr(Ctx&); template MaybeResult blockinstr(Ctx&); template MaybeResult plaininstr(Ctx&); template MaybeResult instr(Ctx&); -template Result instrs(Ctx&); +template +Result instrs(Ctx&, bool requireFolded = false); +template Result foldedinstrs(Ctx&); template Result expr(Ctx&); template Result memarg(Ctx&, uint32_t); template Result blocktype(Ctx&); template MaybeResult block(Ctx&, bool); +template MaybeResult ifelse(Ctx&, bool); template Result makeUnreachable(Ctx&, Index); template Result makeNop(Ctx&, Index); @@ -594,6 +597,9 @@ MaybeResult foldedBlockinstr(Ctx& ctx) { if (auto i = block(ctx, true)) { return i; } + if (auto i = ifelse(ctx, true)) { + return i; + } // TODO: Other block instructions return {}; } @@ -603,6 +609,9 @@ MaybeResult unfoldedBlockinstr(Ctx& ctx) { if (auto i = block(ctx, false)) { return i; } + if (auto i = ifelse(ctx, false)) { + return i; + } // TODO: Other block instructions return {}; } @@ -635,7 +644,7 @@ template MaybeResult instr(Ctx& ctx) { // Check for valid strings that are not instructions. if (auto tok = ctx.in.peek()) { if (auto keyword = tok->getKeyword()) { - if (keyword == "end"sv) { + if (keyword == "end"sv || keyword == "then"sv || keyword == "else"sv) { return {}; } } @@ -650,13 +659,20 @@ template MaybeResult instr(Ctx& ctx) { return {}; } -template Result instrs(Ctx& ctx) { +template +Result instrs(Ctx& ctx, bool requireFolded) { auto insts = ctx.makeInstrs(); + bool parsedFolded = false; while (true) { if (auto blockinst = foldedBlockinstr(ctx)) { CHECK_ERR(blockinst); ctx.appendInstr(insts, *blockinst); + parsedFolded = true; + if (requireFolded) { + // Do not continue to parse another sibling folded instruction. + break; + } continue; } // Parse an arbitrary number of folded instructions. @@ -686,6 +702,7 @@ template Result instrs(Ctx& ctx) { if (auto blockinst = foldedBlockinstr(ctx)) { CHECK_ERR(blockinst); ctx.appendInstr(insts, *blockinst); + parsedFolded = true; } else if (ctx.in.takeLParen()) { foldedInstrs.push_back({ctx.in.getPos(), {}}); } else if (ctx.in.takeRParen()) { @@ -697,6 +714,7 @@ template Result instrs(Ctx& ctx) { if (auto inst = plaininstr(ctx)) { CHECK_ERR(inst); ctx.appendInstr(insts, *inst); + parsedFolded = true; } else { return ctx.in.err(start, "expected folded instruction"); } @@ -708,9 +726,18 @@ template Result instrs(Ctx& ctx) { WASM_UNREACHABLE("expected paren"); } } + if (requireFolded) { + // Do not continue to parse another sibling folded instruction. + break; + } continue; } + if (requireFolded) { + // Do not continue to parse a non-folded instruction. + break; + } + // A non-folded instruction. if (auto inst = instr(ctx)) { CHECK_ERR(inst); @@ -720,9 +747,17 @@ template Result instrs(Ctx& ctx) { } } + if (requireFolded && !parsedFolded) { + return ctx.in.err("expected folded instructions"); + } + return ctx.finishInstrs(insts); } +template Result foldedinstrs(Ctx& ctx) { + return instrs(ctx, true); +} + template Result expr(Ctx& ctx) { auto insts = instrs(ctx); CHECK_ERR(insts); @@ -807,7 +842,81 @@ MaybeResult block(Ctx& ctx, bool folded) { } } - return ctx.finishBlock(pos, std::move(*insts)); + return ctx.visitEnd(pos, std::move(*insts)); +} + +// if ::= 'if' label blocktype instr1* ('else' id1? instr2*)? 'end' id2? +// | '(' 'if' label blocktype instr* '(' 'then' instr1* ')' +// ('(' 'else' instr2* ')')? ')' +template +MaybeResult ifelse(Ctx& ctx, bool folded) { + auto pos = ctx.in.getPos(); + + if (folded) { + if (!ctx.in.takeSExprStart("if"sv)) { + return {}; + } + } else { + if (!ctx.in.takeKeyword("if"sv)) { + return {}; + } + } + + auto label = ctx.in.takeID(); + + auto type = blocktype(ctx); + CHECK_ERR(type); + + if (folded) { + CHECK_ERR(foldedinstrs(ctx)); + } + + ctx.makeIf(pos, label, *type); + + if (folded && !ctx.in.takeSExprStart("then"sv)) { + return ctx.in.err("expected 'then' before if instructions"); + } + + auto insts = instrs(ctx); + CHECK_ERR(insts); + + if (folded && !ctx.in.takeRParen()) { + return ctx.in.err("expected ')' at end of then block"); + } + + if ((folded && ctx.in.takeSExprStart("else"sv)) || + (!folded && ctx.in.takeKeyword("else"sv))) { + auto id1 = ctx.in.takeID(); + if (id1 && id1 != label) { + return ctx.in.err("else label does not match if label"); + } + + ctx.visitElse(pos); + + auto elseInsts = instrs(ctx); + CHECK_ERR(elseInsts); + + if (folded && !ctx.in.takeRParen()) { + return ctx.in.err("expected ')' at end of else block"); + } + } + + if (folded) { + if (!ctx.in.takeRParen()) { + return ctx.in.err("expected ')' at end of if"); + } + } else { + if (!ctx.in.takeKeyword("end"sv)) { + return ctx.in.err("expected 'end' at end of if"); + } + } + + auto id2 = ctx.in.takeID(); + if (id2 && id2 != label) { + return ctx.in.err("end label does not match if label"); + } + + return ctx.visitEnd(pos, std::move(*insts)); } template @@ -896,11 +1005,6 @@ Result makeBlock(Ctx& ctx, Index pos) { return ctx.in.err("unimplemented instruction"); } -template -Result makeThenOrElse(Ctx& ctx, Index pos) { - return ctx.in.err("unimplemented instruction"); -} - template Result makeConst(Ctx& ctx, Index pos, Type type) { assert(type.isBasic()); diff --git a/src/wasm-ir-builder.h b/src/wasm-ir-builder.h index 1a19f62e8..c7185ebf0 100644 --- a/src/wasm-ir-builder.h +++ b/src/wasm-ir-builder.h @@ -52,6 +52,8 @@ public: // the corresponding `makeXYZ` function below instead of `visitXYZStart`, but // either way must call `visitEnd` and friends at the appropriate times. [[nodiscard]] Result<> visitBlockStart(Block* block); + [[nodiscard]] Result<> visitIfStart(If* iff, Name label = {}); + [[nodiscard]] Result<> visitElse(); [[nodiscard]] Result<> visitEnd(); // Alternatively, call makeXYZ to have the IRBuilder allocate the nodes. This @@ -59,7 +61,7 @@ public: // ensure that there are no missing fields. [[nodiscard]] Result<> makeNop(); [[nodiscard]] Result<> makeBlock(Name label, Type type); - // [[nodiscard]] Result<> makeIf(); + [[nodiscard]] Result<> makeIf(Name label, Type type); // [[nodiscard]] Result<> makeLoop(); // [[nodiscard]] Result<> makeBreak(); // [[nodiscard]] Result<> makeSwitch(); @@ -184,25 +186,104 @@ private: // The context for a single block scope, including the instructions parsed // inside that scope so far and the ultimate result type we expect this block // to have. - struct BlockCtx { + struct ScopeCtx { + struct NoScope {}; + struct BlockScope { + Block* block; + }; + struct IfScope { + If* iff; + Name label; + }; + struct ElseScope { + If* iff; + Name label; + }; + using Scope = std::variant; + + // The control flow structure we are building expressions for. + Scope scope; + std::vector exprStack; - Block* block; // Whether we have seen an unreachable instruction and are in // stack-polymorphic unreachable mode. bool unreachable = false; + + ScopeCtx() : scope(NoScope{}) {} + ScopeCtx(Scope scope) : scope(scope) {} + + static ScopeCtx makeBlock(Block* block) { + return ScopeCtx(BlockScope{block}); + } + static ScopeCtx makeIf(If* iff, Name label = {}) { + return ScopeCtx(IfScope{iff, label}); + } + static ScopeCtx makeElse(If* iff, Name label = {}) { + return ScopeCtx(ElseScope{iff, label}); + } + + bool isNone() { return std::get_if(&scope); } + Block* getBlock() { + if (auto* blockScope = std::get_if(&scope)) { + return blockScope->block; + } + return nullptr; + } + If* getIf() { + if (auto* ifScope = std::get_if(&scope)) { + return ifScope->iff; + } + return nullptr; + } + If* getElse() { + if (auto* elseScope = std::get_if(&scope)) { + return elseScope->iff; + } + return nullptr; + } + Type getResultType() { + if (auto* block = getBlock()) { + return block->type; + } + if (auto* iff = getIf()) { + return iff->type; + } + if (auto* iff = getElse()) { + return iff->type; + } + WASM_UNREACHABLE("unexpected scope kind"); + } + Name getLabel() { + if (auto* block = getBlock()) { + return block->name; + } + if (auto* ifScope = std::get_if(&scope)) { + return ifScope->label; + } + if (auto* elseScope = std::get_if(&scope)) { + return elseScope->label; + } + WASM_UNREACHABLE("unexpected scope kind"); + } }; // The stack of block contexts currently being parsed. - std::vector scopeStack; + std::vector scopeStack; - BlockCtx& getScope() { + ScopeCtx& getScope() { if (scopeStack.empty()) { // We are not in a block context, so push a dummy scope. - scopeStack.push_back({{}, nullptr}); + scopeStack.push_back({}); } return scopeStack.back(); } + // Collect the current scope into a single expression. If it has multiple + // top-level expressions, this requires collecting them into a block. If we + // are in a block context, we can collect them directly into the destination + // `block`, but otherwise we will have to allocate a new block. + Result finishScope(Block* block = nullptr); + [[nodiscard]] Result addScratchLocal(Type); [[nodiscard]] Result pop(); void push(Expression*); diff --git a/src/wasm/wasm-ir-builder.cpp b/src/wasm/wasm-ir-builder.cpp index bc3180423..b8d742ae8 100644 --- a/src/wasm/wasm-ir-builder.cpp +++ b/src/wasm/wasm-ir-builder.cpp @@ -172,7 +172,7 @@ Result IRBuilder::build() { if (scopeStack.empty()) { return builder.makeNop(); } - if (scopeStack.size() > 1 || scopeStack.back().block != nullptr) { + if (scopeStack.size() > 1 || !scopeStack.back().isNone()) { return Err{"unfinished block context"}; } if (scopeStack.back().exprStack.size() > 1) { @@ -282,18 +282,26 @@ Result<> IRBuilder::visitArrayNew(ArrayNew* curr) { } Result<> IRBuilder::visitBlockStart(Block* curr) { - scopeStack.push_back({{}, curr}); + scopeStack.push_back(ScopeCtx::makeBlock(curr)); return Ok{}; } -Result<> IRBuilder::visitEnd() { - if (scopeStack.empty() || !scopeStack.back().block) { - return Err{"unexpected end"}; +Result<> IRBuilder::visitIfStart(If* iff, Name label) { + auto cond = pop(); + CHECK_ERR(cond); + iff->condition = *cond; + scopeStack.push_back(ScopeCtx::makeIf(iff, label)); + return Ok{}; +} + +Result IRBuilder::finishScope(Block* block) { + if (scopeStack.empty() || scopeStack.back().isNone()) { + return Err{"unexpected end of scope"}; } auto& scope = scopeStack.back(); - Block* block = scope.block; - if (block->type.isTuple()) { + auto type = scope.getResultType(); + if (type.isTuple()) { if (scope.unreachable) { // We may not have enough concrete values on the stack to construct the // full tuple, and if we tried to fill out the beginning of a tuple.make @@ -328,18 +336,98 @@ Result<> IRBuilder::visitEnd() { scope.exprStack.push_back(builder.makeTupleMake(std::move(elems))); } } - } else if (block->type.isConcrete()) { + } else if (type.isConcrete()) { // If the value is buried in none-typed expressions, we have to bring it to // the top. auto hoisted = hoistLastValue(); CHECK_ERR(hoisted); } - block->list.set(scope.exprStack); - // TODO: Track branches so we can know whether this block is a target and - // finalize more efficiently. - block->finalize(block->type); + Expression* ret = nullptr; + if (scope.exprStack.size() == 0) { + // No expressions for this scope, but we need something. If we were given a + // block, we can empty it out and return it, but otherwise we need a nop. + if (block) { + block->list.clear(); + ret = block; + } else { + ret = builder.makeNop(); + } + } else if (scope.exprStack.size() == 1) { + // We can put our single expression directly into the surrounding scope. + if (block) { + block->list.resize(1); + block->list[0] = scope.exprStack.back(); + ret = block; + } else { + ret = scope.exprStack.back(); + } + } else { + // More than one expression, so we need a block. Allocate one if we weren't + // already given one. + if (!block) { + block = wasm.allocator.alloc(); + block->type = type; + } + block->list.set(scope.exprStack); + ret = block; + } scopeStack.pop_back(); - push(block); + return ret; +} + +Result<> IRBuilder::visitElse() { + auto& scope = getScope(); + auto* iff = scope.getIf(); + if (!iff) { + return Err{"unexpected else"}; + } + auto label = scope.getLabel(); + auto expr = finishScope(); + CHECK_ERR(expr); + iff->ifTrue = *expr; + scopeStack.push_back(ScopeCtx::makeElse(iff, label)); + return Ok{}; +} + +Result<> IRBuilder::visitEnd() { + auto& scope = getScope(); + if (scope.isNone()) { + return Err{"unexpected end"}; + } + if (auto* block = scope.getBlock()) { + auto expr = finishScope(block); + CHECK_ERR(expr); + assert(*expr == block); + // TODO: Track branches so we can know whether this block is a target and + // finalize more efficiently. + block->finalize(block->type); + push(block); + return Ok{}; + } + auto label = scope.getLabel(); + Expression* scopeExpr = nullptr; + if (auto* iff = scope.getIf()) { + auto expr = finishScope(); + CHECK_ERR(expr); + iff->ifTrue = *expr; + iff->ifFalse = nullptr; + iff->finalize(); + scopeExpr = iff; + } else if (auto* iff = scope.getElse()) { + auto expr = finishScope(); + CHECK_ERR(expr); + iff->ifFalse = *expr; + iff->finalize(); + scopeExpr = iff; + } + assert(scopeExpr && "unexpected scope kind"); + if (label) { + // We cannot directly name an If in Binaryen IR, so we need to wrap it in + // a block. + push(builder.makeBlock(label, {scopeExpr}, scopeExpr->type)); + } else { + push(scopeExpr); + } return Ok{}; } @@ -352,8 +440,13 @@ Result<> IRBuilder::makeBlock(Name label, Type type) { auto* block = wasm.allocator.alloc(); block->name = label; block->type = type; - CHECK_ERR(visitBlockStart(block)); - return Ok{}; + return visitBlockStart(block); +} + +Result<> IRBuilder::makeIf(Name label, Type type) { + auto* iff = wasm.allocator.alloc(); + iff->type = type; + return visitIfStart(iff, label); } // Result<> IRBuilder::makeIf() {} -- cgit v1.2.3