diff options
author | Thomas Lively <tlively@google.com> | 2024-01-03 12:55:00 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-03 12:55:00 -0800 |
commit | a6bc9542e98b4164d3a26c67c94b1136b4fc8b86 (patch) | |
tree | 50c3408c4a9a0d9a6cef73780364caff8826fe90 /src | |
parent | 260fdfcdaaeba3f4a47ef057db28c61203c8d3b1 (diff) | |
download | binaryen-a6bc9542e98b4164d3a26c67c94b1136b4fc8b86.tar.gz binaryen-a6bc9542e98b4164d3a26c67c94b1136b4fc8b86.tar.bz2 binaryen-a6bc9542e98b4164d3a26c67c94b1136b4fc8b86.zip |
[Parser] Parse folded instructions that contain parentheses (#6196)
To parse folded instructions in the right order, we need to defer parsing each
instruction until we have parsed each of its children and found its closing
parenthesis. Previously we naively looked for parentheses to determine where
instructions began and ended before we parsed them, but that scheme did not
correctly handle instructions that can contain parentheses in their immediates,
such as call_indirect.
Fix the problem by using the actual instruction parser functions with a
placeholder context to find the end of the instructions, including any kind of
immediates they might have.
Diffstat (limited to 'src')
-rw-r--r-- | src/parser/contexts.h | 8 | ||||
-rw-r--r-- | src/parser/parsers.h | 81 |
2 files changed, 51 insertions, 38 deletions
diff --git a/src/parser/contexts.h b/src/parser/contexts.h index 503f4dc3a..2565a4817 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -517,6 +517,14 @@ struct NullInstrParserCtx { Result<> makeStringSliceIter(Index) { return Ok{}; } }; +struct NullCtx : NullTypeParserCtx, NullInstrParserCtx { + ParseInput in; + NullCtx(const ParseInput& in) : in(in) {} + Result<> makeTypeUse(Index, std::optional<HeapTypeT>, ParamsT*, ResultsT*) { + return Ok{}; + } +}; + // Phase 1: Parse definition spans for top-level module elements and determine // their indices and names. struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { diff --git a/src/parser/parsers.h b/src/parser/parsers.h index 6dd617309..9302f4fc9 100644 --- a/src/parser/parsers.h +++ b/src/parser/parsers.h @@ -18,6 +18,7 @@ #define parser_parsers_h #include "common.h" +#include "contexts.h" #include "input.h" namespace wasm::WATParser { @@ -706,14 +707,13 @@ template<typename Ctx> MaybeResult<> instr(Ctx& ctx) { } template<typename Ctx> MaybeResult<> foldedinstr(Ctx& ctx) { - // Check for valid strings that are not instructions. - if (ctx.in.peekSExprStart("then"sv) || ctx.in.peekSExprStart("else")) { + // We must have an '(' to start a folded instruction. + if (auto tok = ctx.in.peek(); !tok || !tok->isLParen()) { return {}; } - if (auto inst = foldedBlockinstr(ctx)) { - return inst; - } - if (!ctx.in.takeLParen()) { + + // Check for valid strings that look like folded instructions but are not. + if (ctx.in.peekSExprStart("then"sv) || ctx.in.peekSExprStart("else")) { return {}; } @@ -721,47 +721,52 @@ template<typename Ctx> MaybeResult<> foldedinstr(Ctx& ctx) { // instructions that need to be parsed after their folded children. std::vector<std::pair<Index, std::optional<Index>>> foldedInstrs; - // Begin a folded instruction. Push its start position and a placeholder - // end position. - foldedInstrs.push_back({ctx.in.getPos(), {}}); - while (!foldedInstrs.empty()) { - // Consume everything up to the next paren. This span will be parsed as - // an instruction later after its folded children have been parsed. - if (!ctx.in.takeUntilParen()) { - return ctx.in.err(foldedInstrs.back().first, - "unterminated folded instruction"); - } + do { + if (ctx.in.takeRParen()) { + // We've reached the end of a folded instruction. Parse it for real. + auto [start, end] = foldedInstrs.back(); + if (!end) { + return ctx.in.err("unexpected end of folded instruction"); + } + foldedInstrs.pop_back(); - if (!foldedInstrs.back().second) { - // The folded instruction we just started should end here. - foldedInstrs.back().second = ctx.in.getPos(); + WithPosition with(ctx, start); + auto inst = plaininstr(ctx); + assert(inst && "unexpectedly failed to parse instruction"); + CHECK_ERR(inst); + assert(ctx.in.getPos() == *end && "expected end of instruction"); + continue; } - // We have either the start of a new folded child or the end of the last - // one. + // We're not ending an instruction, so we must be starting a new one. Maybe + // it is a block instruction. if (auto blockinst = foldedBlockinstr(ctx)) { CHECK_ERR(blockinst); - } else if (ctx.in.takeLParen()) { - foldedInstrs.push_back({ctx.in.getPos(), {}}); - } else if (ctx.in.takeRParen()) { - auto [start, end] = foldedInstrs.back(); - assert(end && "Should have found end of instruction"); - foldedInstrs.pop_back(); + continue; + } - WithPosition with(ctx, start); - if (auto inst = plaininstr(ctx)) { - CHECK_ERR(inst); - } else { - return ctx.in.err(start, "expected folded instruction"); - } + // We must be starting a new plain instruction. + if (!ctx.in.takeLParen()) { + return ctx.in.err("expected folded instruction"); + } + foldedInstrs.push_back({ctx.in.getPos(), {}}); - if (ctx.in.getPos() != *end) { - return ctx.in.err("expected end of instruction"); - } + // Consume the span for the instruction without meaningfully parsing it yet. + // It will be parsed for real using the real context after its s-expression + // children have been found and parsed. + NullCtx nullCtx(ctx.in); + if (auto inst = plaininstr(nullCtx)) { + CHECK_ERR(inst); + ctx.in = nullCtx.in; } else { - WASM_UNREACHABLE("expected paren"); + return ctx.in.err("expected instruction"); } - } + + // The folded instruction we just started ends here. + assert(!foldedInstrs.back().second); + foldedInstrs.back().second = ctx.in.getPos(); + } while (!foldedInstrs.empty()); + return Ok{}; } |