diff options
author | Thomas Lively <tlively@google.com> | 2022-10-11 10:23:40 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-11 08:23:40 -0700 |
commit | 6d4ac3162c290e32a98de349d49e26e904a40414 (patch) | |
tree | fa233520ed19f98700ab04462d8dbe75eefc353b /src | |
parent | 802cb0bc9407c3e1f46cf426873cdbfeed42edaf (diff) | |
download | binaryen-6d4ac3162c290e32a98de349d49e26e904a40414.tar.gz binaryen-6d4ac3162c290e32a98de349d49e26e904a40414.tar.bz2 binaryen-6d4ac3162c290e32a98de349d49e26e904a40414.zip |
[Parser] Parse folded instructions (#5124)
Parse folded expressions as described in the spec:
https://webassembly.github.io/spec/core/text/instructions.html#folded-instructions.
The old binaryen parser _only_ parses folded expressions, and furthermore
requires them to be folded such that a parent instruction consumes the values
produced by its children and only those values. The standard format is much more
general and allows folded instructions to have an arbitrary number of children
independent of dataflow.
To prevent the rest of the parser from having to know or care about the
difference between folded and unfolded instructions, parse folded instructions
after their children have been parsed. This means that a sequence of
instructions is always parsed in the order they would appear in a binary no
matter how they are folded (or not folded).
Diffstat (limited to 'src')
-rw-r--r-- | src/wasm/wat-parser.cpp | 78 |
1 files changed, 75 insertions, 3 deletions
diff --git a/src/wasm/wat-parser.cpp b/src/wasm/wat-parser.cpp index 9e843271f..80e537a0e 100644 --- a/src/wasm/wat-parser.cpp +++ b/src/wasm/wat-parser.cpp @@ -81,6 +81,10 @@ struct ParseInput { lexer.setIndex(index); } + ParseInput(const ParseInput& other, size_t index) : lexer(other.lexer) { + lexer.setIndex(index); + } + bool empty() { return lexer.empty(); } std::optional<Token> peek() { @@ -108,6 +112,19 @@ struct ParseInput { return true; } + bool takeUntilParen() { + while (true) { + auto t = peek(); + if (!t) { + return false; + } + if (t->isLParen() || t->isRParen()) { + return true; + } + ++lexer; + } + } + std::optional<Name> takeID() { if (auto t = peek()) { if (auto id = t->getID()) { @@ -1610,10 +1627,65 @@ MaybeResult<typename Ctx::InstrT> instr(Ctx& ctx, ParseInput& in) { template<typename Ctx> Result<typename Ctx::InstrsT> instrs(Ctx& ctx, ParseInput& in) { auto insts = ctx.makeInstrs(); - while (auto inst = instr(ctx, in)) { - CHECK_ERR(inst); - ctx.appendInstr(insts, *inst); + + while (true) { + if (in.takeLParen()) { + // A stack of (start, end) position pairs defining the positions of + // instructions that need to be parsed after their folded children. + std::vector<std::pair<Index, std::optional<Index>>> foldedInstrs; + + // Begin a folded instruction. Push its start position and a placeholder + // end position. + foldedInstrs.push_back({in.getPos(), {}}); + while (!foldedInstrs.empty()) { + // Consume everything up to the next paren. This span will be parsed as + // an instruction later after its folded children have been parsed. + if (!in.takeUntilParen()) { + return ParseInput(in, foldedInstrs.back().first) + .err("unterminated folded instruction"); + } + + if (!foldedInstrs.back().second) { + // The folded instruction we just started should end here. + foldedInstrs.back().second = in.getPos(); + } + + // We have either the start of a new folded child or the end of the last + // one. + if (in.takeLParen()) { + foldedInstrs.push_back({in.getPos(), {}}); + } else if (in.takeRParen()) { + auto [start, end] = foldedInstrs.back(); + assert(end && "Should have found end of instruction"); + foldedInstrs.pop_back(); + + ParseInput foldedIn(in, start); + if (auto inst = instr(ctx, foldedIn)) { + CHECK_ERR(inst); + ctx.appendInstr(insts, *inst); + } else { + return foldedIn.err("expected folded instruction"); + } + + if (foldedIn.getPos() != *end) { + return foldedIn.err("expected end of instruction"); + } + } else { + WASM_UNREACHABLE("expected paren"); + } + } + continue; + } + + // A non-folded instruction. + if (auto inst = instr(ctx, in)) { + CHECK_ERR(inst); + ctx.appendInstr(insts, *inst); + } else { + break; + } } + return insts; } |