summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2022-10-11 10:23:40 -0500
committerGitHub <noreply@github.com>2022-10-11 08:23:40 -0700
commit6d4ac3162c290e32a98de349d49e26e904a40414 (patch)
treefa233520ed19f98700ab04462d8dbe75eefc353b /src
parent802cb0bc9407c3e1f46cf426873cdbfeed42edaf (diff)
downloadbinaryen-6d4ac3162c290e32a98de349d49e26e904a40414.tar.gz
binaryen-6d4ac3162c290e32a98de349d49e26e904a40414.tar.bz2
binaryen-6d4ac3162c290e32a98de349d49e26e904a40414.zip
[Parser] Parse folded instructions (#5124)
Parse folded expressions as described in the spec: https://webassembly.github.io/spec/core/text/instructions.html#folded-instructions. The old binaryen parser _only_ parses folded expressions, and furthermore requires them to be folded such that a parent instruction consumes the values produced by its children and only those values. The standard format is much more general and allows folded instructions to have an arbitrary number of children independent of dataflow. To prevent the rest of the parser from having to know or care about the difference between folded and unfolded instructions, parse folded instructions after their children have been parsed. This means that a sequence of instructions is always parsed in the order they would appear in a binary no matter how they are folded (or not folded).
Diffstat (limited to 'src')
-rw-r--r--src/wasm/wat-parser.cpp78
1 files changed, 75 insertions, 3 deletions
diff --git a/src/wasm/wat-parser.cpp b/src/wasm/wat-parser.cpp
index 9e843271f..80e537a0e 100644
--- a/src/wasm/wat-parser.cpp
+++ b/src/wasm/wat-parser.cpp
@@ -81,6 +81,10 @@ struct ParseInput {
lexer.setIndex(index);
}
+ ParseInput(const ParseInput& other, size_t index) : lexer(other.lexer) {
+ lexer.setIndex(index);
+ }
+
bool empty() { return lexer.empty(); }
std::optional<Token> peek() {
@@ -108,6 +112,19 @@ struct ParseInput {
return true;
}
+ bool takeUntilParen() {
+ while (true) {
+ auto t = peek();
+ if (!t) {
+ return false;
+ }
+ if (t->isLParen() || t->isRParen()) {
+ return true;
+ }
+ ++lexer;
+ }
+ }
+
std::optional<Name> takeID() {
if (auto t = peek()) {
if (auto id = t->getID()) {
@@ -1610,10 +1627,65 @@ MaybeResult<typename Ctx::InstrT> instr(Ctx& ctx, ParseInput& in) {
template<typename Ctx>
Result<typename Ctx::InstrsT> instrs(Ctx& ctx, ParseInput& in) {
auto insts = ctx.makeInstrs();
- while (auto inst = instr(ctx, in)) {
- CHECK_ERR(inst);
- ctx.appendInstr(insts, *inst);
+
+ while (true) {
+ if (in.takeLParen()) {
+ // A stack of (start, end) position pairs defining the positions of
+ // instructions that need to be parsed after their folded children.
+ std::vector<std::pair<Index, std::optional<Index>>> foldedInstrs;
+
+ // Begin a folded instruction. Push its start position and a placeholder
+ // end position.
+ foldedInstrs.push_back({in.getPos(), {}});
+ while (!foldedInstrs.empty()) {
+ // Consume everything up to the next paren. This span will be parsed as
+ // an instruction later after its folded children have been parsed.
+ if (!in.takeUntilParen()) {
+ return ParseInput(in, foldedInstrs.back().first)
+ .err("unterminated folded instruction");
+ }
+
+ if (!foldedInstrs.back().second) {
+ // The folded instruction we just started should end here.
+ foldedInstrs.back().second = in.getPos();
+ }
+
+ // We have either the start of a new folded child or the end of the last
+ // one.
+ if (in.takeLParen()) {
+ foldedInstrs.push_back({in.getPos(), {}});
+ } else if (in.takeRParen()) {
+ auto [start, end] = foldedInstrs.back();
+ assert(end && "Should have found end of instruction");
+ foldedInstrs.pop_back();
+
+ ParseInput foldedIn(in, start);
+ if (auto inst = instr(ctx, foldedIn)) {
+ CHECK_ERR(inst);
+ ctx.appendInstr(insts, *inst);
+ } else {
+ return foldedIn.err("expected folded instruction");
+ }
+
+ if (foldedIn.getPos() != *end) {
+ return foldedIn.err("expected end of instruction");
+ }
+ } else {
+ WASM_UNREACHABLE("expected paren");
+ }
+ }
+ continue;
+ }
+
+ // A non-folded instruction.
+ if (auto inst = instr(ctx, in)) {
+ CHECK_ERR(inst);
+ ctx.appendInstr(insts, *inst);
+ } else {
+ break;
+ }
}
+
return insts;
}