[Parser] Parse folded instructions that contain parentheses (#6196)

To parse folded instructions in the right order, we need to defer parsing each instruction until we have parsed each of its children and found its closing parenthesis. Previously we naively looked for parentheses to determine where instructions began and ended before we parsed them, but that scheme did not correctly handle instructions that can contain parentheses in their immediates, such as call_indirect. Fix the problem by using the actual instruction parser functions with a placeholder context to find the end of the instructions, including any kind of immediates they might have.
author: Thomas Lively <tlively@google.com> 2024-01-03 12:55:00 -0800
committer: GitHub <noreply@github.com> 2024-01-03 12:55:00 -0800
commit: a6bc9542e98b4164d3a26c67c94b1136b4fc8b86 (patch)
tree: 50c3408c4a9a0d9a6cef73780364caff8826fe90 /src
parent: 260fdfcdaaeba3f4a47ef057db28c61203c8d3b1 (diff)
download: binaryen-a6bc9542e98b4164d3a26c67c94b1136b4fc8b86.tar.gz
binaryen-a6bc9542e98b4164d3a26c67c94b1136b4fc8b86.tar.bz2
binaryen-a6bc9542e98b4164d3a26c67c94b1136b4fc8b86.zip
2 files changed, 51 insertions, 38 deletions
diff --git a/src/parser/contexts.h b/src/parser/contexts.h
index 503f4dc3a..2565a4817 100644
--- a/src/parser/contexts.h
+++ b/src/parser/contexts.h
@@ -517,6 +517,14 @@ struct NullInstrParserCtx {
   Result<> makeStringSliceIter(Index) { return Ok{}; }
 };
 
+struct NullCtx : NullTypeParserCtx, NullInstrParserCtx {
+  ParseInput in;
+  NullCtx(const ParseInput& in) : in(in) {}
+  Result<> makeTypeUse(Index, std::optional<HeapTypeT>, ParamsT*, ResultsT*) {
+    return Ok{};
+  }
+};
+
 // Phase 1: Parse definition spans for top-level module elements and determine
 // their indices and names.
 struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx {
diff --git a/src/parser/parsers.h b/src/parser/parsers.h
index 6dd617309..9302f4fc9 100644
--- a/src/parser/parsers.h
+++ b/src/parser/parsers.h
@@ -18,6 +18,7 @@
 #define parser_parsers_h
 
 #include "common.h"
+#include "contexts.h"
 #include "input.h"
 
 namespace wasm::WATParser {
@@ -706,14 +707,13 @@ template<typename Ctx> MaybeResult<> instr(Ctx& ctx) {
 }
 
 template<typename Ctx> MaybeResult<> foldedinstr(Ctx& ctx) {
-  // Check for valid strings that are not instructions.
-  if (ctx.in.peekSExprStart("then"sv) || ctx.in.peekSExprStart("else")) {
+  // We must have an '(' to start a folded instruction.
+  if (auto tok = ctx.in.peek(); !tok || !tok->isLParen()) {
     return {};
   }
-  if (auto inst = foldedBlockinstr(ctx)) {
-    return inst;
-  }
-  if (!ctx.in.takeLParen()) {
+
+  // Check for valid strings that look like folded instructions but are not.
+  if (ctx.in.peekSExprStart("then"sv) || ctx.in.peekSExprStart("else")) {
     return {};
   }
 
@@ -721,47 +721,52 @@ template<typename Ctx> MaybeResult<> foldedinstr(Ctx& ctx) {
   // instructions that need to be parsed after their folded children.
   std::vector<std::pair<Index, std::optional<Index>>> foldedInstrs;
 
-  // Begin a folded instruction. Push its start position and a placeholder
-  // end position.
-  foldedInstrs.push_back({ctx.in.getPos(), {}});
-  while (!foldedInstrs.empty()) {
-    // Consume everything up to the next paren. This span will be parsed as
-    // an instruction later after its folded children have been parsed.
-    if (!ctx.in.takeUntilParen()) {
-      return ctx.in.err(foldedInstrs.back().first,
-                        "unterminated folded instruction");
-    }
+  do {
+    if (ctx.in.takeRParen()) {
+      // We've reached the end of a folded instruction. Parse it for real.
+      auto [start, end] = foldedInstrs.back();
+      if (!end) {
+        return ctx.in.err("unexpected end of folded instruction");
+      }
+      foldedInstrs.pop_back();
 
-    if (!foldedInstrs.back().second) {
-      // The folded instruction we just started should end here.
-      foldedInstrs.back().second = ctx.in.getPos();
+      WithPosition with(ctx, start);
+      auto inst = plaininstr(ctx);
+      assert(inst && "unexpectedly failed to parse instruction");
+      CHECK_ERR(inst);
+      assert(ctx.in.getPos() == *end && "expected end of instruction");
+      continue;
     }
 
-    // We have either the start of a new folded child or the end of the last
-    // one.
+    // We're not ending an instruction, so we must be starting a new one. Maybe
+    // it is a block instruction.
     if (auto blockinst = foldedBlockinstr(ctx)) {
       CHECK_ERR(blockinst);
-    } else if (ctx.in.takeLParen()) {
-      foldedInstrs.push_back({ctx.in.getPos(), {}});
-    } else if (ctx.in.takeRParen()) {
-      auto [start, end] = foldedInstrs.back();
-      assert(end && "Should have found end of instruction");
-      foldedInstrs.pop_back();
+      continue;
+    }
 
-      WithPosition with(ctx, start);
-      if (auto inst = plaininstr(ctx)) {
-        CHECK_ERR(inst);
-      } else {
-        return ctx.in.err(start, "expected folded instruction");
-      }
+    // We must be starting a new plain instruction.
+    if (!ctx.in.takeLParen()) {
+      return ctx.in.err("expected folded instruction");
+    }
+    foldedInstrs.push_back({ctx.in.getPos(), {}});
 
-      if (ctx.in.getPos() != *end) {
-        return ctx.in.err("expected end of instruction");
-      }
+    // Consume the span for the instruction without meaningfully parsing it yet.
+    // It will be parsed for real using the real context after its s-expression
+    // children have been found and parsed.
+    NullCtx nullCtx(ctx.in);
+    if (auto inst = plaininstr(nullCtx)) {
+      CHECK_ERR(inst);
+      ctx.in = nullCtx.in;
     } else {
-      WASM_UNREACHABLE("expected paren");
+      return ctx.in.err("expected instruction");
     }
-  }
+
+    // The folded instruction we just started ends here.
+    assert(!foldedInstrs.back().second);
+    foldedInstrs.back().second = ctx.in.getPos();
+  } while (!foldedInstrs.empty());
+
   return Ok{};
 }
author	Thomas Lively <tlively@google.com>	2024-01-03 12:55:00 -0800
committer	GitHub <noreply@github.com>	2024-01-03 12:55:00 -0800
commit	a6bc9542e98b4164d3a26c67c94b1136b4fc8b86 (patch)
tree	50c3408c4a9a0d9a6cef73780364caff8826fe90 /src
parent	260fdfcdaaeba3f4a47ef057db28c61203c8d3b1 (diff)
download	binaryen-a6bc9542e98b4164d3a26c67c94b1136b4fc8b86.tar.gz binaryen-a6bc9542e98b4164d3a26c67c94b1136b4fc8b86.tar.bz2 binaryen-a6bc9542e98b4164d3a26c67c94b1136b4fc8b86.zip