[Parser] Parse annotations, including source map comments (#6345)

Parse annotations using the standards-track `(@annotation ...)` format as well as the `;;@ source-map:0:1` format. Have the lexer implicitly collect annotations while it skips whitespace and add lexer APIs to access the annotations since the last token was parsed. Collect annotations before parsing each instruction and pass the annotations explicitly to the parser and parser context functions for instructions. Add an API to `IRBuilder` to set a debug location to be attached to the next visited or created instruction and use it from the parser.
author: Thomas Lively <tlively@google.com> 2024-02-26 17:55:18 -0800
committer: GitHub <noreply@github.com> 2024-02-26 17:55:18 -0800
commit: f8b07f75996b34142450435c75a811aa946a6d3b (patch)
tree: a6f2c9b0c602acf5e1b07acde2c3c491d0f91076 /src/parser/lexer.cpp
parent: 55c206216ea93bd84de8f68b81fd903724006b50 (diff)
download: binaryen-f8b07f75996b34142450435c75a811aa946a6d3b.tar.gz
binaryen-f8b07f75996b34142450435c75a811aa946a6d3b.tar.bz2
binaryen-f8b07f75996b34142450435c75a811aa946a6d3b.zip
1 files changed, 150 insertions, 3 deletions
diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp
index 07931d69b..8c7542dd7 100644
--- a/src/parser/lexer.cpp
+++ b/src/parser/lexer.cpp
@@ -28,6 +28,8 @@ using namespace std::string_view_literals;
 
 namespace wasm::WATParser {
 
+Name srcAnnotationKind("src");
+
 namespace {
 
 // ================
@@ -348,6 +350,47 @@ struct LexIdCtx : LexCtx {
   }
 };
 
+struct LexAnnotationResult : LexResult {
+  Annotation annotation;
+};
+
+struct LexAnnotationCtx : LexCtx {
+  std::string_view kind;
+  size_t kindSize = 0;
+  std::string_view contents;
+  size_t contentsSize = 0;
+
+  explicit LexAnnotationCtx(std::string_view in) : LexCtx(in) {}
+
+  void startKind() { kind = next(); }
+
+  void takeKind(size_t size) {
+    kindSize += size;
+    take(size);
+  }
+
+  void setKind(std::string_view kind) {
+    this->kind = kind;
+    kindSize = kind.size();
+  }
+
+  void startContents() { contents = next(); }
+
+  void takeContents(size_t size) {
+    contentsSize += size;
+    take(size);
+  }
+
+  std::optional<LexAnnotationResult> lexed() {
+    if (auto basic = LexCtx::lexed()) {
+      return LexAnnotationResult{
+        *basic,
+        {Name(kind.substr(0, kindSize)), contents.substr(0, contentsSize)}};
+    }
+    return std::nullopt;
+  }
+};
+
 std::optional<LexResult> lparen(std::string_view in) {
   LexCtx ctx(in);
   ctx.takePrefix("("sv);
@@ -360,6 +403,101 @@ std::optional<LexResult> rparen(std::string_view in) {
   return ctx.lexed();
 }
 
+std::optional<LexResult> idchar(std::string_view);
+std::optional<LexResult> space(std::string_view);
+std::optional<LexResult> keyword(std::string_view);
+std::optional<LexIntResult> integer(std::string_view);
+std::optional<LexFloatResult> float_(std::string_view);
+std::optional<LexStrResult> str(std::string_view);
+std::optional<LexIdResult> ident(std::string_view);
+
+// annotation ::= ';;@' [^\n]* | '(@'idchar+ annotelem* ')'
+// annotelem  ::= keyword | reserved | uN | sN | fN | string | id
+//              | '(' annotelem* ')' | '(@'idchar+ annotelem* ')'
+std::optional<LexAnnotationResult> annotation(std::string_view in) {
+  LexAnnotationCtx ctx(in);
+  if (ctx.takePrefix(";;@"sv)) {
+    ctx.setKind(srcAnnotationKind.str);
+    ctx.startContents();
+    if (auto size = ctx.next().find('\n'); size != ""sv.npos) {
+      ctx.takeContents(size);
+    } else {
+      ctx.takeContents(ctx.next().size());
+    }
+  } else if (ctx.takePrefix("(@"sv)) {
+    ctx.startKind();
+    bool hasIdchar = false;
+    while (auto lexed = idchar(ctx.next())) {
+      ctx.takeKind(1);
+      hasIdchar = true;
+    }
+    if (!hasIdchar) {
+      return std::nullopt;
+    }
+    ctx.startContents();
+    size_t depth = 1;
+    while (true) {
+      if (ctx.empty()) {
+        return std::nullopt;
+      }
+      if (auto lexed = space(ctx.next())) {
+        ctx.takeContents(lexed->span.size());
+        continue;
+      }
+      if (auto lexed = keyword(ctx.next())) {
+        ctx.takeContents(lexed->span.size());
+        continue;
+      }
+      if (auto lexed = integer(ctx.next())) {
+        ctx.takeContents(lexed->span.size());
+        continue;
+      }
+      if (auto lexed = float_(ctx.next())) {
+        ctx.takeContents(lexed->span.size());
+        continue;
+      }
+      if (auto lexed = str(ctx.next())) {
+        ctx.takeContents(lexed->span.size());
+        continue;
+      }
+      if (auto lexed = ident(ctx.next())) {
+        ctx.takeContents(lexed->span.size());
+        continue;
+      }
+      if (ctx.startsWith("(@"sv)) {
+        ctx.takeContents(2);
+        bool hasIdchar = false;
+        while (auto lexed = idchar(ctx.next())) {
+          ctx.takeContents(1);
+          hasIdchar = true;
+        }
+        if (!hasIdchar) {
+          return std::nullopt;
+        }
+        ++depth;
+        continue;
+      }
+      if (ctx.startsWith("("sv)) {
+        ctx.takeContents(1);
+        ++depth;
+        continue;
+      }
+      if (ctx.startsWith(")"sv)) {
+        --depth;
+        if (depth == 0) {
+          ctx.take(1);
+          break;
+        }
+        ctx.takeContents(1);
+        continue;
+      }
+      // Unrecognized token.
+      return std::nullopt;
+    }
+  }
+  return ctx.lexed();
+}
+
 // comment      ::= linecomment | blockcomment
 // linecomment  ::= ';;' linechar* ('\n' | eof)
 // linechar     ::= c:char                      (if c != '\n')
@@ -375,7 +513,7 @@ std::optional<LexResult> comment(std::string_view in) {
   }
 
   // Line comment
-  if (ctx.takePrefix(";;"sv)) {
+  if (!ctx.startsWith(";;@"sv) && ctx.takePrefix(";;"sv)) {
     if (auto size = ctx.next().find('\n'); size != ""sv.npos) {
       ctx.take(size);
     } else {
@@ -934,8 +1072,17 @@ std::optional<std::string_view> Token::getID() const {
 }
 
 void Lexer::skipSpace() {
-  if (auto ctx = space(next())) {
-    index += ctx->span.size();
+  while (true) {
+    if (auto ctx = annotation(next())) {
+      index += ctx->span.size();
+      annotations.push_back(ctx->annotation);
+      continue;
+    }
+    if (auto ctx = space(next())) {
+      index += ctx->span.size();
+      continue;
+    }
+    break;
   }
 }
author	Thomas Lively <tlively@google.com>	2024-02-26 17:55:18 -0800
committer	GitHub <noreply@github.com>	2024-02-26 17:55:18 -0800
commit	f8b07f75996b34142450435c75a811aa946a6d3b (patch)
tree	a6f2c9b0c602acf5e1b07acde2c3c491d0f91076 /src/parser/lexer.cpp
parent	55c206216ea93bd84de8f68b81fd903724006b50 (diff)
download	binaryen-f8b07f75996b34142450435c75a811aa946a6d3b.tar.gz binaryen-f8b07f75996b34142450435c75a811aa946a6d3b.tar.bz2 binaryen-f8b07f75996b34142450435c75a811aa946a6d3b.zip