summaryrefslogtreecommitdiff
path: root/src/parser/lexer.cpp
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2024-02-26 17:55:18 -0800
committerGitHub <noreply@github.com>2024-02-26 17:55:18 -0800
commitf8b07f75996b34142450435c75a811aa946a6d3b (patch)
treea6f2c9b0c602acf5e1b07acde2c3c491d0f91076 /src/parser/lexer.cpp
parent55c206216ea93bd84de8f68b81fd903724006b50 (diff)
downloadbinaryen-f8b07f75996b34142450435c75a811aa946a6d3b.tar.gz
binaryen-f8b07f75996b34142450435c75a811aa946a6d3b.tar.bz2
binaryen-f8b07f75996b34142450435c75a811aa946a6d3b.zip
[Parser] Parse annotations, including source map comments (#6345)
Parse annotations using the standards-track `(@annotation ...)` format as well as the `;;@ source-map:0:1` format. Have the lexer implicitly collect annotations while it skips whitespace and add lexer APIs to access the annotations since the last token was parsed. Collect annotations before parsing each instruction and pass the annotations explicitly to the parser and parser context functions for instructions. Add an API to `IRBuilder` to set a debug location to be attached to the next visited or created instruction and use it from the parser.
Diffstat (limited to 'src/parser/lexer.cpp')
-rw-r--r--src/parser/lexer.cpp153
1 files changed, 150 insertions, 3 deletions
diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp
index 07931d69b..8c7542dd7 100644
--- a/src/parser/lexer.cpp
+++ b/src/parser/lexer.cpp
@@ -28,6 +28,8 @@ using namespace std::string_view_literals;
namespace wasm::WATParser {
+Name srcAnnotationKind("src");
+
namespace {
// ================
@@ -348,6 +350,47 @@ struct LexIdCtx : LexCtx {
}
};
+struct LexAnnotationResult : LexResult {
+ Annotation annotation;
+};
+
+struct LexAnnotationCtx : LexCtx {
+ std::string_view kind;
+ size_t kindSize = 0;
+ std::string_view contents;
+ size_t contentsSize = 0;
+
+ explicit LexAnnotationCtx(std::string_view in) : LexCtx(in) {}
+
+ void startKind() { kind = next(); }
+
+ void takeKind(size_t size) {
+ kindSize += size;
+ take(size);
+ }
+
+ void setKind(std::string_view kind) {
+ this->kind = kind;
+ kindSize = kind.size();
+ }
+
+ void startContents() { contents = next(); }
+
+ void takeContents(size_t size) {
+ contentsSize += size;
+ take(size);
+ }
+
+ std::optional<LexAnnotationResult> lexed() {
+ if (auto basic = LexCtx::lexed()) {
+ return LexAnnotationResult{
+ *basic,
+ {Name(kind.substr(0, kindSize)), contents.substr(0, contentsSize)}};
+ }
+ return std::nullopt;
+ }
+};
+
std::optional<LexResult> lparen(std::string_view in) {
LexCtx ctx(in);
ctx.takePrefix("("sv);
@@ -360,6 +403,101 @@ std::optional<LexResult> rparen(std::string_view in) {
return ctx.lexed();
}
+std::optional<LexResult> idchar(std::string_view);
+std::optional<LexResult> space(std::string_view);
+std::optional<LexResult> keyword(std::string_view);
+std::optional<LexIntResult> integer(std::string_view);
+std::optional<LexFloatResult> float_(std::string_view);
+std::optional<LexStrResult> str(std::string_view);
+std::optional<LexIdResult> ident(std::string_view);
+
+// annotation ::= ';;@' [^\n]* | '(@'idchar+ annotelem* ')'
+// annotelem ::= keyword | reserved | uN | sN | fN | string | id
+// | '(' annotelem* ')' | '(@'idchar+ annotelem* ')'
+std::optional<LexAnnotationResult> annotation(std::string_view in) {
+ LexAnnotationCtx ctx(in);
+ if (ctx.takePrefix(";;@"sv)) {
+ ctx.setKind(srcAnnotationKind.str);
+ ctx.startContents();
+ if (auto size = ctx.next().find('\n'); size != ""sv.npos) {
+ ctx.takeContents(size);
+ } else {
+ ctx.takeContents(ctx.next().size());
+ }
+ } else if (ctx.takePrefix("(@"sv)) {
+ ctx.startKind();
+ bool hasIdchar = false;
+ while (auto lexed = idchar(ctx.next())) {
+ ctx.takeKind(1);
+ hasIdchar = true;
+ }
+ if (!hasIdchar) {
+ return std::nullopt;
+ }
+ ctx.startContents();
+ size_t depth = 1;
+ while (true) {
+ if (ctx.empty()) {
+ return std::nullopt;
+ }
+ if (auto lexed = space(ctx.next())) {
+ ctx.takeContents(lexed->span.size());
+ continue;
+ }
+ if (auto lexed = keyword(ctx.next())) {
+ ctx.takeContents(lexed->span.size());
+ continue;
+ }
+ if (auto lexed = integer(ctx.next())) {
+ ctx.takeContents(lexed->span.size());
+ continue;
+ }
+ if (auto lexed = float_(ctx.next())) {
+ ctx.takeContents(lexed->span.size());
+ continue;
+ }
+ if (auto lexed = str(ctx.next())) {
+ ctx.takeContents(lexed->span.size());
+ continue;
+ }
+ if (auto lexed = ident(ctx.next())) {
+ ctx.takeContents(lexed->span.size());
+ continue;
+ }
+ if (ctx.startsWith("(@"sv)) {
+ ctx.takeContents(2);
+ bool hasIdchar = false;
+ while (auto lexed = idchar(ctx.next())) {
+ ctx.takeContents(1);
+ hasIdchar = true;
+ }
+ if (!hasIdchar) {
+ return std::nullopt;
+ }
+ ++depth;
+ continue;
+ }
+ if (ctx.startsWith("("sv)) {
+ ctx.takeContents(1);
+ ++depth;
+ continue;
+ }
+ if (ctx.startsWith(")"sv)) {
+ --depth;
+ if (depth == 0) {
+ ctx.take(1);
+ break;
+ }
+ ctx.takeContents(1);
+ continue;
+ }
+ // Unrecognized token.
+ return std::nullopt;
+ }
+ }
+ return ctx.lexed();
+}
+
// comment ::= linecomment | blockcomment
// linecomment ::= ';;' linechar* ('\n' | eof)
// linechar ::= c:char (if c != '\n')
@@ -375,7 +513,7 @@ std::optional<LexResult> comment(std::string_view in) {
}
// Line comment
- if (ctx.takePrefix(";;"sv)) {
+ if (!ctx.startsWith(";;@"sv) && ctx.takePrefix(";;"sv)) {
if (auto size = ctx.next().find('\n'); size != ""sv.npos) {
ctx.take(size);
} else {
@@ -934,8 +1072,17 @@ std::optional<std::string_view> Token::getID() const {
}
void Lexer::skipSpace() {
- if (auto ctx = space(next())) {
- index += ctx->span.size();
+ while (true) {
+ if (auto ctx = annotation(next())) {
+ index += ctx->span.size();
+ annotations.push_back(ctx->annotation);
+ continue;
+ }
+ if (auto ctx = space(next())) {
+ index += ctx->span.size();
+ continue;
+ }
+ break;
}
}