diff options
author | Thomas Lively <tlively@google.com> | 2024-02-26 17:55:18 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-26 17:55:18 -0800 |
commit | f8b07f75996b34142450435c75a811aa946a6d3b (patch) | |
tree | a6f2c9b0c602acf5e1b07acde2c3c491d0f91076 /src/parser/lexer.cpp | |
parent | 55c206216ea93bd84de8f68b81fd903724006b50 (diff) | |
download | binaryen-f8b07f75996b34142450435c75a811aa946a6d3b.tar.gz binaryen-f8b07f75996b34142450435c75a811aa946a6d3b.tar.bz2 binaryen-f8b07f75996b34142450435c75a811aa946a6d3b.zip |
[Parser] Parse annotations, including source map comments (#6345)
Parse annotations using the standards-track `(@annotation ...)` format as well
as the `;;@ source-map:0:1` format. Have the lexer implicitly collect
annotations while it skips whitespace and add lexer APIs to access the
annotations since the last token was parsed. Collect annotations before parsing
each instruction and pass the annotations explicitly to the parser and parser
context functions for instructions. Add an API to `IRBuilder` to set a debug
location to be attached to the next visited or created instruction and use it
from the parser.
Diffstat (limited to 'src/parser/lexer.cpp')
-rw-r--r-- | src/parser/lexer.cpp | 153 |
1 files changed, 150 insertions, 3 deletions
diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp index 07931d69b..8c7542dd7 100644 --- a/src/parser/lexer.cpp +++ b/src/parser/lexer.cpp @@ -28,6 +28,8 @@ using namespace std::string_view_literals; namespace wasm::WATParser { +Name srcAnnotationKind("src"); + namespace { // ================ @@ -348,6 +350,47 @@ struct LexIdCtx : LexCtx { } }; +struct LexAnnotationResult : LexResult { + Annotation annotation; +}; + +struct LexAnnotationCtx : LexCtx { + std::string_view kind; + size_t kindSize = 0; + std::string_view contents; + size_t contentsSize = 0; + + explicit LexAnnotationCtx(std::string_view in) : LexCtx(in) {} + + void startKind() { kind = next(); } + + void takeKind(size_t size) { + kindSize += size; + take(size); + } + + void setKind(std::string_view kind) { + this->kind = kind; + kindSize = kind.size(); + } + + void startContents() { contents = next(); } + + void takeContents(size_t size) { + contentsSize += size; + take(size); + } + + std::optional<LexAnnotationResult> lexed() { + if (auto basic = LexCtx::lexed()) { + return LexAnnotationResult{ + *basic, + {Name(kind.substr(0, kindSize)), contents.substr(0, contentsSize)}}; + } + return std::nullopt; + } +}; + std::optional<LexResult> lparen(std::string_view in) { LexCtx ctx(in); ctx.takePrefix("("sv); @@ -360,6 +403,101 @@ std::optional<LexResult> rparen(std::string_view in) { return ctx.lexed(); } +std::optional<LexResult> idchar(std::string_view); +std::optional<LexResult> space(std::string_view); +std::optional<LexResult> keyword(std::string_view); +std::optional<LexIntResult> integer(std::string_view); +std::optional<LexFloatResult> float_(std::string_view); +std::optional<LexStrResult> str(std::string_view); +std::optional<LexIdResult> ident(std::string_view); + +// annotation ::= ';;@' [^\n]* | '(@'idchar+ annotelem* ')' +// annotelem ::= keyword | reserved | uN | sN | fN | string | id +// | '(' annotelem* ')' | '(@'idchar+ annotelem* ')' +std::optional<LexAnnotationResult> annotation(std::string_view in) { + LexAnnotationCtx ctx(in); + if (ctx.takePrefix(";;@"sv)) { + ctx.setKind(srcAnnotationKind.str); + ctx.startContents(); + if (auto size = ctx.next().find('\n'); size != ""sv.npos) { + ctx.takeContents(size); + } else { + ctx.takeContents(ctx.next().size()); + } + } else if (ctx.takePrefix("(@"sv)) { + ctx.startKind(); + bool hasIdchar = false; + while (auto lexed = idchar(ctx.next())) { + ctx.takeKind(1); + hasIdchar = true; + } + if (!hasIdchar) { + return std::nullopt; + } + ctx.startContents(); + size_t depth = 1; + while (true) { + if (ctx.empty()) { + return std::nullopt; + } + if (auto lexed = space(ctx.next())) { + ctx.takeContents(lexed->span.size()); + continue; + } + if (auto lexed = keyword(ctx.next())) { + ctx.takeContents(lexed->span.size()); + continue; + } + if (auto lexed = integer(ctx.next())) { + ctx.takeContents(lexed->span.size()); + continue; + } + if (auto lexed = float_(ctx.next())) { + ctx.takeContents(lexed->span.size()); + continue; + } + if (auto lexed = str(ctx.next())) { + ctx.takeContents(lexed->span.size()); + continue; + } + if (auto lexed = ident(ctx.next())) { + ctx.takeContents(lexed->span.size()); + continue; + } + if (ctx.startsWith("(@"sv)) { + ctx.takeContents(2); + bool hasIdchar = false; + while (auto lexed = idchar(ctx.next())) { + ctx.takeContents(1); + hasIdchar = true; + } + if (!hasIdchar) { + return std::nullopt; + } + ++depth; + continue; + } + if (ctx.startsWith("("sv)) { + ctx.takeContents(1); + ++depth; + continue; + } + if (ctx.startsWith(")"sv)) { + --depth; + if (depth == 0) { + ctx.take(1); + break; + } + ctx.takeContents(1); + continue; + } + // Unrecognized token. + return std::nullopt; + } + } + return ctx.lexed(); +} + // comment ::= linecomment | blockcomment // linecomment ::= ';;' linechar* ('\n' | eof) // linechar ::= c:char (if c != '\n') @@ -375,7 +513,7 @@ std::optional<LexResult> comment(std::string_view in) { } // Line comment - if (ctx.takePrefix(";;"sv)) { + if (!ctx.startsWith(";;@"sv) && ctx.takePrefix(";;"sv)) { if (auto size = ctx.next().find('\n'); size != ""sv.npos) { ctx.take(size); } else { @@ -934,8 +1072,17 @@ std::optional<std::string_view> Token::getID() const { } void Lexer::skipSpace() { - if (auto ctx = space(next())) { - index += ctx->span.size(); + while (true) { + if (auto ctx = annotation(next())) { + index += ctx->span.size(); + annotations.push_back(ctx->annotation); + continue; + } + if (auto ctx = space(next())) { + index += ctx->span.size(); + continue; + } + break; } } |