diff options
author | Thomas Lively <7121787+tlively@users.noreply.github.com> | 2022-05-25 11:25:04 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-25 11:25:04 -0700 |
commit | a5a889c4cb2c7b5b84b7dd18292b55b1ca4ca47e (patch) | |
tree | 02ba3d40d66e466e11a32f6f95fe4872cd7a62d9 /src | |
parent | 808be8a27f7adf00f2410219507b649cdab4aa99 (diff) | |
download | binaryen-a5a889c4cb2c7b5b84b7dd18292b55b1ca4ca47e.tar.gz binaryen-a5a889c4cb2c7b5b84b7dd18292b55b1ca4ca47e.tar.bz2 binaryen-a5a889c4cb2c7b5b84b7dd18292b55b1ca4ca47e.zip |
[Parser] Lex idchar and identifiers (#4686)
Diffstat (limited to 'src')
-rw-r--r-- | src/wasm/wat-parser-internal.h | 85 |
1 files changed, 82 insertions, 3 deletions
diff --git a/src/wasm/wat-parser-internal.h b/src/wasm/wat-parser-internal.h index ede89de66..2f7e10802 100644 --- a/src/wasm/wat-parser-internal.h +++ b/src/wasm/wat-parser-internal.h @@ -68,6 +68,9 @@ public: // The next input that has not already been lexed. std::string_view next() const { return input.substr(lexedSize); } + // Get the next character without consuming it. + uint8_t peek() const { return next()[0]; } + // The size of the unlexed input. size_t size() const { return input.size() - lexedSize; } @@ -179,7 +182,7 @@ public: bool takeDigit() { if (!empty()) { - if (auto d = getDigit(next()[0])) { + if (auto d = getDigit(peek())) { take(1); uint64_t newN = n * 10 + *d; if (newN < n) { @@ -194,7 +197,7 @@ public: bool takeHexdigit() { if (!empty()) { - if (auto h = getHexDigit(next()[0])) { + if (auto h = getHexDigit(peek())) { take(1); uint64_t newN = n * 16 + *h; if (newN < n) { @@ -372,6 +375,72 @@ std::optional<LexIntResult> integer(std::string_view in) { return {}; } +// idchar ::= '0' | ... | '9' +// | 'A' | ... | 'Z' +// | 'a' | ... | 'z' +// | '!' | '#' | '$' | '%' | '&' | ''' | '*' | '+' +// | '-' | '.' | '/' | ':' | '<' | '=' | '>' | '?' +// | '@' | '\' | '^' | '_' | '`' | '|' | '~' +std::optional<LexResult> idchar(std::string_view in) { + LexCtx ctx(in); + if (ctx.empty()) { + return {}; + } + uint8_t c = ctx.peek(); + if (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') || + ('a' <= c && c <= 'z')) { + ctx.take(1); + } else { + switch (c) { + case '!': + case '#': + case '$': + case '%': + case '&': + case '\'': + case '*': + case '+': + case '-': + case '.': + case '/': + case ':': + case '<': + case '=': + case '>': + case '?': + case '@': + case '\\': + case '^': + case '_': + case '`': + case '|': + case '~': + ctx.take(1); + } + } + return ctx.lexed(); +} + +// id ::= '$' idchar+ +std::optional<LexResult> ident(std::string_view in) { + LexCtx ctx(in); + if (!ctx.takePrefix("$"sv)) { + return {}; + } + if (auto lexed = idchar(ctx.next())) { + ctx.take(*lexed); + } else { + return {}; + } + while (auto lexed = idchar(ctx.next())) { + ctx.take(*lexed); + } + if (ctx.canFinish()) { + return ctx.lexed(); + } + return {}; +} + // ====== // Tokens // ====== @@ -405,8 +474,16 @@ struct IntTok { } }; +struct IdTok { + friend std::ostream& operator<<(std::ostream& os, const IdTok&) { + return os << "id"; + } + + friend bool operator==(const IdTok&, const IdTok&) { return true; } +}; + struct Token { - using Data = std::variant<LParenTok, RParenTok, IntTok>; + using Data = std::variant<LParenTok, RParenTok, IntTok, IdTok>; std::string_view span; Data data; @@ -490,6 +567,8 @@ struct Lexer { tok = Token{t->span, LParenTok{}}; } else if (auto t = rparen(next())) { tok = Token{t->span, RParenTok{}}; + } else if (auto t = ident(next())) { + tok = Token{t->span, IdTok{}}; } else if (auto t = integer(next())) { tok = Token{t->span, IntTok{t->n, t->signedness}}; } else { |