summaryrefslogtreecommitdiff
path: root/src/wasm/wat-parser-internal.h
diff options
context:
space:
mode:
authorThomas Lively <7121787+tlively@users.noreply.github.com>2022-05-25 11:25:04 -0700
committerGitHub <noreply@github.com>2022-05-25 11:25:04 -0700
commita5a889c4cb2c7b5b84b7dd18292b55b1ca4ca47e (patch)
tree02ba3d40d66e466e11a32f6f95fe4872cd7a62d9 /src/wasm/wat-parser-internal.h
parent808be8a27f7adf00f2410219507b649cdab4aa99 (diff)
downloadbinaryen-a5a889c4cb2c7b5b84b7dd18292b55b1ca4ca47e.tar.gz
binaryen-a5a889c4cb2c7b5b84b7dd18292b55b1ca4ca47e.tar.bz2
binaryen-a5a889c4cb2c7b5b84b7dd18292b55b1ca4ca47e.zip
[Parser] Lex idchar and identifiers (#4686)
Diffstat (limited to 'src/wasm/wat-parser-internal.h')
-rw-r--r--src/wasm/wat-parser-internal.h85
1 files changed, 82 insertions, 3 deletions
diff --git a/src/wasm/wat-parser-internal.h b/src/wasm/wat-parser-internal.h
index ede89de66..2f7e10802 100644
--- a/src/wasm/wat-parser-internal.h
+++ b/src/wasm/wat-parser-internal.h
@@ -68,6 +68,9 @@ public:
// The next input that has not already been lexed.
std::string_view next() const { return input.substr(lexedSize); }
+ // Get the next character without consuming it.
+ uint8_t peek() const { return next()[0]; }
+
// The size of the unlexed input.
size_t size() const { return input.size() - lexedSize; }
@@ -179,7 +182,7 @@ public:
bool takeDigit() {
if (!empty()) {
- if (auto d = getDigit(next()[0])) {
+ if (auto d = getDigit(peek())) {
take(1);
uint64_t newN = n * 10 + *d;
if (newN < n) {
@@ -194,7 +197,7 @@ public:
bool takeHexdigit() {
if (!empty()) {
- if (auto h = getHexDigit(next()[0])) {
+ if (auto h = getHexDigit(peek())) {
take(1);
uint64_t newN = n * 16 + *h;
if (newN < n) {
@@ -372,6 +375,72 @@ std::optional<LexIntResult> integer(std::string_view in) {
return {};
}
+// idchar ::= '0' | ... | '9'
+// | 'A' | ... | 'Z'
+// | 'a' | ... | 'z'
+// | '!' | '#' | '$' | '%' | '&' | ''' | '*' | '+'
+// | '-' | '.' | '/' | ':' | '<' | '=' | '>' | '?'
+// | '@' | '\' | '^' | '_' | '`' | '|' | '~'
+std::optional<LexResult> idchar(std::string_view in) {
+ LexCtx ctx(in);
+ if (ctx.empty()) {
+ return {};
+ }
+ uint8_t c = ctx.peek();
+ if (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') ||
+ ('a' <= c && c <= 'z')) {
+ ctx.take(1);
+ } else {
+ switch (c) {
+ case '!':
+ case '#':
+ case '$':
+ case '%':
+ case '&':
+ case '\'':
+ case '*':
+ case '+':
+ case '-':
+ case '.':
+ case '/':
+ case ':':
+ case '<':
+ case '=':
+ case '>':
+ case '?':
+ case '@':
+ case '\\':
+ case '^':
+ case '_':
+ case '`':
+ case '|':
+ case '~':
+ ctx.take(1);
+ }
+ }
+ return ctx.lexed();
+}
+
+// id ::= '$' idchar+
+std::optional<LexResult> ident(std::string_view in) {
+ LexCtx ctx(in);
+ if (!ctx.takePrefix("$"sv)) {
+ return {};
+ }
+ if (auto lexed = idchar(ctx.next())) {
+ ctx.take(*lexed);
+ } else {
+ return {};
+ }
+ while (auto lexed = idchar(ctx.next())) {
+ ctx.take(*lexed);
+ }
+ if (ctx.canFinish()) {
+ return ctx.lexed();
+ }
+ return {};
+}
+
// ======
// Tokens
// ======
@@ -405,8 +474,16 @@ struct IntTok {
}
};
+struct IdTok {
+ friend std::ostream& operator<<(std::ostream& os, const IdTok&) {
+ return os << "id";
+ }
+
+ friend bool operator==(const IdTok&, const IdTok&) { return true; }
+};
+
struct Token {
- using Data = std::variant<LParenTok, RParenTok, IntTok>;
+ using Data = std::variant<LParenTok, RParenTok, IntTok, IdTok>;
std::string_view span;
Data data;
@@ -490,6 +567,8 @@ struct Lexer {
tok = Token{t->span, LParenTok{}};
} else if (auto t = rparen(next())) {
tok = Token{t->span, RParenTok{}};
+ } else if (auto t = ident(next())) {
+ tok = Token{t->span, IdTok{}};
} else if (auto t = integer(next())) {
tok = Token{t->span, IntTok{t->n, t->signedness}};
} else {