[Parser] Do not eagerly lex keywords (#6541)

Lex them on demand instead to avoid wasted work.
author: Thomas Lively <tlively@google.com> 2024-04-25 21:19:46 -0700
committer: GitHub <noreply@github.com> 2024-04-25 21:19:46 -0700
commit: eccf9f951262bf6909bf75e8865e09d0596dcc09 (patch)
tree: 873779152561a68fa30f2423508ae9a72d09e49f /src/parser/lexer.cpp
parent: 35560732b6a2c6960a6e72ea478bc0238a967c30 (diff)
download: binaryen-eccf9f951262bf6909bf75e8865e09d0596dcc09.tar.gz
binaryen-eccf9f951262bf6909bf75e8865e09d0596dcc09.tar.bz2
binaryen-eccf9f951262bf6909bf75e8865e09d0596dcc09.zip
1 files changed, 51 insertions, 6 deletions
diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp
index 7c9bbb225..464cd34c4 100644
--- a/src/parser/lexer.cpp
+++ b/src/parser/lexer.cpp
@@ -1069,6 +1069,57 @@ bool Lexer::takeRParen() {
   return false;
 }
 
+std::optional<std::string_view> Lexer::takeKeyword() {
+  if (curr) {
+    return std::nullopt;
+  }
+  if (auto result = keyword(next())) {
+    index += result->span.size();
+    advance();
+    return result->span;
+  }
+  return std::nullopt;
+}
+
+bool Lexer::takeKeyword(std::string_view expected) {
+  if (auto result = keyword(next()); result && result->span == expected) {
+    index += expected.size();
+    advance();
+    return true;
+  }
+  return false;
+}
+
+std::optional<uint64_t> Lexer::takeOffset() {
+  if (auto result = keyword(next())) {
+    if (result->span.substr(0, 7) != "offset="sv) {
+      return std::nullopt;
+    }
+    Lexer subLexer(result->span.substr(7));
+    if (auto o = subLexer.takeU64()) {
+      index += result->span.size();
+      advance();
+      return o;
+    }
+  }
+  return std::nullopt;
+}
+
+std::optional<uint32_t> Lexer::takeAlign() {
+  if (auto result = keyword(next())) {
+    if (result->span.substr(0, 6) != "align="sv) {
+      return std::nullopt;
+    }
+    Lexer subLexer(result->span.substr(6));
+    if (auto o = subLexer.takeU32()) {
+      index += result->span.size();
+      advance();
+      return o;
+    }
+  }
+  return std::nullopt;
+}
+
 void Lexer::lexToken() {
   // TODO: Ensure we're getting the longest possible match.
   Token tok;
@@ -1080,8 +1131,6 @@ void Lexer::lexToken() {
     tok = Token{t->span, FloatTok{t->nanPayload, t->d}};
   } else if (auto t = str(next())) {
     tok = Token{t->span, StringTok{t->str}};
-  } else if (auto t = keyword(next())) {
-    tok = Token{t->span, KeywordTok{}};
   } else {
     // TODO: Do something about lexing errors.
     curr = std::nullopt;
@@ -1163,10 +1212,6 @@ std::ostream& operator<<(std::ostream& os, const StringTok& tok) {
   return os;
 }
 
-std::ostream& operator<<(std::ostream& os, const KeywordTok&) {
-  return os << "keyword";
-}
-
 std::ostream& operator<<(std::ostream& os, const Token& tok) {
   std::visit([&](const auto& t) { os << t; }, tok.data);
   return os << " \"" << tok.span << "\"";
author	Thomas Lively <tlively@google.com>	2024-04-25 21:19:46 -0700
committer	GitHub <noreply@github.com>	2024-04-25 21:19:46 -0700
commit	eccf9f951262bf6909bf75e8865e09d0596dcc09 (patch)
tree	873779152561a68fa30f2423508ae9a72d09e49f /src/parser/lexer.cpp
parent	35560732b6a2c6960a6e72ea478bc0238a967c30 (diff)
download	binaryen-eccf9f951262bf6909bf75e8865e09d0596dcc09.tar.gz binaryen-eccf9f951262bf6909bf75e8865e09d0596dcc09.tar.bz2 binaryen-eccf9f951262bf6909bf75e8865e09d0596dcc09.zip