diff options
author | Thomas Lively <tlively@google.com> | 2024-04-25 21:19:46 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-25 21:19:46 -0700 |
commit | eccf9f951262bf6909bf75e8865e09d0596dcc09 (patch) | |
tree | 873779152561a68fa30f2423508ae9a72d09e49f | |
parent | 35560732b6a2c6960a6e72ea478bc0238a967c30 (diff) | |
download | binaryen-eccf9f951262bf6909bf75e8865e09d0596dcc09.tar.gz binaryen-eccf9f951262bf6909bf75e8865e09d0596dcc09.tar.bz2 binaryen-eccf9f951262bf6909bf75e8865e09d0596dcc09.zip |
[Parser] Do not eagerly lex keywords (#6541)
Lex them on demand instead to avoid wasted work.
-rw-r--r-- | src/parser/lexer.cpp | 57 | ||||
-rw-r--r-- | src/parser/lexer.h | 84 |
2 files changed, 56 insertions, 85 deletions
diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp index 7c9bbb225..464cd34c4 100644 --- a/src/parser/lexer.cpp +++ b/src/parser/lexer.cpp @@ -1069,6 +1069,57 @@ bool Lexer::takeRParen() { return false; } +std::optional<std::string_view> Lexer::takeKeyword() { + if (curr) { + return std::nullopt; + } + if (auto result = keyword(next())) { + index += result->span.size(); + advance(); + return result->span; + } + return std::nullopt; +} + +bool Lexer::takeKeyword(std::string_view expected) { + if (auto result = keyword(next()); result && result->span == expected) { + index += expected.size(); + advance(); + return true; + } + return false; +} + +std::optional<uint64_t> Lexer::takeOffset() { + if (auto result = keyword(next())) { + if (result->span.substr(0, 7) != "offset="sv) { + return std::nullopt; + } + Lexer subLexer(result->span.substr(7)); + if (auto o = subLexer.takeU64()) { + index += result->span.size(); + advance(); + return o; + } + } + return std::nullopt; +} + +std::optional<uint32_t> Lexer::takeAlign() { + if (auto result = keyword(next())) { + if (result->span.substr(0, 6) != "align="sv) { + return std::nullopt; + } + Lexer subLexer(result->span.substr(6)); + if (auto o = subLexer.takeU32()) { + index += result->span.size(); + advance(); + return o; + } + } + return std::nullopt; +} + void Lexer::lexToken() { // TODO: Ensure we're getting the longest possible match. Token tok; @@ -1080,8 +1131,6 @@ void Lexer::lexToken() { tok = Token{t->span, FloatTok{t->nanPayload, t->d}}; } else if (auto t = str(next())) { tok = Token{t->span, StringTok{t->str}}; - } else if (auto t = keyword(next())) { - tok = Token{t->span, KeywordTok{}}; } else { // TODO: Do something about lexing errors. curr = std::nullopt; @@ -1163,10 +1212,6 @@ std::ostream& operator<<(std::ostream& os, const StringTok& tok) { return os; } -std::ostream& operator<<(std::ostream& os, const KeywordTok&) { - return os << "keyword"; -} - std::ostream& operator<<(std::ostream& os, const Token& tok) { std::visit([&](const auto& t) { os << t; }, tok.data); return os << " \"" << tok.span << "\""; diff --git a/src/parser/lexer.h b/src/parser/lexer.h index 10ba7c25a..f8f7f8b57 100644 --- a/src/parser/lexer.h +++ b/src/parser/lexer.h @@ -87,13 +87,8 @@ struct StringTok { friend std::ostream& operator<<(std::ostream&, const StringTok&); }; -struct KeywordTok { - bool operator==(const KeywordTok&) const { return true; } - friend std::ostream& operator<<(std::ostream&, const KeywordTok&); -}; - struct Token { - using Data = std::variant<IdTok, IntTok, FloatTok, StringTok, KeywordTok>; + using Data = std::variant<IdTok, IntTok, FloatTok, StringTok>; std::string_view span; Data data; @@ -101,13 +96,6 @@ struct Token { // Token classification // ==================== - std::optional<std::string_view> getKeyword() const { - if (std::get_if<KeywordTok>(&data)) { - return span; - } - return {}; - } - template<typename T> std::optional<T> getU() const; template<typename T> std::optional<T> getS() const; template<typename T> std::optional<T> getI() const; @@ -187,77 +175,15 @@ public: return {}; } - std::optional<std::string_view> takeKeyword() { - if (curr) { - if (auto keyword = curr->getKeyword()) { - advance(); - return *keyword; - } - } - return {}; - } + std::optional<std::string_view> takeKeyword(); + bool takeKeyword(std::string_view expected); std::optional<std::string_view> peekKeyword() { return Lexer(*this).takeKeyword(); } - bool takeKeyword(std::string_view expected) { - if (curr) { - if (auto keyword = curr->getKeyword()) { - if (*keyword == expected) { - advance(); - return true; - } - } - } - return false; - } - - std::optional<uint64_t> takeOffset() { - using namespace std::string_view_literals; - if (curr) { - if (auto keyword = curr->getKeyword()) { - if (keyword->substr(0, 7) != "offset="sv) { - return {}; - } - Lexer subLexer(keyword->substr(7)); - if (subLexer.empty()) { - return {}; - } - if (auto o = subLexer.curr->getU<uint64_t>()) { - subLexer.advance(); - if (subLexer.empty()) { - advance(); - return o; - } - } - } - } - return std::nullopt; - } - - std::optional<uint32_t> takeAlign() { - using namespace std::string_view_literals; - if (curr) { - if (auto keyword = curr->getKeyword()) { - if (keyword->substr(0, 6) != "align="sv) { - return {}; - } - Lexer subLexer(keyword->substr(6)); - if (subLexer.empty()) { - return {}; - } - if (auto a = subLexer.curr->getU<uint32_t>()) { - subLexer.advance(); - if (subLexer.empty()) { - advance(); - return a; - } - } - } - } - return {}; - } + std::optional<uint64_t> takeOffset(); + std::optional<uint32_t> takeAlign(); template<typename T> std::optional<T> takeU() { if (curr) { |