diff options
author | Thomas Lively <tlively@google.com> | 2024-04-25 22:22:35 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-25 22:22:35 -0700 |
commit | df6878612a32d50408fa9dc71e84199bc823a250 (patch) | |
tree | af4b9115a3b5613f14ff1d6ecf6abb0f2e3caf99 /src/parser | |
parent | 003786cb6aede99cf3d188771a27de8e26b555cf (diff) | |
download | binaryen-df6878612a32d50408fa9dc71e84199bc823a250.tar.gz binaryen-df6878612a32d50408fa9dc71e84199bc823a250.tar.bz2 binaryen-df6878612a32d50408fa9dc71e84199bc823a250.zip |
[Parser] Do not eagerly lex strings (#6543)
Lex them on demand instead to avoid wasted work.
Diffstat (limited to 'src/parser')
-rw-r--r-- | src/parser/lexer.cpp | 38 | ||||
-rw-r--r-- | src/parser/lexer.h | 36 |
2 files changed, 25 insertions, 49 deletions
diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp index 8384b5047..f3646c0be 100644 --- a/src/parser/lexer.cpp +++ b/src/parser/lexer.cpp @@ -1004,17 +1004,6 @@ std::optional<float> Token::getF32() const { return {}; } -std::optional<std::string_view> Token::getString() const { - if (auto* tok = std::get_if<StringTok>(&data)) { - if (tok->str) { - return std::string_view(*tok->str); - } - // Remove quotes. - return span.substr(1, span.size() - 2); - } - return {}; -} - void Lexer::skipSpace() { while (true) { if (auto ctx = annotation(next())) { @@ -1054,6 +1043,22 @@ bool Lexer::takeRParen() { return false; } +std::optional<std::string> Lexer::takeString() { + if (curr) { + return std::nullopt; + } + if (auto result = str(next())) { + index += result->span.size(); + advance(); + if (result->str) { + return result->str; + } + // Remove quotes. + return std::string(result->span.substr(1, result->span.size() - 2)); + } + return std::nullopt; +} + std::optional<Name> Lexer::takeID() { if (curr) { return std::nullopt; @@ -1132,8 +1137,6 @@ void Lexer::lexToken() { tok = Token{t->span, IntTok{t->n, t->sign}}; } else if (auto t = float_(next())) { tok = Token{t->span, FloatTok{t->nanPayload, t->d}}; - } else if (auto t = str(next())) { - tok = Token{t->span, StringTok{t->str}}; } else { // TODO: Do something about lexing errors. curr = std::nullopt; @@ -1204,15 +1207,6 @@ std::ostream& operator<<(std::ostream& os, const FloatTok& tok) { return os << tok.d; } -std::ostream& operator<<(std::ostream& os, const StringTok& tok) { - if (tok.str) { - os << '"' << *tok.str << '"'; - } else { - os << "(raw string)"; - } - return os; -} - std::ostream& operator<<(std::ostream& os, const Token& tok) { std::visit([&](const auto& t) { os << t; }, tok.data); return os << " \"" << tok.span << "\""; diff --git a/src/parser/lexer.h b/src/parser/lexer.h index 0fe0faa27..64d1fc6be 100644 --- a/src/parser/lexer.h +++ b/src/parser/lexer.h @@ -68,16 +68,8 @@ struct FloatTok { friend std::ostream& operator<<(std::ostream&, const FloatTok&); }; -struct StringTok { - // If the string contains escapes, this is its contents. - std::optional<std::string> str; - - bool operator==(const StringTok& other) const { return str == other.str; } - friend std::ostream& operator<<(std::ostream&, const StringTok&); -}; - struct Token { - using Data = std::variant<IntTok, FloatTok, StringTok>; + using Data = std::variant<IntTok, FloatTok>; std::string_view span; Data data; @@ -90,7 +82,6 @@ struct Token { template<typename T> std::optional<T> getI() const; std::optional<double> getF64() const; std::optional<float> getF32() const; - std::optional<std::string_view> getString() const; bool operator==(const Token&) const; friend std::ostream& operator<<(std::ostream& os, const Token&); @@ -145,6 +136,10 @@ public: if (peekLParen() || peekRParen()) { return true; } + // Do not count the parentheses in strings. + if (takeString()) { + continue; + } if (!curr) { ++index; } @@ -218,27 +213,14 @@ public: return std::nullopt; } - std::optional<std::string> takeString() { - if (curr) { - if (auto s = curr->getString()) { - std::string ret(*s); - advance(); - return ret; - } - } - return {}; - } + std::optional<std::string> takeString(); std::optional<Name> takeName() { - // TODO: Move this to lexer and validate UTF. + // TODO: Validate UTF. if (auto str = takeString()) { - // Copy to a std::string to make sure we have a null terminator, otherwise - // the `Name` constructor won't work correctly. - // TODO: Update `Name` to use string_view instead of char* and/or to take - // rvalue strings to avoid this extra copy. - return Name(std::string(*str)); + return Name(*str); } - return {}; + return std::nullopt; } bool takeSExprStart(std::string_view expected) { |