summaryrefslogtreecommitdiff
path: root/src/parser
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2024-04-25 22:22:35 -0700
committerGitHub <noreply@github.com>2024-04-25 22:22:35 -0700
commitdf6878612a32d50408fa9dc71e84199bc823a250 (patch)
treeaf4b9115a3b5613f14ff1d6ecf6abb0f2e3caf99 /src/parser
parent003786cb6aede99cf3d188771a27de8e26b555cf (diff)
downloadbinaryen-df6878612a32d50408fa9dc71e84199bc823a250.tar.gz
binaryen-df6878612a32d50408fa9dc71e84199bc823a250.tar.bz2
binaryen-df6878612a32d50408fa9dc71e84199bc823a250.zip
[Parser] Do not eagerly lex strings (#6543)
Lex them on demand instead to avoid wasted work.
Diffstat (limited to 'src/parser')
-rw-r--r--src/parser/lexer.cpp38
-rw-r--r--src/parser/lexer.h36
2 files changed, 25 insertions, 49 deletions
diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp
index 8384b5047..f3646c0be 100644
--- a/src/parser/lexer.cpp
+++ b/src/parser/lexer.cpp
@@ -1004,17 +1004,6 @@ std::optional<float> Token::getF32() const {
return {};
}
-std::optional<std::string_view> Token::getString() const {
- if (auto* tok = std::get_if<StringTok>(&data)) {
- if (tok->str) {
- return std::string_view(*tok->str);
- }
- // Remove quotes.
- return span.substr(1, span.size() - 2);
- }
- return {};
-}
-
void Lexer::skipSpace() {
while (true) {
if (auto ctx = annotation(next())) {
@@ -1054,6 +1043,22 @@ bool Lexer::takeRParen() {
return false;
}
+std::optional<std::string> Lexer::takeString() {
+ if (curr) {
+ return std::nullopt;
+ }
+ if (auto result = str(next())) {
+ index += result->span.size();
+ advance();
+ if (result->str) {
+ return result->str;
+ }
+ // Remove quotes.
+ return std::string(result->span.substr(1, result->span.size() - 2));
+ }
+ return std::nullopt;
+}
+
std::optional<Name> Lexer::takeID() {
if (curr) {
return std::nullopt;
@@ -1132,8 +1137,6 @@ void Lexer::lexToken() {
tok = Token{t->span, IntTok{t->n, t->sign}};
} else if (auto t = float_(next())) {
tok = Token{t->span, FloatTok{t->nanPayload, t->d}};
- } else if (auto t = str(next())) {
- tok = Token{t->span, StringTok{t->str}};
} else {
// TODO: Do something about lexing errors.
curr = std::nullopt;
@@ -1204,15 +1207,6 @@ std::ostream& operator<<(std::ostream& os, const FloatTok& tok) {
return os << tok.d;
}
-std::ostream& operator<<(std::ostream& os, const StringTok& tok) {
- if (tok.str) {
- os << '"' << *tok.str << '"';
- } else {
- os << "(raw string)";
- }
- return os;
-}
-
std::ostream& operator<<(std::ostream& os, const Token& tok) {
std::visit([&](const auto& t) { os << t; }, tok.data);
return os << " \"" << tok.span << "\"";
diff --git a/src/parser/lexer.h b/src/parser/lexer.h
index 0fe0faa27..64d1fc6be 100644
--- a/src/parser/lexer.h
+++ b/src/parser/lexer.h
@@ -68,16 +68,8 @@ struct FloatTok {
friend std::ostream& operator<<(std::ostream&, const FloatTok&);
};
-struct StringTok {
- // If the string contains escapes, this is its contents.
- std::optional<std::string> str;
-
- bool operator==(const StringTok& other) const { return str == other.str; }
- friend std::ostream& operator<<(std::ostream&, const StringTok&);
-};
-
struct Token {
- using Data = std::variant<IntTok, FloatTok, StringTok>;
+ using Data = std::variant<IntTok, FloatTok>;
std::string_view span;
Data data;
@@ -90,7 +82,6 @@ struct Token {
template<typename T> std::optional<T> getI() const;
std::optional<double> getF64() const;
std::optional<float> getF32() const;
- std::optional<std::string_view> getString() const;
bool operator==(const Token&) const;
friend std::ostream& operator<<(std::ostream& os, const Token&);
@@ -145,6 +136,10 @@ public:
if (peekLParen() || peekRParen()) {
return true;
}
+ // Do not count the parentheses in strings.
+ if (takeString()) {
+ continue;
+ }
if (!curr) {
++index;
}
@@ -218,27 +213,14 @@ public:
return std::nullopt;
}
- std::optional<std::string> takeString() {
- if (curr) {
- if (auto s = curr->getString()) {
- std::string ret(*s);
- advance();
- return ret;
- }
- }
- return {};
- }
+ std::optional<std::string> takeString();
std::optional<Name> takeName() {
- // TODO: Move this to lexer and validate UTF.
+ // TODO: Validate UTF.
if (auto str = takeString()) {
- // Copy to a std::string to make sure we have a null terminator, otherwise
- // the `Name` constructor won't work correctly.
- // TODO: Update `Name` to use string_view instead of char* and/or to take
- // rvalue strings to avoid this extra copy.
- return Name(std::string(*str));
+ return Name(*str);
}
- return {};
+ return std::nullopt;
}
bool takeSExprStart(std::string_view expected) {