diff options
Diffstat (limited to 'src/parser/lexer.h')
-rw-r--r-- | src/parser/lexer.h | 246 |
1 files changed, 214 insertions, 32 deletions
diff --git a/src/parser/lexer.h b/src/parser/lexer.h index f0da151f9..8f9bd103a 100644 --- a/src/parser/lexer.h +++ b/src/parser/lexer.h @@ -23,6 +23,8 @@ #include <string_view> #include <variant> +#include "support/name.h" + #ifndef parser_lexer_h #define parser_lexer_h @@ -147,13 +149,6 @@ struct Token { // positions are computed on demand rather than eagerly because they are // typically only needed when there is an error to report. struct Lexer { - using iterator = Lexer; - using difference_type = std::ptrdiff_t; - using value_type = Token; - using pointer = const Token*; - using reference = const Token&; - using iterator_category = std::forward_iterator_tag; - private: std::string_view buffer; size_t index = 0; @@ -169,51 +164,238 @@ public: void setIndex(size_t i) { index = i; - skipSpace(); - lexToken(); + advance(); } - std::string_view next() const { return buffer.substr(index); } - Lexer& operator++() { - // Preincrement - skipSpace(); - lexToken(); - return *this; + std::optional<Token> peek() const { return curr; } + + bool takeLParen() { + auto t = peek(); + if (!t || !t->isLParen()) { + return false; + } + advance(); + return true; } - Lexer operator++(int) { - // Postincrement - Lexer ret = *this; - ++(*this); - return ret; + bool takeRParen() { + auto t = peek(); + if (!t || !t->isRParen()) { + return false; + } + advance(); + return true; + } + + bool takeUntilParen() { + while (true) { + auto t = peek(); + if (!t) { + return false; + } + if (t->isLParen() || t->isRParen()) { + return true; + } + advance(); + } + } + + std::optional<Name> takeID() { + if (auto t = peek()) { + if (auto id = t->getID()) { + advance(); + // See comment on takeName. + return Name(std::string(*id)); + } + } + return {}; + } + + std::optional<std::string_view> takeKeyword() { + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + advance(); + return *keyword; + } + } + return {}; + } + + bool takeKeyword(std::string_view expected) { + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + if (*keyword == expected) { + advance(); + return true; + } + } + } + return false; + } + + std::optional<uint64_t> takeOffset() { + using namespace std::string_view_literals; + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + if (keyword->substr(0, 7) != "offset="sv) { + return {}; + } + Lexer subLexer(keyword->substr(7)); + if (subLexer.empty()) { + return {}; + } + if (auto o = subLexer.peek()->getU<uint64_t>()) { + subLexer.advance(); + if (subLexer.empty()) { + advance(); + return o; + } + } + } + } + return std::nullopt; + } + + std::optional<uint32_t> takeAlign() { + using namespace std::string_view_literals; + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + if (keyword->substr(0, 6) != "align="sv) { + return {}; + } + Lexer subLexer(keyword->substr(6)); + if (subLexer.empty()) { + return {}; + } + if (auto a = subLexer.peek()->getU<uint32_t>()) { + subLexer.advance(); + if (subLexer.empty()) { + advance(); + return a; + } + } + } + } + return {}; + } + + template<typename T> std::optional<T> takeU() { + if (auto t = peek()) { + if (auto n = t->getU<T>()) { + advance(); + return n; + } + } + return std::nullopt; + } + + template<typename T> std::optional<T> takeI() { + if (auto t = peek()) { + if (auto n = t->getI<T>()) { + advance(); + return n; + } + } + return std::nullopt; + } + + std::optional<uint64_t> takeU64() { return takeU<uint64_t>(); } + + std::optional<uint64_t> takeI64() { return takeI<uint64_t>(); } + + std::optional<uint32_t> takeU32() { return takeU<uint32_t>(); } + + std::optional<uint32_t> takeI32() { return takeI<uint32_t>(); } + + std::optional<uint16_t> takeI16() { return takeI<uint16_t>(); } + + std::optional<uint8_t> takeU8() { return takeU<uint8_t>(); } + + std::optional<uint8_t> takeI8() { return takeI<uint8_t>(); } + + std::optional<double> takeF64() { + if (auto t = peek()) { + if (auto d = t->getF64()) { + advance(); + return d; + } + } + return std::nullopt; + } + + std::optional<float> takeF32() { + if (auto t = peek()) { + if (auto f = t->getF32()) { + advance(); + return f; + } + } + return std::nullopt; } - const Token& operator*() { return *curr; } - const Token* operator->() { return &*curr; } + std::optional<std::string> takeString() { + if (auto t = peek()) { + if (auto s = t->getString()) { + advance(); + return std::string(*s); + } + } + return {}; + } - bool operator==(const Lexer& other) const { - // The iterator is equal to the end sentinel when there is no current token. - if (!curr && !other.curr) { + std::optional<Name> takeName() { + // TODO: Move this to lexer and validate UTF. + if (auto str = takeString()) { + // Copy to a std::string to make sure we have a null terminator, otherwise + // the `Name` constructor won't work correctly. + // TODO: Update `Name` to use string_view instead of char* and/or to take + // rvalue strings to avoid this extra copy. + return Name(std::string(*str)); + } + return {}; + } + + bool takeSExprStart(std::string_view expected) { + auto original = *this; + if (takeLParen() && takeKeyword(expected)) { return true; } - // Otherwise they are equivalent when they are at the same position. - return index == other.index; + *this = original; + return false; } - bool operator!=(const Lexer& other) const { return !(*this == other); } + bool peekSExprStart(std::string_view expected) { + auto original = *this; + if (!takeLParen()) { + return false; + } + bool ret = takeKeyword(expected); + *this = original; + return ret; + } - Lexer begin() { return *this; } + std::string_view next() const { return buffer.substr(index); } - Lexer end() const { return Lexer(); } + void advance() { + skipSpace(); + lexToken(); + } - bool empty() const { return *this == end(); } + bool empty() const { return !curr; } TextPos position(const char* c) const; TextPos position(size_t i) const { return position(buffer.data() + i); } TextPos position(std::string_view span) const { return position(span.data()); } - TextPos position(Token tok) const { return position(tok.span); } + TextPos position() const { return position(getPos()); } + + size_t getPos() const { + if (auto t = peek()) { + return getIndex() - t->span.size(); + } + return getIndex(); + } private: void skipSpace(); |