summaryrefslogtreecommitdiff
path: root/src/parser/lexer.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser/lexer.h')
-rw-r--r--src/parser/lexer.h246
1 files changed, 214 insertions, 32 deletions
diff --git a/src/parser/lexer.h b/src/parser/lexer.h
index f0da151f9..8f9bd103a 100644
--- a/src/parser/lexer.h
+++ b/src/parser/lexer.h
@@ -23,6 +23,8 @@
#include <string_view>
#include <variant>
+#include "support/name.h"
+
#ifndef parser_lexer_h
#define parser_lexer_h
@@ -147,13 +149,6 @@ struct Token {
// positions are computed on demand rather than eagerly because they are
// typically only needed when there is an error to report.
struct Lexer {
- using iterator = Lexer;
- using difference_type = std::ptrdiff_t;
- using value_type = Token;
- using pointer = const Token*;
- using reference = const Token&;
- using iterator_category = std::forward_iterator_tag;
-
private:
std::string_view buffer;
size_t index = 0;
@@ -169,51 +164,238 @@ public:
void setIndex(size_t i) {
index = i;
- skipSpace();
- lexToken();
+ advance();
}
- std::string_view next() const { return buffer.substr(index); }
- Lexer& operator++() {
- // Preincrement
- skipSpace();
- lexToken();
- return *this;
+ std::optional<Token> peek() const { return curr; }
+
+ bool takeLParen() {
+ auto t = peek();
+ if (!t || !t->isLParen()) {
+ return false;
+ }
+ advance();
+ return true;
}
- Lexer operator++(int) {
- // Postincrement
- Lexer ret = *this;
- ++(*this);
- return ret;
+ bool takeRParen() {
+ auto t = peek();
+ if (!t || !t->isRParen()) {
+ return false;
+ }
+ advance();
+ return true;
+ }
+
+ bool takeUntilParen() {
+ while (true) {
+ auto t = peek();
+ if (!t) {
+ return false;
+ }
+ if (t->isLParen() || t->isRParen()) {
+ return true;
+ }
+ advance();
+ }
+ }
+
+ std::optional<Name> takeID() {
+ if (auto t = peek()) {
+ if (auto id = t->getID()) {
+ advance();
+ // See comment on takeName.
+ return Name(std::string(*id));
+ }
+ }
+ return {};
+ }
+
+ std::optional<std::string_view> takeKeyword() {
+ if (auto t = peek()) {
+ if (auto keyword = t->getKeyword()) {
+ advance();
+ return *keyword;
+ }
+ }
+ return {};
+ }
+
+ bool takeKeyword(std::string_view expected) {
+ if (auto t = peek()) {
+ if (auto keyword = t->getKeyword()) {
+ if (*keyword == expected) {
+ advance();
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ std::optional<uint64_t> takeOffset() {
+ using namespace std::string_view_literals;
+ if (auto t = peek()) {
+ if (auto keyword = t->getKeyword()) {
+ if (keyword->substr(0, 7) != "offset="sv) {
+ return {};
+ }
+ Lexer subLexer(keyword->substr(7));
+ if (subLexer.empty()) {
+ return {};
+ }
+ if (auto o = subLexer.peek()->getU<uint64_t>()) {
+ subLexer.advance();
+ if (subLexer.empty()) {
+ advance();
+ return o;
+ }
+ }
+ }
+ }
+ return std::nullopt;
+ }
+
+ std::optional<uint32_t> takeAlign() {
+ using namespace std::string_view_literals;
+ if (auto t = peek()) {
+ if (auto keyword = t->getKeyword()) {
+ if (keyword->substr(0, 6) != "align="sv) {
+ return {};
+ }
+ Lexer subLexer(keyword->substr(6));
+ if (subLexer.empty()) {
+ return {};
+ }
+ if (auto a = subLexer.peek()->getU<uint32_t>()) {
+ subLexer.advance();
+ if (subLexer.empty()) {
+ advance();
+ return a;
+ }
+ }
+ }
+ }
+ return {};
+ }
+
+ template<typename T> std::optional<T> takeU() {
+ if (auto t = peek()) {
+ if (auto n = t->getU<T>()) {
+ advance();
+ return n;
+ }
+ }
+ return std::nullopt;
+ }
+
+ template<typename T> std::optional<T> takeI() {
+ if (auto t = peek()) {
+ if (auto n = t->getI<T>()) {
+ advance();
+ return n;
+ }
+ }
+ return std::nullopt;
+ }
+
+ std::optional<uint64_t> takeU64() { return takeU<uint64_t>(); }
+
+ std::optional<uint64_t> takeI64() { return takeI<uint64_t>(); }
+
+ std::optional<uint32_t> takeU32() { return takeU<uint32_t>(); }
+
+ std::optional<uint32_t> takeI32() { return takeI<uint32_t>(); }
+
+ std::optional<uint16_t> takeI16() { return takeI<uint16_t>(); }
+
+ std::optional<uint8_t> takeU8() { return takeU<uint8_t>(); }
+
+ std::optional<uint8_t> takeI8() { return takeI<uint8_t>(); }
+
+ std::optional<double> takeF64() {
+ if (auto t = peek()) {
+ if (auto d = t->getF64()) {
+ advance();
+ return d;
+ }
+ }
+ return std::nullopt;
+ }
+
+ std::optional<float> takeF32() {
+ if (auto t = peek()) {
+ if (auto f = t->getF32()) {
+ advance();
+ return f;
+ }
+ }
+ return std::nullopt;
}
- const Token& operator*() { return *curr; }
- const Token* operator->() { return &*curr; }
+ std::optional<std::string> takeString() {
+ if (auto t = peek()) {
+ if (auto s = t->getString()) {
+ advance();
+ return std::string(*s);
+ }
+ }
+ return {};
+ }
- bool operator==(const Lexer& other) const {
- // The iterator is equal to the end sentinel when there is no current token.
- if (!curr && !other.curr) {
+ std::optional<Name> takeName() {
+ // TODO: Move this to lexer and validate UTF.
+ if (auto str = takeString()) {
+ // Copy to a std::string to make sure we have a null terminator, otherwise
+ // the `Name` constructor won't work correctly.
+ // TODO: Update `Name` to use string_view instead of char* and/or to take
+ // rvalue strings to avoid this extra copy.
+ return Name(std::string(*str));
+ }
+ return {};
+ }
+
+ bool takeSExprStart(std::string_view expected) {
+ auto original = *this;
+ if (takeLParen() && takeKeyword(expected)) {
return true;
}
- // Otherwise they are equivalent when they are at the same position.
- return index == other.index;
+ *this = original;
+ return false;
}
- bool operator!=(const Lexer& other) const { return !(*this == other); }
+ bool peekSExprStart(std::string_view expected) {
+ auto original = *this;
+ if (!takeLParen()) {
+ return false;
+ }
+ bool ret = takeKeyword(expected);
+ *this = original;
+ return ret;
+ }
- Lexer begin() { return *this; }
+ std::string_view next() const { return buffer.substr(index); }
- Lexer end() const { return Lexer(); }
+ void advance() {
+ skipSpace();
+ lexToken();
+ }
- bool empty() const { return *this == end(); }
+ bool empty() const { return !curr; }
TextPos position(const char* c) const;
TextPos position(size_t i) const { return position(buffer.data() + i); }
TextPos position(std::string_view span) const {
return position(span.data());
}
- TextPos position(Token tok) const { return position(tok.span); }
+ TextPos position() const { return position(getPos()); }
+
+ size_t getPos() const {
+ if (auto t = peek()) {
+ return getIndex() - t->span.size();
+ }
+ return getIndex();
+ }
private:
void skipSpace();