/* * Copyright 2022 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #ifndef wasm_wat_lexer_h #define wasm_wat_lexer_h namespace wasm::WATParser { struct TextPos { size_t line; size_t col; bool operator==(const TextPos& other) const; bool operator!=(const TextPos& other) const { return !(*this == other); } friend std::ostream& operator<<(std::ostream& os, const TextPos& pos); }; // ====== // Tokens // ====== struct LParenTok { bool operator==(const LParenTok&) const { return true; } friend std::ostream& operator<<(std::ostream&, const LParenTok&); }; struct RParenTok { bool operator==(const RParenTok&) const { return true; } friend std::ostream& operator<<(std::ostream&, const RParenTok&); }; struct IdTok { bool operator==(const IdTok&) const { return true; } friend std::ostream& operator<<(std::ostream&, const IdTok&); }; enum Sign { NoSign, Pos, Neg }; struct IntTok { uint64_t n; Sign sign; bool operator==(const IntTok&) const; friend std::ostream& operator<<(std::ostream&, const IntTok&); }; struct FloatTok { // The payload if we lexed a nan with payload. We cannot store the payload // directly in `d` because we do not know at this point whether we are parsing // an f32 or f64 and therefore we do not know what the allowable payloads are. // No payload with NaN means to use the default payload for the expected float // width. std::optional nanPayload; double d; bool operator==(const FloatTok&) const; friend std::ostream& operator<<(std::ostream&, const FloatTok&); }; struct StringTok { std::optional str; bool operator==(const StringTok& other) const { return str == other.str; } friend std::ostream& operator<<(std::ostream&, const StringTok&); }; struct KeywordTok { bool operator==(const KeywordTok&) const { return true; } friend std::ostream& operator<<(std::ostream&, const KeywordTok&); }; struct Token { using Data = std::variant; std::string_view span; Data data; // ==================== // Token classification // ==================== bool isLParen() const { return std::get_if(&data); } bool isRParen() const { return std::get_if(&data); } std::optional getID() const { if (std::get_if(&data)) { // Drop leading '$'. return span.substr(1); } return {}; } std::optional getKeyword() const { if (std::get_if(&data)) { return span; } return {}; } std::optional getU64() const; std::optional getS64() const; std::optional getI64() const; std::optional getU32() const; std::optional getS32() const; std::optional getI32() const; std::optional getF64() const; std::optional getF32() const; std::optional getString() const; bool operator==(const Token&) const; friend std::ostream& operator<<(std::ostream& os, const Token&); }; // ===== // Lexer // ===== // Lexer's purpose is twofold. First, it wraps a buffer to provide a tokenizing // iterator over it. Second, it implements that iterator itself. Also provides // utilities for locating the text position of tokens within the buffer. Text // positions are computed on demand rather than eagerly because they are // typically only needed when there is an error to report. struct Lexer { using iterator = Lexer; using difference_type = std::ptrdiff_t; using value_type = Token; using pointer = const Token*; using reference = const Token&; using iterator_category = std::forward_iterator_tag; private: std::string_view buffer; size_t index = 0; std::optional curr; public: // The end sentinel. Lexer() = default; Lexer(std::string_view buffer) : buffer(buffer) { setIndex(0); } size_t getIndex() const { return index; } void setIndex(size_t i) { index = i; skipSpace(); lexToken(); } std::string_view next() const { return buffer.substr(index); } Lexer& operator++() { // Preincrement skipSpace(); lexToken(); return *this; } Lexer operator++(int) { // Postincrement Lexer ret = *this; ++(*this); return ret; } const Token& operator*() { return *curr; } const Token* operator->() { return &*curr; } bool operator==(const Lexer& other) const { // The iterator is equal to the end sentinel when there is no current token. if (!curr && !other.curr) { return true; } // Otherwise they are equivalent when they are at the same position. return index == other.index; } bool operator!=(const Lexer& other) const { return !(*this == other); } Lexer begin() { return *this; } Lexer end() const { return Lexer(); } bool empty() const { return *this == end(); } TextPos position(const char* c) const; TextPos position(size_t i) const { return position(buffer.data() + i); } TextPos position(std::string_view span) const { return position(span.data()); } TextPos position(Token tok) const { return position(tok.span); } private: void skipSpace(); void lexToken(); }; } // namespace wasm::WATParser #endif // wasm_wat_lexer_h