diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/parser/input-impl.h | 255 | ||||
-rw-r--r-- | src/parser/input.h | 69 | ||||
-rw-r--r-- | src/parser/lexer.h | 246 |
3 files changed, 252 insertions, 318 deletions
diff --git a/src/parser/input-impl.h b/src/parser/input-impl.h deleted file mode 100644 index e3cf52015..000000000 --- a/src/parser/input-impl.h +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright 2023 WebAssembly Community Group participants - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "input.h" - -#ifndef parser_input_impl_h -#define parser_input_impl_h - -inline std::optional<Token> ParseInput::peek() { - if (!empty()) { - return *lexer; - } - return {}; -} - -inline bool ParseInput::takeLParen() { - auto t = peek(); - if (!t || !t->isLParen()) { - return false; - } - ++lexer; - return true; -} - -inline bool ParseInput::takeRParen() { - auto t = peek(); - if (!t || !t->isRParen()) { - return false; - } - ++lexer; - return true; -} - -inline bool ParseInput::takeUntilParen() { - while (true) { - auto t = peek(); - if (!t) { - return false; - } - if (t->isLParen() || t->isRParen()) { - return true; - } - ++lexer; - } -} - -inline std::optional<Name> ParseInput::takeID() { - if (auto t = peek()) { - if (auto id = t->getID()) { - ++lexer; - // See comment on takeName. - return Name(std::string(*id)); - } - } - return {}; -} - -inline std::optional<std::string_view> ParseInput::takeKeyword() { - if (auto t = peek()) { - if (auto keyword = t->getKeyword()) { - ++lexer; - return *keyword; - } - } - return {}; -} - -inline bool ParseInput::takeKeyword(std::string_view expected) { - if (auto t = peek()) { - if (auto keyword = t->getKeyword()) { - if (*keyword == expected) { - ++lexer; - return true; - } - } - } - return false; -} - -inline std::optional<uint64_t> ParseInput::takeOffset() { - if (auto t = peek()) { - if (auto keyword = t->getKeyword()) { - if (keyword->substr(0, 7) != "offset="sv) { - return {}; - } - Lexer subLexer(keyword->substr(7)); - if (subLexer == subLexer.end()) { - return {}; - } - if (auto o = subLexer->getU<uint64_t>()) { - ++subLexer; - if (subLexer == subLexer.end()) { - ++lexer; - return o; - } - } - } - } - return std::nullopt; -} - -inline std::optional<uint32_t> ParseInput::takeAlign() { - if (auto t = peek()) { - if (auto keyword = t->getKeyword()) { - if (keyword->substr(0, 6) != "align="sv) { - return {}; - } - Lexer subLexer(keyword->substr(6)); - if (subLexer == subLexer.end()) { - return {}; - } - if (auto a = subLexer->getU<uint32_t>()) { - ++subLexer; - if (subLexer == subLexer.end()) { - ++lexer; - return a; - } - } - } - } - return {}; -} - -template<typename T> inline std::optional<T> ParseInput::takeU() { - if (auto t = peek()) { - if (auto n = t->getU<T>()) { - ++lexer; - return n; - } - } - return std::nullopt; -} - -template<typename T> inline std::optional<T> ParseInput::takeI() { - if (auto t = peek()) { - if (auto n = t->getI<T>()) { - ++lexer; - return n; - } - } - return std::nullopt; -} - -inline std::optional<uint64_t> ParseInput::takeU64() { - return takeU<uint64_t>(); -} - -inline std::optional<uint64_t> ParseInput::takeI64() { - return takeI<uint64_t>(); -} - -inline std::optional<uint32_t> ParseInput::takeU32() { - return takeU<uint64_t>(); -} - -inline std::optional<uint32_t> ParseInput::takeI32() { - return takeI<uint32_t>(); -} - -inline std::optional<uint16_t> ParseInput::takeI16() { - return takeI<uint16_t>(); -} - -inline std::optional<uint8_t> ParseInput::takeU8() { return takeU<uint8_t>(); } - -inline std::optional<uint8_t> ParseInput::takeI8() { return takeI<uint8_t>(); } - -inline std::optional<double> ParseInput::takeF64() { - if (auto t = peek()) { - if (auto d = t->getF64()) { - ++lexer; - return d; - } - } - return std::nullopt; -} - -inline std::optional<float> ParseInput::takeF32() { - if (auto t = peek()) { - if (auto f = t->getF32()) { - ++lexer; - return f; - } - } - return std::nullopt; -} - -inline std::optional<std::string> ParseInput::takeString() { - if (auto t = peek()) { - if (auto s = t->getString()) { - ++lexer; - return std::string(*s); - } - } - return {}; -} - -inline std::optional<Name> ParseInput::takeName() { - // TODO: Move this to lexer and validate UTF. - if (auto str = takeString()) { - // Copy to a std::string to make sure we have a null terminator, otherwise - // the `Name` constructor won't work correctly. - // TODO: Update `Name` to use string_view instead of char* and/or to take - // rvalue strings to avoid this extra copy. - return Name(std::string(*str)); - } - return {}; -} - -inline bool ParseInput::takeSExprStart(std::string_view expected) { - auto original = lexer; - if (takeLParen() && takeKeyword(expected)) { - return true; - } - lexer = original; - return false; -} - -inline bool ParseInput::peekSExprStart(std::string_view expected) { - auto original = lexer; - if (!takeLParen()) { - return false; - } - bool ret = takeKeyword(expected); - lexer = original; - return ret; -} - -inline Index ParseInput::getPos() { - if (auto t = peek()) { - return lexer.getIndex() - t->span.size(); - } - return lexer.getIndex(); -} - -inline Err ParseInput::err(Index pos, std::string reason) { - std::stringstream msg; - msg << lexer.position(pos) << ": error: " << reason; - return Err{msg.str()}; -} - -#endif // parser_input_impl_h diff --git a/src/parser/input.h b/src/parser/input.h index 6086ed1a5..f83f5a40a 100644 --- a/src/parser/input.h +++ b/src/parser/input.h @@ -41,40 +41,47 @@ struct ParseInput { bool empty() { return lexer.empty(); } - std::optional<Token> peek(); - bool takeLParen(); - bool takeRParen(); - bool takeUntilParen(); - std::optional<Name> takeID(); - std::optional<std::string_view> takeKeyword(); - bool takeKeyword(std::string_view expected); - std::optional<uint64_t> takeOffset(); - std::optional<uint32_t> takeAlign(); - std::optional<uint64_t> takeU64(); - std::optional<uint64_t> takeI64(); - std::optional<uint32_t> takeU32(); - std::optional<uint32_t> takeI32(); - std::optional<uint16_t> takeI16(); - std::optional<uint8_t> takeU8(); - std::optional<uint8_t> takeI8(); - std::optional<double> takeF64(); - std::optional<float> takeF32(); - std::optional<std::string> takeString(); - std::optional<Name> takeName(); - bool takeSExprStart(std::string_view expected); - bool peekSExprStart(std::string_view expected); + // TODO: Remove this useless layer of abstraction between the Lexer and + // Parser. + std::optional<Token> peek() { return lexer.peek(); } + bool takeLParen() { return lexer.takeLParen(); } + bool takeRParen() { return lexer.takeRParen(); } + bool takeUntilParen() { return lexer.takeUntilParen(); } + std::optional<Name> takeID() { return lexer.takeID(); } + std::optional<std::string_view> takeKeyword() { return lexer.takeKeyword(); } + bool takeKeyword(std::string_view expected) { + return lexer.takeKeyword(expected); + } + std::optional<uint64_t> takeOffset() { return lexer.takeOffset(); } + std::optional<uint32_t> takeAlign() { return lexer.takeAlign(); } + std::optional<uint64_t> takeU64() { return lexer.takeU64(); } + std::optional<uint64_t> takeI64() { return lexer.takeI64(); } + std::optional<uint32_t> takeU32() { return lexer.takeU32(); } + std::optional<uint32_t> takeI32() { return lexer.takeI32(); } + std::optional<uint16_t> takeI16() { return lexer.takeI16(); } + std::optional<uint8_t> takeU8() { return lexer.takeU8(); } + std::optional<uint8_t> takeI8() { return lexer.takeI8(); } + std::optional<double> takeF64() { return lexer.takeF64(); } + std::optional<float> takeF32() { return lexer.takeF32(); } + std::optional<std::string> takeString() { return lexer.takeString(); } + std::optional<Name> takeName() { return lexer.takeName(); } + bool takeSExprStart(std::string_view expected) { + return lexer.takeSExprStart(expected); + } + bool peekSExprStart(std::string_view expected) { + return lexer.peekSExprStart(expected); + } - Index getPos(); - [[nodiscard]] Err err(Index pos, std::string reason); - [[nodiscard]] Err err(std::string reason) { return err(getPos(), reason); } + Index getPos() { return lexer.getPos(); } -private: - template<typename T> std::optional<T> takeU(); - template<typename T> std::optional<T> takeS(); - template<typename T> std::optional<T> takeI(); -}; + [[nodiscard]] Err err(Index pos, std::string reason) { + std::stringstream msg; + msg << lexer.position(pos) << ": error: " << reason; + return Err{msg.str()}; + } -#include "input-impl.h" + [[nodiscard]] Err err(std::string reason) { return err(getPos(), reason); } +}; } // namespace wasm::WATParser diff --git a/src/parser/lexer.h b/src/parser/lexer.h index f0da151f9..8f9bd103a 100644 --- a/src/parser/lexer.h +++ b/src/parser/lexer.h @@ -23,6 +23,8 @@ #include <string_view> #include <variant> +#include "support/name.h" + #ifndef parser_lexer_h #define parser_lexer_h @@ -147,13 +149,6 @@ struct Token { // positions are computed on demand rather than eagerly because they are // typically only needed when there is an error to report. struct Lexer { - using iterator = Lexer; - using difference_type = std::ptrdiff_t; - using value_type = Token; - using pointer = const Token*; - using reference = const Token&; - using iterator_category = std::forward_iterator_tag; - private: std::string_view buffer; size_t index = 0; @@ -169,51 +164,238 @@ public: void setIndex(size_t i) { index = i; - skipSpace(); - lexToken(); + advance(); } - std::string_view next() const { return buffer.substr(index); } - Lexer& operator++() { - // Preincrement - skipSpace(); - lexToken(); - return *this; + std::optional<Token> peek() const { return curr; } + + bool takeLParen() { + auto t = peek(); + if (!t || !t->isLParen()) { + return false; + } + advance(); + return true; } - Lexer operator++(int) { - // Postincrement - Lexer ret = *this; - ++(*this); - return ret; + bool takeRParen() { + auto t = peek(); + if (!t || !t->isRParen()) { + return false; + } + advance(); + return true; + } + + bool takeUntilParen() { + while (true) { + auto t = peek(); + if (!t) { + return false; + } + if (t->isLParen() || t->isRParen()) { + return true; + } + advance(); + } + } + + std::optional<Name> takeID() { + if (auto t = peek()) { + if (auto id = t->getID()) { + advance(); + // See comment on takeName. + return Name(std::string(*id)); + } + } + return {}; + } + + std::optional<std::string_view> takeKeyword() { + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + advance(); + return *keyword; + } + } + return {}; + } + + bool takeKeyword(std::string_view expected) { + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + if (*keyword == expected) { + advance(); + return true; + } + } + } + return false; + } + + std::optional<uint64_t> takeOffset() { + using namespace std::string_view_literals; + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + if (keyword->substr(0, 7) != "offset="sv) { + return {}; + } + Lexer subLexer(keyword->substr(7)); + if (subLexer.empty()) { + return {}; + } + if (auto o = subLexer.peek()->getU<uint64_t>()) { + subLexer.advance(); + if (subLexer.empty()) { + advance(); + return o; + } + } + } + } + return std::nullopt; + } + + std::optional<uint32_t> takeAlign() { + using namespace std::string_view_literals; + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + if (keyword->substr(0, 6) != "align="sv) { + return {}; + } + Lexer subLexer(keyword->substr(6)); + if (subLexer.empty()) { + return {}; + } + if (auto a = subLexer.peek()->getU<uint32_t>()) { + subLexer.advance(); + if (subLexer.empty()) { + advance(); + return a; + } + } + } + } + return {}; + } + + template<typename T> std::optional<T> takeU() { + if (auto t = peek()) { + if (auto n = t->getU<T>()) { + advance(); + return n; + } + } + return std::nullopt; + } + + template<typename T> std::optional<T> takeI() { + if (auto t = peek()) { + if (auto n = t->getI<T>()) { + advance(); + return n; + } + } + return std::nullopt; + } + + std::optional<uint64_t> takeU64() { return takeU<uint64_t>(); } + + std::optional<uint64_t> takeI64() { return takeI<uint64_t>(); } + + std::optional<uint32_t> takeU32() { return takeU<uint32_t>(); } + + std::optional<uint32_t> takeI32() { return takeI<uint32_t>(); } + + std::optional<uint16_t> takeI16() { return takeI<uint16_t>(); } + + std::optional<uint8_t> takeU8() { return takeU<uint8_t>(); } + + std::optional<uint8_t> takeI8() { return takeI<uint8_t>(); } + + std::optional<double> takeF64() { + if (auto t = peek()) { + if (auto d = t->getF64()) { + advance(); + return d; + } + } + return std::nullopt; + } + + std::optional<float> takeF32() { + if (auto t = peek()) { + if (auto f = t->getF32()) { + advance(); + return f; + } + } + return std::nullopt; } - const Token& operator*() { return *curr; } - const Token* operator->() { return &*curr; } + std::optional<std::string> takeString() { + if (auto t = peek()) { + if (auto s = t->getString()) { + advance(); + return std::string(*s); + } + } + return {}; + } - bool operator==(const Lexer& other) const { - // The iterator is equal to the end sentinel when there is no current token. - if (!curr && !other.curr) { + std::optional<Name> takeName() { + // TODO: Move this to lexer and validate UTF. + if (auto str = takeString()) { + // Copy to a std::string to make sure we have a null terminator, otherwise + // the `Name` constructor won't work correctly. + // TODO: Update `Name` to use string_view instead of char* and/or to take + // rvalue strings to avoid this extra copy. + return Name(std::string(*str)); + } + return {}; + } + + bool takeSExprStart(std::string_view expected) { + auto original = *this; + if (takeLParen() && takeKeyword(expected)) { return true; } - // Otherwise they are equivalent when they are at the same position. - return index == other.index; + *this = original; + return false; } - bool operator!=(const Lexer& other) const { return !(*this == other); } + bool peekSExprStart(std::string_view expected) { + auto original = *this; + if (!takeLParen()) { + return false; + } + bool ret = takeKeyword(expected); + *this = original; + return ret; + } - Lexer begin() { return *this; } + std::string_view next() const { return buffer.substr(index); } - Lexer end() const { return Lexer(); } + void advance() { + skipSpace(); + lexToken(); + } - bool empty() const { return *this == end(); } + bool empty() const { return !curr; } TextPos position(const char* c) const; TextPos position(size_t i) const { return position(buffer.data() + i); } TextPos position(std::string_view span) const { return position(span.data()); } - TextPos position(Token tok) const { return position(tok.span); } + TextPos position() const { return position(getPos()); } + + size_t getPos() const { + if (auto t = peek()) { + return getIndex() - t->span.size(); + } + return getIndex(); + } private: void skipSpace(); |