From ed15efeedd33bbdbadfafabc812d70a792a9a06c Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 5 Feb 2024 10:24:16 -0800 Subject: [Parser] Templatize lexing of integers (#6272) Have a single implementation for lexing each of unsigned, signed, and uninterpreted integers, each generic over the bit width of the integer. This reduces duplication in the existing code and it will make it much easier to support lexing more 8- and 16-bit integers. --- src/parser/input-impl.h | 66 +++++++++++---------------------------------- src/parser/input.h | 11 +++++--- src/parser/lexer.cpp | 71 ++++++++++++++++--------------------------------- src/parser/lexer.h | 10 +++---- 4 files changed, 50 insertions(+), 108 deletions(-) (limited to 'src') diff --git a/src/parser/input-impl.h b/src/parser/input-impl.h index 0f8fc2e86..3ffce07f8 100644 --- a/src/parser/input-impl.h +++ b/src/parser/input-impl.h @@ -100,7 +100,7 @@ inline std::optional ParseInput::takeOffset() { if (subLexer == subLexer.end()) { return {}; } - if (auto o = subLexer->getU64()) { + if (auto o = subLexer->getU()) { ++subLexer; if (subLexer == subLexer.end()) { ++lexer; @@ -122,7 +122,7 @@ inline std::optional ParseInput::takeAlign() { if (subLexer == subLexer.end()) { return {}; } - if (auto a = subLexer->getU32()) { + if (auto a = subLexer->getU()) { ++subLexer; if (subLexer == subLexer.end()) { ++lexer; @@ -134,9 +134,9 @@ inline std::optional ParseInput::takeAlign() { return {}; } -inline std::optional ParseInput::takeU64() { +template inline std::optional ParseInput::takeU() { if (auto t = peek()) { - if (auto n = t->getU64()) { + if (auto n = t->getU()) { ++lexer; return n; } @@ -144,67 +144,33 @@ inline std::optional ParseInput::takeU64() { return std::nullopt; } -inline std::optional ParseInput::takeS64() { +template inline std::optional ParseInput::takeI() { if (auto t = peek()) { - if (auto n = t->getS64()) { + if (auto n = t->getI()) { ++lexer; return n; } } - return {}; + return std::nullopt; } -inline std::optional ParseInput::takeI64() { - if (auto t = peek()) { - if (auto n = t->getI64()) { - ++lexer; - return n; - } - } - return {}; +inline std::optional ParseInput::takeU64() { + return takeU(); } -inline std::optional ParseInput::takeU32() { - if (auto t = peek()) { - if (auto n = t->getU32()) { - ++lexer; - return n; - } - } - return std::nullopt; +inline std::optional ParseInput::takeI64() { + return takeI(); } -inline std::optional ParseInput::takeS32() { - if (auto t = peek()) { - if (auto n = t->getS32()) { - ++lexer; - return n; - } - } - return {}; +inline std::optional ParseInput::takeU32() { + return takeU(); } -inline std::optional ParseInput::takeI32() { - if (auto t = peek()) { - if (auto n = t->getI32()) { - ++lexer; - return n; - } - } - return {}; +inline std::optional ParseInput::takeI32() { + return takeI(); } -inline std::optional ParseInput::takeU8() { - if (auto t = peek()) { - if (auto n = t->getU32()) { - if (n <= std::numeric_limits::max()) { - ++lexer; - return uint8_t(*n); - } - } - } - return {}; -} +inline std::optional ParseInput::takeU8() { return takeU(); } inline std::optional ParseInput::takeF64() { if (auto t = peek()) { diff --git a/src/parser/input.h b/src/parser/input.h index dbf3e4868..d4fdde1bd 100644 --- a/src/parser/input.h +++ b/src/parser/input.h @@ -51,11 +51,9 @@ struct ParseInput { std::optional takeOffset(); std::optional takeAlign(); std::optional takeU64(); - std::optional takeS64(); - std::optional takeI64(); + std::optional takeI64(); std::optional takeU32(); - std::optional takeS32(); - std::optional takeI32(); + std::optional takeI32(); std::optional takeU8(); std::optional takeF64(); std::optional takeF32(); @@ -67,6 +65,11 @@ struct ParseInput { Index getPos(); [[nodiscard]] Err err(Index pos, std::string reason); [[nodiscard]] Err err(std::string reason) { return err(getPos(), reason); } + +private: + template std::optional takeU(); + template std::optional takeS(); + template std::optional takeI(); }; #include "input-impl.h" diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp index 0796013fe..288660c76 100644 --- a/src/parser/lexer.cpp +++ b/src/parser/lexer.cpp @@ -767,77 +767,52 @@ std::optional keyword(std::string_view in) { } // anonymous namespace -std::optional Token::getU64() const { +template std::optional Token::getU() const { + static_assert(std::is_integral_v && std::is_unsigned_v); if (auto* tok = std::get_if(&data)) { - if (tok->sign == NoSign) { - return tok->n; - } - } - return {}; -} - -std::optional Token::getS64() const { - if (auto* tok = std::get_if(&data)) { - if (tok->sign == Neg) { - if (uint64_t(INT64_MIN) <= tok->n || tok->n == 0) { - return int64_t(tok->n); - } - // TODO: Add error production for signed underflow. - } else { - if (tok->n <= uint64_t(INT64_MAX)) { - return int64_t(tok->n); - } - // TODO: Add error production for signed overflow. - } - } - return {}; -} - -std::optional Token::getI64() const { - if (auto n = getU64()) { - return *n; - } - if (auto n = getS64()) { - return *n; - } - return {}; -} - -std::optional Token::getU32() const { - if (auto* tok = std::get_if(&data)) { - if (tok->sign == NoSign && tok->n <= UINT32_MAX) { - return int32_t(tok->n); + if (tok->sign == NoSign && tok->n <= std::numeric_limits::max()) { + return T(tok->n); } // TODO: Add error production for unsigned overflow. } return {}; } -std::optional Token::getS32() const { +template std::optional Token::getS() const { + static_assert(std::is_integral_v && std::is_signed_v); if (auto* tok = std::get_if(&data)) { if (tok->sign == Neg) { - if (uint64_t(INT32_MIN) <= tok->n || tok->n == 0) { - return int32_t(tok->n); + if (uint64_t(std::numeric_limits::min()) <= tok->n || tok->n == 0) { + return T(tok->n); } } else { - if (tok->n <= uint64_t(INT32_MAX)) { - return int32_t(tok->n); + if (tok->n <= uint64_t(std::numeric_limits::max())) { + return T(tok->n); } } } return {}; } -std::optional Token::getI32() const { - if (auto n = getU32()) { +template std::optional Token::getI() const { + static_assert(std::is_integral_v && std::is_unsigned_v); + if (auto n = getU()) { return *n; } - if (auto n = getS32()) { - return uint32_t(*n); + if (auto n = getS>()) { + return T(*n); } return {}; } +template std::optional Token::getU() const; +template std::optional Token::getS() const; +template std::optional Token::getI() const; +template std::optional Token::getU() const; +template std::optional Token::getS() const; +template std::optional Token::getI() const; +template std::optional Token::getU() const; + std::optional Token::getF64() const { constexpr int signif = 52; constexpr uint64_t payloadMask = (1ull << signif) - 1; diff --git a/src/parser/lexer.h b/src/parser/lexer.h index 67d29b002..42b18508e 100644 --- a/src/parser/lexer.h +++ b/src/parser/lexer.h @@ -125,12 +125,10 @@ struct Token { } return {}; } - std::optional getU64() const; - std::optional getS64() const; - std::optional getI64() const; - std::optional getU32() const; - std::optional getS32() const; - std::optional getI32() const; + + template std::optional getU() const; + template std::optional getS() const; + template std::optional getI() const; std::optional getF64() const; std::optional getF32() const; std::optional getString() const; -- cgit v1.2.3