diff options
-rw-r--r-- | src/wasm/wat-parser-internal.h | 557 | ||||
-rw-r--r-- | test/gtest/CMakeLists.txt | 2 | ||||
-rw-r--r-- | test/gtest/wat-parser.cpp | 348 |
3 files changed, 907 insertions, 0 deletions
diff --git a/src/wasm/wat-parser-internal.h b/src/wasm/wat-parser-internal.h new file mode 100644 index 000000000..ede89de66 --- /dev/null +++ b/src/wasm/wat-parser-internal.h @@ -0,0 +1,557 @@ +/* + * Copyright 2022 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Usage note +// ---------- +// +// This parser is a work in progress and this file should not yet be included +// anywhere except for in its own tests. Once the parser is usable, we will add +// wat-parser.h to declare the public parsing API and wat-parser.cpp to +// implement the public parsing functions in terms of the private API in this +// header. The private API will stay in this header rather than moving to +// wat-parser.cpp so that we can continue to unit test it. + +#include <cassert> +#include <cctype> +#include <iostream> +#include <optional> +#include <variant> + +using namespace std::string_view_literals; + +namespace wasm::WATParser { + +namespace { + +// ================ +// Lexical Analysis +// ================ + +// The result of lexing a token fragment. +struct LexResult { + std::string_view span; +}; + +// Lexing context that accumulates lexed input to produce a token fragment. +struct LexCtx { +private: + // The input we are lexing. + std::string_view input; + + // How much of the input we have already lexed. + size_t lexedSize = 0; + +public: + explicit LexCtx(std::string_view in) : input(in) {} + + // Return the fragment that has been lexed so far. + std::optional<LexResult> lexed() const { + if (lexedSize > 0) { + return {LexResult{input.substr(0, lexedSize)}}; + } + return {}; + } + + // The next input that has not already been lexed. + std::string_view next() const { return input.substr(lexedSize); } + + // The size of the unlexed input. + size_t size() const { return input.size() - lexedSize; } + + // Whether there is no more input. + bool empty() const { return size() == 0; } + + // Tokens must be separated by spaces or parentheses. + bool canFinish() const; + + // Whether the unlexed input starts with prefix `sv`. + size_t startsWith(std::string_view sv) const { + return next().substr(0, sv.size()) == sv; + } + + // Consume the next `n` characters. + void take(size_t n) { lexedSize += n; } + + // Consume an additional lexed fragment. + void take(const LexResult& res) { lexedSize += res.span.size(); } + + // Consume the prefix and return true if possible. + bool takePrefix(std::string_view sv) { + if (startsWith(sv)) { + take(sv.size()); + return true; + } + return false; + } + + // Consume the rest of the input. + void takeAll() { lexedSize = input.size(); } +}; + +enum Signedness { Unsigned, Signed }; + +// The result of lexing an integer token fragment. +struct LexIntResult : LexResult { + uint64_t n; + Signedness signedness; +}; + +// Lexing context that accumulates lexed input to produce an integer token +// fragment. +struct LexIntCtx : LexCtx { + using LexCtx::take; + +private: + uint64_t n = 0; + Signedness signedness = Unsigned; + bool negative = false; + bool overflow = false; + + std::optional<int> getDigit(char c) { + if ('0' <= c && c <= '9') { + return {c - '0'}; + } + return std::nullopt; + } + + std::optional<int> getHexDigit(char c) { + if ('0' <= c && c <= '9') { + return {c - '0'}; + } + if ('A' <= c && c <= 'F') { + return {10 + c - 'A'}; + } + if ('a' <= c && c <= 'f') { + return {10 + c - 'a'}; + } + return std::nullopt; + } + +public: + explicit LexIntCtx(std::string_view in) : LexCtx(in) {} + + std::optional<LexIntResult> lexed() { + // Check most significant bit for overflow of signed numbers. + if (overflow) { + return {}; + } + auto basic = LexCtx::lexed(); + if (!basic) { + return {}; + } + if (signedness == Signed) { + if (negative) { + if (n > (1ull << 63)) { + // TODO: Add error production for signed underflow. + return {}; + } + } else { + if (n > (1ull << 63) - 1) { + // TODO: Add error production for signed overflow. + return {}; + } + } + } + return {LexIntResult{*basic, negative ? -n : n, signedness}}; + } + + void takeSign() { + if (takePrefix("+"sv)) { + signedness = Signed; + } else if (takePrefix("-"sv)) { + signedness = Signed; + negative = true; + } + } + + bool takeDigit() { + if (!empty()) { + if (auto d = getDigit(next()[0])) { + take(1); + uint64_t newN = n * 10 + *d; + if (newN < n) { + overflow = true; + } + n = newN; + return true; + } + } + return false; + } + + bool takeHexdigit() { + if (!empty()) { + if (auto h = getHexDigit(next()[0])) { + take(1); + uint64_t newN = n * 16 + *h; + if (newN < n) { + overflow = true; + } + n = newN; + return true; + } + } + return false; + } + + void take(const LexIntResult& res) { + LexCtx::take(res); + n = res.n; + } +}; + +std::optional<LexResult> lparen(std::string_view in) { + LexCtx ctx(in); + ctx.takePrefix("("sv); + return ctx.lexed(); +} + +std::optional<LexResult> rparen(std::string_view in) { + LexCtx ctx(in); + ctx.takePrefix(")"sv); + return ctx.lexed(); +} + +// comment ::= linecomment | blockcomment +// linecomment ::= ';;' linechar* ('\n' | eof) +// linechar ::= c:char (if c != '\n') +// blockcomment ::= '(;' blockchar* ';)' +// blockchar ::= c:char (if c != ';' and c != '(') +// | ';' (if the next char is not ')') +// | '(' (if the next char is not ';') +// | blockcomment +std::optional<LexResult> comment(std::string_view in) { + LexCtx ctx(in); + if (ctx.size() < 2) { + return {}; + } + + // Line comment + if (ctx.takePrefix(";;"sv)) { + if (auto size = ctx.next().find('\n'); size != ""sv.npos) { + ctx.take(size); + } else { + ctx.takeAll(); + } + return ctx.lexed(); + } + + // Block comment (possibly nested!) + if (ctx.takePrefix("(;"sv)) { + size_t depth = 1; + while (depth > 0 && ctx.size() >= 2) { + if (ctx.takePrefix("(;"sv)) { + ++depth; + } else if (ctx.takePrefix(";)"sv)) { + --depth; + } else { + ctx.take(1); + } + } + if (depth > 0) { + // TODO: Add error production for non-terminated block comment. + return {}; + } + return ctx.lexed(); + } + + return {}; +} + +std::optional<LexResult> spacechar(std::string_view in) { + LexCtx ctx(in); + ctx.takePrefix(" "sv) || ctx.takePrefix("\n"sv) || ctx.takePrefix("\r"sv) || + ctx.takePrefix("\t"sv); + return ctx.lexed(); +} + +// space ::= (' ' | format | comment)* +// format ::= '\t' | '\n' | '\r' +std::optional<LexResult> space(std::string_view in) { + LexCtx ctx(in); + while (ctx.size()) { + if (auto lexed = spacechar(ctx.next())) { + ctx.take(*lexed); + } else if (auto lexed = comment(ctx.next())) { + ctx.take(*lexed); + } else { + break; + } + } + return ctx.lexed(); +} + +bool LexCtx::canFinish() const { + // Logically we want to check for eof, parens, and space. But we don't + // actually want to parse more than a couple characters of space, so check for + // individual space chars or comment starts instead. + return empty() || lparen(next()) || rparen(next()) || spacechar(next()) || + startsWith(";;"sv); +} + +// num ::= d:digit => d +// | n:num '_'? d:digit => 10*n + d +// digit ::= '0' => 0 | ... | '9' => 9 +std::optional<LexIntResult> num(std::string_view in) { + LexIntCtx ctx(in); + if (!ctx.takeDigit()) { + return {}; + } + while (true) { + bool under = ctx.takePrefix("_"sv); + if (!ctx.takeDigit()) { + if (!under) { + return ctx.lexed(); + } + return {}; + } + } +} + +// hexnum ::= h:hexdigit => h +// | n:hexnum '_'? h:hexdigit => 16*n + h +// hexdigit ::= d:digit => d +// | 'A' => 10 | ... | 'F' => 15 +// | 'a' => 10 | ... | 'f' => 15 +std::optional<LexIntResult> hexnum(std::string_view in) { + LexIntCtx ctx(in); + if (!ctx.takeHexdigit()) { + return {}; + } + while (true) { + bool under = ctx.takePrefix("_"sv); + if (!ctx.takeHexdigit()) { + if (!under) { + return ctx.lexed(); + } + return {}; + } + } +} + +// uN ::= n:num => n (if n < 2^N) +// | '0x' n:hexnum => n (if n < 2^N) +// sN ::= s:sign n:num => [s]n (if -2^(N-1) <= [s]n < 2^(N-1)) +// | s:sign '0x' n:hexnum => [s]n (if -2^(N-1) <= [s]n < 2^(N-1)) +// sign ::= {} => + | '+' => + | '-' => - +// +// Note: Defer bounds and sign checking until we know what kind of integer we +// expect. +std::optional<LexIntResult> integer(std::string_view in) { + LexIntCtx ctx(in); + ctx.takeSign(); + if (ctx.takePrefix("0x"sv)) { + if (auto lexed = hexnum(ctx.next())) { + ctx.take(*lexed); + if (ctx.canFinish()) { + return ctx.lexed(); + } + } + // TODO: Add error production for unrecognized hexnum. + return {}; + } + if (auto lexed = num(ctx.next())) { + ctx.take(*lexed); + if (ctx.canFinish()) { + return ctx.lexed(); + } + } + return {}; +} + +// ====== +// Tokens +// ====== + +struct LParenTok { + friend std::ostream& operator<<(std::ostream& os, const LParenTok&) { + return os << "'('"; + } + + friend bool operator==(const LParenTok&, const LParenTok&) { return true; } +}; + +struct RParenTok { + friend std::ostream& operator<<(std::ostream& os, const RParenTok&) { + return os << "')'"; + } + + friend bool operator==(const RParenTok&, const RParenTok&) { return true; } +}; + +struct IntTok { + uint64_t n; + Signedness signedness; + + friend std::ostream& operator<<(std::ostream& os, const IntTok& tok) { + return os << tok.n << (tok.signedness == Signed ? " signed" : " unsigned"); + } + + friend bool operator==(const IntTok& t1, const IntTok& t2) { + return t1.n == t2.n && t1.signedness == t2.signedness; + } +}; + +struct Token { + using Data = std::variant<LParenTok, RParenTok, IntTok>; + + std::string_view span; + Data data; + + // Suppress clang-tidy false positive about unused functions. + [[maybe_unused]] friend std::ostream& operator<<(std::ostream& os, + const Token& tok) { + std::visit([&](const auto& t) { os << t; }, tok.data); + return os << " \"" << tok.span << "\""; + } + + [[maybe_unused]] friend bool operator==(const Token& t1, const Token& t2) { + return t1.span == t2.span && + std::visit( + [](auto& d1, auto& d2) { + if constexpr (std::is_same_v<decltype(d1), decltype(d2)>) { + return d1 == d2; + } else { + return false; + } + }, + t1.data, + t2.data); + } +}; + +struct TextPos { + size_t line; + size_t col; + + bool operator==(const TextPos& other) const { + return line == other.line && col == other.col; + } + bool operator!=(const TextPos& other) const { return !(*this == other); } + + // Suppress clang-tidy false positive about unused functions. + [[maybe_unused]] friend std::ostream& operator<<(std::ostream& os, + const TextPos& pos) { + return os << pos.line << ":" << pos.col; + } +}; + +// Lexer's purpose is twofold. First, it wraps a buffer to provide a tokenizing +// iterator over it. Second, it implements that iterator itself. Also provides +// utilities for locating the text position of tokens within the buffer. Text +// positions are computed on demand rather than eagerly because they are +// typically only needed when there is an error to report. +struct Lexer { + using iterator = Lexer; + using difference_type = std::ptrdiff_t; + using value_type = Token; + using pointer = const Token*; + using reference = const Token&; + using iterator_category = std::forward_iterator_tag; + + std::string_view buffer; + size_t index = 0; + std::optional<Token> curr; + + // The end sentinel. + Lexer() = default; + + Lexer(std::string_view buffer) : buffer(buffer) { + skipSpace(); + lexToken(); + skipSpace(); + } + + std::string_view next() const { return buffer.substr(index); } + + void skipSpace() { + if (auto ctx = space(next())) { + index += ctx->span.size(); + } + } + + void lexToken() { + // TODO: Ensure we're getting the longest possible match. + Token tok; + if (auto t = lparen(next())) { + tok = Token{t->span, LParenTok{}}; + } else if (auto t = rparen(next())) { + tok = Token{t->span, RParenTok{}}; + } else if (auto t = integer(next())) { + tok = Token{t->span, IntTok{t->n, t->signedness}}; + } else { + // TODO: Do something about lexing errors. + curr = std::nullopt; + return; + } + index += tok.span.size(); + curr = {tok}; + } + + Lexer& operator++() { + // Preincrement + lexToken(); + skipSpace(); + return *this; + } + + Lexer operator++(int) { + // Postincrement + Lexer ret = *this; + ++(*this); + return ret; + } + + const Token& operator*() { return *curr; } + const Token* operator->() { return &*curr; } + + bool operator==(const Lexer& other) const { + // The iterator is equal to the end sentinel when there is no current token. + if (!curr && !other.curr) { + return true; + } + // Otherwise they are equivalent when they are at the same position. + return index == other.index; + } + + bool operator!=(const Lexer& other) const { return !(*this == other); } + + Lexer begin() { return *this; } + + Lexer end() { return Lexer(); } + + TextPos position(const char* c) { + assert(size_t(c - buffer.data()) < buffer.size()); + TextPos pos{1, 0}; + for (const char* p = buffer.data(); p != c; ++p) { + if (*p == '\n') { + pos.line++; + pos.col = 0; + } else { + pos.col++; + } + } + return pos; + } + + TextPos position(std::string_view span) { return position(span.data()); } + + TextPos position(Token tok) { return position(tok.span); } +}; + +} // anonymous namespace + +} // namespace wasm::WATParser diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt index c58827a21..6a58debbf 100644 --- a/test/gtest/CMakeLists.txt +++ b/test/gtest/CMakeLists.txt @@ -1,7 +1,9 @@ include_directories(../../third_party/googletest/googletest/include) +include_directories(../../src/wasm) set(unittest_SOURCES type-builder.cpp + wat-parser.cpp ) binaryen_add_executable(binaryen-unittests "${unittest_SOURCES}") diff --git a/test/gtest/wat-parser.cpp b/test/gtest/wat-parser.cpp new file mode 100644 index 000000000..1eba25869 --- /dev/null +++ b/test/gtest/wat-parser.cpp @@ -0,0 +1,348 @@ +#include "gtest/gtest.h" + +#include "wat-parser-internal.h" + +using namespace wasm::WATParser; + +TEST(ParserTest, LexWhitespace) { + Token one{"1"sv, IntTok{1, Unsigned}}; + Token two{"2"sv, IntTok{2, Unsigned}}; + Token three{"3"sv, IntTok{3, Unsigned}}; + Token four{"4"sv, IntTok{4, Unsigned}}; + Token five{"5"sv, IntTok{5, Unsigned}}; + + Lexer lexer(" 1\t2\n3\r4 \n\n\t 5 "sv); + + auto it = lexer.begin(); + ASSERT_NE(it, lexer.end()); + Token t1 = *it++; + ASSERT_NE(it, lexer.end()); + Token t2 = *it++; + ASSERT_NE(it, lexer.end()); + Token t3 = *it++; + ASSERT_NE(it, lexer.end()); + Token t4 = *it++; + ASSERT_NE(it, lexer.end()); + Token t5 = *it++; + EXPECT_EQ(it, lexer.end()); + + EXPECT_EQ(t1, one); + EXPECT_EQ(t2, two); + EXPECT_EQ(t3, three); + EXPECT_EQ(t4, four); + EXPECT_EQ(t5, five); + + EXPECT_EQ(lexer.position(t1), (TextPos{1, 1})); + EXPECT_EQ(lexer.position(t2), (TextPos{1, 3})); + EXPECT_EQ(lexer.position(t3), (TextPos{2, 0})); + EXPECT_EQ(lexer.position(t4), (TextPos{2, 2})); + EXPECT_EQ(lexer.position(t5), (TextPos{4, 2})); +} + +TEST(ParserTest, LexLineComment) { + Token one{"1"sv, IntTok{1, Unsigned}}; + Token six{"6"sv, IntTok{6, Unsigned}}; + + Lexer lexer("1;; whee! 2 3\t4\r5\n6"sv); + + auto it = lexer.begin(); + Token t1 = *it++; + ASSERT_NE(it, lexer.end()); + Token t2 = *it++; + EXPECT_EQ(it, lexer.end()); + + EXPECT_EQ(t1, one); + EXPECT_EQ(t2, six); + + EXPECT_EQ(lexer.position(t1), (TextPos{1, 0})); + EXPECT_EQ(lexer.position(t2), (TextPos{2, 0})); +} + +TEST(ParserTest, LexBlockComment) { + Token one{"1"sv, IntTok{1, Unsigned}}; + Token six{"6"sv, IntTok{6, Unsigned}}; + + Lexer lexer("1(; whoo! 2\n (; \n3\n ;) 4 (;) 5 ;) \n;)6"sv); + + auto it = lexer.begin(); + Token t1 = *it++; + ASSERT_NE(it, lexer.end()); + Token t2 = *it++; + EXPECT_EQ(it, lexer.end()); + + EXPECT_EQ(t1, one); + EXPECT_EQ(t2, six); + + EXPECT_EQ(lexer.position(t1), (TextPos{1, 0})); + EXPECT_EQ(lexer.position(t2), (TextPos{5, 2})); +} + +TEST(ParserTest, LexParens) { + Token left{"("sv, LParenTok{}}; + Token right{")"sv, RParenTok{}}; + + Lexer lexer("(())"sv); + + auto it = lexer.begin(); + ASSERT_NE(it, lexer.end()); + Token t1 = *it++; + ASSERT_NE(it, lexer.end()); + Token t2 = *it++; + ASSERT_NE(it, lexer.end()); + Token t3 = *it++; + ASSERT_NE(it, lexer.end()); + Token t4 = *it++; + EXPECT_EQ(it, lexer.end()); + + EXPECT_EQ(t1, left); + EXPECT_EQ(t2, left); + EXPECT_EQ(t3, right); + EXPECT_EQ(t4, right); +} + +TEST(ParserTest, LexInt) { + { + Lexer lexer("0"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0"sv, IntTok{0, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+0"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+0"sv, IntTok{0, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-0"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-0"sv, IntTok{0, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"1"sv, IntTok{1, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+1"sv, IntTok{1, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-1"sv, IntTok{-1ull, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0010"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0010"sv, IntTok{10, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+0010"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+0010"sv, IntTok{10, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-0010"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-0010"sv, IntTok{-10ull, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("9999"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"9999"sv, IntTok{9999, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+9999"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+9999"sv, IntTok{9999, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-9999"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-9999"sv, IntTok{-9999ull, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("12_34"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"12_34"sv, IntTok{1234, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("1_2_3_4"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"1_2_3_4"sv, IntTok{1234, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("_1234"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("1234_"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("12__34"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("12cd56"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("18446744073709551615"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"18446744073709551615"sv, IntTok{-1ull, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + // 64-bit unsigned overflow! + Lexer lexer("18446744073709551616"); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("+9223372036854775807"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+9223372036854775807"sv, IntTok{~(1ull << 63), Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + // 64-bit signed overflow! + Lexer lexer("+9223372036854775808"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("-9223372036854775808"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-9223372036854775808"sv, IntTok{1ull << 63, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + // 64-bit signed underflow! + Lexer lexer("-9223372036854775809"sv); + EXPECT_EQ(lexer, lexer.end()); + } +} + +TEST(ParserTest, LexHexInt) { + { + Lexer lexer("0x0"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x0"sv, IntTok{0, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+0x0"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+0x0"sv, IntTok{0, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-0x0"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-0x0"sv, IntTok{0, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x1"sv, IntTok{1, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+0x1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+0x1"sv, IntTok{1, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-0x1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-0x1"sv, IntTok{-1ull, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x0010"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x0010"sv, IntTok{16, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+0x0010"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+0x0010"sv, IntTok{16, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-0x0010"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-0x0010"sv, IntTok{-16ull, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0xabcdef"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0xabcdef"sv, IntTok{0xabcdef, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+0xABCDEF"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+0xABCDEF"sv, IntTok{0xabcdef, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-0xAbCdEf"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-0xAbCdEf"sv, IntTok{-0xabcdefull, Signed}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x12_34"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x12_34"sv, IntTok{0x1234, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x1_2_3_4"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x1_2_3_4"sv, IntTok{0x1234, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("_0x1234"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x_1234"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x1234_"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x12__34"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0xg"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x120x34"sv); + EXPECT_EQ(lexer, lexer.end()); + } +} |