diff options
-rw-r--r-- | src/wasm/wat-lexer.cpp | 173 | ||||
-rw-r--r-- | src/wat-lexer.h | 32 | ||||
-rw-r--r-- | test/gtest/wat-lexer.cpp | 529 |
3 files changed, 704 insertions, 30 deletions
diff --git a/src/wasm/wat-lexer.cpp b/src/wasm/wat-lexer.cpp index 4bea32b59..0d1dc2794 100644 --- a/src/wasm/wat-lexer.cpp +++ b/src/wasm/wat-lexer.cpp @@ -151,23 +151,10 @@ public: if (overflow) { return {}; } - auto basic = LexCtx::lexed(); - if (!basic) { - return {}; - } - // Check most significant bit for overflow of signed numbers. - if (sign == Neg) { - if (n > (1ull << 63)) { - // TODO: Add error production for signed underflow. - return {}; - } - } else if (sign == Pos) { - if (n > (1ull << 63) - 1) { - // TODO: Add error production for signed overflow. - return {}; - } + if (auto basic = LexCtx::lexed()) { + return LexIntResult{*basic, sign == Neg ? -n : n, sign}; } - return LexIntResult{*basic, sign == Neg ? -n : n, sign}; + return {}; } void takeSign() { @@ -592,12 +579,7 @@ std::optional<LexFloatResult> float_(std::string_view in) { if (ctx.takePrefix(":0x"sv)) { if (auto lexed = hexnum(ctx.next())) { ctx.take(*lexed); - if (1 <= lexed->n && lexed->n < (1ull << 52)) { - ctx.nanPayload = lexed->n; - } else { - // TODO: Add error production for invalid NaN payload. - return {}; - } + ctx.nanPayload = lexed->n; } else { // TODO: Add error production for malformed NaN payload. return {}; @@ -781,6 +763,153 @@ std::optional<LexResult> keyword(std::string_view in) { } // anonymous namespace +std::optional<uint64_t> Token::getU64() const { + if (auto* tok = std::get_if<IntTok>(&data)) { + if (tok->sign == NoSign) { + return tok->n; + } + } + return {}; +} + +std::optional<int64_t> Token::getS64() const { + if (auto* tok = std::get_if<IntTok>(&data)) { + if (tok->sign == Neg) { + if (uint64_t(INT64_MIN) <= tok->n || tok->n == 0) { + return int64_t(tok->n); + } + // TODO: Add error production for signed underflow. + } else { + if (tok->n <= uint64_t(INT64_MAX)) { + return int64_t(tok->n); + } + // TODO: Add error production for signed overflow. + } + } + return {}; +} + +std::optional<uint64_t> Token::getI64() const { + if (auto n = getU64()) { + return *n; + } + if (auto n = getS64()) { + return *n; + } + return {}; +} + +std::optional<uint32_t> Token::getU32() const { + if (auto* tok = std::get_if<IntTok>(&data)) { + if (tok->sign == NoSign && tok->n <= UINT32_MAX) { + return int32_t(tok->n); + } + // TODO: Add error production for unsigned overflow. + } + return {}; +} + +std::optional<int32_t> Token::getS32() const { + if (auto* tok = std::get_if<IntTok>(&data)) { + if (tok->sign == Neg) { + if (uint64_t(INT32_MIN) <= tok->n || tok->n == 0) { + return int32_t(tok->n); + } + } else { + if (tok->n <= uint64_t(INT32_MAX)) { + return int32_t(tok->n); + } + } + } + return {}; +} + +std::optional<uint32_t> Token::getI32() const { + if (auto n = getU32()) { + return *n; + } + if (auto n = getS32()) { + return uint32_t(*n); + } + return {}; +} + +std::optional<double> Token::getF64() const { + constexpr int signif = 52; + constexpr uint64_t payloadMask = (1ull << signif) - 1; + constexpr uint64_t nanDefault = 1ull << (signif - 1); + if (auto* tok = std::get_if<FloatTok>(&data)) { + double d = tok->d; + if (std::isnan(d)) { + // Inject payload. + uint64_t payload = tok->nanPayload ? *tok->nanPayload : nanDefault; + if (payload == 0 || payload > payloadMask) { + // TODO: Add error production for out-of-bounds payload. + return {}; + } + uint64_t bits; + static_assert(sizeof(bits) == sizeof(d)); + memcpy(&bits, &d, sizeof(bits)); + bits = (bits & ~payloadMask) | payload; + memcpy(&d, &bits, sizeof(bits)); + } + return d; + } + if (auto* tok = std::get_if<IntTok>(&data)) { + if (tok->sign == Neg) { + if (tok->n == 0) { + return -0.0; + } + return double(int64_t(tok->n)); + } + return double(tok->n); + } + return {}; +} + +std::optional<float> Token::getF32() const { + constexpr int signif = 23; + constexpr uint32_t payloadMask = (1u << signif) - 1; + constexpr uint64_t nanDefault = 1ull << (signif - 1); + if (auto* tok = std::get_if<FloatTok>(&data)) { + float f = tok->d; + if (std::isnan(f)) { + // Validate and inject payload. + uint64_t payload = tok->nanPayload ? *tok->nanPayload : nanDefault; + if (payload == 0 || payload > payloadMask) { + // TODO: Add error production for out-of-bounds payload. + return {}; + } + uint32_t bits; + static_assert(sizeof(bits) == sizeof(f)); + memcpy(&bits, &f, sizeof(bits)); + bits = (bits & ~payloadMask) | payload; + memcpy(&f, &bits, sizeof(bits)); + } + return f; + } + if (auto* tok = std::get_if<IntTok>(&data)) { + if (tok->sign == Neg) { + if (tok->n == 0) { + return -0.0f; + } + return float(int64_t(tok->n)); + } + return float(tok->n); + } + return {}; +} + +std::optional<std::string_view> Token::getString() const { + if (auto* tok = std::get_if<StringTok>(&data)) { + if (tok->str) { + return std::string_view(*tok->str); + } + return span.substr(1, span.size() - 2); + } + return {}; +} + void Lexer::skipSpace() { if (auto ctx = space(next())) { index += ctx->span.size(); diff --git a/src/wat-lexer.h b/src/wat-lexer.h index 5a955f5c0..e4ba2efa8 100644 --- a/src/wat-lexer.h +++ b/src/wat-lexer.h @@ -15,6 +15,7 @@ */ #include <cstddef> +#include <cstring> #include <iterator> #include <optional> #include <ostream> @@ -101,6 +102,37 @@ struct Token { std::string_view span; Data data; + // ==================== + // Token classification + // ==================== + + bool isLParen() const { return std::get_if<LParenTok>(&data); } + + bool isRParen() const { return std::get_if<RParenTok>(&data); } + + std::optional<std::string_view> getID() const { + if (std::get_if<IdTok>(&data)) { + return span; + } + return {}; + } + + std::optional<std::string_view> getKeyword() const { + if (std::get_if<KeywordTok>(&data)) { + return span; + } + return {}; + } + std::optional<uint64_t> getU64() const; + std::optional<int64_t> getS64() const; + std::optional<uint64_t> getI64() const; + std::optional<uint32_t> getU32() const; + std::optional<int32_t> getS32() const; + std::optional<uint32_t> getI32() const; + std::optional<double> getF64() const; + std::optional<float> getF32() const; + std::optional<std::string_view> getString() const; + bool operator==(const Token&) const; friend std::ostream& operator<<(std::ostream& os, const Token&); }; diff --git a/test/gtest/wat-lexer.cpp b/test/gtest/wat-lexer.cpp index 75ddca6ed..a1c60a706 100644 --- a/test/gtest/wat-lexer.cpp +++ b/test/gtest/wat-lexer.cpp @@ -116,6 +116,8 @@ TEST(LexerTest, LexParens) { EXPECT_EQ(t2, left); EXPECT_EQ(t3, right); EXPECT_EQ(t4, right); + EXPECT_TRUE(left.isLParen()); + EXPECT_TRUE(right.isRParen()); } TEST(LexerTest, LexInt) { @@ -236,29 +238,28 @@ TEST(LexerTest, LexInt) { { Lexer lexer("+9223372036854775807"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"+9223372036854775807"sv, IntTok{~(1ull << 63), Pos}}; + Token expected{"+9223372036854775807"sv, IntTok{INT64_MAX, Pos}}; EXPECT_EQ(*lexer, expected); } { - // 64-bit signed overflow! Lexer lexer("+9223372036854775808"sv); ASSERT_NE(lexer, lexer.end()); Token expected{"+9223372036854775808"sv, - FloatTok{{}, 9223372036854775808.}}; + IntTok{uint64_t(INT64_MAX) + 1, Pos}}; + ; EXPECT_EQ(*lexer, expected); } { Lexer lexer("-9223372036854775808"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"-9223372036854775808"sv, IntTok{1ull << 63, Neg}}; + Token expected{"-9223372036854775808"sv, IntTok{uint64_t(INT64_MIN), Neg}}; EXPECT_EQ(*lexer, expected); } { - // 64-bit signed underflow! Lexer lexer("-9223372036854775809"sv); ASSERT_NE(lexer, lexer.end()); Token expected{"-9223372036854775809"sv, - FloatTok{{}, -9223372036854775809.}}; + IntTok{uint64_t(INT64_MIN) - 1, Neg}}; EXPECT_EQ(*lexer, expected); } } @@ -374,6 +375,361 @@ TEST(LexerTest, LexHexInt) { } } +TEST(LexerTest, ClassifyInt) { + { + Lexer lexer("0"sv); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + ASSERT_TRUE(lexer->getU32()); + ASSERT_TRUE(lexer->getS32()); + ASSERT_TRUE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getU64(), 0ull); + EXPECT_EQ(*lexer->getS64(), 0ll); + EXPECT_EQ(*lexer->getI64(), 0ull); + EXPECT_EQ(*lexer->getU32(), 0u); + EXPECT_EQ(*lexer->getS32(), 0); + EXPECT_EQ(*lexer->getI32(), 0u); + EXPECT_EQ(*lexer->getF64(), 0.0); + EXPECT_EQ(*lexer->getF32(), 0.0); + EXPECT_FALSE(std::signbit(*lexer->getF64())); + EXPECT_FALSE(std::signbit(*lexer->getF32())); + } + { + Lexer lexer("+0"sv); + ASSERT_NE(lexer, lexer.end()); + + EXPECT_FALSE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + ASSERT_TRUE(lexer->getS32()); + ASSERT_TRUE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getS64(), 0ll); + EXPECT_EQ(*lexer->getI64(), 0ull); + EXPECT_EQ(*lexer->getS32(), 0); + EXPECT_EQ(*lexer->getI32(), 0u); + EXPECT_EQ(*lexer->getF64(), 0.0); + EXPECT_EQ(*lexer->getF32(), 0.0); + EXPECT_FALSE(std::signbit(*lexer->getF64())); + EXPECT_FALSE(std::signbit(*lexer->getF32())); + } + { + Lexer lexer("-0"sv); + ASSERT_NE(lexer, lexer.end()); + + EXPECT_FALSE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + ASSERT_TRUE(lexer->getS32()); + ASSERT_TRUE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getS64(), 0ll); + EXPECT_EQ(*lexer->getI64(), 0ull); + EXPECT_EQ(*lexer->getS32(), 0); + EXPECT_EQ(*lexer->getI32(), 0u); + EXPECT_EQ(*lexer->getF64(), -0.0); + EXPECT_EQ(*lexer->getF32(), -0.0); + ASSERT_TRUE(std::signbit(*lexer->getF64())); + ASSERT_TRUE(std::signbit(*lexer->getF32())); + } + { + Lexer lexer("0x7fff_ffff"sv); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + ASSERT_TRUE(lexer->getU32()); + ASSERT_TRUE(lexer->getS32()); + ASSERT_TRUE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getU64(), 0x7fffffffull); + EXPECT_EQ(*lexer->getS64(), 0x7fffffffll); + EXPECT_EQ(*lexer->getI64(), 0x7fffffffull); + EXPECT_EQ(*lexer->getU32(), 0x7fffffffu); + EXPECT_EQ(*lexer->getS32(), 0x7fffffff); + EXPECT_EQ(*lexer->getI32(), 0x7fffffffu); + EXPECT_EQ(*lexer->getF64(), 0x7fffffff.p0); + EXPECT_EQ(*lexer->getF32(), 0x7fffffff.p0f); + } + { + Lexer lexer("0x8000_0000"sv); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + ASSERT_TRUE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + ASSERT_TRUE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getU64(), 0x80000000ull); + EXPECT_EQ(*lexer->getS64(), 0x80000000ll); + EXPECT_EQ(*lexer->getI64(), 0x80000000ull); + EXPECT_EQ(*lexer->getU32(), 0x80000000u); + EXPECT_EQ(*lexer->getI32(), 0x80000000u); + EXPECT_EQ(*lexer->getF64(), 0x80000000.p0); + EXPECT_EQ(*lexer->getF32(), 0x80000000.p0f); + } + { + Lexer lexer("+0x7fff_ffff"sv); + ASSERT_NE(lexer, lexer.end()); + + EXPECT_FALSE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + ASSERT_TRUE(lexer->getS32()); + ASSERT_TRUE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getS64(), 0x7fffffffll); + EXPECT_EQ(*lexer->getI64(), 0x7fffffffull); + EXPECT_EQ(*lexer->getS32(), 0x7fffffff); + EXPECT_EQ(*lexer->getI32(), 0x7fffffffu); + EXPECT_EQ(*lexer->getF64(), 0x7fffffff.p0); + EXPECT_EQ(*lexer->getF32(), 0x7fffffff.p0f); + } + { + Lexer lexer("+0x8000_0000"sv); + ASSERT_NE(lexer, lexer.end()); + + EXPECT_FALSE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + EXPECT_FALSE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getS64(), 0x80000000ll); + EXPECT_EQ(*lexer->getI64(), 0x80000000ull); + EXPECT_EQ(*lexer->getF64(), 0x80000000.p0); + EXPECT_EQ(*lexer->getF32(), 0x80000000.p0f); + } + { + Lexer lexer("-0x8000_0000"sv); + ASSERT_NE(lexer, lexer.end()); + + EXPECT_FALSE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + ASSERT_TRUE(lexer->getS32()); + ASSERT_TRUE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getS64(), -0x80000000ll); + EXPECT_EQ(*lexer->getI64(), -0x80000000ull); + EXPECT_EQ(*lexer->getS32(), -0x7fffffffll - 1); + EXPECT_EQ(*lexer->getI32(), -0x80000000u); + EXPECT_EQ(*lexer->getF64(), -0x80000000.p0); + EXPECT_EQ(*lexer->getF32(), -0x80000000.p0f); + } + { + Lexer lexer("-0x8000_0001"sv); + ASSERT_NE(lexer, lexer.end()); + + EXPECT_FALSE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + EXPECT_FALSE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getS64(), -0x80000001ll); + EXPECT_EQ(*lexer->getI64(), -0x80000001ull); + EXPECT_EQ(*lexer->getF64(), -0x80000001.p0); + EXPECT_EQ(*lexer->getF32(), -0x80000001.p0f); + } + { + Lexer lexer("0xffff_ffff"sv); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + ASSERT_TRUE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + ASSERT_TRUE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getU64(), 0xffffffffull); + EXPECT_EQ(*lexer->getS64(), 0xffffffffll); + EXPECT_EQ(*lexer->getI64(), 0xffffffffull); + EXPECT_EQ(*lexer->getU32(), 0xffffffffu); + EXPECT_EQ(*lexer->getI32(), 0xffffffffu); + EXPECT_EQ(*lexer->getF64(), 0xffffffff.p0); + EXPECT_EQ(*lexer->getF32(), 0xffffffff.p0f); + } + { + Lexer lexer("0x1_0000_0000"sv); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + EXPECT_FALSE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getU64(), 0x100000000ull); + EXPECT_EQ(*lexer->getS64(), 0x100000000ll); + EXPECT_EQ(*lexer->getI64(), 0x100000000ull); + EXPECT_EQ(*lexer->getF64(), 0x100000000.p0); + EXPECT_EQ(*lexer->getF32(), 0x100000000.p0f); + } + { + Lexer lexer("+0xffff_ffff"sv); + ASSERT_NE(lexer, lexer.end()); + + EXPECT_FALSE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + EXPECT_FALSE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getS64(), 0xffffffffll); + EXPECT_EQ(*lexer->getI64(), 0xffffffffull); + EXPECT_EQ(*lexer->getF64(), 0xffffffff.p0); + EXPECT_EQ(*lexer->getF32(), 0xffffffff.p0f); + } + { + Lexer lexer("+0x1_0000_0000"sv); + ASSERT_NE(lexer, lexer.end()); + + EXPECT_FALSE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + EXPECT_FALSE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getS64(), 0x100000000ll); + EXPECT_EQ(*lexer->getI64(), 0x100000000ull); + EXPECT_EQ(*lexer->getF64(), 0x100000000.p0); + EXPECT_EQ(*lexer->getF32(), 0x100000000.p0f); + } + { + Lexer lexer("0x7fff_ffff_ffff_ffff"sv); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + EXPECT_FALSE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getU64(), 0x7fffffffffffffffull); + EXPECT_EQ(*lexer->getS64(), 0x7fffffffffffffffll); + EXPECT_EQ(*lexer->getI64(), 0x7fffffffffffffffull); + EXPECT_EQ(*lexer->getF64(), 0x7fffffffffffffff.p0); + EXPECT_EQ(*lexer->getF32(), 0x7fffffffffffffff.p0f); + } + { + Lexer lexer("+0x7fff_ffff_ffff_ffff"sv); + ASSERT_NE(lexer, lexer.end()); + + EXPECT_FALSE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + EXPECT_FALSE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getS64(), 0x7fffffffffffffffll); + EXPECT_EQ(*lexer->getI64(), 0x7fffffffffffffffull); + EXPECT_EQ(*lexer->getF64(), 0x7fffffffffffffff.p0); + EXPECT_EQ(*lexer->getF32(), 0x7fffffffffffffff.p0f); + } + { + Lexer lexer("-0x8000_0000_0000_0000"sv); + ASSERT_NE(lexer, lexer.end()); + + EXPECT_FALSE(lexer->getU64()); + ASSERT_TRUE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + EXPECT_FALSE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getS64(), -0x7fffffffffffffffll - 1); + EXPECT_EQ(*lexer->getI64(), -0x8000000000000000ull); + EXPECT_EQ(*lexer->getF64(), -0x8000000000000000.p0); + EXPECT_EQ(*lexer->getF32(), -0x8000000000000000.p0f); + } + { + Lexer lexer("0xffff_ffff_ffff_ffff"sv); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getU64()); + EXPECT_FALSE(lexer->getS64()); + ASSERT_TRUE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + EXPECT_FALSE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getU64(), 0xffffffffffffffffull); + EXPECT_EQ(*lexer->getI64(), 0xffffffffffffffffull); + EXPECT_EQ(*lexer->getF64(), 0xffffffffffffffff.p0); + EXPECT_EQ(*lexer->getF32(), 0xffffffffffffffff.p0f); + } + { + Lexer lexer("+0xffff_ffff_ffff_ffff"sv); + ASSERT_NE(lexer, lexer.end()); + + EXPECT_FALSE(lexer->getU64()); + EXPECT_FALSE(lexer->getS64()); + EXPECT_FALSE(lexer->getI64()); + EXPECT_FALSE(lexer->getU32()); + EXPECT_FALSE(lexer->getS32()); + EXPECT_FALSE(lexer->getI32()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + + EXPECT_EQ(*lexer->getF64(), 0xffffffffffffffff.p0); + EXPECT_EQ(*lexer->getF32(), 0xffffffffffffffff.p0f); + } +} + TEST(LexerTest, LexFloat) { { Lexer lexer("42"sv); @@ -826,13 +1182,14 @@ TEST(LexerTest, LexNan) { { Lexer lexer("nan:0x0"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"nan:0x0"sv, KeywordTok{}}; + Token expected{"nan:0x0"sv, FloatTok{{0}, NAN}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("nan:0x10_0000_0000_0000"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"nan:0x10_0000_0000_0000"sv, KeywordTok{}}; + Token expected{"nan:0x10_0000_0000_0000"sv, + FloatTok{{0x10000000000000}, NAN}}; EXPECT_EQ(*lexer, expected); } { @@ -851,12 +1208,160 @@ TEST(LexerTest, LexNan) { } } +TEST(LexerTest, ClassifyFloat) { + constexpr int signif64 = 52; + constexpr int signif32 = 23; + constexpr uint64_t payloadMask64 = (1ull << signif64) - 1; + constexpr uint32_t payloadMask32 = (1u << signif32) - 1; + constexpr uint64_t dnanDefault = 1ull << (signif64 - 1); + constexpr uint32_t fnanDefault = 1u << (signif32 - 1); + { + Lexer lexer("340282346638528859811704183484516925440."sv); + ASSERT_NE(lexer, lexer.end()); + ASSERT_TRUE(lexer->getF64()); + EXPECT_TRUE(lexer->getF32()); + EXPECT_EQ(*lexer->getF64(), FLT_MAX); + EXPECT_EQ(*lexer->getF32(), FLT_MAX); + } + { + Lexer lexer("17976931348623157081452742373170435679807056752584499659891747" + "68031572607800285387605895586327668781715404589535143824642343" + "21326889464182768467546703537516986049910576551282076245490090" + "38932894407586850845513394230458323690322294816580855933212334" + "8274797826204144723168738177180919299881250404026184124858368" + "."sv); + ASSERT_NE(lexer, lexer.end()); + ASSERT_TRUE(lexer->getF64()); + ASSERT_TRUE(lexer->getF32()); + EXPECT_EQ(*lexer->getF64(), DBL_MAX); + EXPECT_EQ(*lexer->getF32(), INFINITY); + } + { + Lexer lexer("nan"); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getF64()); + double d = *lexer->getF64(); + EXPECT_TRUE(std::isnan(d)); + EXPECT_FALSE(std::signbit(d)); + uint64_t dbits; + memcpy(&dbits, &d, sizeof(dbits)); + EXPECT_EQ(dbits & payloadMask64, dnanDefault); + + ASSERT_TRUE(lexer->getF32()); + float f = *lexer->getF32(); + EXPECT_TRUE(std::isnan(f)); + EXPECT_FALSE(std::signbit(f)); + uint32_t fbits; + memcpy(&fbits, &f, sizeof(fbits)); + EXPECT_EQ(fbits & payloadMask32, fnanDefault); + } + { + Lexer lexer("-nan"); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getF64()); + double d = *lexer->getF64(); + EXPECT_TRUE(std::isnan(d)); + EXPECT_TRUE(std::signbit(d)); + uint64_t dbits; + memcpy(&dbits, &d, sizeof(dbits)); + EXPECT_EQ(dbits & payloadMask64, dnanDefault); + + ASSERT_TRUE(lexer->getF32()); + float f = *lexer->getF32(); + EXPECT_TRUE(std::isnan(f)); + EXPECT_TRUE(std::signbit(f)); + uint32_t fbits; + memcpy(&fbits, &f, sizeof(fbits)); + EXPECT_EQ(fbits & payloadMask32, fnanDefault); + } + { + Lexer lexer("+nan"); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getF64()); + double d = *lexer->getF64(); + EXPECT_TRUE(std::isnan(d)); + EXPECT_FALSE(std::signbit(d)); + uint64_t dbits; + memcpy(&dbits, &d, sizeof(dbits)); + EXPECT_EQ(dbits & payloadMask64, dnanDefault); + + ASSERT_TRUE(lexer->getF32()); + float f = *lexer->getF32(); + EXPECT_TRUE(std::isnan(f)); + EXPECT_FALSE(std::signbit(f)); + uint32_t fbits; + memcpy(&fbits, &f, sizeof(fbits)); + EXPECT_EQ(fbits & payloadMask32, fnanDefault); + } + { + Lexer lexer("nan:0x1234"); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getF64()); + double d = *lexer->getF64(); + EXPECT_TRUE(std::isnan(d)); + uint64_t dbits; + memcpy(&dbits, &d, sizeof(dbits)); + EXPECT_EQ(dbits & payloadMask64, 0x1234ull); + + ASSERT_TRUE(lexer->getF32()); + float f = *lexer->getF32(); + EXPECT_TRUE(std::isnan(f)); + uint32_t fbits; + memcpy(&fbits, &f, sizeof(fbits)); + EXPECT_EQ(fbits & payloadMask32, 0x1234u); + } + { + Lexer lexer("nan:0x7FFFFF"); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getF64()); + double d = *lexer->getF64(); + EXPECT_TRUE(std::isnan(d)); + uint64_t dbits; + memcpy(&dbits, &d, sizeof(dbits)); + EXPECT_EQ(dbits & payloadMask64, 0x7fffffull); + + ASSERT_TRUE(lexer->getF32()); + float f = *lexer->getF32(); + EXPECT_TRUE(std::isnan(f)); + uint32_t fbits; + memcpy(&fbits, &f, sizeof(fbits)); + EXPECT_EQ(fbits & payloadMask32, 0x7fffffu); + } + { + Lexer lexer("nan:0x800000"); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_TRUE(lexer->getF64()); + double d = *lexer->getF64(); + EXPECT_TRUE(std::isnan(d)); + uint64_t dbits; + memcpy(&dbits, &d, sizeof(dbits)); + EXPECT_EQ(dbits & payloadMask64, 0x800000ull); + + ASSERT_FALSE(lexer->getF32()); + } + { + Lexer lexer("nan:0x0"); + ASSERT_NE(lexer, lexer.end()); + + ASSERT_FALSE(lexer->getF64()); + ASSERT_FALSE(lexer->getF32()); + } +} + TEST(LexerTest, LexIdent) { { Lexer lexer("$09azAZ!#$%&'*+-./:<=>?@\\^_`|~"sv); ASSERT_NE(lexer, lexer.end()); Token expected{"$09azAZ!#$%&'*+-./:<=>?@\\^_`|~"sv, IdTok{}}; EXPECT_EQ(*lexer, expected); + EXPECT_TRUE(lexer->getID()); + EXPECT_EQ(*lexer->getID(), "$09azAZ!#$%&'*+-./:<=>?@\\^_`|~"sv); } { Lexer lexer("$[]{}"sv); @@ -879,6 +1384,9 @@ TEST(LexerTest, LexString) { ASSERT_NE(lexer, lexer.end()); Token expected{pangram, StringTok{{}}}; EXPECT_EQ(*lexer, expected); + EXPECT_TRUE(lexer->getString()); + EXPECT_EQ(*lexer->getString(), + "The quick brown fox jumps over the lazy dog"sv); } { auto chars = "\"`~!@#$%^&*()_-+0123456789|,.<>/?;:'\""sv; @@ -893,6 +1401,8 @@ TEST(LexerTest, LexString) { ASSERT_NE(lexer, lexer.end()); Token expected{escapes, StringTok{{"_\t_\n_\r_\\_\"_'_"}}}; EXPECT_EQ(*lexer, expected); + EXPECT_TRUE(lexer->getString()); + EXPECT_EQ(*lexer->getString(), "_\t_\n_\r_\\_\"_'_"sv); } { auto escapes = "\"_\\00_\\07_\\20_\\5A_\\7F_\\ff_\\ffff_\""sv; @@ -1001,4 +1511,7 @@ TEST(LexerTest, LexKeywords) { EXPECT_EQ(t3, func); EXPECT_EQ(t4, import); EXPECT_EQ(t5, reserved); + + EXPECT_TRUE(t1.getKeyword()); + EXPECT_EQ(*t1.getKeyword(), "module"sv); } |