summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Lively <7121787+tlively@users.noreply.github.com>2022-06-01 13:00:54 -0700
committerGitHub <noreply@github.com>2022-06-01 20:00:54 +0000
commite74f66e92affd637cb19af2ad5f3d015ba86aa1c (patch)
tree75bd835941c2f9cf72f0b29814f114fe0159272d
parent623e08e88db3ebc913fe76e7f60e89fa030f884d (diff)
downloadbinaryen-e74f66e92affd637cb19af2ad5f3d015ba86aa1c.tar.gz
binaryen-e74f66e92affd637cb19af2ad5f3d015ba86aa1c.tar.bz2
binaryen-e74f66e92affd637cb19af2ad5f3d015ba86aa1c.zip
[Parser] Token classification (#4699)
Add methods to `Token` for determining whether the token can be interpreted as a particular token type, returning the interpreted value as appropriate. These methods perform additional bounds checks for integers and NaN payloads that could not be done during the initial lexing because the lexer did not know what the intended token type was. The float methods also reinterpret integer tokens as floating point tokens since the float grammar is a superset of the integer grammar and inject the NaN payloads into parsed NaN values. Move all bounds checking to these new classifier functions to have it in one place.
-rw-r--r--src/wasm/wat-lexer.cpp173
-rw-r--r--src/wat-lexer.h32
-rw-r--r--test/gtest/wat-lexer.cpp529
3 files changed, 704 insertions, 30 deletions
diff --git a/src/wasm/wat-lexer.cpp b/src/wasm/wat-lexer.cpp
index 4bea32b59..0d1dc2794 100644
--- a/src/wasm/wat-lexer.cpp
+++ b/src/wasm/wat-lexer.cpp
@@ -151,23 +151,10 @@ public:
if (overflow) {
return {};
}
- auto basic = LexCtx::lexed();
- if (!basic) {
- return {};
- }
- // Check most significant bit for overflow of signed numbers.
- if (sign == Neg) {
- if (n > (1ull << 63)) {
- // TODO: Add error production for signed underflow.
- return {};
- }
- } else if (sign == Pos) {
- if (n > (1ull << 63) - 1) {
- // TODO: Add error production for signed overflow.
- return {};
- }
+ if (auto basic = LexCtx::lexed()) {
+ return LexIntResult{*basic, sign == Neg ? -n : n, sign};
}
- return LexIntResult{*basic, sign == Neg ? -n : n, sign};
+ return {};
}
void takeSign() {
@@ -592,12 +579,7 @@ std::optional<LexFloatResult> float_(std::string_view in) {
if (ctx.takePrefix(":0x"sv)) {
if (auto lexed = hexnum(ctx.next())) {
ctx.take(*lexed);
- if (1 <= lexed->n && lexed->n < (1ull << 52)) {
- ctx.nanPayload = lexed->n;
- } else {
- // TODO: Add error production for invalid NaN payload.
- return {};
- }
+ ctx.nanPayload = lexed->n;
} else {
// TODO: Add error production for malformed NaN payload.
return {};
@@ -781,6 +763,153 @@ std::optional<LexResult> keyword(std::string_view in) {
} // anonymous namespace
+std::optional<uint64_t> Token::getU64() const {
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == NoSign) {
+ return tok->n;
+ }
+ }
+ return {};
+}
+
+std::optional<int64_t> Token::getS64() const {
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == Neg) {
+ if (uint64_t(INT64_MIN) <= tok->n || tok->n == 0) {
+ return int64_t(tok->n);
+ }
+ // TODO: Add error production for signed underflow.
+ } else {
+ if (tok->n <= uint64_t(INT64_MAX)) {
+ return int64_t(tok->n);
+ }
+ // TODO: Add error production for signed overflow.
+ }
+ }
+ return {};
+}
+
+std::optional<uint64_t> Token::getI64() const {
+ if (auto n = getU64()) {
+ return *n;
+ }
+ if (auto n = getS64()) {
+ return *n;
+ }
+ return {};
+}
+
+std::optional<uint32_t> Token::getU32() const {
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == NoSign && tok->n <= UINT32_MAX) {
+ return int32_t(tok->n);
+ }
+ // TODO: Add error production for unsigned overflow.
+ }
+ return {};
+}
+
+std::optional<int32_t> Token::getS32() const {
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == Neg) {
+ if (uint64_t(INT32_MIN) <= tok->n || tok->n == 0) {
+ return int32_t(tok->n);
+ }
+ } else {
+ if (tok->n <= uint64_t(INT32_MAX)) {
+ return int32_t(tok->n);
+ }
+ }
+ }
+ return {};
+}
+
+std::optional<uint32_t> Token::getI32() const {
+ if (auto n = getU32()) {
+ return *n;
+ }
+ if (auto n = getS32()) {
+ return uint32_t(*n);
+ }
+ return {};
+}
+
+std::optional<double> Token::getF64() const {
+ constexpr int signif = 52;
+ constexpr uint64_t payloadMask = (1ull << signif) - 1;
+ constexpr uint64_t nanDefault = 1ull << (signif - 1);
+ if (auto* tok = std::get_if<FloatTok>(&data)) {
+ double d = tok->d;
+ if (std::isnan(d)) {
+ // Inject payload.
+ uint64_t payload = tok->nanPayload ? *tok->nanPayload : nanDefault;
+ if (payload == 0 || payload > payloadMask) {
+ // TODO: Add error production for out-of-bounds payload.
+ return {};
+ }
+ uint64_t bits;
+ static_assert(sizeof(bits) == sizeof(d));
+ memcpy(&bits, &d, sizeof(bits));
+ bits = (bits & ~payloadMask) | payload;
+ memcpy(&d, &bits, sizeof(bits));
+ }
+ return d;
+ }
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == Neg) {
+ if (tok->n == 0) {
+ return -0.0;
+ }
+ return double(int64_t(tok->n));
+ }
+ return double(tok->n);
+ }
+ return {};
+}
+
+std::optional<float> Token::getF32() const {
+ constexpr int signif = 23;
+ constexpr uint32_t payloadMask = (1u << signif) - 1;
+ constexpr uint64_t nanDefault = 1ull << (signif - 1);
+ if (auto* tok = std::get_if<FloatTok>(&data)) {
+ float f = tok->d;
+ if (std::isnan(f)) {
+ // Validate and inject payload.
+ uint64_t payload = tok->nanPayload ? *tok->nanPayload : nanDefault;
+ if (payload == 0 || payload > payloadMask) {
+ // TODO: Add error production for out-of-bounds payload.
+ return {};
+ }
+ uint32_t bits;
+ static_assert(sizeof(bits) == sizeof(f));
+ memcpy(&bits, &f, sizeof(bits));
+ bits = (bits & ~payloadMask) | payload;
+ memcpy(&f, &bits, sizeof(bits));
+ }
+ return f;
+ }
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == Neg) {
+ if (tok->n == 0) {
+ return -0.0f;
+ }
+ return float(int64_t(tok->n));
+ }
+ return float(tok->n);
+ }
+ return {};
+}
+
+std::optional<std::string_view> Token::getString() const {
+ if (auto* tok = std::get_if<StringTok>(&data)) {
+ if (tok->str) {
+ return std::string_view(*tok->str);
+ }
+ return span.substr(1, span.size() - 2);
+ }
+ return {};
+}
+
void Lexer::skipSpace() {
if (auto ctx = space(next())) {
index += ctx->span.size();
diff --git a/src/wat-lexer.h b/src/wat-lexer.h
index 5a955f5c0..e4ba2efa8 100644
--- a/src/wat-lexer.h
+++ b/src/wat-lexer.h
@@ -15,6 +15,7 @@
*/
#include <cstddef>
+#include <cstring>
#include <iterator>
#include <optional>
#include <ostream>
@@ -101,6 +102,37 @@ struct Token {
std::string_view span;
Data data;
+ // ====================
+ // Token classification
+ // ====================
+
+ bool isLParen() const { return std::get_if<LParenTok>(&data); }
+
+ bool isRParen() const { return std::get_if<RParenTok>(&data); }
+
+ std::optional<std::string_view> getID() const {
+ if (std::get_if<IdTok>(&data)) {
+ return span;
+ }
+ return {};
+ }
+
+ std::optional<std::string_view> getKeyword() const {
+ if (std::get_if<KeywordTok>(&data)) {
+ return span;
+ }
+ return {};
+ }
+ std::optional<uint64_t> getU64() const;
+ std::optional<int64_t> getS64() const;
+ std::optional<uint64_t> getI64() const;
+ std::optional<uint32_t> getU32() const;
+ std::optional<int32_t> getS32() const;
+ std::optional<uint32_t> getI32() const;
+ std::optional<double> getF64() const;
+ std::optional<float> getF32() const;
+ std::optional<std::string_view> getString() const;
+
bool operator==(const Token&) const;
friend std::ostream& operator<<(std::ostream& os, const Token&);
};
diff --git a/test/gtest/wat-lexer.cpp b/test/gtest/wat-lexer.cpp
index 75ddca6ed..a1c60a706 100644
--- a/test/gtest/wat-lexer.cpp
+++ b/test/gtest/wat-lexer.cpp
@@ -116,6 +116,8 @@ TEST(LexerTest, LexParens) {
EXPECT_EQ(t2, left);
EXPECT_EQ(t3, right);
EXPECT_EQ(t4, right);
+ EXPECT_TRUE(left.isLParen());
+ EXPECT_TRUE(right.isRParen());
}
TEST(LexerTest, LexInt) {
@@ -236,29 +238,28 @@ TEST(LexerTest, LexInt) {
{
Lexer lexer("+9223372036854775807"sv);
ASSERT_NE(lexer, lexer.end());
- Token expected{"+9223372036854775807"sv, IntTok{~(1ull << 63), Pos}};
+ Token expected{"+9223372036854775807"sv, IntTok{INT64_MAX, Pos}};
EXPECT_EQ(*lexer, expected);
}
{
- // 64-bit signed overflow!
Lexer lexer("+9223372036854775808"sv);
ASSERT_NE(lexer, lexer.end());
Token expected{"+9223372036854775808"sv,
- FloatTok{{}, 9223372036854775808.}};
+ IntTok{uint64_t(INT64_MAX) + 1, Pos}};
+ ;
EXPECT_EQ(*lexer, expected);
}
{
Lexer lexer("-9223372036854775808"sv);
ASSERT_NE(lexer, lexer.end());
- Token expected{"-9223372036854775808"sv, IntTok{1ull << 63, Neg}};
+ Token expected{"-9223372036854775808"sv, IntTok{uint64_t(INT64_MIN), Neg}};
EXPECT_EQ(*lexer, expected);
}
{
- // 64-bit signed underflow!
Lexer lexer("-9223372036854775809"sv);
ASSERT_NE(lexer, lexer.end());
Token expected{"-9223372036854775809"sv,
- FloatTok{{}, -9223372036854775809.}};
+ IntTok{uint64_t(INT64_MIN) - 1, Neg}};
EXPECT_EQ(*lexer, expected);
}
}
@@ -374,6 +375,361 @@ TEST(LexerTest, LexHexInt) {
}
}
+TEST(LexerTest, ClassifyInt) {
+ {
+ Lexer lexer("0"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ ASSERT_TRUE(lexer->getU32());
+ ASSERT_TRUE(lexer->getS32());
+ ASSERT_TRUE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getU64(), 0ull);
+ EXPECT_EQ(*lexer->getS64(), 0ll);
+ EXPECT_EQ(*lexer->getI64(), 0ull);
+ EXPECT_EQ(*lexer->getU32(), 0u);
+ EXPECT_EQ(*lexer->getS32(), 0);
+ EXPECT_EQ(*lexer->getI32(), 0u);
+ EXPECT_EQ(*lexer->getF64(), 0.0);
+ EXPECT_EQ(*lexer->getF32(), 0.0);
+ EXPECT_FALSE(std::signbit(*lexer->getF64()));
+ EXPECT_FALSE(std::signbit(*lexer->getF32()));
+ }
+ {
+ Lexer lexer("+0"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ EXPECT_FALSE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ ASSERT_TRUE(lexer->getS32());
+ ASSERT_TRUE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getS64(), 0ll);
+ EXPECT_EQ(*lexer->getI64(), 0ull);
+ EXPECT_EQ(*lexer->getS32(), 0);
+ EXPECT_EQ(*lexer->getI32(), 0u);
+ EXPECT_EQ(*lexer->getF64(), 0.0);
+ EXPECT_EQ(*lexer->getF32(), 0.0);
+ EXPECT_FALSE(std::signbit(*lexer->getF64()));
+ EXPECT_FALSE(std::signbit(*lexer->getF32()));
+ }
+ {
+ Lexer lexer("-0"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ EXPECT_FALSE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ ASSERT_TRUE(lexer->getS32());
+ ASSERT_TRUE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getS64(), 0ll);
+ EXPECT_EQ(*lexer->getI64(), 0ull);
+ EXPECT_EQ(*lexer->getS32(), 0);
+ EXPECT_EQ(*lexer->getI32(), 0u);
+ EXPECT_EQ(*lexer->getF64(), -0.0);
+ EXPECT_EQ(*lexer->getF32(), -0.0);
+ ASSERT_TRUE(std::signbit(*lexer->getF64()));
+ ASSERT_TRUE(std::signbit(*lexer->getF32()));
+ }
+ {
+ Lexer lexer("0x7fff_ffff"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ ASSERT_TRUE(lexer->getU32());
+ ASSERT_TRUE(lexer->getS32());
+ ASSERT_TRUE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getU64(), 0x7fffffffull);
+ EXPECT_EQ(*lexer->getS64(), 0x7fffffffll);
+ EXPECT_EQ(*lexer->getI64(), 0x7fffffffull);
+ EXPECT_EQ(*lexer->getU32(), 0x7fffffffu);
+ EXPECT_EQ(*lexer->getS32(), 0x7fffffff);
+ EXPECT_EQ(*lexer->getI32(), 0x7fffffffu);
+ EXPECT_EQ(*lexer->getF64(), 0x7fffffff.p0);
+ EXPECT_EQ(*lexer->getF32(), 0x7fffffff.p0f);
+ }
+ {
+ Lexer lexer("0x8000_0000"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ ASSERT_TRUE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ ASSERT_TRUE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getU64(), 0x80000000ull);
+ EXPECT_EQ(*lexer->getS64(), 0x80000000ll);
+ EXPECT_EQ(*lexer->getI64(), 0x80000000ull);
+ EXPECT_EQ(*lexer->getU32(), 0x80000000u);
+ EXPECT_EQ(*lexer->getI32(), 0x80000000u);
+ EXPECT_EQ(*lexer->getF64(), 0x80000000.p0);
+ EXPECT_EQ(*lexer->getF32(), 0x80000000.p0f);
+ }
+ {
+ Lexer lexer("+0x7fff_ffff"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ EXPECT_FALSE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ ASSERT_TRUE(lexer->getS32());
+ ASSERT_TRUE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getS64(), 0x7fffffffll);
+ EXPECT_EQ(*lexer->getI64(), 0x7fffffffull);
+ EXPECT_EQ(*lexer->getS32(), 0x7fffffff);
+ EXPECT_EQ(*lexer->getI32(), 0x7fffffffu);
+ EXPECT_EQ(*lexer->getF64(), 0x7fffffff.p0);
+ EXPECT_EQ(*lexer->getF32(), 0x7fffffff.p0f);
+ }
+ {
+ Lexer lexer("+0x8000_0000"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ EXPECT_FALSE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ EXPECT_FALSE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getS64(), 0x80000000ll);
+ EXPECT_EQ(*lexer->getI64(), 0x80000000ull);
+ EXPECT_EQ(*lexer->getF64(), 0x80000000.p0);
+ EXPECT_EQ(*lexer->getF32(), 0x80000000.p0f);
+ }
+ {
+ Lexer lexer("-0x8000_0000"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ EXPECT_FALSE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ ASSERT_TRUE(lexer->getS32());
+ ASSERT_TRUE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getS64(), -0x80000000ll);
+ EXPECT_EQ(*lexer->getI64(), -0x80000000ull);
+ EXPECT_EQ(*lexer->getS32(), -0x7fffffffll - 1);
+ EXPECT_EQ(*lexer->getI32(), -0x80000000u);
+ EXPECT_EQ(*lexer->getF64(), -0x80000000.p0);
+ EXPECT_EQ(*lexer->getF32(), -0x80000000.p0f);
+ }
+ {
+ Lexer lexer("-0x8000_0001"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ EXPECT_FALSE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ EXPECT_FALSE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getS64(), -0x80000001ll);
+ EXPECT_EQ(*lexer->getI64(), -0x80000001ull);
+ EXPECT_EQ(*lexer->getF64(), -0x80000001.p0);
+ EXPECT_EQ(*lexer->getF32(), -0x80000001.p0f);
+ }
+ {
+ Lexer lexer("0xffff_ffff"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ ASSERT_TRUE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ ASSERT_TRUE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getU64(), 0xffffffffull);
+ EXPECT_EQ(*lexer->getS64(), 0xffffffffll);
+ EXPECT_EQ(*lexer->getI64(), 0xffffffffull);
+ EXPECT_EQ(*lexer->getU32(), 0xffffffffu);
+ EXPECT_EQ(*lexer->getI32(), 0xffffffffu);
+ EXPECT_EQ(*lexer->getF64(), 0xffffffff.p0);
+ EXPECT_EQ(*lexer->getF32(), 0xffffffff.p0f);
+ }
+ {
+ Lexer lexer("0x1_0000_0000"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ EXPECT_FALSE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getU64(), 0x100000000ull);
+ EXPECT_EQ(*lexer->getS64(), 0x100000000ll);
+ EXPECT_EQ(*lexer->getI64(), 0x100000000ull);
+ EXPECT_EQ(*lexer->getF64(), 0x100000000.p0);
+ EXPECT_EQ(*lexer->getF32(), 0x100000000.p0f);
+ }
+ {
+ Lexer lexer("+0xffff_ffff"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ EXPECT_FALSE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ EXPECT_FALSE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getS64(), 0xffffffffll);
+ EXPECT_EQ(*lexer->getI64(), 0xffffffffull);
+ EXPECT_EQ(*lexer->getF64(), 0xffffffff.p0);
+ EXPECT_EQ(*lexer->getF32(), 0xffffffff.p0f);
+ }
+ {
+ Lexer lexer("+0x1_0000_0000"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ EXPECT_FALSE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ EXPECT_FALSE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getS64(), 0x100000000ll);
+ EXPECT_EQ(*lexer->getI64(), 0x100000000ull);
+ EXPECT_EQ(*lexer->getF64(), 0x100000000.p0);
+ EXPECT_EQ(*lexer->getF32(), 0x100000000.p0f);
+ }
+ {
+ Lexer lexer("0x7fff_ffff_ffff_ffff"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ EXPECT_FALSE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getU64(), 0x7fffffffffffffffull);
+ EXPECT_EQ(*lexer->getS64(), 0x7fffffffffffffffll);
+ EXPECT_EQ(*lexer->getI64(), 0x7fffffffffffffffull);
+ EXPECT_EQ(*lexer->getF64(), 0x7fffffffffffffff.p0);
+ EXPECT_EQ(*lexer->getF32(), 0x7fffffffffffffff.p0f);
+ }
+ {
+ Lexer lexer("+0x7fff_ffff_ffff_ffff"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ EXPECT_FALSE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ EXPECT_FALSE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getS64(), 0x7fffffffffffffffll);
+ EXPECT_EQ(*lexer->getI64(), 0x7fffffffffffffffull);
+ EXPECT_EQ(*lexer->getF64(), 0x7fffffffffffffff.p0);
+ EXPECT_EQ(*lexer->getF32(), 0x7fffffffffffffff.p0f);
+ }
+ {
+ Lexer lexer("-0x8000_0000_0000_0000"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ EXPECT_FALSE(lexer->getU64());
+ ASSERT_TRUE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ EXPECT_FALSE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getS64(), -0x7fffffffffffffffll - 1);
+ EXPECT_EQ(*lexer->getI64(), -0x8000000000000000ull);
+ EXPECT_EQ(*lexer->getF64(), -0x8000000000000000.p0);
+ EXPECT_EQ(*lexer->getF32(), -0x8000000000000000.p0f);
+ }
+ {
+ Lexer lexer("0xffff_ffff_ffff_ffff"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getU64());
+ EXPECT_FALSE(lexer->getS64());
+ ASSERT_TRUE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ EXPECT_FALSE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getU64(), 0xffffffffffffffffull);
+ EXPECT_EQ(*lexer->getI64(), 0xffffffffffffffffull);
+ EXPECT_EQ(*lexer->getF64(), 0xffffffffffffffff.p0);
+ EXPECT_EQ(*lexer->getF32(), 0xffffffffffffffff.p0f);
+ }
+ {
+ Lexer lexer("+0xffff_ffff_ffff_ffff"sv);
+ ASSERT_NE(lexer, lexer.end());
+
+ EXPECT_FALSE(lexer->getU64());
+ EXPECT_FALSE(lexer->getS64());
+ EXPECT_FALSE(lexer->getI64());
+ EXPECT_FALSE(lexer->getU32());
+ EXPECT_FALSE(lexer->getS32());
+ EXPECT_FALSE(lexer->getI32());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+
+ EXPECT_EQ(*lexer->getF64(), 0xffffffffffffffff.p0);
+ EXPECT_EQ(*lexer->getF32(), 0xffffffffffffffff.p0f);
+ }
+}
+
TEST(LexerTest, LexFloat) {
{
Lexer lexer("42"sv);
@@ -826,13 +1182,14 @@ TEST(LexerTest, LexNan) {
{
Lexer lexer("nan:0x0"sv);
ASSERT_NE(lexer, lexer.end());
- Token expected{"nan:0x0"sv, KeywordTok{}};
+ Token expected{"nan:0x0"sv, FloatTok{{0}, NAN}};
EXPECT_EQ(*lexer, expected);
}
{
Lexer lexer("nan:0x10_0000_0000_0000"sv);
ASSERT_NE(lexer, lexer.end());
- Token expected{"nan:0x10_0000_0000_0000"sv, KeywordTok{}};
+ Token expected{"nan:0x10_0000_0000_0000"sv,
+ FloatTok{{0x10000000000000}, NAN}};
EXPECT_EQ(*lexer, expected);
}
{
@@ -851,12 +1208,160 @@ TEST(LexerTest, LexNan) {
}
}
+TEST(LexerTest, ClassifyFloat) {
+ constexpr int signif64 = 52;
+ constexpr int signif32 = 23;
+ constexpr uint64_t payloadMask64 = (1ull << signif64) - 1;
+ constexpr uint32_t payloadMask32 = (1u << signif32) - 1;
+ constexpr uint64_t dnanDefault = 1ull << (signif64 - 1);
+ constexpr uint32_t fnanDefault = 1u << (signif32 - 1);
+ {
+ Lexer lexer("340282346638528859811704183484516925440."sv);
+ ASSERT_NE(lexer, lexer.end());
+ ASSERT_TRUE(lexer->getF64());
+ EXPECT_TRUE(lexer->getF32());
+ EXPECT_EQ(*lexer->getF64(), FLT_MAX);
+ EXPECT_EQ(*lexer->getF32(), FLT_MAX);
+ }
+ {
+ Lexer lexer("17976931348623157081452742373170435679807056752584499659891747"
+ "68031572607800285387605895586327668781715404589535143824642343"
+ "21326889464182768467546703537516986049910576551282076245490090"
+ "38932894407586850845513394230458323690322294816580855933212334"
+ "8274797826204144723168738177180919299881250404026184124858368"
+ "."sv);
+ ASSERT_NE(lexer, lexer.end());
+ ASSERT_TRUE(lexer->getF64());
+ ASSERT_TRUE(lexer->getF32());
+ EXPECT_EQ(*lexer->getF64(), DBL_MAX);
+ EXPECT_EQ(*lexer->getF32(), INFINITY);
+ }
+ {
+ Lexer lexer("nan");
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getF64());
+ double d = *lexer->getF64();
+ EXPECT_TRUE(std::isnan(d));
+ EXPECT_FALSE(std::signbit(d));
+ uint64_t dbits;
+ memcpy(&dbits, &d, sizeof(dbits));
+ EXPECT_EQ(dbits & payloadMask64, dnanDefault);
+
+ ASSERT_TRUE(lexer->getF32());
+ float f = *lexer->getF32();
+ EXPECT_TRUE(std::isnan(f));
+ EXPECT_FALSE(std::signbit(f));
+ uint32_t fbits;
+ memcpy(&fbits, &f, sizeof(fbits));
+ EXPECT_EQ(fbits & payloadMask32, fnanDefault);
+ }
+ {
+ Lexer lexer("-nan");
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getF64());
+ double d = *lexer->getF64();
+ EXPECT_TRUE(std::isnan(d));
+ EXPECT_TRUE(std::signbit(d));
+ uint64_t dbits;
+ memcpy(&dbits, &d, sizeof(dbits));
+ EXPECT_EQ(dbits & payloadMask64, dnanDefault);
+
+ ASSERT_TRUE(lexer->getF32());
+ float f = *lexer->getF32();
+ EXPECT_TRUE(std::isnan(f));
+ EXPECT_TRUE(std::signbit(f));
+ uint32_t fbits;
+ memcpy(&fbits, &f, sizeof(fbits));
+ EXPECT_EQ(fbits & payloadMask32, fnanDefault);
+ }
+ {
+ Lexer lexer("+nan");
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getF64());
+ double d = *lexer->getF64();
+ EXPECT_TRUE(std::isnan(d));
+ EXPECT_FALSE(std::signbit(d));
+ uint64_t dbits;
+ memcpy(&dbits, &d, sizeof(dbits));
+ EXPECT_EQ(dbits & payloadMask64, dnanDefault);
+
+ ASSERT_TRUE(lexer->getF32());
+ float f = *lexer->getF32();
+ EXPECT_TRUE(std::isnan(f));
+ EXPECT_FALSE(std::signbit(f));
+ uint32_t fbits;
+ memcpy(&fbits, &f, sizeof(fbits));
+ EXPECT_EQ(fbits & payloadMask32, fnanDefault);
+ }
+ {
+ Lexer lexer("nan:0x1234");
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getF64());
+ double d = *lexer->getF64();
+ EXPECT_TRUE(std::isnan(d));
+ uint64_t dbits;
+ memcpy(&dbits, &d, sizeof(dbits));
+ EXPECT_EQ(dbits & payloadMask64, 0x1234ull);
+
+ ASSERT_TRUE(lexer->getF32());
+ float f = *lexer->getF32();
+ EXPECT_TRUE(std::isnan(f));
+ uint32_t fbits;
+ memcpy(&fbits, &f, sizeof(fbits));
+ EXPECT_EQ(fbits & payloadMask32, 0x1234u);
+ }
+ {
+ Lexer lexer("nan:0x7FFFFF");
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getF64());
+ double d = *lexer->getF64();
+ EXPECT_TRUE(std::isnan(d));
+ uint64_t dbits;
+ memcpy(&dbits, &d, sizeof(dbits));
+ EXPECT_EQ(dbits & payloadMask64, 0x7fffffull);
+
+ ASSERT_TRUE(lexer->getF32());
+ float f = *lexer->getF32();
+ EXPECT_TRUE(std::isnan(f));
+ uint32_t fbits;
+ memcpy(&fbits, &f, sizeof(fbits));
+ EXPECT_EQ(fbits & payloadMask32, 0x7fffffu);
+ }
+ {
+ Lexer lexer("nan:0x800000");
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_TRUE(lexer->getF64());
+ double d = *lexer->getF64();
+ EXPECT_TRUE(std::isnan(d));
+ uint64_t dbits;
+ memcpy(&dbits, &d, sizeof(dbits));
+ EXPECT_EQ(dbits & payloadMask64, 0x800000ull);
+
+ ASSERT_FALSE(lexer->getF32());
+ }
+ {
+ Lexer lexer("nan:0x0");
+ ASSERT_NE(lexer, lexer.end());
+
+ ASSERT_FALSE(lexer->getF64());
+ ASSERT_FALSE(lexer->getF32());
+ }
+}
+
TEST(LexerTest, LexIdent) {
{
Lexer lexer("$09azAZ!#$%&'*+-./:<=>?@\\^_`|~"sv);
ASSERT_NE(lexer, lexer.end());
Token expected{"$09azAZ!#$%&'*+-./:<=>?@\\^_`|~"sv, IdTok{}};
EXPECT_EQ(*lexer, expected);
+ EXPECT_TRUE(lexer->getID());
+ EXPECT_EQ(*lexer->getID(), "$09azAZ!#$%&'*+-./:<=>?@\\^_`|~"sv);
}
{
Lexer lexer("$[]{}"sv);
@@ -879,6 +1384,9 @@ TEST(LexerTest, LexString) {
ASSERT_NE(lexer, lexer.end());
Token expected{pangram, StringTok{{}}};
EXPECT_EQ(*lexer, expected);
+ EXPECT_TRUE(lexer->getString());
+ EXPECT_EQ(*lexer->getString(),
+ "The quick brown fox jumps over the lazy dog"sv);
}
{
auto chars = "\"`~!@#$%^&*()_-+0123456789|,.<>/?;:'\""sv;
@@ -893,6 +1401,8 @@ TEST(LexerTest, LexString) {
ASSERT_NE(lexer, lexer.end());
Token expected{escapes, StringTok{{"_\t_\n_\r_\\_\"_'_"}}};
EXPECT_EQ(*lexer, expected);
+ EXPECT_TRUE(lexer->getString());
+ EXPECT_EQ(*lexer->getString(), "_\t_\n_\r_\\_\"_'_"sv);
}
{
auto escapes = "\"_\\00_\\07_\\20_\\5A_\\7F_\\ff_\\ffff_\""sv;
@@ -1001,4 +1511,7 @@ TEST(LexerTest, LexKeywords) {
EXPECT_EQ(t3, func);
EXPECT_EQ(t4, import);
EXPECT_EQ(t5, reserved);
+
+ EXPECT_TRUE(t1.getKeyword());
+ EXPECT_EQ(*t1.getKeyword(), "module"sv);
}