summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/wasm/wat-parser-internal.h256
-rw-r--r--test/gtest/wat-parser.cpp494
2 files changed, 721 insertions, 29 deletions
diff --git a/src/wasm/wat-parser-internal.h b/src/wasm/wat-parser-internal.h
index c398acabc..448431d8b 100644
--- a/src/wasm/wat-parser-internal.h
+++ b/src/wasm/wat-parser-internal.h
@@ -26,6 +26,7 @@
#include <cassert>
#include <cctype>
+#include <cmath>
#include <iostream>
#include <optional>
#include <sstream>
@@ -107,6 +108,28 @@ public:
enum Signedness { Unsigned, Signed };
+enum OverflowBehavior { DisallowOverflow, IgnoreOverflow };
+
+std::optional<int> getDigit(char c) {
+ if ('0' <= c && c <= '9') {
+ return {c - '0'};
+ }
+ return std::nullopt;
+}
+
+std::optional<int> getHexDigit(char c) {
+ if ('0' <= c && c <= '9') {
+ return {c - '0'};
+ }
+ if ('A' <= c && c <= 'F') {
+ return {10 + c - 'A'};
+ }
+ if ('a' <= c && c <= 'f') {
+ return {10 + c - 'a'};
+ }
+ return std::nullopt;
+}
+
// The result of lexing an integer token fragment.
struct LexIntResult : LexResult {
uint64_t n;
@@ -124,29 +147,17 @@ private:
bool negative = false;
bool overflow = false;
- std::optional<int> getDigit(char c) {
- if ('0' <= c && c <= '9') {
- return {c - '0'};
- }
- return std::nullopt;
- }
+public:
+ explicit LexIntCtx(std::string_view in) : LexCtx(in) {}
- std::optional<int> getHexDigit(char c) {
- if ('0' <= c && c <= '9') {
- return {c - '0'};
- }
- if ('A' <= c && c <= 'F') {
- return {10 + c - 'A'};
- }
- if ('a' <= c && c <= 'f') {
- return {10 + c - 'a'};
+ // Lex only the underlying span, ignoring the overflow and value.
+ std::optional<LexIntResult> lexedRaw() {
+ if (auto basic = LexCtx::lexed()) {
+ return LexIntResult{*basic, 0, Unsigned};
}
- return std::nullopt;
+ return {};
}
-public:
- explicit LexIntCtx(std::string_view in) : LexCtx(in) {}
-
std::optional<LexIntResult> lexed() {
// Check most significant bit for overflow of signed numbers.
if (overflow) {
@@ -217,6 +228,54 @@ public:
}
};
+struct LexFloatResult : LexResult {
+ // The payload if we lexed a nan with payload. We cannot store the payload
+ // directly in `d` because we do not know at this point whether we are parsing
+ // an f32 or f64 and therefore we do not know what the allowable payloads are.
+ std::optional<uint64_t> nanPayload;
+ double d;
+};
+
+struct LexFloatCtx : LexCtx {
+ std::optional<uint64_t> nanPayload;
+
+ LexFloatCtx(std::string_view in) : LexCtx(in) {}
+
+ std::optional<LexFloatResult> lexed() {
+ assert(!std::signbit(NAN) && "Expected NAN to be positive");
+ auto basic = LexCtx::lexed();
+ if (!basic) {
+ return {};
+ }
+ if (nanPayload) {
+ double nan = basic->span[0] == '-' ? -NAN : NAN;
+ return LexFloatResult{*basic, nanPayload, nan};
+ }
+ // strtod does not return -NAN for "-nan" on all platforms.
+ if (basic->span == "-nan"sv) {
+ return LexFloatResult{*basic, nanPayload, -NAN};
+ }
+ // Do not try to implement fully general and precise float parsing
+ // ourselves. Instead, call out to std::strtod to do our parsing. This means
+ // we need to strip any underscores since `std::strtod` does not understand
+ // them.
+ std::stringstream ss;
+ for (const char *curr = basic->span.data(),
+ *end = curr + basic->span.size();
+ curr != end;
+ ++curr) {
+ if (*curr != '_') {
+ ss << *curr;
+ }
+ }
+ std::string str = ss.str();
+ char* last;
+ double d = std::strtod(str.data(), &last);
+ assert(last == str.data() + str.size() && "could not parse float");
+ return LexFloatResult{*basic, {}, d};
+ }
+};
+
struct LexStrResult : LexResult {
// Allocate a string only if there are escape sequences, otherwise just use
// the original string_view.
@@ -378,8 +437,12 @@ bool LexCtx::canFinish() const {
// num ::= d:digit => d
// | n:num '_'? d:digit => 10*n + d
// digit ::= '0' => 0 | ... | '9' => 9
-std::optional<LexIntResult> num(std::string_view in) {
+std::optional<LexIntResult> num(std::string_view in,
+ OverflowBehavior overflow = DisallowOverflow) {
LexIntCtx ctx(in);
+ if (ctx.empty()) {
+ return {};
+ }
if (!ctx.takeDigit()) {
return {};
}
@@ -387,8 +450,9 @@ std::optional<LexIntResult> num(std::string_view in) {
bool under = ctx.takePrefix("_"sv);
if (!ctx.takeDigit()) {
if (!under) {
- return ctx.lexed();
+ return overflow == DisallowOverflow ? ctx.lexed() : ctx.lexedRaw();
}
+ // TODO: Add error production for trailing underscore.
return {};
}
}
@@ -399,7 +463,8 @@ std::optional<LexIntResult> num(std::string_view in) {
// hexdigit ::= d:digit => d
// | 'A' => 10 | ... | 'F' => 15
// | 'a' => 10 | ... | 'f' => 15
-std::optional<LexIntResult> hexnum(std::string_view in) {
+std::optional<LexIntResult>
+hexnum(std::string_view in, OverflowBehavior overflow = DisallowOverflow) {
LexIntCtx ctx(in);
if (!ctx.takeHexdigit()) {
return {};
@@ -408,8 +473,9 @@ std::optional<LexIntResult> hexnum(std::string_view in) {
bool under = ctx.takePrefix("_"sv);
if (!ctx.takeHexdigit()) {
if (!under) {
- return ctx.lexed();
+ return overflow == DisallowOverflow ? ctx.lexed() : ctx.lexedRaw();
}
+ // TODO: Add error production for trailing underscore.
return {};
}
}
@@ -445,6 +511,114 @@ std::optional<LexIntResult> integer(std::string_view in) {
return {};
}
+// float ::= p:num '.'? => p
+// | p:num '.' q:frac => p + q
+// | p:num '.'? ('E'|'e') s:sign e:num => p * 10^([s]e)
+// | p:num '.' q:frac ('E'|'e') s:sign e:num => (p + q) * 10^([s]e)
+// frac ::= d:digit => d/10
+// | d:digit '_'? p:frac => (d + p/10) / 10
+std::optional<LexResult> decfloat(std::string_view in) {
+ LexCtx ctx(in);
+ if (auto lexed = num(ctx.next(), IgnoreOverflow)) {
+ ctx.take(*lexed);
+ } else {
+ return {};
+ }
+ // Optional '.' followed by optional frac
+ if (ctx.takePrefix("."sv)) {
+ if (auto lexed = num(ctx.next(), IgnoreOverflow)) {
+ ctx.take(*lexed);
+ }
+ }
+ if (ctx.takePrefix("E"sv) || ctx.takePrefix("e"sv)) {
+ // Optional sign
+ ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv);
+ if (auto lexed = num(ctx.next(), IgnoreOverflow)) {
+ ctx.take(*lexed);
+ } else {
+ // TODO: Add error production for missing exponent.
+ return {};
+ }
+ }
+ return ctx.lexed();
+}
+
+// hexfloat ::= '0x' p:hexnum '.'? => p
+// | '0x' p:hexnum '.' q:hexfrac => p + q
+// | '0x' p:hexnum '.'? ('P'|'p') s:sign e:num => p * 2^([s]e)
+// | '0x' p:hexnum '.' q:hexfrac ('P'|'p') s:sign e:num
+// => (p + q) * 2^([s]e)
+// hexfrac ::= h:hexdigit => h/16
+// | h:hexdigit '_'? p:hexfrac => (h + p/16) / 16
+std::optional<LexResult> hexfloat(std::string_view in) {
+ LexCtx ctx(in);
+ if (!ctx.takePrefix("0x"sv)) {
+ return {};
+ }
+ if (auto lexed = hexnum(ctx.next(), IgnoreOverflow)) {
+ ctx.take(*lexed);
+ } else {
+ return {};
+ }
+ // Optional '.' followed by optional hexfrac
+ if (ctx.takePrefix("."sv)) {
+ if (auto lexed = hexnum(ctx.next(), IgnoreOverflow)) {
+ ctx.take(*lexed);
+ }
+ }
+ if (ctx.takePrefix("P"sv) || ctx.takePrefix("p"sv)) {
+ // Optional sign
+ ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv);
+ if (auto lexed = num(ctx.next(), IgnoreOverflow)) {
+ ctx.take(*lexed);
+ } else {
+ // TODO: Add error production for missing exponent.
+ return {};
+ }
+ }
+ return ctx.lexed();
+}
+
+// fN ::= s:sign z:fNmag => [s]z
+// fNmag ::= z:float => float_N(z) (if float_N(z) != +/-infinity)
+// | z:hexfloat => float_N(z) (if float_N(z) != +/-infinity)
+// | 'inf' => infinity
+// | 'nan' => nan(2^(signif(N)-1))
+// | 'nan:0x' n:hexnum => nan(n) (if 1 <= n < 2^signif(N))
+std::optional<LexFloatResult> float_(std::string_view in) {
+ LexFloatCtx ctx(in);
+ // Optional sign
+ ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv);
+ if (auto lexed = hexfloat(ctx.next())) {
+ ctx.take(*lexed);
+ } else if (auto lexed = decfloat(ctx.next())) {
+ ctx.take(*lexed);
+ } else if (ctx.takePrefix("inf"sv)) {
+ // nop
+ } else if (ctx.takePrefix("nan"sv)) {
+ if (ctx.takePrefix(":0x"sv)) {
+ if (auto lexed = hexnum(ctx.next())) {
+ ctx.take(*lexed);
+ if (1 <= lexed->n && lexed->n < (1ull << 52)) {
+ ctx.nanPayload = lexed->n;
+ } else {
+ // TODO: Add error production for invalid NaN payload.
+ return {};
+ }
+ } else {
+ // TODO: Add error production for malformed NaN payload.
+ return {};
+ }
+ }
+ } else {
+ return {};
+ }
+ if (ctx.canFinish()) {
+ return ctx.lexed();
+ }
+ return {};
+}
+
// idchar ::= '0' | ... | '9'
// | 'A' | ... | 'Z'
// | 'a' | ... | 'z'
@@ -642,6 +816,31 @@ struct IntTok {
}
};
+struct FloatTok {
+ // The payload if we lexed a nan with payload. We cannot store the payload
+ // directly in `d` because we do not know at this point whether we are parsing
+ // an f32 or f64 and therefore we do not know what the allowable payloads are.
+ std::optional<uint64_t> nanPayload;
+ double d;
+
+ friend std::ostream& operator<<(std::ostream& os, const FloatTok& tok) {
+ if (std::isnan(tok.d)) {
+ os << (std::signbit(tok.d) ? "+" : "-");
+ if (tok.nanPayload) {
+ return os << "nan:0x" << std::hex << *tok.nanPayload << std::dec;
+ }
+ return os << "nan";
+ }
+ return os << tok.d;
+ }
+
+ friend bool operator==(const FloatTok& t1, const FloatTok& t2) {
+ return std::signbit(t1.d) == std::signbit(t2.d) &&
+ (t1.d == t2.d || (std::isnan(t1.d) && std::isnan(t2.d) &&
+ t1.nanPayload == t2.nanPayload));
+ }
+};
+
struct IdTok {
friend std::ostream& operator<<(std::ostream& os, const IdTok&) {
return os << "id";
@@ -676,8 +875,13 @@ struct KeywordTok {
};
struct Token {
- using Data =
- std::variant<LParenTok, RParenTok, IntTok, IdTok, StringTok, KeywordTok>;
+ using Data = std::variant<LParenTok,
+ RParenTok,
+ IntTok,
+ FloatTok,
+ IdTok,
+ StringTok,
+ KeywordTok>;
std::string_view span;
Data data;
@@ -765,6 +969,8 @@ struct Lexer {
tok = Token{t->span, IdTok{}};
} else if (auto t = integer(next())) {
tok = Token{t->span, IntTok{t->n, t->signedness}};
+ } else if (auto t = float_(next())) {
+ tok = Token{t->span, FloatTok{t->nanPayload, t->d}};
} else if (auto t = str(next())) {
tok = Token{t->span, StringTok{t->str}};
} else if (auto t = keyword(next())) {
diff --git a/test/gtest/wat-parser.cpp b/test/gtest/wat-parser.cpp
index a48efd894..c0d2bb113 100644
--- a/test/gtest/wat-parser.cpp
+++ b/test/gtest/wat-parser.cpp
@@ -209,8 +209,11 @@ TEST(ParserTest, LexInt) {
}
{
// 64-bit unsigned overflow!
- Lexer lexer("18446744073709551616");
- EXPECT_EQ(lexer, lexer.end());
+ Lexer lexer("18446744073709551616"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"18446744073709551616"sv,
+ FloatTok{{}, 18446744073709551616.}};
+ EXPECT_EQ(*lexer, expected);
}
{
Lexer lexer("+9223372036854775807"sv);
@@ -221,7 +224,10 @@ TEST(ParserTest, LexInt) {
{
// 64-bit signed overflow!
Lexer lexer("+9223372036854775808"sv);
- EXPECT_EQ(lexer, lexer.end());
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+9223372036854775808"sv,
+ FloatTok{{}, 9223372036854775808.}};
+ EXPECT_EQ(*lexer, expected);
}
{
Lexer lexer("-9223372036854775808"sv);
@@ -232,7 +238,10 @@ TEST(ParserTest, LexInt) {
{
// 64-bit signed underflow!
Lexer lexer("-9223372036854775809"sv);
- EXPECT_EQ(lexer, lexer.end());
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-9223372036854775809"sv,
+ FloatTok{{}, -9223372036854775809.}};
+ EXPECT_EQ(*lexer, expected);
}
}
@@ -347,6 +356,483 @@ TEST(ParserTest, LexHexInt) {
}
}
+TEST(ParserTest, LexFloat) {
+ {
+ Lexer lexer("42"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42"sv, IntTok{42, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42."sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42."sv, FloatTok{{}, 42.}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.5"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.5"sv, FloatTok{{}, 42.5}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42e0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42e0"sv, FloatTok{{}, 42e0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.e1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.e1"sv, FloatTok{{}, 42.e1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42E1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42E1"sv, FloatTok{{}, 42E1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42e+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42e+2"sv, FloatTok{{}, 42e+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.E-02"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.E-02"sv, FloatTok{{}, 42.E-02}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.0e0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.0e0"sv, FloatTok{{}, 42.0e0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.0E1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.0E1"sv, FloatTok{{}, 42.0E1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.0e+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.0e+2"sv, FloatTok{{}, 42.0e+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.0E-2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.0E-2"sv, FloatTok{{}, 42.0E-2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+42.0e+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+42.0e+2"sv, FloatTok{{}, +42.0e+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-42.0e+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-42.0e+2"sv, FloatTok{{}, -42.0e+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("4_2.0_0e+0_2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"4_2.0_0e+0_2"sv, FloatTok{{}, 42.00e+02}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42.junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42.0junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42.Ejunk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42.e-junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42.e-10junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("+"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42e"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42eABC"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42e0xABC"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("+-42"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("-+42"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42e+-0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42e-+0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42p0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42P0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+}
+
+TEST(ParserTest, LexHexFloat) {
+ {
+ Lexer lexer("0x4B"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B"sv, IntTok{0x4B, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B."sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B."sv, FloatTok{{}, 0x4Bp0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.5"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.5"sv, FloatTok{{}, 0x4B.5p0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4Bp0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4Bp0"sv, FloatTok{{}, 0x4Bp0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.p1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.p1"sv, FloatTok{{}, 0x4B.p1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4BP1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4BP1"sv, FloatTok{{}, 0x4BP1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4Bp+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4Bp+2"sv, FloatTok{{}, 0x4Bp+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.P-02"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.P-02"sv, FloatTok{{}, 0x4B.P-02}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.0p0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.0p0"sv, FloatTok{{}, 0x4B.0p0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.0P1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.0P1"sv, FloatTok{{}, 0x4B.0P1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.0p+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.0p+2"sv, FloatTok{{}, 0x4B.0p+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.0P-2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.0P-2"sv, FloatTok{{}, 0x4B.0P-2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+0x4B.0p+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+0x4B.0p+2"sv, FloatTok{{}, +0x4B.0p+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-0x4B.0p+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-0x4B.0p+2"sv, FloatTok{{}, -0x4B.0p+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4_2.0_0p+0_2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4_2.0_0p+0_2"sv, FloatTok{{}, 0x42.00p+02}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4Bjunk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.0junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.Pjunk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.p-junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.p-10junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("+0x"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4Bp"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4BpABC"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4Bp0xABC"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x+0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("+-0x4B"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("-+0x4B"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4Bp+-0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4Bp-+0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.e+0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.E-0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+}
+
+TEST(ParserTest, LexInfinity) {
+ {
+ Lexer lexer("inf"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"inf"sv, FloatTok{{}, INFINITY}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+inf"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+inf"sv, FloatTok{{}, INFINITY}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-inf"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-inf"sv, FloatTok{{}, -INFINITY}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("infjunk"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"infjunk"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("Inf"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("INF"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("infinity"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"infinity"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+}
+
+TEST(ParserTest, LexNan) {
+ {
+ Lexer lexer("nan"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan"sv, FloatTok{{}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+nan"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+nan"sv, FloatTok{{}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-nan"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-nan"sv, FloatTok{{}, -NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x01"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x01"sv, FloatTok{{1}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+nan:0x01"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+nan:0x01"sv, FloatTok{{1}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-nan:0x01"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-nan:0x01"sv, FloatTok{{1}, -NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x1234"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x1234"sv, FloatTok{{0x1234}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0xf_ffff_ffff_ffff"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0xf_ffff_ffff_ffff"sv,
+ FloatTok{{0xfffffffffffff}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nanjunk"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nanjunk", KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0xjunk"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0xjunk"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:-0x1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:-0x1"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:+0x1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:+0x1"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x0"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x10_0000_0000_0000"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x10_0000_0000_0000"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x1_0000_0000_0000_0000"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x1_0000_0000_0000_0000"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("NAN"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("NaN"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+}
+
TEST(ParserTest, LexIdent) {
{
Lexer lexer("$09azAZ!#$%&'*+-./:<=>?@\\^_`|~"sv);