diff options
-rw-r--r-- | src/wasm/wat-parser-internal.h | 256 | ||||
-rw-r--r-- | test/gtest/wat-parser.cpp | 494 |
2 files changed, 721 insertions, 29 deletions
diff --git a/src/wasm/wat-parser-internal.h b/src/wasm/wat-parser-internal.h index c398acabc..448431d8b 100644 --- a/src/wasm/wat-parser-internal.h +++ b/src/wasm/wat-parser-internal.h @@ -26,6 +26,7 @@ #include <cassert> #include <cctype> +#include <cmath> #include <iostream> #include <optional> #include <sstream> @@ -107,6 +108,28 @@ public: enum Signedness { Unsigned, Signed }; +enum OverflowBehavior { DisallowOverflow, IgnoreOverflow }; + +std::optional<int> getDigit(char c) { + if ('0' <= c && c <= '9') { + return {c - '0'}; + } + return std::nullopt; +} + +std::optional<int> getHexDigit(char c) { + if ('0' <= c && c <= '9') { + return {c - '0'}; + } + if ('A' <= c && c <= 'F') { + return {10 + c - 'A'}; + } + if ('a' <= c && c <= 'f') { + return {10 + c - 'a'}; + } + return std::nullopt; +} + // The result of lexing an integer token fragment. struct LexIntResult : LexResult { uint64_t n; @@ -124,29 +147,17 @@ private: bool negative = false; bool overflow = false; - std::optional<int> getDigit(char c) { - if ('0' <= c && c <= '9') { - return {c - '0'}; - } - return std::nullopt; - } +public: + explicit LexIntCtx(std::string_view in) : LexCtx(in) {} - std::optional<int> getHexDigit(char c) { - if ('0' <= c && c <= '9') { - return {c - '0'}; - } - if ('A' <= c && c <= 'F') { - return {10 + c - 'A'}; - } - if ('a' <= c && c <= 'f') { - return {10 + c - 'a'}; + // Lex only the underlying span, ignoring the overflow and value. + std::optional<LexIntResult> lexedRaw() { + if (auto basic = LexCtx::lexed()) { + return LexIntResult{*basic, 0, Unsigned}; } - return std::nullopt; + return {}; } -public: - explicit LexIntCtx(std::string_view in) : LexCtx(in) {} - std::optional<LexIntResult> lexed() { // Check most significant bit for overflow of signed numbers. if (overflow) { @@ -217,6 +228,54 @@ public: } }; +struct LexFloatResult : LexResult { + // The payload if we lexed a nan with payload. We cannot store the payload + // directly in `d` because we do not know at this point whether we are parsing + // an f32 or f64 and therefore we do not know what the allowable payloads are. + std::optional<uint64_t> nanPayload; + double d; +}; + +struct LexFloatCtx : LexCtx { + std::optional<uint64_t> nanPayload; + + LexFloatCtx(std::string_view in) : LexCtx(in) {} + + std::optional<LexFloatResult> lexed() { + assert(!std::signbit(NAN) && "Expected NAN to be positive"); + auto basic = LexCtx::lexed(); + if (!basic) { + return {}; + } + if (nanPayload) { + double nan = basic->span[0] == '-' ? -NAN : NAN; + return LexFloatResult{*basic, nanPayload, nan}; + } + // strtod does not return -NAN for "-nan" on all platforms. + if (basic->span == "-nan"sv) { + return LexFloatResult{*basic, nanPayload, -NAN}; + } + // Do not try to implement fully general and precise float parsing + // ourselves. Instead, call out to std::strtod to do our parsing. This means + // we need to strip any underscores since `std::strtod` does not understand + // them. + std::stringstream ss; + for (const char *curr = basic->span.data(), + *end = curr + basic->span.size(); + curr != end; + ++curr) { + if (*curr != '_') { + ss << *curr; + } + } + std::string str = ss.str(); + char* last; + double d = std::strtod(str.data(), &last); + assert(last == str.data() + str.size() && "could not parse float"); + return LexFloatResult{*basic, {}, d}; + } +}; + struct LexStrResult : LexResult { // Allocate a string only if there are escape sequences, otherwise just use // the original string_view. @@ -378,8 +437,12 @@ bool LexCtx::canFinish() const { // num ::= d:digit => d // | n:num '_'? d:digit => 10*n + d // digit ::= '0' => 0 | ... | '9' => 9 -std::optional<LexIntResult> num(std::string_view in) { +std::optional<LexIntResult> num(std::string_view in, + OverflowBehavior overflow = DisallowOverflow) { LexIntCtx ctx(in); + if (ctx.empty()) { + return {}; + } if (!ctx.takeDigit()) { return {}; } @@ -387,8 +450,9 @@ std::optional<LexIntResult> num(std::string_view in) { bool under = ctx.takePrefix("_"sv); if (!ctx.takeDigit()) { if (!under) { - return ctx.lexed(); + return overflow == DisallowOverflow ? ctx.lexed() : ctx.lexedRaw(); } + // TODO: Add error production for trailing underscore. return {}; } } @@ -399,7 +463,8 @@ std::optional<LexIntResult> num(std::string_view in) { // hexdigit ::= d:digit => d // | 'A' => 10 | ... | 'F' => 15 // | 'a' => 10 | ... | 'f' => 15 -std::optional<LexIntResult> hexnum(std::string_view in) { +std::optional<LexIntResult> +hexnum(std::string_view in, OverflowBehavior overflow = DisallowOverflow) { LexIntCtx ctx(in); if (!ctx.takeHexdigit()) { return {}; @@ -408,8 +473,9 @@ std::optional<LexIntResult> hexnum(std::string_view in) { bool under = ctx.takePrefix("_"sv); if (!ctx.takeHexdigit()) { if (!under) { - return ctx.lexed(); + return overflow == DisallowOverflow ? ctx.lexed() : ctx.lexedRaw(); } + // TODO: Add error production for trailing underscore. return {}; } } @@ -445,6 +511,114 @@ std::optional<LexIntResult> integer(std::string_view in) { return {}; } +// float ::= p:num '.'? => p +// | p:num '.' q:frac => p + q +// | p:num '.'? ('E'|'e') s:sign e:num => p * 10^([s]e) +// | p:num '.' q:frac ('E'|'e') s:sign e:num => (p + q) * 10^([s]e) +// frac ::= d:digit => d/10 +// | d:digit '_'? p:frac => (d + p/10) / 10 +std::optional<LexResult> decfloat(std::string_view in) { + LexCtx ctx(in); + if (auto lexed = num(ctx.next(), IgnoreOverflow)) { + ctx.take(*lexed); + } else { + return {}; + } + // Optional '.' followed by optional frac + if (ctx.takePrefix("."sv)) { + if (auto lexed = num(ctx.next(), IgnoreOverflow)) { + ctx.take(*lexed); + } + } + if (ctx.takePrefix("E"sv) || ctx.takePrefix("e"sv)) { + // Optional sign + ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv); + if (auto lexed = num(ctx.next(), IgnoreOverflow)) { + ctx.take(*lexed); + } else { + // TODO: Add error production for missing exponent. + return {}; + } + } + return ctx.lexed(); +} + +// hexfloat ::= '0x' p:hexnum '.'? => p +// | '0x' p:hexnum '.' q:hexfrac => p + q +// | '0x' p:hexnum '.'? ('P'|'p') s:sign e:num => p * 2^([s]e) +// | '0x' p:hexnum '.' q:hexfrac ('P'|'p') s:sign e:num +// => (p + q) * 2^([s]e) +// hexfrac ::= h:hexdigit => h/16 +// | h:hexdigit '_'? p:hexfrac => (h + p/16) / 16 +std::optional<LexResult> hexfloat(std::string_view in) { + LexCtx ctx(in); + if (!ctx.takePrefix("0x"sv)) { + return {}; + } + if (auto lexed = hexnum(ctx.next(), IgnoreOverflow)) { + ctx.take(*lexed); + } else { + return {}; + } + // Optional '.' followed by optional hexfrac + if (ctx.takePrefix("."sv)) { + if (auto lexed = hexnum(ctx.next(), IgnoreOverflow)) { + ctx.take(*lexed); + } + } + if (ctx.takePrefix("P"sv) || ctx.takePrefix("p"sv)) { + // Optional sign + ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv); + if (auto lexed = num(ctx.next(), IgnoreOverflow)) { + ctx.take(*lexed); + } else { + // TODO: Add error production for missing exponent. + return {}; + } + } + return ctx.lexed(); +} + +// fN ::= s:sign z:fNmag => [s]z +// fNmag ::= z:float => float_N(z) (if float_N(z) != +/-infinity) +// | z:hexfloat => float_N(z) (if float_N(z) != +/-infinity) +// | 'inf' => infinity +// | 'nan' => nan(2^(signif(N)-1)) +// | 'nan:0x' n:hexnum => nan(n) (if 1 <= n < 2^signif(N)) +std::optional<LexFloatResult> float_(std::string_view in) { + LexFloatCtx ctx(in); + // Optional sign + ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv); + if (auto lexed = hexfloat(ctx.next())) { + ctx.take(*lexed); + } else if (auto lexed = decfloat(ctx.next())) { + ctx.take(*lexed); + } else if (ctx.takePrefix("inf"sv)) { + // nop + } else if (ctx.takePrefix("nan"sv)) { + if (ctx.takePrefix(":0x"sv)) { + if (auto lexed = hexnum(ctx.next())) { + ctx.take(*lexed); + if (1 <= lexed->n && lexed->n < (1ull << 52)) { + ctx.nanPayload = lexed->n; + } else { + // TODO: Add error production for invalid NaN payload. + return {}; + } + } else { + // TODO: Add error production for malformed NaN payload. + return {}; + } + } + } else { + return {}; + } + if (ctx.canFinish()) { + return ctx.lexed(); + } + return {}; +} + // idchar ::= '0' | ... | '9' // | 'A' | ... | 'Z' // | 'a' | ... | 'z' @@ -642,6 +816,31 @@ struct IntTok { } }; +struct FloatTok { + // The payload if we lexed a nan with payload. We cannot store the payload + // directly in `d` because we do not know at this point whether we are parsing + // an f32 or f64 and therefore we do not know what the allowable payloads are. + std::optional<uint64_t> nanPayload; + double d; + + friend std::ostream& operator<<(std::ostream& os, const FloatTok& tok) { + if (std::isnan(tok.d)) { + os << (std::signbit(tok.d) ? "+" : "-"); + if (tok.nanPayload) { + return os << "nan:0x" << std::hex << *tok.nanPayload << std::dec; + } + return os << "nan"; + } + return os << tok.d; + } + + friend bool operator==(const FloatTok& t1, const FloatTok& t2) { + return std::signbit(t1.d) == std::signbit(t2.d) && + (t1.d == t2.d || (std::isnan(t1.d) && std::isnan(t2.d) && + t1.nanPayload == t2.nanPayload)); + } +}; + struct IdTok { friend std::ostream& operator<<(std::ostream& os, const IdTok&) { return os << "id"; @@ -676,8 +875,13 @@ struct KeywordTok { }; struct Token { - using Data = - std::variant<LParenTok, RParenTok, IntTok, IdTok, StringTok, KeywordTok>; + using Data = std::variant<LParenTok, + RParenTok, + IntTok, + FloatTok, + IdTok, + StringTok, + KeywordTok>; std::string_view span; Data data; @@ -765,6 +969,8 @@ struct Lexer { tok = Token{t->span, IdTok{}}; } else if (auto t = integer(next())) { tok = Token{t->span, IntTok{t->n, t->signedness}}; + } else if (auto t = float_(next())) { + tok = Token{t->span, FloatTok{t->nanPayload, t->d}}; } else if (auto t = str(next())) { tok = Token{t->span, StringTok{t->str}}; } else if (auto t = keyword(next())) { diff --git a/test/gtest/wat-parser.cpp b/test/gtest/wat-parser.cpp index a48efd894..c0d2bb113 100644 --- a/test/gtest/wat-parser.cpp +++ b/test/gtest/wat-parser.cpp @@ -209,8 +209,11 @@ TEST(ParserTest, LexInt) { } { // 64-bit unsigned overflow! - Lexer lexer("18446744073709551616"); - EXPECT_EQ(lexer, lexer.end()); + Lexer lexer("18446744073709551616"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"18446744073709551616"sv, + FloatTok{{}, 18446744073709551616.}}; + EXPECT_EQ(*lexer, expected); } { Lexer lexer("+9223372036854775807"sv); @@ -221,7 +224,10 @@ TEST(ParserTest, LexInt) { { // 64-bit signed overflow! Lexer lexer("+9223372036854775808"sv); - EXPECT_EQ(lexer, lexer.end()); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+9223372036854775808"sv, + FloatTok{{}, 9223372036854775808.}}; + EXPECT_EQ(*lexer, expected); } { Lexer lexer("-9223372036854775808"sv); @@ -232,7 +238,10 @@ TEST(ParserTest, LexInt) { { // 64-bit signed underflow! Lexer lexer("-9223372036854775809"sv); - EXPECT_EQ(lexer, lexer.end()); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-9223372036854775809"sv, + FloatTok{{}, -9223372036854775809.}}; + EXPECT_EQ(*lexer, expected); } } @@ -347,6 +356,483 @@ TEST(ParserTest, LexHexInt) { } } +TEST(ParserTest, LexFloat) { + { + Lexer lexer("42"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42"sv, IntTok{42, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("42."sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42."sv, FloatTok{{}, 42.}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("42.5"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42.5"sv, FloatTok{{}, 42.5}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("42e0"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42e0"sv, FloatTok{{}, 42e0}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("42.e1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42.e1"sv, FloatTok{{}, 42.e1}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("42E1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42E1"sv, FloatTok{{}, 42E1}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("42e+2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42e+2"sv, FloatTok{{}, 42e+2}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("42.E-02"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42.E-02"sv, FloatTok{{}, 42.E-02}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("42.0e0"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42.0e0"sv, FloatTok{{}, 42.0e0}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("42.0E1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42.0E1"sv, FloatTok{{}, 42.0E1}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("42.0e+2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42.0e+2"sv, FloatTok{{}, 42.0e+2}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("42.0E-2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"42.0E-2"sv, FloatTok{{}, 42.0E-2}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+42.0e+2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+42.0e+2"sv, FloatTok{{}, +42.0e+2}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-42.0e+2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-42.0e+2"sv, FloatTok{{}, -42.0e+2}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("4_2.0_0e+0_2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"4_2.0_0e+0_2"sv, FloatTok{{}, 42.00e+02}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+junk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42junk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42.junk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42.0junk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42.Ejunk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42.e-junk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42.e-10junk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("+"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42e"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42eABC"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42e0xABC"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("+-42"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("-+42"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42e+-0"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42e-+0"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42p0"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("42P0"sv); + EXPECT_EQ(lexer, lexer.end()); + } +} + +TEST(ParserTest, LexHexFloat) { + { + Lexer lexer("0x4B"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4B"sv, IntTok{0x4B, Unsigned}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4B."sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4B."sv, FloatTok{{}, 0x4Bp0}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4B.5"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4B.5"sv, FloatTok{{}, 0x4B.5p0}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4Bp0"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4Bp0"sv, FloatTok{{}, 0x4Bp0}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4B.p1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4B.p1"sv, FloatTok{{}, 0x4B.p1}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4BP1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4BP1"sv, FloatTok{{}, 0x4BP1}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4Bp+2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4Bp+2"sv, FloatTok{{}, 0x4Bp+2}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4B.P-02"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4B.P-02"sv, FloatTok{{}, 0x4B.P-02}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4B.0p0"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4B.0p0"sv, FloatTok{{}, 0x4B.0p0}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4B.0P1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4B.0P1"sv, FloatTok{{}, 0x4B.0P1}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4B.0p+2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4B.0p+2"sv, FloatTok{{}, 0x4B.0p+2}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4B.0P-2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4B.0P-2"sv, FloatTok{{}, 0x4B.0P-2}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+0x4B.0p+2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+0x4B.0p+2"sv, FloatTok{{}, +0x4B.0p+2}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-0x4B.0p+2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-0x4B.0p+2"sv, FloatTok{{}, -0x4B.0p+2}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4_2.0_0p+0_2"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"0x4_2.0_0p+0_2"sv, FloatTok{{}, 0x42.00p+02}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("0x4Bjunk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4B.junk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4B.0junk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4B.Pjunk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4B.p-junk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4B.p-10junk"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("+0x"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4Bp"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4BpABC"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4Bp0xABC"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x+0"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("+-0x4B"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("-+0x4B"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4Bp+-0"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4Bp-+0"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4B.e+0"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("0x4B.E-0"sv); + EXPECT_EQ(lexer, lexer.end()); + } +} + +TEST(ParserTest, LexInfinity) { + { + Lexer lexer("inf"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"inf"sv, FloatTok{{}, INFINITY}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+inf"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+inf"sv, FloatTok{{}, INFINITY}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-inf"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-inf"sv, FloatTok{{}, -INFINITY}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("infjunk"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"infjunk"sv, KeywordTok{}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("Inf"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("INF"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("infinity"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"infinity"sv, KeywordTok{}}; + EXPECT_EQ(*lexer, expected); + } +} + +TEST(ParserTest, LexNan) { + { + Lexer lexer("nan"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan"sv, FloatTok{{}, NAN}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+nan"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+nan"sv, FloatTok{{}, NAN}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-nan"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-nan"sv, FloatTok{{}, -NAN}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nan:0x01"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan:0x01"sv, FloatTok{{1}, NAN}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("+nan:0x01"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"+nan:0x01"sv, FloatTok{{1}, NAN}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("-nan:0x01"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"-nan:0x01"sv, FloatTok{{1}, -NAN}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nan:0x1234"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan:0x1234"sv, FloatTok{{0x1234}, NAN}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nan:0xf_ffff_ffff_ffff"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan:0xf_ffff_ffff_ffff"sv, + FloatTok{{0xfffffffffffff}, NAN}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nanjunk"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nanjunk", KeywordTok{}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nan:"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan:"sv, KeywordTok{}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nan:0x"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan:0x"sv, KeywordTok{}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nan:0xjunk"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan:0xjunk"sv, KeywordTok{}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nan:-0x1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan:-0x1"sv, KeywordTok{}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nan:+0x1"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan:+0x1"sv, KeywordTok{}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nan:0x0"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan:0x0"sv, KeywordTok{}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nan:0x10_0000_0000_0000"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan:0x10_0000_0000_0000"sv, KeywordTok{}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("nan:0x1_0000_0000_0000_0000"sv); + ASSERT_NE(lexer, lexer.end()); + Token expected{"nan:0x1_0000_0000_0000_0000"sv, KeywordTok{}}; + EXPECT_EQ(*lexer, expected); + } + { + Lexer lexer("NAN"sv); + EXPECT_EQ(lexer, lexer.end()); + } + { + Lexer lexer("NaN"sv); + EXPECT_EQ(lexer, lexer.end()); + } +} + TEST(ParserTest, LexIdent) { { Lexer lexer("$09azAZ!#$%&'*+-./:<=>?@\\^_`|~"sv); |