summaryrefslogtreecommitdiff
path: root/test/gtest/wat-lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'test/gtest/wat-lexer.cpp')
-rw-r--r--test/gtest/wat-lexer.cpp1004
1 files changed, 1004 insertions, 0 deletions
diff --git a/test/gtest/wat-lexer.cpp b/test/gtest/wat-lexer.cpp
new file mode 100644
index 000000000..f77e73cdc
--- /dev/null
+++ b/test/gtest/wat-lexer.cpp
@@ -0,0 +1,1004 @@
+/*
+ * Copyright 2022 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cmath>
+
+#include "wat-lexer.h"
+#include "gtest/gtest.h"
+
+using namespace wasm::WATParser;
+using namespace std::string_view_literals;
+
+TEST(LexerTest, LexWhitespace) {
+ Token one{"1"sv, IntTok{1, Unsigned}};
+ Token two{"2"sv, IntTok{2, Unsigned}};
+ Token three{"3"sv, IntTok{3, Unsigned}};
+ Token four{"4"sv, IntTok{4, Unsigned}};
+ Token five{"5"sv, IntTok{5, Unsigned}};
+
+ Lexer lexer(" 1\t2\n3\r4 \n\n\t 5 "sv);
+
+ auto it = lexer.begin();
+ ASSERT_NE(it, lexer.end());
+ Token t1 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t2 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t3 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t4 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t5 = *it++;
+ EXPECT_EQ(it, lexer.end());
+
+ EXPECT_EQ(t1, one);
+ EXPECT_EQ(t2, two);
+ EXPECT_EQ(t3, three);
+ EXPECT_EQ(t4, four);
+ EXPECT_EQ(t5, five);
+
+ EXPECT_EQ(lexer.position(t1), (TextPos{1, 1}));
+ EXPECT_EQ(lexer.position(t2), (TextPos{1, 3}));
+ EXPECT_EQ(lexer.position(t3), (TextPos{2, 0}));
+ EXPECT_EQ(lexer.position(t4), (TextPos{2, 2}));
+ EXPECT_EQ(lexer.position(t5), (TextPos{4, 2}));
+}
+
+TEST(LexerTest, LexLineComment) {
+ Token one{"1"sv, IntTok{1, Unsigned}};
+ Token six{"6"sv, IntTok{6, Unsigned}};
+
+ Lexer lexer("1;; whee! 2 3\t4\r5\n6"sv);
+
+ auto it = lexer.begin();
+ Token t1 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t2 = *it++;
+ EXPECT_EQ(it, lexer.end());
+
+ EXPECT_EQ(t1, one);
+ EXPECT_EQ(t2, six);
+
+ EXPECT_EQ(lexer.position(t1), (TextPos{1, 0}));
+ EXPECT_EQ(lexer.position(t2), (TextPos{2, 0}));
+}
+
+TEST(LexerTest, LexBlockComment) {
+ Token one{"1"sv, IntTok{1, Unsigned}};
+ Token six{"6"sv, IntTok{6, Unsigned}};
+
+ Lexer lexer("1(; whoo! 2\n (; \n3\n ;) 4 (;) 5 ;) \n;)6"sv);
+
+ auto it = lexer.begin();
+ Token t1 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t2 = *it++;
+ EXPECT_EQ(it, lexer.end());
+
+ EXPECT_EQ(t1, one);
+ EXPECT_EQ(t2, six);
+
+ EXPECT_EQ(lexer.position(t1), (TextPos{1, 0}));
+ EXPECT_EQ(lexer.position(t2), (TextPos{5, 2}));
+}
+
+TEST(LexerTest, LexParens) {
+ Token left{"("sv, LParenTok{}};
+ Token right{")"sv, RParenTok{}};
+
+ Lexer lexer("(())"sv);
+
+ auto it = lexer.begin();
+ ASSERT_NE(it, lexer.end());
+ Token t1 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t2 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t3 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t4 = *it++;
+ EXPECT_EQ(it, lexer.end());
+
+ EXPECT_EQ(t1, left);
+ EXPECT_EQ(t2, left);
+ EXPECT_EQ(t3, right);
+ EXPECT_EQ(t4, right);
+}
+
+TEST(LexerTest, LexInt) {
+ {
+ Lexer lexer("0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0"sv, IntTok{0, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+0"sv, IntTok{0, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-0"sv, IntTok{0, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"1"sv, IntTok{1, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+1"sv, IntTok{1, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-1"sv, IntTok{-1ull, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0010"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0010"sv, IntTok{10, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+0010"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+0010"sv, IntTok{10, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-0010"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-0010"sv, IntTok{-10ull, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("9999"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"9999"sv, IntTok{9999, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+9999"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+9999"sv, IntTok{9999, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-9999"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-9999"sv, IntTok{-9999ull, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("12_34"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"12_34"sv, IntTok{1234, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("1_2_3_4"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"1_2_3_4"sv, IntTok{1234, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("_1234"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("1234_"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("12__34"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("12cd56"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("18446744073709551615"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"18446744073709551615"sv, IntTok{-1ull, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ // 64-bit unsigned overflow!
+ Lexer lexer("18446744073709551616"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"18446744073709551616"sv,
+ FloatTok{{}, 18446744073709551616.}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+9223372036854775807"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+9223372036854775807"sv, IntTok{~(1ull << 63), Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ // 64-bit signed overflow!
+ Lexer lexer("+9223372036854775808"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+9223372036854775808"sv,
+ FloatTok{{}, 9223372036854775808.}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-9223372036854775808"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-9223372036854775808"sv, IntTok{1ull << 63, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ // 64-bit signed underflow!
+ Lexer lexer("-9223372036854775809"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-9223372036854775809"sv,
+ FloatTok{{}, -9223372036854775809.}};
+ EXPECT_EQ(*lexer, expected);
+ }
+}
+
+TEST(LexerTest, LexHexInt) {
+ {
+ Lexer lexer("0x0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x0"sv, IntTok{0, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+0x0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+0x0"sv, IntTok{0, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-0x0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-0x0"sv, IntTok{0, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x1"sv, IntTok{1, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+0x1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+0x1"sv, IntTok{1, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-0x1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-0x1"sv, IntTok{-1ull, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x0010"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x0010"sv, IntTok{16, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+0x0010"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+0x0010"sv, IntTok{16, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-0x0010"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-0x0010"sv, IntTok{-16ull, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0xabcdef"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0xabcdef"sv, IntTok{0xabcdef, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+0xABCDEF"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+0xABCDEF"sv, IntTok{0xabcdef, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-0xAbCdEf"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-0xAbCdEf"sv, IntTok{-0xabcdefull, Signed}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x12_34"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x12_34"sv, IntTok{0x1234, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x1_2_3_4"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x1_2_3_4"sv, IntTok{0x1234, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("_0x1234"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x_1234"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x1234_"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x12__34"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0xg"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x120x34"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+}
+
+TEST(LexerTest, LexFloat) {
+ {
+ Lexer lexer("42"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42"sv, IntTok{42, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42."sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42."sv, FloatTok{{}, 42.}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.5"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.5"sv, FloatTok{{}, 42.5}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42e0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42e0"sv, FloatTok{{}, 42e0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.e1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.e1"sv, FloatTok{{}, 42.e1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42E1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42E1"sv, FloatTok{{}, 42E1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42e+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42e+2"sv, FloatTok{{}, 42e+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.E-02"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.E-02"sv, FloatTok{{}, 42.E-02}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.0e0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.0e0"sv, FloatTok{{}, 42.0e0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.0E1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.0E1"sv, FloatTok{{}, 42.0E1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.0e+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.0e+2"sv, FloatTok{{}, 42.0e+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("42.0E-2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"42.0E-2"sv, FloatTok{{}, 42.0E-2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+42.0e+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+42.0e+2"sv, FloatTok{{}, +42.0e+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-42.0e+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-42.0e+2"sv, FloatTok{{}, -42.0e+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("4_2.0_0e+0_2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"4_2.0_0e+0_2"sv, FloatTok{{}, 42.00e+02}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42.junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42.0junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42.Ejunk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42.e-junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42.e-10junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("+"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42e"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42eABC"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42e0xABC"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("+-42"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("-+42"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42e+-0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42e-+0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42p0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("42P0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+}
+
+TEST(LexerTest, LexHexFloat) {
+ {
+ Lexer lexer("0x4B"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B"sv, IntTok{0x4B, Unsigned}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B."sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B."sv, FloatTok{{}, 0x4Bp0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.5"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.5"sv, FloatTok{{}, 0x4B.5p0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4Bp0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4Bp0"sv, FloatTok{{}, 0x4Bp0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.p1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.p1"sv, FloatTok{{}, 0x4B.p1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4BP1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4BP1"sv, FloatTok{{}, 0x4BP1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4Bp+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4Bp+2"sv, FloatTok{{}, 0x4Bp+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.P-02"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.P-02"sv, FloatTok{{}, 0x4B.P-02}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.0p0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.0p0"sv, FloatTok{{}, 0x4B.0p0}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.0P1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.0P1"sv, FloatTok{{}, 0x4B.0P1}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.0p+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.0p+2"sv, FloatTok{{}, 0x4B.0p+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4B.0P-2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4B.0P-2"sv, FloatTok{{}, 0x4B.0P-2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+0x4B.0p+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+0x4B.0p+2"sv, FloatTok{{}, +0x4B.0p+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-0x4B.0p+2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-0x4B.0p+2"sv, FloatTok{{}, -0x4B.0p+2}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4_2.0_0p+0_2"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"0x4_2.0_0p+0_2"sv, FloatTok{{}, 0x42.00p+02}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("0x4Bjunk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.0junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.Pjunk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.p-junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.p-10junk"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("+0x"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4Bp"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4BpABC"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4Bp0xABC"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x+0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("+-0x4B"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("-+0x4B"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4Bp+-0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4Bp-+0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.e+0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("0x4B.E-0"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+}
+
+TEST(LexerTest, LexInfinity) {
+ {
+ Lexer lexer("inf"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"inf"sv, FloatTok{{}, INFINITY}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+inf"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+inf"sv, FloatTok{{}, INFINITY}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-inf"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-inf"sv, FloatTok{{}, -INFINITY}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("infjunk"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"infjunk"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("Inf"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("INF"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("infinity"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"infinity"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+}
+
+TEST(LexerTest, LexNan) {
+ {
+ Lexer lexer("nan"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan"sv, FloatTok{{}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+nan"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+nan"sv, FloatTok{{}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-nan"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-nan"sv, FloatTok{{}, -NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x01"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x01"sv, FloatTok{{1}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("+nan:0x01"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"+nan:0x01"sv, FloatTok{{1}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("-nan:0x01"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"-nan:0x01"sv, FloatTok{{1}, -NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x1234"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x1234"sv, FloatTok{{0x1234}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0xf_ffff_ffff_ffff"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0xf_ffff_ffff_ffff"sv,
+ FloatTok{{0xfffffffffffff}, NAN}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nanjunk"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nanjunk", KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0xjunk"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0xjunk"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:-0x1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:-0x1"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:+0x1"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:+0x1"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x0"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x0"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x10_0000_0000_0000"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x10_0000_0000_0000"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("nan:0x1_0000_0000_0000_0000"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"nan:0x1_0000_0000_0000_0000"sv, KeywordTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("NAN"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("NaN"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+}
+
+TEST(LexerTest, LexIdent) {
+ {
+ Lexer lexer("$09azAZ!#$%&'*+-./:<=>?@\\^_`|~"sv);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{"$09azAZ!#$%&'*+-./:<=>?@\\^_`|~"sv, IdTok{}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("$[]{}"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("$abc[]"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("$"sv);
+ EXPECT_EQ(lexer, lexer.end());
+ }
+}
+
+TEST(LexerTest, LexString) {
+ {
+ auto pangram = "\"The quick brown fox jumps over the lazy dog\""sv;
+ Lexer lexer(pangram);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{pangram, StringTok{{}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ auto chars = "\"`~!@#$%^&*()_-+0123456789|,.<>/?;:'\""sv;
+ Lexer lexer(chars);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{chars, StringTok{{}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ auto escapes = "\"_\\t_\\n_\\r_\\\\_\\\"_\\'_\""sv;
+ Lexer lexer(escapes);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{escapes, StringTok{{"_\t_\n_\r_\\_\"_'_"}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ auto escapes = "\"_\\00_\\07_\\20_\\5A_\\7F_\\ff_\\ffff_\""sv;
+ Lexer lexer(escapes);
+ ASSERT_NE(lexer, lexer.end());
+ std::string escaped{"_\0_\7_ _Z_\x7f_\xff_\xff"
+ "ff_"sv};
+ Token expected{escapes, StringTok{{escaped}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ // _$_£_€_𐍈_
+ auto unicode = "\"_\\u{24}_\\u{00a3}_\\u{20AC}_\\u{10348}_\""sv;
+ Lexer lexer(unicode);
+ ASSERT_NE(lexer, lexer.end());
+ std::string escaped{"_$_\xC2\xA3_\xE2\x82\xAC_\xF0\x90\x8D\x88_"};
+ Token expected{unicode, StringTok{{escaped}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ // _$_£_€_𐍈_
+ auto unicode = "\"_$_\xC2\xA3_\xE2\x82\xAC_\xF0\x90\x8D\x88_\""sv;
+ Lexer lexer(unicode);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{unicode, StringTok{{}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("\"unterminated"sv);
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"unescaped nul\0\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"unescaped U+19\x19\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"unescaped U+7f\x7f\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"\\ stray backslash\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"short \\f hex escape\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"bad hex \\gg\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"empty unicode \\u{}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"not unicode \\u{abcdefg}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"extra chars \\u{123(}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"unpaired surrogate unicode crimes \\u{d800}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"more surrogate unicode crimes \\u{dfff}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"too big \\u{110000}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+}
+
+TEST(LexerTest, LexKeywords) {
+ Token module{"module"sv, KeywordTok{}};
+ Token type{"type"sv, KeywordTok{}};
+ Token func{"func"sv, KeywordTok{}};
+ Token import{"import"sv, KeywordTok{}};
+ Token reserved{"rEsErVeD"sv, KeywordTok{}};
+
+ Lexer lexer("module type func import rEsErVeD");
+
+ auto it = lexer.begin();
+ ASSERT_NE(it, lexer.end());
+ Token t1 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t2 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t3 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t4 = *it++;
+ ASSERT_NE(it, lexer.end());
+ Token t5 = *it++;
+ EXPECT_EQ(it, lexer.end());
+
+ EXPECT_EQ(t1, module);
+ EXPECT_EQ(t2, type);
+ EXPECT_EQ(t3, func);
+ EXPECT_EQ(t4, import);
+ EXPECT_EQ(t5, reserved);
+}