summaryrefslogtreecommitdiff
path: root/test/gtest/wat-parser.cpp
diff options
context:
space:
mode:
authorThomas Lively <7121787+tlively@users.noreply.github.com>2022-05-25 12:04:04 -0700
committerGitHub <noreply@github.com>2022-05-25 12:04:04 -0700
commit65301697b3603ec8d21615e50f0cf5b9a2a2b02f (patch)
treebdb90c1e60f900a1a725dae470271730ba0c931d /test/gtest/wat-parser.cpp
parenta5a889c4cb2c7b5b84b7dd18292b55b1ca4ca47e (diff)
downloadbinaryen-65301697b3603ec8d21615e50f0cf5b9a2a2b02f.tar.gz
binaryen-65301697b3603ec8d21615e50f0cf5b9a2a2b02f.tar.bz2
binaryen-65301697b3603ec8d21615e50f0cf5b9a2a2b02f.zip
[Parser] Lex strings (#4687)
Diffstat (limited to 'test/gtest/wat-parser.cpp')
-rw-r--r--test/gtest/wat-parser.cpp102
1 files changed, 102 insertions, 0 deletions
diff --git a/test/gtest/wat-parser.cpp b/test/gtest/wat-parser.cpp
index 2ddb781a2..be6d76eac 100644
--- a/test/gtest/wat-parser.cpp
+++ b/test/gtest/wat-parser.cpp
@@ -367,3 +367,105 @@ TEST(ParserTest, LexIdent) {
EXPECT_EQ(lexer, lexer.end());
}
}
+
+TEST(ParserTest, LexString) {
+ {
+ auto pangram = "\"The quick brown fox jumps over the lazy dog\""sv;
+ Lexer lexer(pangram);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{pangram, StringTok{{}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ auto chars = "\"`~!@#$%^&*()_-+0123456789|,.<>/?;:'\""sv;
+ Lexer lexer(chars);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{chars, StringTok{{}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ auto escapes = "\"_\\t_\\n_\\r_\\\\_\\\"_\\'_\""sv;
+ Lexer lexer(escapes);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{escapes, StringTok{{"_\t_\n_\r_\\_\"_'_"}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ auto escapes = "\"_\\00_\\07_\\20_\\5A_\\7F_\\ff_\\ffff_\""sv;
+ Lexer lexer(escapes);
+ ASSERT_NE(lexer, lexer.end());
+ std::string escaped{"_\0_\7_ _Z_\x7f_\xff_\xff"
+ "ff_"sv};
+ Token expected{escapes, StringTok{{escaped}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ // _$_£_€_𐍈_
+ auto unicode = "\"_\\u{24}_\\u{00a3}_\\u{20AC}_\\u{10348}_\""sv;
+ Lexer lexer(unicode);
+ ASSERT_NE(lexer, lexer.end());
+ std::string escaped{"_$_\xC2\xA3_\xE2\x82\xAC_\xF0\x90\x8D\x88_"};
+ Token expected{unicode, StringTok{{escaped}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ // _$_£_€_𐍈_
+ auto unicode = "\"_$_\xC2\xA3_\xE2\x82\xAC_\xF0\x90\x8D\x88_\""sv;
+ Lexer lexer(unicode);
+ ASSERT_NE(lexer, lexer.end());
+ Token expected{unicode, StringTok{{}}};
+ EXPECT_EQ(*lexer, expected);
+ }
+ {
+ Lexer lexer("\"unterminated"sv);
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"unescaped nul\0\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"unescaped U+19\x19\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"unescaped U+7f\x7f\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"\\ stray backslash\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"short \\f hex escape\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"bad hex \\gg\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"empty unicode \\u{}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"not unicode \\u{abcdefg}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"extra chars \\u{123(}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"unpaired surrogate unicode crimes \\u{d800}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"more surrogate unicode crimes \\u{dfff}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+ {
+ Lexer lexer("\"too big \\u{110000}\"");
+ ASSERT_EQ(lexer, lexer.end());
+ }
+}