diff options
author | Thomas Lively <tlively@google.com> | 2024-02-06 13:35:29 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-06 13:35:29 -0800 |
commit | 8cce4d103a2ee54e7f09e81fc25b982b060d0e41 (patch) | |
tree | bfea5d74173a307b5e802cbbaeb4284f12894dc1 /test/gtest | |
parent | 41b365e99ffd68f427b561121c364028f2c2d2f9 (diff) | |
download | binaryen-8cce4d103a2ee54e7f09e81fc25b982b060d0e41.tar.gz binaryen-8cce4d103a2ee54e7f09e81fc25b982b060d0e41.tar.bz2 binaryen-8cce4d103a2ee54e7f09e81fc25b982b060d0e41.zip |
[Parser] Support string-style identifiers (#6278)
In addition to normal identifiers, support parsing identifiers of the format
`$"..."`. This format is not yet allowed by the standard, but it is a popular
proposed extension (see https://github.com/WebAssembly/spec/issues/617 and
https://github.com/WebAssembly/annotations/issues/21).
Binaryen has historically allowed a similar format and has supported arbitrary
non-standard identifier characters, so it's much easier to support this extended
syntax than to fix everything to use the restricted standard syntax.
Diffstat (limited to 'test/gtest')
-rw-r--r-- | test/gtest/wat-lexer.cpp | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/test/gtest/wat-lexer.cpp b/test/gtest/wat-lexer.cpp index b46f9927f..b62644682 100644 --- a/test/gtest/wat-lexer.cpp +++ b/test/gtest/wat-lexer.cpp @@ -1377,6 +1377,33 @@ TEST(LexerTest, LexIdent) { Lexer lexer("$"sv); EXPECT_TRUE(lexer.empty()); } + + // String IDs + { + Lexer lexer("$\"\""); + ASSERT_FALSE(lexer.empty()); + Token expected{"$\"\""sv, IdTok{true, std::nullopt}}; + EXPECT_EQ(*lexer, expected); + EXPECT_TRUE(lexer->getID()); + EXPECT_EQ(*lexer->getID(), ""sv); + } + { + Lexer lexer("$\"hello\""); + ASSERT_FALSE(lexer.empty()); + Token expected{"$\"hello\""sv, IdTok{true, std::nullopt}}; + EXPECT_EQ(*lexer, expected); + EXPECT_TRUE(lexer->getID()); + EXPECT_EQ(*lexer->getID(), "hello"sv); + } + { + // _$_£_€_𐍈_ + auto unicode = "$\"_\\u{24}_\\u{00a3}_\\u{20AC}_\\u{10348}_\""sv; + Lexer lexer(unicode); + ASSERT_FALSE(lexer.empty()); + std::string escaped{"_$_\xC2\xA3_\xE2\x82\xAC_\xF0\x90\x8D\x88_"}; + Token expected{unicode, IdTok{true, {escaped}}}; + EXPECT_EQ(*lexer, expected); + } } TEST(LexerTest, LexString) { |