diff options
-rw-r--r-- | src/wasm/wat-parser-internal.h | 37 | ||||
-rw-r--r-- | test/gtest/wat-parser.cpp | 29 |
2 files changed, 65 insertions, 1 deletions
diff --git a/src/wasm/wat-parser-internal.h b/src/wasm/wat-parser-internal.h index 787913910..c398acabc 100644 --- a/src/wasm/wat-parser-internal.h +++ b/src/wasm/wat-parser-internal.h @@ -585,6 +585,30 @@ std::optional<LexStrResult> str(std::string_view in) { return ctx.lexed(); } +// keyword ::= ( 'a' | ... | 'z' ) idchar* (if literal terminal in grammar) +// reserved ::= idchar+ +// +// The "keyword" token we lex here covers both keywords as well as any reserved +// tokens that match the keyword format. This saves us from having to enumerate +// all the valid keywords here. These invalid keywords will still produce +// errors, just at a higher level of the parser. +std::optional<LexResult> keyword(std::string_view in) { + LexCtx ctx(in); + if (ctx.empty()) { + return {}; + } + uint8_t start = ctx.peek(); + if ('a' <= start && start <= 'z') { + ctx.take(1); + } else { + return {}; + } + while (auto lexed = idchar(ctx.next())) { + ctx.take(*lexed); + } + return ctx.lexed(); +} + // ====== // Tokens // ====== @@ -643,8 +667,17 @@ struct StringTok { } }; +struct KeywordTok { + friend std::ostream& operator<<(std::ostream& os, const KeywordTok&) { + return os << "keyword"; + } + + friend bool operator==(const KeywordTok&, const KeywordTok&) { return true; } +}; + struct Token { - using Data = std::variant<LParenTok, RParenTok, IntTok, IdTok, StringTok>; + using Data = + std::variant<LParenTok, RParenTok, IntTok, IdTok, StringTok, KeywordTok>; std::string_view span; Data data; @@ -734,6 +767,8 @@ struct Lexer { tok = Token{t->span, IntTok{t->n, t->signedness}}; } else if (auto t = str(next())) { tok = Token{t->span, StringTok{t->str}}; + } else if (auto t = keyword(next())) { + tok = Token{t->span, KeywordTok{}}; } else { // TODO: Do something about lexing errors. curr = std::nullopt; diff --git a/test/gtest/wat-parser.cpp b/test/gtest/wat-parser.cpp index be6d76eac..a48efd894 100644 --- a/test/gtest/wat-parser.cpp +++ b/test/gtest/wat-parser.cpp @@ -469,3 +469,32 @@ TEST(ParserTest, LexString) { ASSERT_EQ(lexer, lexer.end()); } } + +TEST(ParserTest, LexKeywords) { + Token module{"module"sv, KeywordTok{}}; + Token type{"type"sv, KeywordTok{}}; + Token func{"func"sv, KeywordTok{}}; + Token import{"import"sv, KeywordTok{}}; + Token reserved{"rEsErVeD"sv, KeywordTok{}}; + + Lexer lexer("module type func import rEsErVeD"); + + auto it = lexer.begin(); + ASSERT_NE(it, lexer.end()); + Token t1 = *it++; + ASSERT_NE(it, lexer.end()); + Token t2 = *it++; + ASSERT_NE(it, lexer.end()); + Token t3 = *it++; + ASSERT_NE(it, lexer.end()); + Token t4 = *it++; + ASSERT_NE(it, lexer.end()); + Token t5 = *it++; + EXPECT_EQ(it, lexer.end()); + + EXPECT_EQ(t1, module); + EXPECT_EQ(t2, type); + EXPECT_EQ(t3, func); + EXPECT_EQ(t4, import); + EXPECT_EQ(t5, reserved); +} |