2 files changed, 65 insertions, 1 deletions
diff --git a/src/wasm/wat-parser-internal.h b/src/wasm/wat-parser-internal.h
index 787913910..c398acabc 100644
--- a/src/wasm/wat-parser-internal.h
+++ b/src/wasm/wat-parser-internal.h
@@ -585,6 +585,30 @@ std::optional<LexStrResult> str(std::string_view in) {
   return ctx.lexed();
 }
 
+// keyword ::= ( 'a' | ... | 'z' ) idchar* (if literal terminal in grammar)
+// reserved ::= idchar+
+//
+// The "keyword" token we lex here covers both keywords as well as any reserved
+// tokens that match the keyword format. This saves us from having to enumerate
+// all the valid keywords here. These invalid keywords will still produce
+// errors, just at a higher level of the parser.
+std::optional<LexResult> keyword(std::string_view in) {
+  LexCtx ctx(in);
+  if (ctx.empty()) {
+    return {};
+  }
+  uint8_t start = ctx.peek();
+  if ('a' <= start && start <= 'z') {
+    ctx.take(1);
+  } else {
+    return {};
+  }
+  while (auto lexed = idchar(ctx.next())) {
+    ctx.take(*lexed);
+  }
+  return ctx.lexed();
+}
+
 // ======
 // Tokens
 // ======
@@ -643,8 +667,17 @@ struct StringTok {
   }
 };
 
+struct KeywordTok {
+  friend std::ostream& operator<<(std::ostream& os, const KeywordTok&) {
+    return os << "keyword";
+  }
+
+  friend bool operator==(const KeywordTok&, const KeywordTok&) { return true; }
+};
+
 struct Token {
-  using Data = std::variant<LParenTok, RParenTok, IntTok, IdTok, StringTok>;
+  using Data =
+    std::variant<LParenTok, RParenTok, IntTok, IdTok, StringTok, KeywordTok>;
 
   std::string_view span;
   Data data;
@@ -734,6 +767,8 @@ struct Lexer {
       tok = Token{t->span, IntTok{t->n, t->signedness}};
     } else if (auto t = str(next())) {
       tok = Token{t->span, StringTok{t->str}};
+    } else if (auto t = keyword(next())) {
+      tok = Token{t->span, KeywordTok{}};
     } else {
       // TODO: Do something about lexing errors.
       curr = std::nullopt;
diff --git a/test/gtest/wat-parser.cpp b/test/gtest/wat-parser.cpp
index be6d76eac..a48efd894 100644
--- a/test/gtest/wat-parser.cpp
+++ b/test/gtest/wat-parser.cpp
@@ -469,3 +469,32 @@ TEST(ParserTest, LexString) {
     ASSERT_EQ(lexer, lexer.end());
   }
 }
+
+TEST(ParserTest, LexKeywords) {
+  Token module{"module"sv, KeywordTok{}};
+  Token type{"type"sv, KeywordTok{}};
+  Token func{"func"sv, KeywordTok{}};
+  Token import{"import"sv, KeywordTok{}};
+  Token reserved{"rEsErVeD"sv, KeywordTok{}};
+
+  Lexer lexer("module type func import rEsErVeD");
+
+  auto it = lexer.begin();
+  ASSERT_NE(it, lexer.end());
+  Token t1 = *it++;
+  ASSERT_NE(it, lexer.end());
+  Token t2 = *it++;
+  ASSERT_NE(it, lexer.end());
+  Token t3 = *it++;
+  ASSERT_NE(it, lexer.end());
+  Token t4 = *it++;
+  ASSERT_NE(it, lexer.end());
+  Token t5 = *it++;
+  EXPECT_EQ(it, lexer.end());
+
+  EXPECT_EQ(t1, module);
+  EXPECT_EQ(t2, type);
+  EXPECT_EQ(t3, func);
+  EXPECT_EQ(t4, import);
+  EXPECT_EQ(t5, reserved);
+}