summaryrefslogtreecommitdiff
path: root/src/parser/lexer.cpp
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2024-04-19 16:37:28 -0700
committerGitHub <noreply@github.com>2024-04-19 16:37:28 -0700
commitc60fe154ac09deb9227165d388c5f950bab6e052 (patch)
treed1fc06da94be6c380004e3b42fd0995308bab7b9 /src/parser/lexer.cpp
parent4a907b0a5ec1c25eef259ba54b387865edbe0451 (diff)
downloadbinaryen-c60fe154ac09deb9227165d388c5f950bab6e052.tar.gz
binaryen-c60fe154ac09deb9227165d388c5f950bab6e052.tar.bz2
binaryen-c60fe154ac09deb9227165d388c5f950bab6e052.zip
[Parser][NFC] Improve performance of idchar lexing (#6515)
The parsing of idchars was hot enough to show up while profiling the parsing of a very large module. Optimize it to speed up the overall parse by about 16% in a very unscientific measurement.
Diffstat (limited to 'src/parser/lexer.cpp')
-rw-r--r--src/parser/lexer.cpp48
1 files changed, 18 insertions, 30 deletions
diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp
index 48da163e1..ef25b6302 100644
--- a/src/parser/lexer.cpp
+++ b/src/parser/lexer.cpp
@@ -753,37 +753,25 @@ std::optional<LexResult> idchar(std::string_view in) {
return {};
}
uint8_t c = ctx.peek();
- if (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') ||
- ('a' <= c && c <= 'z')) {
- ctx.take(1);
- } else {
- switch (c) {
- case '!':
- case '#':
- case '$':
- case '%':
- case '&':
- case '\'':
- case '*':
- case '+':
- case '-':
- case '.':
- case '/':
- case ':':
- case '<':
- case '=':
- case '>':
- case '?':
- case '@':
- case '\\':
- case '^':
- case '_':
- case '`':
- case '|':
- case '~':
- ctx.take(1);
- }
+ // All the allowed characters lie in the range '!' to '~', and within that
+ // range the vast majority of characters are allowed, so it is significantly
+ // faster to check for the disallowed characters instead.
+ if (c < '!' || c > '~') {
+ return ctx.lexed();
+ }
+ switch (c) {
+ case '"':
+ case '(':
+ case ')':
+ case ',':
+ case ';':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ return ctx.lexed();
}
+ ctx.take(1);
return ctx.lexed();
}