/* * Copyright 2023 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include "lexer.h" #include "support/bits.h" #include "support/string.h" using namespace std::string_view_literals; namespace wasm::WATParser { Name srcAnnotationKind("src"); namespace { // ================ // Lexical Analysis // ================ // The result of lexing a token fragment. struct LexResult { std::string_view span; }; // Lexing context that accumulates lexed input to produce a token fragment. struct LexCtx { private: // The input we are lexing. std::string_view input; // How much of the input we have already lexed. size_t lexedSize = 0; public: explicit LexCtx(std::string_view in) : input(in) {} // Return the fragment that has been lexed so far. std::optional lexed() const { if (lexedSize > 0) { return {LexResult{input.substr(0, lexedSize)}}; } return {}; } // The next input that has not already been lexed. std::string_view next() const { return input.substr(lexedSize); } // Get the next character without consuming it. uint8_t peek() const { return next()[0]; } // The size of the unlexed input. size_t size() const { return input.size() - lexedSize; } // Whether there is no more input. bool empty() const { return size() == 0; } // Tokens must be separated by spaces or parentheses. bool canFinish() const; // Whether the unlexed input starts with prefix `sv`. size_t startsWith(std::string_view sv) const { return next().substr(0, sv.size()) == sv; } // Consume the next `n` characters. void take(size_t n) { lexedSize += n; } // Consume an additional lexed fragment. void take(const LexResult& res) { lexedSize += res.span.size(); } // Consume the prefix and return true if possible. bool takePrefix(std::string_view sv) { if (startsWith(sv)) { take(sv.size()); return true; } return false; } // Consume the rest of the input. void takeAll() { lexedSize = input.size(); } }; enum OverflowBehavior { DisallowOverflow, IgnoreOverflow }; std::optional getDigit(char c) { if ('0' <= c && c <= '9') { return c - '0'; } return {}; } std::optional getHexDigit(char c) { if ('0' <= c && c <= '9') { return c - '0'; } if ('A' <= c && c <= 'F') { return 10 + c - 'A'; } if ('a' <= c && c <= 'f') { return 10 + c - 'a'; } return {}; } enum Sign { NoSign, Pos, Neg }; // The result of lexing an integer token fragment. struct LexIntResult : LexResult { uint64_t n; Sign sign; template bool isUnsigned() { static_assert(std::is_integral_v && std::is_unsigned_v); return sign == NoSign && n <= std::numeric_limits::max(); } template bool isSigned() { static_assert(std::is_integral_v && std::is_signed_v); if (sign == Neg) { return uint64_t(std::numeric_limits::min()) <= n || n == 0; } return n <= uint64_t(std::numeric_limits::max()); } }; // Lexing context that accumulates lexed input to produce an integer token // fragment. struct LexIntCtx : LexCtx { using LexCtx::take; private: uint64_t n = 0; Sign sign = NoSign; bool overflow = false; public: explicit LexIntCtx(std::string_view in) : LexCtx(in) {} // Lex only the underlying span, ignoring the overflow and value. std::optional lexedRaw() { if (auto basic = LexCtx::lexed()) { return LexIntResult{*basic, 0, NoSign}; } return {}; } std::optional lexed() { if (overflow) { return {}; } if (auto basic = LexCtx::lexed()) { return LexIntResult{*basic, sign == Neg ? -n : n, sign}; } return {}; } void takeSign() { if (takePrefix("+"sv)) { sign = Pos; } else if (takePrefix("-"sv)) { sign = Neg; } else { sign = NoSign; } } bool takeDigit() { if (!empty()) { if (auto d = getDigit(peek())) { take(1); uint64_t newN = n * 10 + *d; if (newN < n) { overflow = true; } n = newN; return true; } } return false; } bool takeHexdigit() { if (!empty()) { if (auto h = getHexDigit(peek())) { take(1); uint64_t newN = n * 16 + *h; if (newN < n) { overflow = true; } n = newN; return true; } } return false; } void take(const LexIntResult& res) { LexCtx::take(res); n = res.n; } }; struct LexFloatResult : LexResult { // The payload if we lexed a nan with payload. We cannot store the payload // directly in `d` because we do not know at this point whether we are parsing // an f32 or f64 and therefore we do not know what the allowable payloads are. // No payload with NaN means to use the default payload for the expected float // width. std::optional nanPayload; double d; }; struct LexFloatCtx : LexCtx { std::optional nanPayload; LexFloatCtx(std::string_view in) : LexCtx(in) {} std::optional lexed() { const double posNan = std::copysign(NAN, 1.0); const double negNan = std::copysign(NAN, -1.0); assert(!std::signbit(posNan) && "expected positive NaN to be positive"); assert(std::signbit(negNan) && "expected negative NaN to be negative"); auto basic = LexCtx::lexed(); if (!basic) { return {}; } // strtod does not return NaNs with the expected signs on all platforms. // TODO: use starts_with once we have C++20. if (basic->span.substr(0, 3) == "nan"sv || basic->span.substr(0, 4) == "+nan"sv) { return LexFloatResult{*basic, nanPayload, posNan}; } if (basic->span.substr(0, 4) == "-nan"sv) { return LexFloatResult{*basic, nanPayload, negNan}; } // Do not try to implement fully general and precise float parsing // ourselves. Instead, call out to std::strtod to do our parsing. This means // we need to strip any underscores since `std::strtod` does not understand // them. std::stringstream ss; for (const char *curr = basic->span.data(), *end = curr + basic->span.size(); curr != end; ++curr) { if (*curr != '_') { ss << *curr; } } std::string str = ss.str(); char* last; double d = std::strtod(str.data(), &last); assert(last == str.data() + str.size() && "could not parse float"); return LexFloatResult{*basic, {}, d}; } }; struct LexStrResult : LexResult { // Allocate a string only if there are escape sequences, otherwise just use // the original string_view. std::optional str; std::string_view getStr() { if (str) { return *str; } return span; } }; struct LexStrCtx : LexCtx { private: // Used to build a string with resolved escape sequences. Only used when the // parsed string contains escape sequences, otherwise we can just use the // parsed string directly. std::optional escapeBuilder; public: LexStrCtx(std::string_view in) : LexCtx(in) {} std::optional lexed() { if (auto basic = LexCtx::lexed()) { if (escapeBuilder) { return LexStrResult{*basic, {escapeBuilder->str()}}; } else { return LexStrResult{*basic, {}}; } } return {}; } void takeChar() { if (escapeBuilder) { *escapeBuilder << peek(); } LexCtx::take(1); } void ensureBuildingEscaped() { if (escapeBuilder) { return; } // Drop the opening '"'. escapeBuilder = std::stringstream{}; *escapeBuilder << LexCtx::lexed()->span.substr(1); } void appendEscaped(char c) { *escapeBuilder << c; } bool appendUnicode(uint64_t u) { if ((0xd800 <= u && u < 0xe000) || 0x110000 <= u) { return false; } String::writeWTF8CodePoint(*escapeBuilder, u); return true; } }; struct LexIdResult : LexResult { bool isStr = false; std::optional str; }; struct LexIdCtx : LexCtx { bool isStr = false; std::optional str; LexIdCtx(std::string_view in) : LexCtx(in) {} std::optional lexed() { if (auto basic = LexCtx::lexed()) { return LexIdResult{*basic, isStr, str}; } return {}; } }; struct LexAnnotationResult : LexResult { Annotation annotation; }; struct LexAnnotationCtx : LexCtx { std::string_view kind; size_t kindSize = 0; std::string_view contents; size_t contentsSize = 0; explicit LexAnnotationCtx(std::string_view in) : LexCtx(in) {} void startKind() { kind = next(); } void takeKind(size_t size) { kindSize += size; take(size); } void setKind(std::string_view kind) { this->kind = kind; kindSize = kind.size(); } void startContents() { contents = next(); } void takeContents(size_t size) { contentsSize += size; take(size); } std::optional lexed() { if (auto basic = LexCtx::lexed()) { return LexAnnotationResult{ *basic, {Name(kind.substr(0, kindSize)), contents.substr(0, contentsSize)}}; } return std::nullopt; } }; std::optional idchar(std::string_view); std::optional space(std::string_view); std::optional keyword(std::string_view); std::optional integer(std::string_view); std::optional float_(std::string_view); std::optional str(std::string_view); std::optional ident(std::string_view); // annotation ::= ';;@' [^\n]* | '(@'idchar+ annotelem* ')' // annotelem ::= keyword | reserved | uN | sN | fN | string | id // | '(' annotelem* ')' | '(@'idchar+ annotelem* ')' std::optional annotation(std::string_view in) { LexAnnotationCtx ctx(in); if (ctx.takePrefix(";;@"sv)) { ctx.setKind(srcAnnotationKind.str); ctx.startContents(); if (auto size = ctx.next().find('\n'); size != ""sv.npos) { ctx.takeContents(size); } else { ctx.takeContents(ctx.next().size()); } } else if (ctx.takePrefix("(@"sv)) { ctx.startKind(); bool hasIdchar = false; while (auto lexed = idchar(ctx.next())) { ctx.takeKind(1); hasIdchar = true; } if (!hasIdchar) { return std::nullopt; } ctx.startContents(); size_t depth = 1; while (true) { if (ctx.empty()) { return std::nullopt; } if (auto lexed = space(ctx.next())) { ctx.takeContents(lexed->span.size()); continue; } if (auto lexed = keyword(ctx.next())) { ctx.takeContents(lexed->span.size()); continue; } if (auto lexed = integer(ctx.next())) { ctx.takeContents(lexed->span.size()); continue; } if (auto lexed = float_(ctx.next())) { ctx.takeContents(lexed->span.size()); continue; } if (auto lexed = str(ctx.next())) { ctx.takeContents(lexed->span.size()); continue; } if (auto lexed = ident(ctx.next())) { ctx.takeContents(lexed->span.size()); continue; } if (ctx.startsWith("(@"sv)) { ctx.takeContents(2); bool hasIdchar = false; while (auto lexed = idchar(ctx.next())) { ctx.takeContents(1); hasIdchar = true; } if (!hasIdchar) { return std::nullopt; } ++depth; continue; } if (ctx.startsWith("("sv)) { ctx.takeContents(1); ++depth; continue; } if (ctx.startsWith(")"sv)) { --depth; if (depth == 0) { ctx.take(1); break; } ctx.takeContents(1); continue; } // Unrecognized token. return std::nullopt; } } return ctx.lexed(); } // comment ::= linecomment | blockcomment // linecomment ::= ';;' linechar* ('\n' | eof) // linechar ::= c:char (if c != '\n') // blockcomment ::= '(;' blockchar* ';)' // blockchar ::= c:char (if c != ';' and c != '(') // | ';' (if the next char is not ')') // | '(' (if the next char is not ';') // | blockcomment std::optional comment(std::string_view in) { LexCtx ctx(in); if (ctx.size() < 2) { return {}; } // Line comment if (!ctx.startsWith(";;@"sv) && ctx.takePrefix(";;"sv)) { if (auto size = ctx.next().find('\n'); size != ""sv.npos) { ctx.take(size); } else { ctx.takeAll(); } return ctx.lexed(); } // Block comment (possibly nested!) if (ctx.takePrefix("(;"sv)) { size_t depth = 1; while (depth > 0 && ctx.size() >= 2) { if (ctx.takePrefix("(;"sv)) { ++depth; } else if (ctx.takePrefix(";)"sv)) { --depth; } else { ctx.take(1); } } if (depth > 0) { // TODO: Add error production for non-terminated block comment. return {}; } return ctx.lexed(); } return {}; } std::optional spacechar(std::string_view in) { LexCtx ctx(in); ctx.takePrefix(" "sv) || ctx.takePrefix("\n"sv) || ctx.takePrefix("\r"sv) || ctx.takePrefix("\t"sv); return ctx.lexed(); } // space ::= (' ' | format | comment)* // format ::= '\t' | '\n' | '\r' std::optional space(std::string_view in) { LexCtx ctx(in); while (ctx.size()) { if (auto lexed = spacechar(ctx.next())) { ctx.take(*lexed); } else if (auto lexed = comment(ctx.next())) { ctx.take(*lexed); } else { break; } } return ctx.lexed(); } bool LexCtx::canFinish() const { // Logically we want to check for eof, parens, and space. But we don't // actually want to parse more than a couple characters of space, so check for // individual space chars or comment starts instead. return empty() || startsWith("("sv) || startsWith(")"sv) || spacechar(next()) || startsWith(";;"sv); } // num ::= d:digit => d // | n:num '_'? d:digit => 10*n + d // digit ::= '0' => 0 | ... | '9' => 9 std::optional num(std::string_view in, OverflowBehavior overflow = DisallowOverflow) { LexIntCtx ctx(in); if (ctx.empty()) { return {}; } if (!ctx.takeDigit()) { return {}; } while (true) { bool under = ctx.takePrefix("_"sv); if (!ctx.takeDigit()) { if (!under) { return overflow == DisallowOverflow ? ctx.lexed() : ctx.lexedRaw(); } // TODO: Add error production for trailing underscore. return {}; } } } // hexnum ::= h:hexdigit => h // | n:hexnum '_'? h:hexdigit => 16*n + h // hexdigit ::= d:digit => d // | 'A' => 10 | ... | 'F' => 15 // | 'a' => 10 | ... | 'f' => 15 std::optional hexnum(std::string_view in, OverflowBehavior overflow = DisallowOverflow) { LexIntCtx ctx(in); if (!ctx.takeHexdigit()) { return {}; } while (true) { bool under = ctx.takePrefix("_"sv); if (!ctx.takeHexdigit()) { if (!under) { return overflow == DisallowOverflow ? ctx.lexed() : ctx.lexedRaw(); } // TODO: Add error production for trailing underscore. return {}; } } } // uN ::= n:num => n (if n < 2^N) // | '0x' n:hexnum => n (if n < 2^N) // sN ::= s:sign n:num => [s]n (if -2^(N-1) <= [s]n < 2^(N-1)) // | s:sign '0x' n:hexnum => [s]n (if -2^(N-1) <= [s]n < 2^(N-1)) // sign ::= {} => + | '+' => + | '-' => - // // Note: Defer bounds and sign checking until we know what kind of integer we // expect. std::optional integer(std::string_view in) { LexIntCtx ctx(in); ctx.takeSign(); if (ctx.takePrefix("0x"sv)) { if (auto lexed = hexnum(ctx.next())) { ctx.take(*lexed); if (ctx.canFinish()) { return ctx.lexed(); } } // TODO: Add error production for unrecognized hexnum. return {}; } if (auto lexed = num(ctx.next())) { ctx.take(*lexed); if (ctx.canFinish()) { return ctx.lexed(); } } return {}; } // float ::= p:num '.'? => p // | p:num '.' q:frac => p + q // | p:num '.'? ('E'|'e') s:sign e:num => p * 10^([s]e) // | p:num '.' q:frac ('E'|'e') s:sign e:num => (p + q) * 10^([s]e) // frac ::= d:digit => d/10 // | d:digit '_'? p:frac => (d + p/10) / 10 std::optional decfloat(std::string_view in) { LexCtx ctx(in); if (auto lexed = num(ctx.next(), IgnoreOverflow)) { ctx.take(*lexed); } else { return {}; } // Optional '.' followed by optional frac if (ctx.takePrefix("."sv)) { if (auto lexed = num(ctx.next(), IgnoreOverflow)) { ctx.take(*lexed); } } if (ctx.takePrefix("E"sv) || ctx.takePrefix("e"sv)) { // Optional sign ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv); if (auto lexed = num(ctx.next(), IgnoreOverflow)) { ctx.take(*lexed); } else { // TODO: Add error production for missing exponent. return {}; } } return ctx.lexed(); } // hexfloat ::= '0x' p:hexnum '.'? => p // | '0x' p:hexnum '.' q:hexfrac => p + q // | '0x' p:hexnum '.'? ('P'|'p') s:sign e:num => p * 2^([s]e) // | '0x' p:hexnum '.' q:hexfrac ('P'|'p') s:sign e:num // => (p + q) * 2^([s]e) // hexfrac ::= h:hexdigit => h/16 // | h:hexdigit '_'? p:hexfrac => (h + p/16) / 16 std::optional hexfloat(std::string_view in) { LexCtx ctx(in); if (!ctx.takePrefix("0x"sv)) { return {}; } if (auto lexed = hexnum(ctx.next(), IgnoreOverflow)) { ctx.take(*lexed); } else { return {}; } // Optional '.' followed by optional hexfrac if (ctx.takePrefix("."sv)) { if (auto lexed = hexnum(ctx.next(), IgnoreOverflow)) { ctx.take(*lexed); } } if (ctx.takePrefix("P"sv) || ctx.takePrefix("p"sv)) { // Optional sign ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv); if (auto lexed = num(ctx.next(), IgnoreOverflow)) { ctx.take(*lexed); } else { // TODO: Add error production for missing exponent. return {}; } } return ctx.lexed(); } // fN ::= s:sign z:fNmag => [s]z // fNmag ::= z:float => float_N(z) (if float_N(z) != +/-infinity) // | z:hexfloat => float_N(z) (if float_N(z) != +/-infinity) // | 'inf' => infinity // | 'nan' => nan(2^(signif(N)-1)) // | 'nan:0x' n:hexnum => nan(n) (if 1 <= n < 2^signif(N)) std::optional float_(std::string_view in) { LexFloatCtx ctx(in); // Optional sign ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv); if (auto lexed = hexfloat(ctx.next())) { ctx.take(*lexed); } else if (auto lexed = decfloat(ctx.next())) { ctx.take(*lexed); } else if (ctx.takePrefix("inf"sv)) { // nop } else if (ctx.takePrefix("nan"sv)) { if (ctx.takePrefix(":0x"sv)) { if (auto lexed = hexnum(ctx.next())) { ctx.take(*lexed); ctx.nanPayload = lexed->n; } else { // TODO: Add error production for malformed NaN payload. return {}; } } else { // No explicit payload necessary; we will inject the default payload // later. } } else { return {}; } if (ctx.canFinish()) { return ctx.lexed(); } return {}; } // idchar ::= '0' | ... | '9' // | 'A' | ... | 'Z' // | 'a' | ... | 'z' // | '!' | '#' | '$' | '%' | '&' | ''' | '*' | '+' // | '-' | '.' | '/' | ':' | '<' | '=' | '>' | '?' // | '@' | '\' | '^' | '_' | '`' | '|' | '~' std::optional idchar(std::string_view in) { LexCtx ctx(in); if (ctx.empty()) { return {}; } uint8_t c = ctx.peek(); // All the allowed characters lie in the range '!' to '~', and within that // range the vast majority of characters are allowed, so it is significantly // faster to check for the disallowed characters instead. if (c < '!' || c > '~') { return ctx.lexed(); } switch (c) { case '"': case '(': case ')': case ',': case ';': case '[': case ']': case '{': case '}': return ctx.lexed(); } ctx.take(1); return ctx.lexed(); } // string ::= '"' (b*:stringelem)* '"' => concat((b*)*) // (if |concat((b*)*)| < 2^32) // stringelem ::= c:stringchar => utf8(c) // | '\' n:hexdigit m:hexdigit => 16*n + m // stringchar ::= c:char => c // (if c >= U+20 && c != U+7f && c != '"' && c != '\') // | '\t' => \t | '\n' => \n | '\r' => \r // | '\\' => \ | '\"' => " | '\'' => ' // | '\u{' n:hexnum '}' => U+(n) // (if n < 0xD800 and 0xE000 <= n <= 0x110000) std::optional str(std::string_view in) { LexStrCtx ctx(in); if (!ctx.takePrefix("\""sv)) { return {}; } while (!ctx.takePrefix("\""sv)) { if (ctx.empty()) { // TODO: Add error production for unterminated string. return {}; } if (ctx.startsWith("\\"sv)) { // Escape sequences ctx.ensureBuildingEscaped(); ctx.take(1); if (ctx.takePrefix("t"sv)) { ctx.appendEscaped('\t'); } else if (ctx.takePrefix("n"sv)) { ctx.appendEscaped('\n'); } else if (ctx.takePrefix("r"sv)) { ctx.appendEscaped('\r'); } else if (ctx.takePrefix("\\"sv)) { ctx.appendEscaped('\\'); } else if (ctx.takePrefix("\""sv)) { ctx.appendEscaped('"'); } else if (ctx.takePrefix("'"sv)) { ctx.appendEscaped('\''); } else if (ctx.takePrefix("u{"sv)) { auto lexed = hexnum(ctx.next()); if (!lexed) { // TODO: Add error production for malformed unicode escapes. return {}; } ctx.take(*lexed); if (!ctx.takePrefix("}"sv)) { // TODO: Add error production for malformed unicode escapes. return {}; } if (!ctx.appendUnicode(lexed->n)) { // TODO: Add error production for invalid unicode values. return {}; } } else { LexIntCtx ictx(ctx.next()); if (!ictx.takeHexdigit() || !ictx.takeHexdigit()) { // TODO: Add error production for unrecognized escape sequence. return {}; } auto lexed = *ictx.lexed(); ctx.take(lexed); ctx.appendEscaped(char(lexed.n)); } } else { // Normal characters if (uint8_t c = ctx.peek(); c >= 0x20 && c != 0x7F) { ctx.takeChar(); } else { // TODO: Add error production for unescaped control characters. return {}; } } } return ctx.lexed(); } // id ::= '$' idchar+ | '$' str std::optional ident(std::string_view in) { LexIdCtx ctx(in); if (!ctx.takePrefix("$"sv)) { return {}; } if (auto s = str(ctx.next())) { if (!String::isUTF8(s->getStr())) { return {}; } ctx.isStr = true; ctx.str = s->str; ctx.take(*s); } else if (auto lexed = idchar(ctx.next())) { ctx.take(*lexed); while (auto lexed = idchar(ctx.next())) { ctx.take(*lexed); } } else { return {}; } if (ctx.canFinish()) { return ctx.lexed(); } return {}; } // keyword ::= ( 'a' | ... | 'z' ) idchar* (if literal terminal in grammar) // reserved ::= idchar+ // // The "keyword" token we lex here covers both keywords as well as any reserved // tokens that match the keyword format. This saves us from having to enumerate // all the valid keywords here. These invalid keywords will still produce // errors, just at a higher level of the parser. std::optional keyword(std::string_view in) { LexCtx ctx(in); if (ctx.empty()) { return {}; } uint8_t start = ctx.peek(); if ('a' <= start && start <= 'z') { ctx.take(1); } else { return {}; } while (auto lexed = idchar(ctx.next())) { ctx.take(*lexed); } return ctx.lexed(); } } // anonymous namespace void Lexer::skipSpace() { while (true) { if (auto ctx = annotation(next())) { pos += ctx->span.size(); annotations.push_back(ctx->annotation); continue; } if (auto ctx = space(next())) { pos += ctx->span.size(); continue; } break; } } bool Lexer::takeLParen() { if (LexCtx(next()).startsWith("("sv)) { ++pos; advance(); return true; } return false; } bool Lexer::takeRParen() { if (LexCtx(next()).startsWith(")"sv)) { ++pos; advance(); return true; } return false; } std::optional Lexer::takeString() { if (auto result = str(next())) { pos += result->span.size(); advance(); if (result->str) { return result->str; } // Remove quotes. return std::string(result->span.substr(1, result->span.size() - 2)); } return std::nullopt; } std::optional Lexer::takeID() { if (auto result = ident(next())) { pos += result->span.size(); advance(); if (result->str) { return Name(*result->str); } if (result->isStr) { // Remove '$' and quotes. return Name(result->span.substr(2, result->span.size() - 3)); } // Remove '$'. return Name(result->span.substr(1)); } return std::nullopt; } std::optional Lexer::takeKeyword() { if (auto result = keyword(next())) { pos += result->span.size(); advance(); return result->span; } return std::nullopt; } bool Lexer::takeKeyword(std::string_view expected) { if (auto result = keyword(next()); result && result->span == expected) { pos += expected.size(); advance(); return true; } return false; } std::optional Lexer::takeOffset() { if (auto result = keyword(next())) { if (result->span.substr(0, 7) != "offset="sv) { return std::nullopt; } Lexer subLexer(result->span.substr(7)); if (auto o = subLexer.takeU64()) { pos += result->span.size(); advance(); return o; } } return std::nullopt; } std::optional Lexer::takeAlign() { if (auto result = keyword(next())) { if (result->span.substr(0, 6) != "align="sv) { return std::nullopt; } Lexer subLexer(result->span.substr(6)); if (auto o = subLexer.takeU32()) { if (Bits::popCount(*o) != 1) { return std::nullopt; } pos += result->span.size(); advance(); return o; } } return std::nullopt; } template std::optional Lexer::takeU() { static_assert(std::is_integral_v && std::is_unsigned_v); if (auto result = integer(next()); result && result->isUnsigned()) { pos += result->span.size(); advance(); return T(result->n); } // TODO: Add error production for unsigned overflow. return std::nullopt; } template std::optional Lexer::takeS() { static_assert(std::is_integral_v && std::is_signed_v); if (auto result = integer(next()); result && result->isSigned()) { pos += result->span.size(); advance(); return T(result->n); } return std::nullopt; } template std::optional Lexer::takeI() { static_assert(std::is_integral_v && std::is_unsigned_v); if (auto result = integer(next())) { if (result->isUnsigned() || result->isSigned>()) { pos += result->span.size(); advance(); return T(result->n); } } return std::nullopt; } template std::optional Lexer::takeU(); template std::optional Lexer::takeS(); template std::optional Lexer::takeI(); template std::optional Lexer::takeU(); template std::optional Lexer::takeS(); template std::optional Lexer::takeI(); template std::optional Lexer::takeU(); template std::optional Lexer::takeS(); template std::optional Lexer::takeI(); template std::optional Lexer::takeU(); template std::optional Lexer::takeS(); template std::optional Lexer::takeI(); std::optional Lexer::takeF64() { constexpr int signif = 52; constexpr uint64_t payloadMask = (1ull << signif) - 1; constexpr uint64_t nanDefault = 1ull << (signif - 1); if (auto result = float_(next())) { double d = result->d; if (std::isnan(d)) { // Inject payload. uint64_t payload = result->nanPayload ? *result->nanPayload : nanDefault; if (payload == 0 || payload > payloadMask) { // TODO: Add error production for out-of-bounds payload. return std::nullopt; } uint64_t bits; static_assert(sizeof(bits) == sizeof(d)); memcpy(&bits, &d, sizeof(bits)); bits = (bits & ~payloadMask) | payload; memcpy(&d, &bits, sizeof(bits)); } pos += result->span.size(); advance(); return d; } if (auto result = integer(next())) { pos += result->span.size(); advance(); if (result->sign == Neg) { if (result->n == 0) { return -0.0; } return double(int64_t(result->n)); } return double(result->n); } return std::nullopt; } std::optional Lexer::takeF32() { constexpr int signif = 23; constexpr uint32_t payloadMask = (1u << signif) - 1; constexpr uint64_t nanDefault = 1ull << (signif - 1); if (auto result = float_(next())) { float f = result->d; if (std::isnan(f)) { // Validate and inject payload. uint64_t payload = result->nanPayload ? *result->nanPayload : nanDefault; if (payload == 0 || payload > payloadMask) { // TODO: Add error production for out-of-bounds payload. return std::nullopt; } uint32_t bits; static_assert(sizeof(bits) == sizeof(f)); memcpy(&bits, &f, sizeof(bits)); bits = (bits & ~payloadMask) | payload; memcpy(&f, &bits, sizeof(bits)); } pos += result->span.size(); advance(); return f; } if (auto result = integer(next())) { pos += result->span.size(); advance(); if (result->sign == Neg) { if (result->n == 0) { return -0.0f; } return float(int64_t(result->n)); } return float(result->n); } return std::nullopt; } TextPos Lexer::position(const char* c) const { assert(size_t(c - buffer.data()) <= buffer.size()); TextPos pos{1, 0}; for (const char* p = buffer.data(); p != c; ++p) { if (*p == '\n') { pos.line++; pos.col = 0; } else { pos.col++; } } return pos; } bool TextPos::operator==(const TextPos& other) const { return line == other.line && col == other.col; } std::ostream& operator<<(std::ostream& os, const TextPos& pos) { return os << pos.line << ":" << pos.col; } } // namespace wasm::WATParser