summaryrefslogtreecommitdiff
path: root/src/wasm
diff options
context:
space:
mode:
authorThomas Lively <7121787+tlively@users.noreply.github.com>2022-06-01 13:00:54 -0700
committerGitHub <noreply@github.com>2022-06-01 20:00:54 +0000
commite74f66e92affd637cb19af2ad5f3d015ba86aa1c (patch)
tree75bd835941c2f9cf72f0b29814f114fe0159272d /src/wasm
parent623e08e88db3ebc913fe76e7f60e89fa030f884d (diff)
downloadbinaryen-e74f66e92affd637cb19af2ad5f3d015ba86aa1c.tar.gz
binaryen-e74f66e92affd637cb19af2ad5f3d015ba86aa1c.tar.bz2
binaryen-e74f66e92affd637cb19af2ad5f3d015ba86aa1c.zip
[Parser] Token classification (#4699)
Add methods to `Token` for determining whether the token can be interpreted as a particular token type, returning the interpreted value as appropriate. These methods perform additional bounds checks for integers and NaN payloads that could not be done during the initial lexing because the lexer did not know what the intended token type was. The float methods also reinterpret integer tokens as floating point tokens since the float grammar is a superset of the integer grammar and inject the NaN payloads into parsed NaN values. Move all bounds checking to these new classifier functions to have it in one place.
Diffstat (limited to 'src/wasm')
-rw-r--r--src/wasm/wat-lexer.cpp173
1 files changed, 151 insertions, 22 deletions
diff --git a/src/wasm/wat-lexer.cpp b/src/wasm/wat-lexer.cpp
index 4bea32b59..0d1dc2794 100644
--- a/src/wasm/wat-lexer.cpp
+++ b/src/wasm/wat-lexer.cpp
@@ -151,23 +151,10 @@ public:
if (overflow) {
return {};
}
- auto basic = LexCtx::lexed();
- if (!basic) {
- return {};
- }
- // Check most significant bit for overflow of signed numbers.
- if (sign == Neg) {
- if (n > (1ull << 63)) {
- // TODO: Add error production for signed underflow.
- return {};
- }
- } else if (sign == Pos) {
- if (n > (1ull << 63) - 1) {
- // TODO: Add error production for signed overflow.
- return {};
- }
+ if (auto basic = LexCtx::lexed()) {
+ return LexIntResult{*basic, sign == Neg ? -n : n, sign};
}
- return LexIntResult{*basic, sign == Neg ? -n : n, sign};
+ return {};
}
void takeSign() {
@@ -592,12 +579,7 @@ std::optional<LexFloatResult> float_(std::string_view in) {
if (ctx.takePrefix(":0x"sv)) {
if (auto lexed = hexnum(ctx.next())) {
ctx.take(*lexed);
- if (1 <= lexed->n && lexed->n < (1ull << 52)) {
- ctx.nanPayload = lexed->n;
- } else {
- // TODO: Add error production for invalid NaN payload.
- return {};
- }
+ ctx.nanPayload = lexed->n;
} else {
// TODO: Add error production for malformed NaN payload.
return {};
@@ -781,6 +763,153 @@ std::optional<LexResult> keyword(std::string_view in) {
} // anonymous namespace
+std::optional<uint64_t> Token::getU64() const {
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == NoSign) {
+ return tok->n;
+ }
+ }
+ return {};
+}
+
+std::optional<int64_t> Token::getS64() const {
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == Neg) {
+ if (uint64_t(INT64_MIN) <= tok->n || tok->n == 0) {
+ return int64_t(tok->n);
+ }
+ // TODO: Add error production for signed underflow.
+ } else {
+ if (tok->n <= uint64_t(INT64_MAX)) {
+ return int64_t(tok->n);
+ }
+ // TODO: Add error production for signed overflow.
+ }
+ }
+ return {};
+}
+
+std::optional<uint64_t> Token::getI64() const {
+ if (auto n = getU64()) {
+ return *n;
+ }
+ if (auto n = getS64()) {
+ return *n;
+ }
+ return {};
+}
+
+std::optional<uint32_t> Token::getU32() const {
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == NoSign && tok->n <= UINT32_MAX) {
+ return int32_t(tok->n);
+ }
+ // TODO: Add error production for unsigned overflow.
+ }
+ return {};
+}
+
+std::optional<int32_t> Token::getS32() const {
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == Neg) {
+ if (uint64_t(INT32_MIN) <= tok->n || tok->n == 0) {
+ return int32_t(tok->n);
+ }
+ } else {
+ if (tok->n <= uint64_t(INT32_MAX)) {
+ return int32_t(tok->n);
+ }
+ }
+ }
+ return {};
+}
+
+std::optional<uint32_t> Token::getI32() const {
+ if (auto n = getU32()) {
+ return *n;
+ }
+ if (auto n = getS32()) {
+ return uint32_t(*n);
+ }
+ return {};
+}
+
+std::optional<double> Token::getF64() const {
+ constexpr int signif = 52;
+ constexpr uint64_t payloadMask = (1ull << signif) - 1;
+ constexpr uint64_t nanDefault = 1ull << (signif - 1);
+ if (auto* tok = std::get_if<FloatTok>(&data)) {
+ double d = tok->d;
+ if (std::isnan(d)) {
+ // Inject payload.
+ uint64_t payload = tok->nanPayload ? *tok->nanPayload : nanDefault;
+ if (payload == 0 || payload > payloadMask) {
+ // TODO: Add error production for out-of-bounds payload.
+ return {};
+ }
+ uint64_t bits;
+ static_assert(sizeof(bits) == sizeof(d));
+ memcpy(&bits, &d, sizeof(bits));
+ bits = (bits & ~payloadMask) | payload;
+ memcpy(&d, &bits, sizeof(bits));
+ }
+ return d;
+ }
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == Neg) {
+ if (tok->n == 0) {
+ return -0.0;
+ }
+ return double(int64_t(tok->n));
+ }
+ return double(tok->n);
+ }
+ return {};
+}
+
+std::optional<float> Token::getF32() const {
+ constexpr int signif = 23;
+ constexpr uint32_t payloadMask = (1u << signif) - 1;
+ constexpr uint64_t nanDefault = 1ull << (signif - 1);
+ if (auto* tok = std::get_if<FloatTok>(&data)) {
+ float f = tok->d;
+ if (std::isnan(f)) {
+ // Validate and inject payload.
+ uint64_t payload = tok->nanPayload ? *tok->nanPayload : nanDefault;
+ if (payload == 0 || payload > payloadMask) {
+ // TODO: Add error production for out-of-bounds payload.
+ return {};
+ }
+ uint32_t bits;
+ static_assert(sizeof(bits) == sizeof(f));
+ memcpy(&bits, &f, sizeof(bits));
+ bits = (bits & ~payloadMask) | payload;
+ memcpy(&f, &bits, sizeof(bits));
+ }
+ return f;
+ }
+ if (auto* tok = std::get_if<IntTok>(&data)) {
+ if (tok->sign == Neg) {
+ if (tok->n == 0) {
+ return -0.0f;
+ }
+ return float(int64_t(tok->n));
+ }
+ return float(tok->n);
+ }
+ return {};
+}
+
+std::optional<std::string_view> Token::getString() const {
+ if (auto* tok = std::get_if<StringTok>(&data)) {
+ if (tok->str) {
+ return std::string_view(*tok->str);
+ }
+ return span.substr(1, span.size() - 2);
+ }
+ return {};
+}
+
void Lexer::skipSpace() {
if (auto ctx = space(next())) {
index += ctx->span.size();