diff options
author | Thomas Lively <7121787+tlively@users.noreply.github.com> | 2022-05-27 16:58:24 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-27 16:58:24 -0700 |
commit | 838de5c4f374396a15d5700ebb51c3e0a89b5840 (patch) | |
tree | 1969623c389959f8341637e58130d5dae8c50be6 | |
parent | 410b7c92ea2e36b6a938fc34b4fd11e2eeea9fb3 (diff) | |
download | binaryen-838de5c4f374396a15d5700ebb51c3e0a89b5840.tar.gz binaryen-838de5c4f374396a15d5700ebb51c3e0a89b5840.tar.bz2 binaryen-838de5c4f374396a15d5700ebb51c3e0a89b5840.zip |
[Parser] Replace Signedness with ternary Sign (#4698)
Previously we were tracking whether integer tokens were signed but we did not
differentiate between positive and negative signs. Unfortunately, without
differentiating them, there's no way to tell the difference between an in-bounds
negative integer and a wildly out-of-bounds positive integer when trying to
perform bounds checks for s32 tokens. Fix the problem by tracking not only
whether there is a sign on an integer token, but also what the sign is.
-rw-r--r-- | src/wasm/wat-lexer.cpp | 44 | ||||
-rw-r--r-- | src/wat-lexer.h | 4 | ||||
-rw-r--r-- | test/gtest/wat-lexer.cpp | 84 |
3 files changed, 65 insertions, 67 deletions
diff --git a/src/wasm/wat-lexer.cpp b/src/wasm/wat-lexer.cpp index 1992e9bab..4bea32b59 100644 --- a/src/wasm/wat-lexer.cpp +++ b/src/wasm/wat-lexer.cpp @@ -123,7 +123,7 @@ std::optional<int> getHexDigit(char c) { // The result of lexing an integer token fragment. struct LexIntResult : LexResult { uint64_t n; - Signedness signedness; + Sign sign; }; // Lexing context that accumulates lexed input to produce an integer token @@ -133,8 +133,7 @@ struct LexIntCtx : LexCtx { private: uint64_t n = 0; - Signedness signedness = Unsigned; - bool negative = false; + Sign sign = NoSign; bool overflow = false; public: @@ -143,13 +142,12 @@ public: // Lex only the underlying span, ignoring the overflow and value. std::optional<LexIntResult> lexedRaw() { if (auto basic = LexCtx::lexed()) { - return LexIntResult{*basic, 0, Unsigned}; + return LexIntResult{*basic, 0, NoSign}; } return {}; } std::optional<LexIntResult> lexed() { - // Check most significant bit for overflow of signed numbers. if (overflow) { return {}; } @@ -157,28 +155,28 @@ public: if (!basic) { return {}; } - if (signedness == Signed) { - if (negative) { - if (n > (1ull << 63)) { - // TODO: Add error production for signed underflow. - return {}; - } - } else { - if (n > (1ull << 63) - 1) { - // TODO: Add error production for signed overflow. - return {}; - } + // Check most significant bit for overflow of signed numbers. + if (sign == Neg) { + if (n > (1ull << 63)) { + // TODO: Add error production for signed underflow. + return {}; + } + } else if (sign == Pos) { + if (n > (1ull << 63) - 1) { + // TODO: Add error production for signed overflow. + return {}; } } - return LexIntResult{*basic, negative ? -n : n, signedness}; + return LexIntResult{*basic, sign == Neg ? -n : n, sign}; } void takeSign() { if (takePrefix("+"sv)) { - signedness = Signed; + sign = Pos; } else if (takePrefix("-"sv)) { - signedness = Signed; - negative = true; + sign = Neg; + } else { + sign = NoSign; } } @@ -799,7 +797,7 @@ void Lexer::lexToken() { } else if (auto t = ident(next())) { tok = Token{t->span, IdTok{}}; } else if (auto t = integer(next())) { - tok = Token{t->span, IntTok{t->n, t->signedness}}; + tok = Token{t->span, IntTok{t->n, t->sign}}; } else if (auto t = float_(next())) { tok = Token{t->span, FloatTok{t->nanPayload, t->d}}; } else if (auto t = str(next())) { @@ -834,7 +832,7 @@ bool TextPos::operator==(const TextPos& other) const { } bool IntTok::operator==(const IntTok& other) const { - return n == other.n && signedness == other.signedness; + return n == other.n && sign == other.sign; } bool FloatTok::operator==(const FloatTok& other) const { @@ -872,7 +870,7 @@ std::ostream& operator<<(std::ostream& os, const RParenTok&) { std::ostream& operator<<(std::ostream& os, const IdTok&) { return os << "id"; } std::ostream& operator<<(std::ostream& os, const IntTok& tok) { - return os << tok.n << (tok.signedness == Signed ? " signed" : " unsigned"); + return os << (tok.sign == Pos ? "+" : tok.sign == Neg ? "-" : "") << tok.n; } std::ostream& operator<<(std::ostream& os, const FloatTok& tok) { diff --git a/src/wat-lexer.h b/src/wat-lexer.h index 057d7eed7..5a955f5c0 100644 --- a/src/wat-lexer.h +++ b/src/wat-lexer.h @@ -55,11 +55,11 @@ struct IdTok { friend std::ostream& operator<<(std::ostream&, const IdTok&); }; -enum Signedness { Unsigned, Signed }; +enum Sign { NoSign, Pos, Neg }; struct IntTok { uint64_t n; - Signedness signedness; + Sign sign; bool operator==(const IntTok&) const; friend std::ostream& operator<<(std::ostream&, const IntTok&); diff --git a/test/gtest/wat-lexer.cpp b/test/gtest/wat-lexer.cpp index f77e73cdc..75ddca6ed 100644 --- a/test/gtest/wat-lexer.cpp +++ b/test/gtest/wat-lexer.cpp @@ -23,11 +23,11 @@ using namespace wasm::WATParser; using namespace std::string_view_literals; TEST(LexerTest, LexWhitespace) { - Token one{"1"sv, IntTok{1, Unsigned}}; - Token two{"2"sv, IntTok{2, Unsigned}}; - Token three{"3"sv, IntTok{3, Unsigned}}; - Token four{"4"sv, IntTok{4, Unsigned}}; - Token five{"5"sv, IntTok{5, Unsigned}}; + Token one{"1"sv, IntTok{1, NoSign}}; + Token two{"2"sv, IntTok{2, NoSign}}; + Token three{"3"sv, IntTok{3, NoSign}}; + Token four{"4"sv, IntTok{4, NoSign}}; + Token five{"5"sv, IntTok{5, NoSign}}; Lexer lexer(" 1\t2\n3\r4 \n\n\t 5 "sv); @@ -58,8 +58,8 @@ TEST(LexerTest, LexWhitespace) { } TEST(LexerTest, LexLineComment) { - Token one{"1"sv, IntTok{1, Unsigned}}; - Token six{"6"sv, IntTok{6, Unsigned}}; + Token one{"1"sv, IntTok{1, NoSign}}; + Token six{"6"sv, IntTok{6, NoSign}}; Lexer lexer("1;; whee! 2 3\t4\r5\n6"sv); @@ -77,8 +77,8 @@ TEST(LexerTest, LexLineComment) { } TEST(LexerTest, LexBlockComment) { - Token one{"1"sv, IntTok{1, Unsigned}}; - Token six{"6"sv, IntTok{6, Unsigned}}; + Token one{"1"sv, IntTok{1, NoSign}}; + Token six{"6"sv, IntTok{6, NoSign}}; Lexer lexer("1(; whoo! 2\n (; \n3\n ;) 4 (;) 5 ;) \n;)6"sv); @@ -122,85 +122,85 @@ TEST(LexerTest, LexInt) { { Lexer lexer("0"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"0"sv, IntTok{0, Unsigned}}; + Token expected{"0"sv, IntTok{0, NoSign}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("+0"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"+0"sv, IntTok{0, Signed}}; + Token expected{"+0"sv, IntTok{0, Pos}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("-0"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"-0"sv, IntTok{0, Signed}}; + Token expected{"-0"sv, IntTok{0, Neg}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("1"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"1"sv, IntTok{1, Unsigned}}; + Token expected{"1"sv, IntTok{1, NoSign}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("+1"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"+1"sv, IntTok{1, Signed}}; + Token expected{"+1"sv, IntTok{1, Pos}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("-1"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"-1"sv, IntTok{-1ull, Signed}}; + Token expected{"-1"sv, IntTok{-1ull, Neg}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("0010"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"0010"sv, IntTok{10, Unsigned}}; + Token expected{"0010"sv, IntTok{10, NoSign}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("+0010"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"+0010"sv, IntTok{10, Signed}}; + Token expected{"+0010"sv, IntTok{10, Pos}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("-0010"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"-0010"sv, IntTok{-10ull, Signed}}; + Token expected{"-0010"sv, IntTok{-10ull, Neg}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("9999"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"9999"sv, IntTok{9999, Unsigned}}; + Token expected{"9999"sv, IntTok{9999, NoSign}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("+9999"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"+9999"sv, IntTok{9999, Signed}}; + Token expected{"+9999"sv, IntTok{9999, Pos}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("-9999"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"-9999"sv, IntTok{-9999ull, Signed}}; + Token expected{"-9999"sv, IntTok{-9999ull, Neg}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("12_34"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"12_34"sv, IntTok{1234, Unsigned}}; + Token expected{"12_34"sv, IntTok{1234, NoSign}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("1_2_3_4"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"1_2_3_4"sv, IntTok{1234, Unsigned}}; + Token expected{"1_2_3_4"sv, IntTok{1234, NoSign}}; EXPECT_EQ(*lexer, expected); } { @@ -222,7 +222,7 @@ TEST(LexerTest, LexInt) { { Lexer lexer("18446744073709551615"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"18446744073709551615"sv, IntTok{-1ull, Unsigned}}; + Token expected{"18446744073709551615"sv, IntTok{-1ull, NoSign}}; EXPECT_EQ(*lexer, expected); } { @@ -236,7 +236,7 @@ TEST(LexerTest, LexInt) { { Lexer lexer("+9223372036854775807"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"+9223372036854775807"sv, IntTok{~(1ull << 63), Signed}}; + Token expected{"+9223372036854775807"sv, IntTok{~(1ull << 63), Pos}}; EXPECT_EQ(*lexer, expected); } { @@ -250,7 +250,7 @@ TEST(LexerTest, LexInt) { { Lexer lexer("-9223372036854775808"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"-9223372036854775808"sv, IntTok{1ull << 63, Signed}}; + Token expected{"-9223372036854775808"sv, IntTok{1ull << 63, Neg}}; EXPECT_EQ(*lexer, expected); } { @@ -267,85 +267,85 @@ TEST(LexerTest, LexHexInt) { { Lexer lexer("0x0"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"0x0"sv, IntTok{0, Unsigned}}; + Token expected{"0x0"sv, IntTok{0, NoSign}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("+0x0"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"+0x0"sv, IntTok{0, Signed}}; + Token expected{"+0x0"sv, IntTok{0, Pos}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("-0x0"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"-0x0"sv, IntTok{0, Signed}}; + Token expected{"-0x0"sv, IntTok{0, Neg}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("0x1"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"0x1"sv, IntTok{1, Unsigned}}; + Token expected{"0x1"sv, IntTok{1, NoSign}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("+0x1"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"+0x1"sv, IntTok{1, Signed}}; + Token expected{"+0x1"sv, IntTok{1, Pos}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("-0x1"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"-0x1"sv, IntTok{-1ull, Signed}}; + Token expected{"-0x1"sv, IntTok{-1ull, Neg}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("0x0010"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"0x0010"sv, IntTok{16, Unsigned}}; + Token expected{"0x0010"sv, IntTok{16, NoSign}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("+0x0010"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"+0x0010"sv, IntTok{16, Signed}}; + Token expected{"+0x0010"sv, IntTok{16, Pos}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("-0x0010"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"-0x0010"sv, IntTok{-16ull, Signed}}; + Token expected{"-0x0010"sv, IntTok{-16ull, Neg}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("0xabcdef"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"0xabcdef"sv, IntTok{0xabcdef, Unsigned}}; + Token expected{"0xabcdef"sv, IntTok{0xabcdef, NoSign}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("+0xABCDEF"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"+0xABCDEF"sv, IntTok{0xabcdef, Signed}}; + Token expected{"+0xABCDEF"sv, IntTok{0xabcdef, Pos}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("-0xAbCdEf"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"-0xAbCdEf"sv, IntTok{-0xabcdefull, Signed}}; + Token expected{"-0xAbCdEf"sv, IntTok{-0xabcdefull, Neg}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("0x12_34"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"0x12_34"sv, IntTok{0x1234, Unsigned}}; + Token expected{"0x12_34"sv, IntTok{0x1234, NoSign}}; EXPECT_EQ(*lexer, expected); } { Lexer lexer("0x1_2_3_4"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"0x1_2_3_4"sv, IntTok{0x1234, Unsigned}}; + Token expected{"0x1_2_3_4"sv, IntTok{0x1234, NoSign}}; EXPECT_EQ(*lexer, expected); } { @@ -378,7 +378,7 @@ TEST(LexerTest, LexFloat) { { Lexer lexer("42"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"42"sv, IntTok{42, Unsigned}}; + Token expected{"42"sv, IntTok{42, NoSign}}; EXPECT_EQ(*lexer, expected); } { @@ -539,7 +539,7 @@ TEST(LexerTest, LexHexFloat) { { Lexer lexer("0x4B"sv); ASSERT_NE(lexer, lexer.end()); - Token expected{"0x4B"sv, IntTok{0x4B, Unsigned}}; + Token expected{"0x4B"sv, IntTok{0x4B, NoSign}}; EXPECT_EQ(*lexer, expected); } { |