diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/parser/lexer.h | 9 | ||||
-rw-r--r-- | src/support/string.cpp | 22 | ||||
-rw-r--r-- | src/support/string.h | 3 | ||||
-rw-r--r-- | src/wasm-binary.h | 2 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 8 |
5 files changed, 36 insertions, 8 deletions
diff --git a/src/parser/lexer.h b/src/parser/lexer.h index 83cbcfc53..37c3fe04a 100644 --- a/src/parser/lexer.h +++ b/src/parser/lexer.h @@ -25,6 +25,7 @@ #include "support/name.h" #include "support/result.h" +#include "support/string.h" #ifndef parser_lexer_h #define parser_lexer_h @@ -124,11 +125,11 @@ public: std::optional<std::string> takeString(); std::optional<Name> takeName() { - // TODO: Validate UTF. - if (auto str = takeString()) { - return Name(*str); + auto str = takeString(); + if (!str || !String::isUTF8(*str)) { + return std::nullopt; } - return std::nullopt; + return Name(*str); } bool takeSExprStart(std::string_view expected) { diff --git a/src/support/string.cpp b/src/support/string.cpp index 31d0e9170..01fe4e522 100644 --- a/src/support/string.cpp +++ b/src/support/string.cpp @@ -195,9 +195,21 @@ std::optional<uint32_t> takeWTF8CodePoint(std::string_view& str) { } str = str.substr(1 + trailingBytes); + if (!valid) { return std::nullopt; } + + size_t expectedTrailing = u < 0x80 ? 0 + : u < 0x800 ? 1 + : u < 0x10000 ? 2 + : u < 0x110000 ? 3 + : -1; + if (trailingBytes != expectedTrailing) { + // Overlong encoding or overlarge code point. + return std::nullopt; + } + return u; } @@ -404,4 +416,14 @@ std::ostream& printEscapedJSON(std::ostream& os, std::string_view str) { return os << '"'; } +bool isUTF8(std::string_view str) { + while (str.size()) { + auto u = takeWTF8CodePoint(str); + if (!u || (0xD800 <= *u && *u < 0xE000)) { + return false; + } + } + return true; +} + } // namespace wasm::String diff --git a/src/support/string.h b/src/support/string.h index af120ab4e..24eb570c2 100644 --- a/src/support/string.h +++ b/src/support/string.h @@ -99,6 +99,9 @@ bool convertWTF16ToWTF8(std::ostream& os, std::string_view str); // unit. Returns `true` if the input was valid UTF-16. bool convertUTF16ToUTF8(std::ostream& os, std::string_view str); +// Whether the string is valid UTF-8. +bool isUTF8(std::string_view str); + } // namespace wasm::String #endif // wasm_support_string_h diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 46d81064e..9ce9bf181 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1505,7 +1505,7 @@ public: HeapType getIndexedHeapType(); Type getConcreteType(); - Name getInlineString(); + Name getInlineString(bool requireValid = true); void verifyInt8(int8_t x); void verifyInt16(int16_t x); void verifyInt32(int32_t x); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index cd1ca7dfd..dc97dbbc3 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2201,11 +2201,13 @@ Type WasmBinaryReader::getConcreteType() { return type; } -Name WasmBinaryReader::getInlineString() { +Name WasmBinaryReader::getInlineString(bool requireValid) { BYN_TRACE("<==\n"); auto len = getU32LEB(); auto data = getByteView(len); - + if (requireValid && !String::isUTF8(data)) { + throwError("invalid UTF-8 string"); + } BYN_TRACE("getInlineString: " << data << " ==>\n"); return Name(data); } @@ -3027,7 +3029,7 @@ void WasmBinaryReader::readStrings() { } size_t num = getU32LEB(); for (size_t i = 0; i < num; i++) { - auto string = getInlineString(); + auto string = getInlineString(false); // Re-encode from WTF-8 to WTF-16. std::stringstream wtf16; if (!String::convertWTF8ToWTF16(wtf16, string.str)) { |