summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/parser/lexer.h9
-rw-r--r--src/support/string.cpp22
-rw-r--r--src/support/string.h3
-rw-r--r--src/wasm-binary.h2
-rw-r--r--src/wasm/wasm-binary.cpp8
5 files changed, 36 insertions, 8 deletions
diff --git a/src/parser/lexer.h b/src/parser/lexer.h
index 83cbcfc53..37c3fe04a 100644
--- a/src/parser/lexer.h
+++ b/src/parser/lexer.h
@@ -25,6 +25,7 @@
#include "support/name.h"
#include "support/result.h"
+#include "support/string.h"
#ifndef parser_lexer_h
#define parser_lexer_h
@@ -124,11 +125,11 @@ public:
std::optional<std::string> takeString();
std::optional<Name> takeName() {
- // TODO: Validate UTF.
- if (auto str = takeString()) {
- return Name(*str);
+ auto str = takeString();
+ if (!str || !String::isUTF8(*str)) {
+ return std::nullopt;
}
- return std::nullopt;
+ return Name(*str);
}
bool takeSExprStart(std::string_view expected) {
diff --git a/src/support/string.cpp b/src/support/string.cpp
index 31d0e9170..01fe4e522 100644
--- a/src/support/string.cpp
+++ b/src/support/string.cpp
@@ -195,9 +195,21 @@ std::optional<uint32_t> takeWTF8CodePoint(std::string_view& str) {
}
str = str.substr(1 + trailingBytes);
+
if (!valid) {
return std::nullopt;
}
+
+ size_t expectedTrailing = u < 0x80 ? 0
+ : u < 0x800 ? 1
+ : u < 0x10000 ? 2
+ : u < 0x110000 ? 3
+ : -1;
+ if (trailingBytes != expectedTrailing) {
+ // Overlong encoding or overlarge code point.
+ return std::nullopt;
+ }
+
return u;
}
@@ -404,4 +416,14 @@ std::ostream& printEscapedJSON(std::ostream& os, std::string_view str) {
return os << '"';
}
+bool isUTF8(std::string_view str) {
+ while (str.size()) {
+ auto u = takeWTF8CodePoint(str);
+ if (!u || (0xD800 <= *u && *u < 0xE000)) {
+ return false;
+ }
+ }
+ return true;
+}
+
} // namespace wasm::String
diff --git a/src/support/string.h b/src/support/string.h
index af120ab4e..24eb570c2 100644
--- a/src/support/string.h
+++ b/src/support/string.h
@@ -99,6 +99,9 @@ bool convertWTF16ToWTF8(std::ostream& os, std::string_view str);
// unit. Returns `true` if the input was valid UTF-16.
bool convertUTF16ToUTF8(std::ostream& os, std::string_view str);
+// Whether the string is valid UTF-8.
+bool isUTF8(std::string_view str);
+
} // namespace wasm::String
#endif // wasm_support_string_h
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index 46d81064e..9ce9bf181 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -1505,7 +1505,7 @@ public:
HeapType getIndexedHeapType();
Type getConcreteType();
- Name getInlineString();
+ Name getInlineString(bool requireValid = true);
void verifyInt8(int8_t x);
void verifyInt16(int16_t x);
void verifyInt32(int32_t x);
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index cd1ca7dfd..dc97dbbc3 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -2201,11 +2201,13 @@ Type WasmBinaryReader::getConcreteType() {
return type;
}
-Name WasmBinaryReader::getInlineString() {
+Name WasmBinaryReader::getInlineString(bool requireValid) {
BYN_TRACE("<==\n");
auto len = getU32LEB();
auto data = getByteView(len);
-
+ if (requireValid && !String::isUTF8(data)) {
+ throwError("invalid UTF-8 string");
+ }
BYN_TRACE("getInlineString: " << data << " ==>\n");
return Name(data);
}
@@ -3027,7 +3029,7 @@ void WasmBinaryReader::readStrings() {
}
size_t num = getU32LEB();
for (size_t i = 0; i < num; i++) {
- auto string = getInlineString();
+ auto string = getInlineString(false);
// Re-encode from WTF-8 to WTF-16.
std::stringstream wtf16;
if (!String::convertWTF8ToWTF16(wtf16, string.str)) {