summaryrefslogtreecommitdiff
path: root/src/support
diff options
context:
space:
mode:
Diffstat (limited to 'src/support')
-rw-r--r--src/support/string.cpp22
-rw-r--r--src/support/string.h3
2 files changed, 25 insertions, 0 deletions
diff --git a/src/support/string.cpp b/src/support/string.cpp
index 31d0e9170..01fe4e522 100644
--- a/src/support/string.cpp
+++ b/src/support/string.cpp
@@ -195,9 +195,21 @@ std::optional<uint32_t> takeWTF8CodePoint(std::string_view& str) {
}
str = str.substr(1 + trailingBytes);
+
if (!valid) {
return std::nullopt;
}
+
+ size_t expectedTrailing = u < 0x80 ? 0
+ : u < 0x800 ? 1
+ : u < 0x10000 ? 2
+ : u < 0x110000 ? 3
+ : -1;
+ if (trailingBytes != expectedTrailing) {
+ // Overlong encoding or overlarge code point.
+ return std::nullopt;
+ }
+
return u;
}
@@ -404,4 +416,14 @@ std::ostream& printEscapedJSON(std::ostream& os, std::string_view str) {
return os << '"';
}
+bool isUTF8(std::string_view str) {
+ while (str.size()) {
+ auto u = takeWTF8CodePoint(str);
+ if (!u || (0xD800 <= *u && *u < 0xE000)) {
+ return false;
+ }
+ }
+ return true;
+}
+
} // namespace wasm::String
diff --git a/src/support/string.h b/src/support/string.h
index af120ab4e..24eb570c2 100644
--- a/src/support/string.h
+++ b/src/support/string.h
@@ -99,6 +99,9 @@ bool convertWTF16ToWTF8(std::ostream& os, std::string_view str);
// unit. Returns `true` if the input was valid UTF-16.
bool convertUTF16ToUTF8(std::ostream& os, std::string_view str);
+// Whether the string is valid UTF-8.
+bool isUTF8(std::string_view str);
+
} // namespace wasm::String
#endif // wasm_support_string_h