summaryrefslogtreecommitdiff
path: root/src/support/string.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/support/string.cpp')
-rw-r--r--src/support/string.cpp88
1 files changed, 87 insertions, 1 deletions
diff --git a/src/support/string.cpp b/src/support/string.cpp
index 09924849b..75dd1716c 100644
--- a/src/support/string.cpp
+++ b/src/support/string.cpp
@@ -106,7 +106,7 @@ std::string trim(const std::string& input) {
return input.substr(0, size);
}
-std::ostream& printEscaped(std::ostream& os, std::string_view str) {
+std::ostream& printEscaped(std::ostream& os, const std::string_view str) {
os << '"';
for (unsigned char c : str) {
switch (c) {
@@ -140,4 +140,90 @@ std::ostream& printEscaped(std::ostream& os, std::string_view str) {
return os << '"';
}
+std::ostream& printEscapedJSON(std::ostream& os, const std::string_view str) {
+ os << '"';
+ for (size_t i = 0; i < str.size(); i++) {
+ int u0 = str[i];
+ switch (u0) {
+ case '\t':
+ os << "\\t";
+ continue;
+ case '\n':
+ os << "\\n";
+ continue;
+ case '\r':
+ os << "\\r";
+ continue;
+ case '"':
+ os << "\\\"";
+ continue;
+ case '\'':
+ os << "'";
+ continue;
+ case '\\':
+ os << "\\\\";
+ continue;
+ default: {
+ // Emit something like \u006e, the JSON escaping of a 16-bit number.
+ auto uEscape = [&](uint32_t v) {
+ if (v > 0xffff) {
+ std::cerr << "warning: Bad 16-bit escapee " << int(u0) << '\n';
+ }
+ os << std::hex;
+ os << "\\u";
+ os << ((v >> 12) & 0xf);
+ os << ((v >> 8) & 0xf);
+ os << ((v >> 4) & 0xf);
+ os << (v & 0xf);
+ os << std::dec;
+ };
+
+ // Based off of
+ // https://github.com/emscripten-core/emscripten/blob/59e6b8f1262d75585d8416b728e8cbb3db176fe2/src/library_strings.js#L72-L91
+ if (!(u0 & 0x80)) {
+ if (u0 >= 32 && u0 < 127) {
+ // This requires no escaping at all.
+ os << char(u0);
+ } else {
+ uEscape(u0);
+ }
+ continue;
+ }
+
+ // This uses 2 bytes.
+ i++;
+ int u1 = str[i] & 63;
+ if ((u0 & 0xE0) == 0xC0) {
+ uEscape((((u0 & 31) << 6) | u1));
+ continue;
+ }
+
+ // This uses 3 bytes.
+ i++;
+ int u2 = str[i] & 63;
+ if ((u0 & 0xF0) == 0xE0) {
+ u0 = ((u0 & 15) << 12) | (u1 << 6) | u2;
+ } else {
+ // This uses 4 bytes.
+ if ((u0 & 0xF8) != 0xF0) {
+ std::cerr << "warning: Bad UTF-8 leading byte " << int(u0) << '\n';
+ }
+ i++;
+ u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | (str[i] & 63);
+ }
+
+ if (u0 < 0x10000) {
+ uEscape(u0);
+ } else {
+ // There are two separate code points here.
+ auto ch = u0 - 0x10000;
+ uEscape(0xD800 | (ch >> 10));
+ uEscape(0xDC00 | (ch & 0x3FF));
+ }
+ }
+ }
+ }
+ return os << '"';
+}
+
} // namespace wasm::String