diff options
-rw-r--r-- | scripts/fuzz_shell.js | 55 | ||||
-rw-r--r-- | src/wasm/literal.cpp | 11 | ||||
-rw-r--r-- | test/lit/exec/strings.wast | 38 |
3 files changed, 92 insertions, 12 deletions
diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js index be65ce31c..1e4068dc8 100644 --- a/scripts/fuzz_shell.js +++ b/scripts/fuzz_shell.js @@ -48,8 +48,58 @@ function printed(x, y) { // 'object', below. return 'null'; } else if (typeof x === 'string') { - // Emit a string in the same format as the binaryen interpreter. - return 'string("' + x + '")'; + // Emit a string in the same format as the binaryen interpreter. This + // escaping routine must be kept in sync with String::printEscapedJSON. + var escaped = ''; + for (u of x) { + switch (u) { + case '"': + escaped += '\\"'; + continue; + case '\\': + escaped += '\\\\'; + continue; + case '\b': + escaped += '\\b'; + continue; + case '\f': + escaped += '\\f'; + continue; + case '\n': + escaped += '\\n'; + continue; + case '\r': + escaped += '\\r'; + continue; + case '\t': + escaped += '\\t'; + continue; + default: + break; + } + + var codePoint = u.codePointAt(0); + if (32 <= codePoint && codePoint < 127) { + escaped += u; + continue + } + + var printEscape = (codePoint) => { + escaped += '\\u' + escaped += ((codePoint & 0xF000) >> 12).toString(16); + escaped += ((codePoint & 0x0F00) >> 8).toString(16); + escaped += ((codePoint & 0x00F0) >> 4).toString(16); + escaped += (codePoint & 0x000F).toString(16); + }; + + if (codePoint < 0x10000) { + printEscape(codePoint); + } else { + printEscape(0xD800 + ((codePoint - 0x10000) >> 10)); + printEscape(0xDC00 + ((codePoint - 0x10000) & 0x3FF)); + } + } + return 'string("' + escaped + '")'; } else if (typeof x === 'bigint') { // Print bigints in legalized form, which is two 32-bit numbers of the low // and high bits. @@ -146,4 +196,3 @@ for (var e in exports) { console.log('exception thrown: ' + e); } } - diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index afdc14c72..887c777ec 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -639,7 +639,7 @@ std::ostream& operator<<(std::ostream& o, Literal literal) { if (!data) { o << "nullstring"; } else { - o << "string(\""; + o << "string("; // Convert WTF-16 literals to WTF-16 string. std::stringstream wtf16; for (auto c : data->values) { @@ -648,12 +648,11 @@ std::ostream& operator<<(std::ostream& o, Literal literal) { wtf16 << uint8_t(u & 0xFF); wtf16 << uint8_t(u >> 8); } - // Convert to WTF-8 for printing. + // Escape to ensure we have valid unicode output and to make + // unprintable characters visible. // TODO: Use wtf16.view() once we have C++20. - [[maybe_unused]] bool valid = - String::convertWTF16ToWTF8(o, wtf16.str()); - assert(valid); - o << "\")"; + String::printEscapedJSON(o, wtf16.str()); + o << ")"; } break; } diff --git a/test/lit/exec/strings.wast b/test/lit/exec/strings.wast index 106e1e214..4fb17a9e3 100644 --- a/test/lit/exec/strings.wast +++ b/test/lit/exec/strings.wast @@ -7,7 +7,7 @@ (memory 1 1) - (import "fuzzing-support" "log" (func $log (param i32))) + (import "fuzzing-support" "log-i32" (func $log (param i32))) ;; CHECK: [fuzz-exec] calling new_wtf16_array ;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello") @@ -280,7 +280,9 @@ (func $slice (export "slice") (result (ref string)) ;; Slicing [3:6] here should definitely output "def". (stringview_wtf16.slice - (string.const "abcdefgh") + (string.as_wtf16 + (string.const "abcdefgh") + ) (i32.const 3) (i32.const 6) ) @@ -291,7 +293,9 @@ (func $slice-big (export "slice-big") (result (ref string)) ;; Slicing [3:huge unsigned value] leads to slicing til the end: "defgh". (stringview_wtf16.slice - (string.const "abcdefgh") + (string.as_wtf16 + (string.const "abcdefgh") + ) (i32.const 3) (i32.const -1) ) @@ -337,6 +341,26 @@ (i32.const 1) ) ) + + ;; CHECK: [fuzz-exec] calling slice-unicode + ;; CHECK-NEXT: [fuzz-exec] note result: slice-unicode => string("d\u00a3f") + (func $slice-unicode (export "slice-unicode") (result (ref string)) + (stringview_wtf16.slice + ;; abcd£fgh + (string.as_wtf16 + (string.const "abcd\C2\A3fgh") + ) + (i32.const 3) + (i32.const 6) + ) + ) + + ;; CHECK: [fuzz-exec] calling concat-surrogates + ;; CHECK-NEXT: [fuzz-exec] note result: concat-surrogates => string("\ud800\udf48") + (func $concat-surrogates (export "concat-surrogates") (result (ref string)) + ;; Concatenating these surrogates creates '𐍈'. + (string.concat (string.const "\ED\A0\80") (string.const "\ED\BD\88")) + ) ) ;; CHECK: [fuzz-exec] calling new_wtf16_array ;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello") @@ -423,6 +447,12 @@ ;; CHECK: [fuzz-exec] calling new_empty_oob_2 ;; CHECK-NEXT: [trap array oob] + +;; CHECK: [fuzz-exec] calling slice-unicode +;; CHECK-NEXT: [fuzz-exec] note result: slice-unicode => string("d\u00a3f") + +;; CHECK: [fuzz-exec] calling concat-surrogates +;; CHECK-NEXT: [fuzz-exec] note result: concat-surrogates => string("\ud800\udf48") ;; CHECK-NEXT: [fuzz-exec] comparing compare.1 ;; CHECK-NEXT: [fuzz-exec] comparing compare.10 ;; CHECK-NEXT: [fuzz-exec] comparing compare.2 @@ -433,6 +463,7 @@ ;; CHECK-NEXT: [fuzz-exec] comparing compare.7 ;; CHECK-NEXT: [fuzz-exec] comparing compare.8 ;; CHECK-NEXT: [fuzz-exec] comparing compare.9 +;; CHECK-NEXT: [fuzz-exec] comparing concat-surrogates ;; CHECK-NEXT: [fuzz-exec] comparing const ;; CHECK-NEXT: [fuzz-exec] comparing encode ;; CHECK-NEXT: [fuzz-exec] comparing encode-overflow @@ -450,3 +481,4 @@ ;; CHECK-NEXT: [fuzz-exec] comparing new_wtf16_array ;; CHECK-NEXT: [fuzz-exec] comparing slice ;; CHECK-NEXT: [fuzz-exec] comparing slice-big +;; CHECK-NEXT: [fuzz-exec] comparing slice-unicode |