diff options
author | Thomas Lively <tlively@google.com> | 2024-03-26 10:44:37 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-26 10:44:37 -0700 |
commit | 431e858c4f4ac0343914eb42196f8bb64ac99023 (patch) | |
tree | 7071e42b72b2cb49c9a15845c5fe1675d3ebd4bf | |
parent | c9a5da466df084da5c0bbcb03b56aa1bd9585dcd (diff) | |
download | binaryen-431e858c4f4ac0343914eb42196f8bb64ac99023.tar.gz binaryen-431e858c4f4ac0343914eb42196f8bb64ac99023.tar.bz2 binaryen-431e858c4f4ac0343914eb42196f8bb64ac99023.zip |
[Strings] Escape strings printed by fuzz-exec (#6441)
Previously we printed strings as WTF-8 in the output of fuzz-exec, but this
could produce invalid unicode output and did not make unprintable characters
visible. Fix both these problems by escaping the output, using the JSON string
escape procedure since the string to be escaped is WTF-16. Reimplement the same
escaping procedure in fuzz_shell.js so that the way we print strings when
running on a real JS engine matches the way we print them in our own fuzz-exec
interpreter.
Fixes #6435.
-rw-r--r-- | scripts/fuzz_shell.js | 55 | ||||
-rw-r--r-- | src/wasm/literal.cpp | 11 | ||||
-rw-r--r-- | test/lit/exec/strings.wast | 38 |
3 files changed, 92 insertions, 12 deletions
diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js index be65ce31c..1e4068dc8 100644 --- a/scripts/fuzz_shell.js +++ b/scripts/fuzz_shell.js @@ -48,8 +48,58 @@ function printed(x, y) { // 'object', below. return 'null'; } else if (typeof x === 'string') { - // Emit a string in the same format as the binaryen interpreter. - return 'string("' + x + '")'; + // Emit a string in the same format as the binaryen interpreter. This + // escaping routine must be kept in sync with String::printEscapedJSON. + var escaped = ''; + for (u of x) { + switch (u) { + case '"': + escaped += '\\"'; + continue; + case '\\': + escaped += '\\\\'; + continue; + case '\b': + escaped += '\\b'; + continue; + case '\f': + escaped += '\\f'; + continue; + case '\n': + escaped += '\\n'; + continue; + case '\r': + escaped += '\\r'; + continue; + case '\t': + escaped += '\\t'; + continue; + default: + break; + } + + var codePoint = u.codePointAt(0); + if (32 <= codePoint && codePoint < 127) { + escaped += u; + continue + } + + var printEscape = (codePoint) => { + escaped += '\\u' + escaped += ((codePoint & 0xF000) >> 12).toString(16); + escaped += ((codePoint & 0x0F00) >> 8).toString(16); + escaped += ((codePoint & 0x00F0) >> 4).toString(16); + escaped += (codePoint & 0x000F).toString(16); + }; + + if (codePoint < 0x10000) { + printEscape(codePoint); + } else { + printEscape(0xD800 + ((codePoint - 0x10000) >> 10)); + printEscape(0xDC00 + ((codePoint - 0x10000) & 0x3FF)); + } + } + return 'string("' + escaped + '")'; } else if (typeof x === 'bigint') { // Print bigints in legalized form, which is two 32-bit numbers of the low // and high bits. @@ -146,4 +196,3 @@ for (var e in exports) { console.log('exception thrown: ' + e); } } - diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index afdc14c72..887c777ec 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -639,7 +639,7 @@ std::ostream& operator<<(std::ostream& o, Literal literal) { if (!data) { o << "nullstring"; } else { - o << "string(\""; + o << "string("; // Convert WTF-16 literals to WTF-16 string. std::stringstream wtf16; for (auto c : data->values) { @@ -648,12 +648,11 @@ std::ostream& operator<<(std::ostream& o, Literal literal) { wtf16 << uint8_t(u & 0xFF); wtf16 << uint8_t(u >> 8); } - // Convert to WTF-8 for printing. + // Escape to ensure we have valid unicode output and to make + // unprintable characters visible. // TODO: Use wtf16.view() once we have C++20. - [[maybe_unused]] bool valid = - String::convertWTF16ToWTF8(o, wtf16.str()); - assert(valid); - o << "\")"; + String::printEscapedJSON(o, wtf16.str()); + o << ")"; } break; } diff --git a/test/lit/exec/strings.wast b/test/lit/exec/strings.wast index 106e1e214..4fb17a9e3 100644 --- a/test/lit/exec/strings.wast +++ b/test/lit/exec/strings.wast @@ -7,7 +7,7 @@ (memory 1 1) - (import "fuzzing-support" "log" (func $log (param i32))) + (import "fuzzing-support" "log-i32" (func $log (param i32))) ;; CHECK: [fuzz-exec] calling new_wtf16_array ;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello") @@ -280,7 +280,9 @@ (func $slice (export "slice") (result (ref string)) ;; Slicing [3:6] here should definitely output "def". (stringview_wtf16.slice - (string.const "abcdefgh") + (string.as_wtf16 + (string.const "abcdefgh") + ) (i32.const 3) (i32.const 6) ) @@ -291,7 +293,9 @@ (func $slice-big (export "slice-big") (result (ref string)) ;; Slicing [3:huge unsigned value] leads to slicing til the end: "defgh". (stringview_wtf16.slice - (string.const "abcdefgh") + (string.as_wtf16 + (string.const "abcdefgh") + ) (i32.const 3) (i32.const -1) ) @@ -337,6 +341,26 @@ (i32.const 1) ) ) + + ;; CHECK: [fuzz-exec] calling slice-unicode + ;; CHECK-NEXT: [fuzz-exec] note result: slice-unicode => string("d\u00a3f") + (func $slice-unicode (export "slice-unicode") (result (ref string)) + (stringview_wtf16.slice + ;; abcd£fgh + (string.as_wtf16 + (string.const "abcd\C2\A3fgh") + ) + (i32.const 3) + (i32.const 6) + ) + ) + + ;; CHECK: [fuzz-exec] calling concat-surrogates + ;; CHECK-NEXT: [fuzz-exec] note result: concat-surrogates => string("\ud800\udf48") + (func $concat-surrogates (export "concat-surrogates") (result (ref string)) + ;; Concatenating these surrogates creates '𐍈'. + (string.concat (string.const "\ED\A0\80") (string.const "\ED\BD\88")) + ) ) ;; CHECK: [fuzz-exec] calling new_wtf16_array ;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello") @@ -423,6 +447,12 @@ ;; CHECK: [fuzz-exec] calling new_empty_oob_2 ;; CHECK-NEXT: [trap array oob] + +;; CHECK: [fuzz-exec] calling slice-unicode +;; CHECK-NEXT: [fuzz-exec] note result: slice-unicode => string("d\u00a3f") + +;; CHECK: [fuzz-exec] calling concat-surrogates +;; CHECK-NEXT: [fuzz-exec] note result: concat-surrogates => string("\ud800\udf48") ;; CHECK-NEXT: [fuzz-exec] comparing compare.1 ;; CHECK-NEXT: [fuzz-exec] comparing compare.10 ;; CHECK-NEXT: [fuzz-exec] comparing compare.2 @@ -433,6 +463,7 @@ ;; CHECK-NEXT: [fuzz-exec] comparing compare.7 ;; CHECK-NEXT: [fuzz-exec] comparing compare.8 ;; CHECK-NEXT: [fuzz-exec] comparing compare.9 +;; CHECK-NEXT: [fuzz-exec] comparing concat-surrogates ;; CHECK-NEXT: [fuzz-exec] comparing const ;; CHECK-NEXT: [fuzz-exec] comparing encode ;; CHECK-NEXT: [fuzz-exec] comparing encode-overflow @@ -450,3 +481,4 @@ ;; CHECK-NEXT: [fuzz-exec] comparing new_wtf16_array ;; CHECK-NEXT: [fuzz-exec] comparing slice ;; CHECK-NEXT: [fuzz-exec] comparing slice-big +;; CHECK-NEXT: [fuzz-exec] comparing slice-unicode |