diff options
author | Thomas Lively <tlively@google.com> | 2024-03-22 17:40:49 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-23 00:40:49 +0000 |
commit | 47bcca6783b64b64560b1c1196a1f5e1a98e108a (patch) | |
tree | 505060ed6927a798f8eb9ea8235a22636262789f /src | |
parent | 81b8497164cff31af3bbd66f8b2aee55503b74cb (diff) | |
download | binaryen-47bcca6783b64b64560b1c1196a1f5e1a98e108a.tar.gz binaryen-47bcca6783b64b64560b1c1196a1f5e1a98e108a.tar.bz2 binaryen-47bcca6783b64b64560b1c1196a1f5e1a98e108a.zip |
Generate interesting strings in fuzzer (#6430)
Instead of generating exclusively ascii strings, generate empty strings and
strings containing various unicode characters and unpaired surrogates as well.
Diffstat (limited to 'src')
-rw-r--r-- | src/tools/fuzzing/fuzzing.cpp | 40 |
1 files changed, 38 insertions, 2 deletions
diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index c62114c3f..980427d81 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2467,10 +2467,46 @@ Expression* TranslateToFuzzReader::makeBasicRef(Type type) { return null; } case HeapType::string: { - auto wtf8 = std::to_string(upTo(1024)); + // Construct an interesting WTF-8 string from parts. + std::stringstream wtf8; + bool lastWasLeadingSurrogate = false; + for (size_t i = 0, end = upTo(4); i < end; ++i) { + switch (upTo(6)) { + case 0: + // A simple ascii string. + wtf8 << std::to_string(upTo(1024)); + break; + case 1: + // '£' + wtf8 << "\xC2\xA3"; + break; + case 2: + // '€' + wtf8 << "\xE2\x82\xAC"; + break; + case 3: + // '𐍈' + wtf8 << "\xF0\x90\x8D\x88"; + break; + case 4: + // The leading surrogate in '𐍈' + wtf8 << "\xED\xA0\x80"; + lastWasLeadingSurrogate = true; + continue; + case 5: + if (lastWasLeadingSurrogate) { + // Avoid invalid WTF-8. + continue; + } + // The trailing surrogate in '𐍈' + wtf8 << "\xED\xBD\x88"; + break; + } + lastWasLeadingSurrogate = false; + } std::stringstream wtf16; - String::convertWTF8ToWTF16(wtf16, wtf8); // TODO: Use wtf16.view() once we have C++20. + String::convertWTF8ToWTF16(wtf16, wtf8.str()); return builder.makeStringConst(wtf16.str()); } case HeapType::stringview_wtf8: |