From b3fea30f84fef3ff7aa77775e00b83ba62d997cc Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 22 Mar 2024 16:56:33 -0700 Subject: [Strings] Represent string values as WTF-16 internally (#6418) WTF-16, i.e. arbitrary sequences of 16-bit values, is the encoding of Java and JavaScript strings, and using the same encoding makes the interpretation of string operations trivial, even when accounting for non-ascii characters. Specifically, use little-endian WTF-16. Re-encode string constants from WTF-8 to WTF-16 in the parsers, then back to WTF-8 in the writers. Update the constructor for string `Literal`s to interpret the string as WTF-16 and store a sequence of WTF-16 code units, i.e. 16-bit integers. Update `Builder::makeConstantExpression` accordingly to convert from the new `Literal` string representation back to a WTF-16 string. Update the interpreter to remove the logic for detecting non-ascii characters and bailing out. The naive implementations of all the string operations are correct now that our string encoding matches the JS string encoding. --- src/tools/fuzzing/fuzzing.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/tools/fuzzing/fuzzing.cpp') diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 1c4ee4cc5..c62114c3f 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -20,6 +20,7 @@ #include "ir/module-utils.h" #include "ir/subtypes.h" #include "ir/type-updating.h" +#include "support/string.h" #include "tools/fuzzing/heap-types.h" #include "tools/fuzzing/parameters.h" @@ -2465,8 +2466,13 @@ Expression* TranslateToFuzzReader::makeBasicRef(Type type) { } return null; } - case HeapType::string: - return builder.makeStringConst(std::to_string(upTo(1024))); + case HeapType::string: { + auto wtf8 = std::to_string(upTo(1024)); + std::stringstream wtf16; + String::convertWTF8ToWTF16(wtf16, wtf8); + // TODO: Use wtf16.view() once we have C++20. + return builder.makeStringConst(wtf16.str()); + } case HeapType::stringview_wtf8: return builder.makeStringAs( StringAsWTF8, makeBasicRef(Type(HeapType::string, NonNullable))); -- cgit v1.2.3