diff options
-rw-r--r-- | src/tools/fuzzing.h | 3 | ||||
-rw-r--r-- | src/tools/fuzzing/fuzzing.cpp | 121 | ||||
-rw-r--r-- | src/wasm-interpreter.h | 11 | ||||
-rw-r--r-- | test/lit/exec/strings.wast | 86 |
4 files changed, 180 insertions, 41 deletions
diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h index 76c0e13c8..5b3c0037b 100644 --- a/src/tools/fuzzing.h +++ b/src/tools/fuzzing.h @@ -315,6 +315,8 @@ private: Expression* makeBasicRef(Type type); Expression* makeCompoundRef(Type type); + Expression* makeString(); + // Similar to makeBasic/CompoundRef, but indicates that this value will be // used in a place that will trap on null. For example, the reference of a // struct.get or array.set would use this. @@ -378,6 +380,7 @@ private: Type getLoggableType(); bool isLoggableType(Type type); Nullability getNullability(); + Mutability getMutability(); Nullability getSubType(Nullability nullability); HeapType getSubType(HeapType type); Type getSubType(Type type); diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 5a3e9002a..9698c06eb 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2605,47 +2605,7 @@ Expression* TranslateToFuzzReader::makeBasicRef(Type type) { return null; } case HeapType::string: { - // Construct an interesting WTF-8 string from parts. - std::stringstream wtf8; - bool lastWasLeadingSurrogate = false; - for (size_t i = 0, end = upTo(4); i < end; ++i) { - switch (upTo(6)) { - case 0: - // A simple ascii string. - wtf8 << std::to_string(upTo(1024)); - break; - case 1: - // '£' - wtf8 << "\xC2\xA3"; - break; - case 2: - // '€' - wtf8 << "\xE2\x82\xAC"; - break; - case 3: - // '𐍈' - wtf8 << "\xF0\x90\x8D\x88"; - break; - case 4: - // The leading surrogate in '𐍈' - wtf8 << "\xED\xA0\x80"; - lastWasLeadingSurrogate = true; - continue; - case 5: - if (lastWasLeadingSurrogate) { - // Avoid invalid WTF-8. - continue; - } - // The trailing surrogate in '𐍈' - wtf8 << "\xED\xBD\x88"; - break; - } - lastWasLeadingSurrogate = false; - } - std::stringstream wtf16; - // TODO: Use wtf16.view() once we have C++20. - String::convertWTF8ToWTF16(wtf16, wtf8.str()); - return builder.makeStringConst(wtf16.str()); + return makeString(); } case HeapType::stringview_wtf16: // We fully support wtf16 strings. @@ -2760,6 +2720,81 @@ Expression* TranslateToFuzzReader::makeCompoundRef(Type type) { } } +Expression* TranslateToFuzzReader::makeString() { + // Fuzz with JS-style strings. + auto mutability = getMutability(); + auto arrayHeapType = + HeapType(Array(Field(Field::PackedType::i16, mutability))); + auto nullability = getNullability(); + auto arrayType = Type(arrayHeapType, nullability); + switch (upTo(3)) { + case 0: { + // Make a string from an array. We can only do this in functions. + if (funcContext) { + auto array = make(arrayType); + auto* start = make(Type::i32); + auto* end = make(Type::i32); + return builder.makeStringNew( + StringNewWTF16Array, array, start, end, false); + } + [[fallthrough]]; + } + case 1: { + // Make a string from a code point. We can only do this in functions. + if (funcContext) { + auto codePoint = make(Type::i32); + return builder.makeStringNew( + StringNewFromCodePoint, codePoint, nullptr, false); + } + [[fallthrough]]; + } + case 2: { + // Construct an interesting WTF-8 string from parts and use string.const. + std::stringstream wtf8; + bool lastWasLeadingSurrogate = false; + for (size_t i = 0, end = upTo(4); i < end; ++i) { + switch (upTo(6)) { + case 0: + // A simple ascii string. + wtf8 << std::to_string(upTo(1024)); + break; + case 1: + // '£' + wtf8 << "\xC2\xA3"; + break; + case 2: + // '€' + wtf8 << "\xE2\x82\xAC"; + break; + case 3: + // '𐍈' + wtf8 << "\xF0\x90\x8D\x88"; + break; + case 4: + // The leading surrogate in '𐍈' + wtf8 << "\xED\xA0\x80"; + lastWasLeadingSurrogate = true; + continue; + case 5: + if (lastWasLeadingSurrogate) { + // Avoid invalid WTF-8. + continue; + } + // The trailing surrogate in '𐍈' + wtf8 << "\xED\xBD\x88"; + break; + } + lastWasLeadingSurrogate = false; + } + std::stringstream wtf16; + // TODO: Use wtf16.view() once we have C++20. + String::convertWTF8ToWTF16(wtf16, wtf8.str()); + return builder.makeStringConst(wtf16.str()); + } + } + WASM_UNREACHABLE("bad switch"); +} + Expression* TranslateToFuzzReader::makeTrappingRefUse(HeapType type) { auto percent = upTo(100); // Only give a low probability to emit a nullable reference. @@ -4071,6 +4106,10 @@ Nullability TranslateToFuzzReader::getNullability() { return Nullable; } +Mutability TranslateToFuzzReader::getMutability() { + return oneIn(2) ? Mutable : Immutable; +} + Nullability TranslateToFuzzReader::getSubType(Nullability nullability) { if (nullability == NonNullable) { return NonNullable; diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 64c0bfb2d..0afbaba94 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -33,6 +33,7 @@ #include "support/bits.h" #include "support/safe_integer.h" #include "support/stdckdint.h" +#include "support/string.h" #include "wasm-builder.h" #include "wasm-traversal.h" #include "wasm.h" @@ -1898,6 +1899,16 @@ public: } return makeGCData(contents, curr->type); } + case StringNewFromCodePoint: { + uint32_t codePoint = ptr.getSingleValue().getUnsigned(); + if (codePoint > 0x10FFFF) { + trap("invalid code point"); + } + std::stringstream wtf16; + String::writeWTF16CodePoint(wtf16, codePoint); + std::string str = wtf16.str(); + return Literal(str); + } default: // TODO: others return Flow(NONCONSTANT_FLOW); diff --git a/test/lit/exec/strings.wast b/test/lit/exec/strings.wast index c67436c98..0f3183ac6 100644 --- a/test/lit/exec/strings.wast +++ b/test/lit/exec/strings.wast @@ -414,6 +414,64 @@ ;; Concatenating these surrogates creates '𐍈'. (string.concat (string.const "\ED\A0\80") (string.const "\ED\BD\88")) ) + + ;; CHECK: [fuzz-exec] calling string.from_code_point + ;; CHECK-NEXT: [fuzz-exec] note result: string.from_code_point => string("A") + (func $string.from_code_point (export "string.from_code_point") (result stringref) + (string.from_code_point + (i32.const 65) + ) + ) + + ;; CHECK: [fuzz-exec] calling unsigned_code_point + ;; CHECK-NEXT: [fuzz-exec] note result: unsigned_code_point => string("\u0093") + (func $unsigned_code_point (export "unsigned_code_point") (result stringref) + (string.from_code_point + ;; This must be interpreted as unsigned, that is, in the escaped output + ;; the top byte is 0. + (i32.const 147) + ) + ) + + ;; CHECK: [fuzz-exec] calling weird_code_point + ;; CHECK-NEXT: [fuzz-exec] note result: weird_code_point => string("\u03e8") + (func $weird_code_point (export "weird_code_point") (result stringref) + (string.from_code_point + (i32.const 0x3e8) + ) + ) + + ;; CHECK: [fuzz-exec] calling isolated_high_code_point + ;; CHECK-NEXT: [fuzz-exec] note result: isolated_high_code_point => string("\ud800") + (func $isolated_high_code_point (export "isolated_high_code_point") (result stringref) + (string.from_code_point + (i32.const 0xD800) + ) + ) + + ;; CHECK: [fuzz-exec] calling isolated_low_code_point + ;; CHECK-NEXT: [fuzz-exec] note result: isolated_low_code_point => string("\udc00") + (func $isolated_low_code_point (export "isolated_low_code_point") (result stringref) + (string.from_code_point + (i32.const 0xDC00) + ) + ) + + ;; CHECK: [fuzz-exec] calling surrogate_pair_code_point + ;; CHECK-NEXT: [fuzz-exec] note result: surrogate_pair_code_point => string("\u286c") + (func $surrogate_pair_code_point (export "surrogate_pair_code_point") (result stringref) + (string.from_code_point + (i32.const 0x286c) ;; 𐍈 + ) + ) + + ;; CHECK: [fuzz-exec] calling invalid_code_point + ;; CHECK-NEXT: [trap invalid code point] + (func $invalid_code_point (export "invalid_code_point") (result stringref) + (string.from_code_point + (i32.const -83) + ) + ) ) ;; CHECK: [fuzz-exec] calling new_wtf16_array ;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello") @@ -518,6 +576,27 @@ ;; CHECK: [fuzz-exec] calling concat-surrogates ;; CHECK-NEXT: [fuzz-exec] note result: concat-surrogates => string("\ud800\udf48") + +;; CHECK: [fuzz-exec] calling string.from_code_point +;; CHECK-NEXT: [fuzz-exec] note result: string.from_code_point => string("A") + +;; CHECK: [fuzz-exec] calling unsigned_code_point +;; CHECK-NEXT: [fuzz-exec] note result: unsigned_code_point => string("\u0093") + +;; CHECK: [fuzz-exec] calling weird_code_point +;; CHECK-NEXT: [fuzz-exec] note result: weird_code_point => string("\u03e8") + +;; CHECK: [fuzz-exec] calling isolated_high_code_point +;; CHECK-NEXT: [fuzz-exec] note result: isolated_high_code_point => string("\ud800") + +;; CHECK: [fuzz-exec] calling isolated_low_code_point +;; CHECK-NEXT: [fuzz-exec] note result: isolated_low_code_point => string("\udc00") + +;; CHECK: [fuzz-exec] calling surrogate_pair_code_point +;; CHECK-NEXT: [fuzz-exec] note result: surrogate_pair_code_point => string("\u286c") + +;; CHECK: [fuzz-exec] calling invalid_code_point +;; CHECK-NEXT: [trap invalid code point] ;; CHECK-NEXT: [fuzz-exec] comparing compare.1 ;; CHECK-NEXT: [fuzz-exec] comparing compare.10 ;; CHECK-NEXT: [fuzz-exec] comparing compare.2 @@ -540,6 +619,9 @@ ;; CHECK-NEXT: [fuzz-exec] comparing eq.5 ;; CHECK-NEXT: [fuzz-exec] comparing get_codeunit ;; CHECK-NEXT: [fuzz-exec] comparing get_length +;; CHECK-NEXT: [fuzz-exec] comparing invalid_code_point +;; CHECK-NEXT: [fuzz-exec] comparing isolated_high_code_point +;; CHECK-NEXT: [fuzz-exec] comparing isolated_low_code_point ;; CHECK-NEXT: [fuzz-exec] comparing new_2 ;; CHECK-NEXT: [fuzz-exec] comparing new_4 ;; CHECK-NEXT: [fuzz-exec] comparing new_empty @@ -551,3 +633,7 @@ ;; CHECK-NEXT: [fuzz-exec] comparing slice ;; CHECK-NEXT: [fuzz-exec] comparing slice-big ;; CHECK-NEXT: [fuzz-exec] comparing slice-unicode +;; CHECK-NEXT: [fuzz-exec] comparing string.from_code_point +;; CHECK-NEXT: [fuzz-exec] comparing surrogate_pair_code_point +;; CHECK-NEXT: [fuzz-exec] comparing unsigned_code_point +;; CHECK-NEXT: [fuzz-exec] comparing weird_code_point |