summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/tools/fuzzing.h3
-rw-r--r--src/tools/fuzzing/fuzzing.cpp121
-rw-r--r--src/wasm-interpreter.h11
-rw-r--r--test/lit/exec/strings.wast86
4 files changed, 180 insertions, 41 deletions
diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h
index 76c0e13c8..5b3c0037b 100644
--- a/src/tools/fuzzing.h
+++ b/src/tools/fuzzing.h
@@ -315,6 +315,8 @@ private:
Expression* makeBasicRef(Type type);
Expression* makeCompoundRef(Type type);
+ Expression* makeString();
+
// Similar to makeBasic/CompoundRef, but indicates that this value will be
// used in a place that will trap on null. For example, the reference of a
// struct.get or array.set would use this.
@@ -378,6 +380,7 @@ private:
Type getLoggableType();
bool isLoggableType(Type type);
Nullability getNullability();
+ Mutability getMutability();
Nullability getSubType(Nullability nullability);
HeapType getSubType(HeapType type);
Type getSubType(Type type);
diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp
index 5a3e9002a..9698c06eb 100644
--- a/src/tools/fuzzing/fuzzing.cpp
+++ b/src/tools/fuzzing/fuzzing.cpp
@@ -2605,47 +2605,7 @@ Expression* TranslateToFuzzReader::makeBasicRef(Type type) {
return null;
}
case HeapType::string: {
- // Construct an interesting WTF-8 string from parts.
- std::stringstream wtf8;
- bool lastWasLeadingSurrogate = false;
- for (size_t i = 0, end = upTo(4); i < end; ++i) {
- switch (upTo(6)) {
- case 0:
- // A simple ascii string.
- wtf8 << std::to_string(upTo(1024));
- break;
- case 1:
- // '£'
- wtf8 << "\xC2\xA3";
- break;
- case 2:
- // '€'
- wtf8 << "\xE2\x82\xAC";
- break;
- case 3:
- // '𐍈'
- wtf8 << "\xF0\x90\x8D\x88";
- break;
- case 4:
- // The leading surrogate in '𐍈'
- wtf8 << "\xED\xA0\x80";
- lastWasLeadingSurrogate = true;
- continue;
- case 5:
- if (lastWasLeadingSurrogate) {
- // Avoid invalid WTF-8.
- continue;
- }
- // The trailing surrogate in '𐍈'
- wtf8 << "\xED\xBD\x88";
- break;
- }
- lastWasLeadingSurrogate = false;
- }
- std::stringstream wtf16;
- // TODO: Use wtf16.view() once we have C++20.
- String::convertWTF8ToWTF16(wtf16, wtf8.str());
- return builder.makeStringConst(wtf16.str());
+ return makeString();
}
case HeapType::stringview_wtf16:
// We fully support wtf16 strings.
@@ -2760,6 +2720,81 @@ Expression* TranslateToFuzzReader::makeCompoundRef(Type type) {
}
}
+Expression* TranslateToFuzzReader::makeString() {
+ // Fuzz with JS-style strings.
+ auto mutability = getMutability();
+ auto arrayHeapType =
+ HeapType(Array(Field(Field::PackedType::i16, mutability)));
+ auto nullability = getNullability();
+ auto arrayType = Type(arrayHeapType, nullability);
+ switch (upTo(3)) {
+ case 0: {
+ // Make a string from an array. We can only do this in functions.
+ if (funcContext) {
+ auto array = make(arrayType);
+ auto* start = make(Type::i32);
+ auto* end = make(Type::i32);
+ return builder.makeStringNew(
+ StringNewWTF16Array, array, start, end, false);
+ }
+ [[fallthrough]];
+ }
+ case 1: {
+ // Make a string from a code point. We can only do this in functions.
+ if (funcContext) {
+ auto codePoint = make(Type::i32);
+ return builder.makeStringNew(
+ StringNewFromCodePoint, codePoint, nullptr, false);
+ }
+ [[fallthrough]];
+ }
+ case 2: {
+ // Construct an interesting WTF-8 string from parts and use string.const.
+ std::stringstream wtf8;
+ bool lastWasLeadingSurrogate = false;
+ for (size_t i = 0, end = upTo(4); i < end; ++i) {
+ switch (upTo(6)) {
+ case 0:
+ // A simple ascii string.
+ wtf8 << std::to_string(upTo(1024));
+ break;
+ case 1:
+ // '£'
+ wtf8 << "\xC2\xA3";
+ break;
+ case 2:
+ // '€'
+ wtf8 << "\xE2\x82\xAC";
+ break;
+ case 3:
+ // '𐍈'
+ wtf8 << "\xF0\x90\x8D\x88";
+ break;
+ case 4:
+ // The leading surrogate in '𐍈'
+ wtf8 << "\xED\xA0\x80";
+ lastWasLeadingSurrogate = true;
+ continue;
+ case 5:
+ if (lastWasLeadingSurrogate) {
+ // Avoid invalid WTF-8.
+ continue;
+ }
+ // The trailing surrogate in '𐍈'
+ wtf8 << "\xED\xBD\x88";
+ break;
+ }
+ lastWasLeadingSurrogate = false;
+ }
+ std::stringstream wtf16;
+ // TODO: Use wtf16.view() once we have C++20.
+ String::convertWTF8ToWTF16(wtf16, wtf8.str());
+ return builder.makeStringConst(wtf16.str());
+ }
+ }
+ WASM_UNREACHABLE("bad switch");
+}
+
Expression* TranslateToFuzzReader::makeTrappingRefUse(HeapType type) {
auto percent = upTo(100);
// Only give a low probability to emit a nullable reference.
@@ -4071,6 +4106,10 @@ Nullability TranslateToFuzzReader::getNullability() {
return Nullable;
}
+Mutability TranslateToFuzzReader::getMutability() {
+ return oneIn(2) ? Mutable : Immutable;
+}
+
Nullability TranslateToFuzzReader::getSubType(Nullability nullability) {
if (nullability == NonNullable) {
return NonNullable;
diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h
index 64c0bfb2d..0afbaba94 100644
--- a/src/wasm-interpreter.h
+++ b/src/wasm-interpreter.h
@@ -33,6 +33,7 @@
#include "support/bits.h"
#include "support/safe_integer.h"
#include "support/stdckdint.h"
+#include "support/string.h"
#include "wasm-builder.h"
#include "wasm-traversal.h"
#include "wasm.h"
@@ -1898,6 +1899,16 @@ public:
}
return makeGCData(contents, curr->type);
}
+ case StringNewFromCodePoint: {
+ uint32_t codePoint = ptr.getSingleValue().getUnsigned();
+ if (codePoint > 0x10FFFF) {
+ trap("invalid code point");
+ }
+ std::stringstream wtf16;
+ String::writeWTF16CodePoint(wtf16, codePoint);
+ std::string str = wtf16.str();
+ return Literal(str);
+ }
default:
// TODO: others
return Flow(NONCONSTANT_FLOW);
diff --git a/test/lit/exec/strings.wast b/test/lit/exec/strings.wast
index c67436c98..0f3183ac6 100644
--- a/test/lit/exec/strings.wast
+++ b/test/lit/exec/strings.wast
@@ -414,6 +414,64 @@
;; Concatenating these surrogates creates '𐍈'.
(string.concat (string.const "\ED\A0\80") (string.const "\ED\BD\88"))
)
+
+ ;; CHECK: [fuzz-exec] calling string.from_code_point
+ ;; CHECK-NEXT: [fuzz-exec] note result: string.from_code_point => string("A")
+ (func $string.from_code_point (export "string.from_code_point") (result stringref)
+ (string.from_code_point
+ (i32.const 65)
+ )
+ )
+
+ ;; CHECK: [fuzz-exec] calling unsigned_code_point
+ ;; CHECK-NEXT: [fuzz-exec] note result: unsigned_code_point => string("\u0093")
+ (func $unsigned_code_point (export "unsigned_code_point") (result stringref)
+ (string.from_code_point
+ ;; This must be interpreted as unsigned, that is, in the escaped output
+ ;; the top byte is 0.
+ (i32.const 147)
+ )
+ )
+
+ ;; CHECK: [fuzz-exec] calling weird_code_point
+ ;; CHECK-NEXT: [fuzz-exec] note result: weird_code_point => string("\u03e8")
+ (func $weird_code_point (export "weird_code_point") (result stringref)
+ (string.from_code_point
+ (i32.const 0x3e8)
+ )
+ )
+
+ ;; CHECK: [fuzz-exec] calling isolated_high_code_point
+ ;; CHECK-NEXT: [fuzz-exec] note result: isolated_high_code_point => string("\ud800")
+ (func $isolated_high_code_point (export "isolated_high_code_point") (result stringref)
+ (string.from_code_point
+ (i32.const 0xD800)
+ )
+ )
+
+ ;; CHECK: [fuzz-exec] calling isolated_low_code_point
+ ;; CHECK-NEXT: [fuzz-exec] note result: isolated_low_code_point => string("\udc00")
+ (func $isolated_low_code_point (export "isolated_low_code_point") (result stringref)
+ (string.from_code_point
+ (i32.const 0xDC00)
+ )
+ )
+
+ ;; CHECK: [fuzz-exec] calling surrogate_pair_code_point
+ ;; CHECK-NEXT: [fuzz-exec] note result: surrogate_pair_code_point => string("\u286c")
+ (func $surrogate_pair_code_point (export "surrogate_pair_code_point") (result stringref)
+ (string.from_code_point
+ (i32.const 0x286c) ;; 𐍈
+ )
+ )
+
+ ;; CHECK: [fuzz-exec] calling invalid_code_point
+ ;; CHECK-NEXT: [trap invalid code point]
+ (func $invalid_code_point (export "invalid_code_point") (result stringref)
+ (string.from_code_point
+ (i32.const -83)
+ )
+ )
)
;; CHECK: [fuzz-exec] calling new_wtf16_array
;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello")
@@ -518,6 +576,27 @@
;; CHECK: [fuzz-exec] calling concat-surrogates
;; CHECK-NEXT: [fuzz-exec] note result: concat-surrogates => string("\ud800\udf48")
+
+;; CHECK: [fuzz-exec] calling string.from_code_point
+;; CHECK-NEXT: [fuzz-exec] note result: string.from_code_point => string("A")
+
+;; CHECK: [fuzz-exec] calling unsigned_code_point
+;; CHECK-NEXT: [fuzz-exec] note result: unsigned_code_point => string("\u0093")
+
+;; CHECK: [fuzz-exec] calling weird_code_point
+;; CHECK-NEXT: [fuzz-exec] note result: weird_code_point => string("\u03e8")
+
+;; CHECK: [fuzz-exec] calling isolated_high_code_point
+;; CHECK-NEXT: [fuzz-exec] note result: isolated_high_code_point => string("\ud800")
+
+;; CHECK: [fuzz-exec] calling isolated_low_code_point
+;; CHECK-NEXT: [fuzz-exec] note result: isolated_low_code_point => string("\udc00")
+
+;; CHECK: [fuzz-exec] calling surrogate_pair_code_point
+;; CHECK-NEXT: [fuzz-exec] note result: surrogate_pair_code_point => string("\u286c")
+
+;; CHECK: [fuzz-exec] calling invalid_code_point
+;; CHECK-NEXT: [trap invalid code point]
;; CHECK-NEXT: [fuzz-exec] comparing compare.1
;; CHECK-NEXT: [fuzz-exec] comparing compare.10
;; CHECK-NEXT: [fuzz-exec] comparing compare.2
@@ -540,6 +619,9 @@
;; CHECK-NEXT: [fuzz-exec] comparing eq.5
;; CHECK-NEXT: [fuzz-exec] comparing get_codeunit
;; CHECK-NEXT: [fuzz-exec] comparing get_length
+;; CHECK-NEXT: [fuzz-exec] comparing invalid_code_point
+;; CHECK-NEXT: [fuzz-exec] comparing isolated_high_code_point
+;; CHECK-NEXT: [fuzz-exec] comparing isolated_low_code_point
;; CHECK-NEXT: [fuzz-exec] comparing new_2
;; CHECK-NEXT: [fuzz-exec] comparing new_4
;; CHECK-NEXT: [fuzz-exec] comparing new_empty
@@ -551,3 +633,7 @@
;; CHECK-NEXT: [fuzz-exec] comparing slice
;; CHECK-NEXT: [fuzz-exec] comparing slice-big
;; CHECK-NEXT: [fuzz-exec] comparing slice-unicode
+;; CHECK-NEXT: [fuzz-exec] comparing string.from_code_point
+;; CHECK-NEXT: [fuzz-exec] comparing surrogate_pair_code_point
+;; CHECK-NEXT: [fuzz-exec] comparing unsigned_code_point
+;; CHECK-NEXT: [fuzz-exec] comparing weird_code_point