diff options
author | Alon Zakai <azakai@google.com> | 2024-04-29 11:29:57 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-29 11:29:57 -0700 |
commit | 63d308f4f2ce6daae3dd3c6ec0b3808134d8791a (patch) | |
tree | e2ee30fcbf46dabe5fa70a62606dc27f48a33206 | |
parent | 4e4cb620d52de7b605eee7dc29cea3be1714f856 (diff) | |
download | binaryen-63d308f4f2ce6daae3dd3c6ec0b3808134d8791a.tar.gz binaryen-63d308f4f2ce6daae3dd3c6ec0b3808134d8791a.tar.bz2 binaryen-63d308f4f2ce6daae3dd3c6ec0b3808134d8791a.zip |
[Strings] Work around ref.cast not working on string views, and add fuzzing (#6549)
As suggested in #6434 (comment) , lower ref.cast of string views
to ref.as_non_null in binary writing. It is a simple hack that avoids the
problem of V8 not allowing them to be cast.
Add fuzzing support for the last three core string operations, after which
that problem becomes very frequent.
Also add yet another makeTrappingRefUse that was missing in that
fuzzer code.
-rw-r--r-- | README.md | 8 | ||||
-rw-r--r-- | src/tools/fuzzing.h | 3 | ||||
-rw-r--r-- | src/tools/fuzzing/fuzzing.cpp | 35 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 18 | ||||
-rw-r--r-- | test/lit/passes/roundtrip.wast | 66 | ||||
-rw-r--r-- | test/passes/translate-to-fuzz_all-features_metrics_noprint.txt | 72 |
6 files changed, 163 insertions, 39 deletions
@@ -147,6 +147,14 @@ There are a few differences between Binaryen IR and the WebAssembly language: much about this when writing Binaryen passes. For more details see the `requiresNonNullableLocalFixups()` hook in `pass.h` and the `LocalStructuralDominance` class. + * Strings + * Binaryen allows string views (`stringview_wtf16` etc.) to be cast using + `ref.cast`. This simplifies the IR, as it allows `ref.cast` to always be + used in all places (and it is lowered to `ref.as_non_null` where possible + in the optimizer). The stringref spec does not seem to allow this though, + and to fix that the binary writer will replace `ref.cast` that casts a + string view to a non-nullable type to `ref.as_non_null`. A `ref.cast` of a + string view that is a no-op is skipped entirely. As a result, you might notice that round-trip conversions (wasm => Binaryen IR => wasm) change code a little in some corner cases. diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h index c60744dea..786527236 100644 --- a/src/tools/fuzzing.h +++ b/src/tools/fuzzing.h @@ -327,7 +327,10 @@ private: Expression* makeStringNewArray(); Expression* makeStringNewCodePoint(); Expression* makeStringConcat(); + Expression* makeStringSlice(); Expression* makeStringEq(Type type); + Expression* makeStringMeasure(Type type); + Expression* makeStringGet(Type type); Expression* makeStringEncode(Type type); // Similar to makeBasic/CompoundRef, but indicates that this value will be diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index add0494ea..6c62e5191 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -1369,7 +1369,9 @@ Expression* TranslateToFuzzReader::_makeConcrete(Type type) { options.add(FeatureSet::ReferenceTypes | FeatureSet::GC | FeatureSet::Strings, &Self::makeStringEncode, - &Self::makeStringEq); + &Self::makeStringEq, + &Self::makeStringMeasure, + &Self::makeStringGet); } if (type.isTuple()) { options.add(FeatureSet::Multivalue, &Self::makeTupleMake); @@ -2620,7 +2622,7 @@ Expression* TranslateToFuzzReader::makeBasicRef(Type type) { if (!funcContext) { return makeStringConst(); } - switch (upTo(9)) { + switch (upTo(11)) { case 0: case 1: case 2: @@ -2639,13 +2641,16 @@ Expression* TranslateToFuzzReader::makeBasicRef(Type type) { // generate two string children, i.e., it can lead to exponential // growth. return makeStringConcat(); + case 9: + case 10: + return makeStringSlice(); } WASM_UNREACHABLE("bad switch"); } case HeapType::stringview_wtf16: // We fully support wtf16 strings. - return builder.makeStringAs( - StringAsWTF16, makeBasicRef(Type(HeapType::string, NonNullable))); + return builder.makeStringAs(StringAsWTF16, + makeTrappingRefUse(HeapType::string)); case HeapType::stringview_wtf8: case HeapType::stringview_iter: // We do not have interpreter support for wtf8 and iter, so emit something @@ -2818,6 +2823,13 @@ Expression* TranslateToFuzzReader::makeStringConcat() { return builder.makeStringConcat(left, right); } +Expression* TranslateToFuzzReader::makeStringSlice() { + auto* ref = makeTrappingRefUse(HeapType::stringview_wtf16); + auto* start = make(Type::i32); + auto* end = make(Type::i32); + return builder.makeStringSliceWTF(StringSliceWTF16, ref, start, end); +} + Expression* TranslateToFuzzReader::makeStringEq(Type type) { assert(type == Type::i32); @@ -2833,6 +2845,21 @@ Expression* TranslateToFuzzReader::makeStringEq(Type type) { return builder.makeStringEq(StringEqCompare, left, right); } +Expression* TranslateToFuzzReader::makeStringMeasure(Type type) { + assert(type == Type::i32); + + auto* ref = makeTrappingRefUse(HeapType::string); + return builder.makeStringMeasure(StringMeasureWTF16, ref); +} + +Expression* TranslateToFuzzReader::makeStringGet(Type type) { + assert(type == Type::i32); + + auto* ref = makeTrappingRefUse(HeapType::stringview_wtf16); + auto* pos = make(Type::i32); + return builder.makeStringWTF16Get(ref, pos); +} + Expression* TranslateToFuzzReader::makeTrappingRefUse(HeapType type) { auto percent = upTo(100); // Only give a low probability to emit a nullable reference. diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 4e3194e84..f1413847f 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -2089,6 +2089,24 @@ void BinaryInstWriter::visitRefTest(RefTest* curr) { } void BinaryInstWriter::visitRefCast(RefCast* curr) { + // We allow ref.cast of string views, but V8 does not. Work around that by + // emitting a ref.as_non_null (or nothing). + auto type = curr->type; + if (type.isRef()) { + auto heapType = type.getHeapType(); + if (heapType == HeapType::stringview_wtf8 || + heapType == HeapType::stringview_wtf16 || + heapType == HeapType::stringview_iter) { + // We cannot cast string views to/from anything, so the input must also + // be a view. + assert(curr->ref->type.getHeapType() == heapType); + if (type.isNonNullable() && curr->ref->type.isNullable()) { + o << int8_t(BinaryConsts::RefAsNonNull); + } + return; + } + } + o << int8_t(BinaryConsts::GCPrefix); if (curr->type.isNullable()) { o << U32LEB(BinaryConsts::RefCastNull); diff --git a/test/lit/passes/roundtrip.wast b/test/lit/passes/roundtrip.wast index 59e303eaf..8fb69cb7d 100644 --- a/test/lit/passes/roundtrip.wast +++ b/test/lit/passes/roundtrip.wast @@ -42,4 +42,70 @@ ) ) ) + + ;; CHECK: (func $string_view_casts (type $2) (param $x stringview_wtf8) (param $y stringview_wtf16) (param $z stringview_iter) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $z) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $z) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string_view_casts + ;; ref.cast of string views is not allowed in binaries: replace with + ;; ref.as_non_null, or remove if it is a no-op. + (param $x (ref null stringview_wtf8)) + (param $y (ref null stringview_wtf16)) + (param $z (ref null stringview_iter)) + ;; Here we still need a cast to non-null. + (drop + (ref.cast (ref stringview_wtf8) + (local.get $x) + ) + ) + (drop + (ref.cast (ref stringview_wtf16) + (local.get $y) + ) + ) + (drop + (ref.cast (ref stringview_iter) + (local.get $z) + ) + ) + ;; Here we do not need the cast. + (drop + (ref.cast (ref null stringview_wtf8) + (local.get $x) + ) + ) + (drop + (ref.cast (ref null stringview_wtf16) + (local.get $y) + ) + ) + (drop + (ref.cast (ref null stringview_iter) + (local.get $z) + ) + ) + ) ) diff --git a/test/passes/translate-to-fuzz_all-features_metrics_noprint.txt b/test/passes/translate-to-fuzz_all-features_metrics_noprint.txt index 20b145a95..4ead62655 100644 --- a/test/passes/translate-to-fuzz_all-features_metrics_noprint.txt +++ b/test/passes/translate-to-fuzz_all-features_metrics_noprint.txt @@ -1,45 +1,47 @@ total - [exports] : 4 - [funcs] : 6 + [exports] : 5 + [funcs] : 7 [globals] : 14 [imports] : 5 [memories] : 1 [memory-data] : 20 - [table-data] : 1 + [table-data] : 2 [tables] : 1 [tags] : 1 - [total] : 533 - [vars] : 24 - ArrayNew : 11 - ArrayNewFixed : 2 - AtomicFence : 1 - Binary : 79 - Block : 57 - Break : 6 - Call : 8 - Const : 137 - Drop : 1 - GlobalGet : 28 - GlobalSet : 28 - If : 15 + [total] : 467 + [vars] : 40 + ArrayGet : 1 + ArrayLen : 1 + ArrayNew : 6 + ArrayNewFixed : 1 + Binary : 67 + Block : 44 + Break : 5 + Call : 21 + Const : 106 + Drop : 7 + GlobalGet : 20 + GlobalSet : 18 + If : 14 Load : 17 - LocalGet : 40 - LocalSet : 23 - Loop : 9 - Nop : 4 + LocalGet : 45 + LocalSet : 28 + Loop : 3 + MemoryFill : 1 + Nop : 5 RefAs : 1 - RefFunc : 2 - RefI31 : 2 - RefIsNull : 1 - RefNull : 5 - Return : 2 - SIMDExtract : 2 - Select : 1 - Store : 2 - StringConst : 4 + RefCast : 1 + RefEq : 1 + RefFunc : 3 + RefI31 : 4 + RefNull : 3 + Return : 3 + Select : 2 + Store : 1 + StringConst : 3 + StringEncode : 1 StringEq : 1 - StructGet : 1 - StructNew : 9 - TupleMake : 4 - Unary : 15 - Unreachable : 15 + StructNew : 5 + TupleMake : 5 + Unary : 13 + Unreachable : 10 |