diff options
-rw-r--r-- | src/ir/properties.h | 3 | ||||
-rw-r--r-- | src/literal.h | 13 | ||||
-rw-r--r-- | src/passes/Precompute.cpp | 4 | ||||
-rw-r--r-- | src/tools/execution-results.h | 5 | ||||
-rw-r--r-- | src/wasm-builder.h | 8 | ||||
-rw-r--r-- | src/wasm-interpreter.h | 52 | ||||
-rw-r--r-- | src/wasm-type.h | 3 | ||||
-rw-r--r-- | src/wasm/literal.cpp | 18 | ||||
-rw-r--r-- | src/wasm/wasm-type.cpp | 13 | ||||
-rw-r--r-- | test/lit/exec/strings.wast | 36 | ||||
-rw-r--r-- | test/lit/passes/precompute-gc.wast | 40 | ||||
-rw-r--r-- | test/lit/strings.wast | 23 |
12 files changed, 193 insertions, 25 deletions
diff --git a/src/ir/properties.h b/src/ir/properties.h index e7e96507c..d47ee55a8 100644 --- a/src/ir/properties.h +++ b/src/ir/properties.h @@ -82,7 +82,8 @@ inline bool isNamedControlFlow(Expression* curr) { // runtime will be equal as well. TODO: combine this with // isValidInConstantExpression or find better names(#4845) inline bool isSingleConstantExpression(const Expression* curr) { - return curr->is<Const>() || curr->is<RefNull>() || curr->is<RefFunc>(); + return curr->is<Const>() || curr->is<RefNull>() || curr->is<RefFunc>() || + curr->is<StringConst>(); } inline bool isConstantExpression(const Expression* curr) { diff --git a/src/literal.h b/src/literal.h index 213713a1f..5e449c576 100644 --- a/src/literal.h +++ b/src/literal.h @@ -48,7 +48,9 @@ class Literal { // A reference to GC data, either a Struct or an Array. For both of those // we store the referred data as a Literals object (which is natural for an // Array, and for a Struct, is just the fields in order). The type is used - // to indicate whether this is a Struct or an Array, and of what type. + // to indicate whether this is a Struct or an Array, and of what type. We + // also use this to store String data, as it is similarly stored on the + // heap. std::shared_ptr<GCData> gcData; // TODO: Literals of type `anyref` can only be `null` currently but we // will need to represent external values eventually, to @@ -90,7 +92,10 @@ public: bool isConcrete() const { return type.isConcrete(); } bool isNone() const { return type == Type::none; } bool isFunction() const { return type.isFunction(); } + // Whether this is GC data, that is, something stored on the heap (aside from + // a null or i31). This includes structs, arrays, and also strings. bool isData() const { return type.isData(); } + bool isString() const { return type.isString(); } bool isNull() const { return type.isNull(); } @@ -709,10 +714,10 @@ public: std::ostream& operator<<(std::ostream& o, wasm::Literal literal); std::ostream& operator<<(std::ostream& o, wasm::Literals literals); -// A GC Struct or Array is a set of values with a type saying how it should be -// interpreted. +// A GC Struct, Array, or String is a set of values with a type saying how it +// should be interpreted. struct GCData { - // The type of this struct or array. + // The type of this struct, array, or string. HeapType type; // The element or field values. diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index c90fdf167..5fff7710b 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -509,6 +509,10 @@ private: if (type.isFunction()) { return true; } + // We can emit a StringConst for a string constant. + if (type.isString()) { + return true; + } // All other reference types cannot be precomputed. Even an immutable GC // reference is not currently something this pass can handle, as it will // evaluate and reevaluate code multiple times in e.g. propagateLocals, see diff --git a/src/tools/execution-results.h b/src/tools/execution-results.h index 925c9b6d8..d12c84d1e 100644 --- a/src/tools/execution-results.h +++ b/src/tools/execution-results.h @@ -116,12 +116,15 @@ struct ExecutionResults { if (values->size() > 0) { std::cout << "[fuzz-exec] note result: " << exp->name << " => "; auto resultType = func->getResults(); - if (resultType.isRef()) { + if (resultType.isRef() && !resultType.isString()) { // Don't print reference values, as funcref(N) contains an index // for example, which is not guaranteed to remain identical after // optimizations. std::cout << resultType << '\n'; } else { + // Non-references can be printed in full. So can strings, since we + // always know how to print them and there is just one string + // type. std::cout << *values << '\n'; } } diff --git a/src/wasm-builder.h b/src/wasm-builder.h index 324ed6fd2..5592273da 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -1153,6 +1153,14 @@ public: if (type.isRef() && type.getHeapType() == HeapType::i31) { return makeI31New(makeConst(value.geti31())); } + if (type.isString()) { + // TODO: more than ascii support + std::string string; + for (auto c : value.getGCData()->values) { + string.push_back(c.getInteger()); + } + return makeStringConst(string); + } TODO_SINGLE_COMPOUND(type); WASM_UNREACHABLE("unsupported constant expression"); } diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 6d25e4398..851c4013c 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1797,8 +1797,54 @@ public: } WASM_UNREACHABLE("unimplemented ref.as_*"); } - Flow visitStringNew(StringNew* curr) { WASM_UNREACHABLE("unimp"); } - Flow visitStringConst(StringConst* curr) { WASM_UNREACHABLE("unimp"); } + Flow visitStringNew(StringNew* curr) { + Flow ptr = visit(curr->ptr); + if (ptr.breaking()) { + return ptr; + } + switch (curr->op) { + case StringNewWTF16Array: { + Flow start = visit(curr->start); + if (start.breaking()) { + return start; + } + Flow end = visit(curr->end); + if (end.breaking()) { + return end; + } + auto ptrData = ptr.getSingleValue().getGCData(); + if (!ptrData) { + trap("null ref"); + } + const auto& ptrDataValues = ptrData->values; + size_t startVal = start.getSingleValue().getUnsigned(); + size_t endVal = end.getSingleValue().getUnsigned(); + if (endVal > ptrDataValues.size()) { + trap("array oob"); + } + Literals contents; + if (endVal > startVal) { + contents.reserve(endVal - startVal); + for (size_t i = startVal; i < endVal; i++) { + contents.push_back(ptrDataValues[i]); + } + } + auto heapType = curr->type.getHeapType(); + return Literal(std::make_shared<GCData>(heapType, contents), heapType); + } + default: + // TODO: others + return Flow(NONCONSTANT_FLOW); + } + } + Flow visitStringConst(StringConst* curr) { + Literals contents; + for (size_t i = 0; i < curr->string.size(); i++) { + contents.push_back(Literal(int32_t(curr->string[i]))); + } + auto heapType = curr->type.getHeapType(); + return Literal(std::make_shared<GCData>(heapType, contents), heapType); + } Flow visitStringMeasure(StringMeasure* curr) { WASM_UNREACHABLE("unimp"); } Flow visitStringEncode(StringEncode* curr) { WASM_UNREACHABLE("unimp"); } Flow visitStringConcat(StringConcat* curr) { WASM_UNREACHABLE("unimp"); } @@ -2121,8 +2167,6 @@ public: NOTE_ENTER("Rethrow"); return Flow(NONCONSTANT_FLOW); } - Flow visitStringNew(StringNew* curr) { return Flow(NONCONSTANT_FLOW); } - Flow visitStringConst(StringConst* curr) { return Flow(NONCONSTANT_FLOW); } Flow visitStringMeasure(StringMeasure* curr) { return Flow(NONCONSTANT_FLOW); } diff --git a/src/wasm-type.h b/src/wasm-type.h index a359ea794..67223705e 100644 --- a/src/wasm-type.h +++ b/src/wasm-type.h @@ -159,6 +159,7 @@ public: bool isSingle() const { return isConcrete() && !isTuple(); } bool isRef() const; bool isFunction() const; + // See literal.h. bool isData() const; // Checks whether a type is a reference and is nullable. This returns false // for a value that is not a reference, that is, for which nullability is @@ -173,6 +174,7 @@ public: bool isNull() const; bool isStruct() const; bool isArray() const; + bool isString() const; bool isDefaultable() const; Nullability getNullability() const; @@ -364,6 +366,7 @@ public: bool isSignature() const; bool isStruct() const; bool isArray() const; + bool isString() const; bool isBottom() const; Signature getSignature() const; diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index 34f3d250d..80ff47b17 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -71,7 +71,8 @@ Literal::Literal(const uint8_t init[16]) : type(Type::v128) { Literal::Literal(std::shared_ptr<GCData> gcData, HeapType type) : gcData(gcData), type(type, NonNullable) { - // The type must be a proper type for GC data. + // The type must be a proper type for GC data: either a struct, array, or + // string; or a null. assert((isData() && gcData) || (type.isBottom() && !gcData)); } @@ -577,7 +578,20 @@ std::ostream& operator<<(std::ostream& o, Literal literal) { case HeapType::struct_: case HeapType::array: WASM_UNREACHABLE("invalid type"); - case HeapType::string: + case HeapType::string: { + auto data = literal.getGCData(); + if (!data) { + o << "nullstring"; + } else { + o << "string(\""; + for (auto c : data->values) { + // TODO: more than ascii + o << char(c.getInteger()); + } + o << "\")"; + } + break; + } case HeapType::stringview_wtf8: case HeapType::stringview_wtf16: case HeapType::stringview_iter: diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp index 2c26f0e21..cebb1b489 100644 --- a/src/wasm/wasm-type.cpp +++ b/src/wasm/wasm-type.cpp @@ -897,7 +897,8 @@ bool Type::isFunction() const { bool Type::isData() const { if (isBasic()) { - return false; + // The only basic type that is considered data is a string. + return isString(); } else { auto* info = getTypeInfo(*this); return info->isRef() && info->ref.heapType.isData(); @@ -924,6 +925,8 @@ bool Type::isStruct() const { return isRef() && getHeapType().isStruct(); } bool Type::isArray() const { return isRef() && getHeapType().isArray(); } +bool Type::isString() const { return isRef() && getHeapType().isString(); } + bool Type::isDefaultable() const { // A variable can get a default value if its type is concrete (unreachable // and none have no values, hence no default), and if it's a reference, it @@ -1267,7 +1270,7 @@ bool HeapType::isFunction() const { bool HeapType::isData() const { if (isBasic()) { - return id == struct_ || id == array; + return id == struct_ || id == array || id == string; } else { return getHeapTypeInfo(*this)->isData(); } @@ -1297,6 +1300,8 @@ bool HeapType::isArray() const { } } +bool HeapType::isString() const { return *this == HeapType::string; } + bool HeapType::isBottom() const { if (isBasic()) { switch (getBasic()) { @@ -1672,7 +1677,9 @@ bool SubTyper::isSubType(HeapType a, HeapType b) { case HeapType::any: return a.getBottom() == HeapType::none; case HeapType::eq: - return a == HeapType::i31 || a == HeapType::none || a.isData(); + return a == HeapType::i31 || a == HeapType::none || + a == HeapType::struct_ || a == HeapType::array || a.isStruct() || + a.isArray(); case HeapType::i31: return a == HeapType::none; case HeapType::struct_: diff --git a/test/lit/exec/strings.wast b/test/lit/exec/strings.wast new file mode 100644 index 000000000..2852337c8 --- /dev/null +++ b/test/lit/exec/strings.wast @@ -0,0 +1,36 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --output=fuzz-exec and should not be edited. + +;; RUN: wasm-opt %s -all --fuzz-exec -q -o /dev/null 2>&1 | filecheck %s + +(module + (type $array16 (array (mut i16))) + + ;; CHECK: [fuzz-exec] calling new_wtf16_array + ;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello") + (func "new_wtf16_array" (result stringref) + (string.new_wtf16_array + (array.init_static $array16 + (i32.const 104) ;; h + (i32.const 101) ;; e + (i32.const 108) ;; l + (i32.const 108) ;; l + (i32.const 111) ;; o + ) + (i32.const 1) ;; start from index 1, to chop off the 'h' + (i32.const 5) + ) + ) + + ;; CHECK: [fuzz-exec] calling const + ;; CHECK-NEXT: [fuzz-exec] note result: const => string("world") + (func "const" (result stringref) + (string.const "world") + ) +) +;; CHECK: [fuzz-exec] calling new_wtf16_array +;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello") + +;; CHECK: [fuzz-exec] calling const +;; CHECK-NEXT: [fuzz-exec] note result: const => string("world") +;; CHECK-NEXT: [fuzz-exec] comparing const +;; CHECK-NEXT: [fuzz-exec] comparing new_wtf16_array diff --git a/test/lit/passes/precompute-gc.wast b/test/lit/passes/precompute-gc.wast index f53ced968..0c5b9f8b8 100644 --- a/test/lit/passes/precompute-gc.wast +++ b/test/lit/passes/precompute-gc.wast @@ -23,6 +23,8 @@ ;; NOMNL: (type $B (struct (field (mut f64)))) (type $B (struct (field (mut f64)))) + (type $array16 (array (mut i16))) + (type $func-return-i32 (func (result i32))) ;; CHECK: (import "fuzzing-support" "log-i32" (func $log (param i32))) @@ -1429,4 +1431,42 @@ ) ) ) + + ;; CHECK: (func $strings (type $ref|string|_=>_none) (param $param (ref string)) + ;; CHECK-NEXT: (local $s (ref string)) + ;; CHECK-NEXT: (local.set $s + ;; CHECK-NEXT: (string.const "hello, world") + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $strings + ;; CHECK-NEXT: (string.const "hello, world") + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $strings + ;; CHECK-NEXT: (string.const "hello, world") + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; NOMNL: (func $strings (type $ref|string|_=>_none) (param $param (ref string)) + ;; NOMNL-NEXT: (local $s (ref string)) + ;; NOMNL-NEXT: (local.set $s + ;; NOMNL-NEXT: (string.const "hello, world") + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (call $strings + ;; NOMNL-NEXT: (string.const "hello, world") + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (call $strings + ;; NOMNL-NEXT: (string.const "hello, world") + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: ) + (func $strings (param $param (ref string)) + (local $s (ref string)) + (local.set $s + (string.const "hello, world") + ) + ;; The constant string should be propagated twice, to both of these calls. + (call $strings + (local.get $s) + ) + (call $strings + (local.get $s) + ) + ) ) diff --git a/test/lit/strings.wast b/test/lit/strings.wast index 63139d900..1e38ff894 100644 --- a/test/lit/strings.wast +++ b/test/lit/strings.wast @@ -22,19 +22,21 @@ ;; CHECK: (type $array (array (mut i8))) (type $array (array_subtype (mut i8) data)) - ;; CHECK: (type $none_=>_none (func)) - ;; CHECK: (type $array16 (array (mut i16))) (type $array16 (array_subtype (mut i16) data)) ;; CHECK: (type $stringref_stringview_wtf8_stringview_wtf16_stringview_iter_stringref_stringview_wtf8_stringview_wtf16_stringview_iter_ref|string|_ref|stringview_wtf8|_ref|stringview_wtf16|_ref|stringview_iter|_=>_none (func (param stringref stringview_wtf8 stringview_wtf16 stringview_iter stringref stringview_wtf8 stringview_wtf16 stringview_iter (ref string) (ref stringview_wtf8) (ref stringview_wtf16) (ref stringview_iter)))) + ;; CHECK: (type $ref|string|_=>_none (func (param (ref string)))) + ;; CHECK: (type $stringview_wtf16_=>_none (func (param stringview_wtf16))) ;; CHECK: (type $ref|$array|_ref|$array16|_=>_none (func (param (ref $array) (ref $array16)))) ;; CHECK: (type $stringref_ref|$array|_ref|$array16|_=>_none (func (param stringref (ref $array) (ref $array16)))) + ;; CHECK: (type $none_=>_none (func)) + ;; CHECK: (type $ref|$array|_=>_none (func (param (ref $array)))) ;; CHECK: (type $stringref_=>_i32 (func (param stringref) (result i32))) @@ -109,25 +111,26 @@ ) ) - ;; CHECK: (func $string.const (type $none_=>_none) - ;; CHECK-NEXT: (drop + ;; CHECK: (func $string.const (type $ref|string|_=>_none) (param $param (ref string)) + ;; CHECK-NEXT: (call $string.const ;; CHECK-NEXT: (string.const "foo") ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $string.const ;; CHECK-NEXT: (string.const "foo") ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $string.const ;; CHECK-NEXT: (string.const "bar") ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $string.const - (drop + (func $string.const (param $param (ref string)) + ;; Use calls to avoid precompute removing dropped constants. + (call $string.const (string.const "foo") ) - (drop + (call $string.const (string.const "foo") ;; intentionally repeat the previous one ) - (drop + (call $string.const (string.const "bar") ) ) |