diff options
-rw-r--r-- | src/wasm-interpreter.h | 34 | ||||
-rw-r--r-- | test/lit/passes/precompute-strings.wast | 106 |
2 files changed, 135 insertions, 5 deletions
diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index c35920802..5179d6ad0 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1902,6 +1902,16 @@ public: Flow visitStringConst(StringConst* curr) { return Literal(curr->string.toString()); } + + bool hasNonAsciiUpTo(const Literals& values, Index end) { + for (Index i = 0; i < end; ++i) { + if (uint32_t(values[i].geti32()) > 127) { + return true; + } + } + return false; + } + Flow visitStringMeasure(StringMeasure* curr) { // For now we only support JS-style strings. if (curr->op != StringMeasureWTF16View) { @@ -1917,6 +1927,13 @@ public: if (!data) { trap("null ref"); } + + // This is only correct if all the bytes stored in `values` correspond to + // single unicode code points. See `visitStringWTF16Get` for details. + if (hasNonAsciiUpTo(data->values, data->values.size())) { + return Flow(NONCONSTANT_FLOW); + } + return Literal(int32_t(data->values.size())); } Flow visitStringConcat(StringConcat* curr) { @@ -1980,6 +1997,11 @@ public: trap("oob"); } + // We don't handle non-ascii code points correctly yet. + if (hasNonAsciiUpTo(refValues, refValues.size())) { + return Flow(NONCONSTANT_FLOW); + } + for (Index i = 0; i < refValues.size(); i++) { ptrValues[startVal + i] = refValues[i]; } @@ -2095,6 +2117,18 @@ public: if (i >= values.size()) { trap("string oob"); } + + // This naive indexing approach is only correct if the first `i` bytes + // stored in `values` each corresponds to a single unicode code point. To + // implement this correctly in general, we would have to reinterpret the + // bytes as WTF-8, then count up to the `i`th code point, accounting + // properly for code points that would be represented by surrogate pairs in + // WTF-16. Alternatively, we could represent string contents as WTF-16 to + // begin with. + if (hasNonAsciiUpTo(values, i + 1)) { + return Flow(NONCONSTANT_FLOW); + } + return Literal(values[i].geti32()); } Flow visitStringIterNext(StringIterNext* curr) { diff --git a/test/lit/passes/precompute-strings.wast b/test/lit/passes/precompute-strings.wast index f5b1660bc..aa138b289 100644 --- a/test/lit/passes/precompute-strings.wast +++ b/test/lit/passes/precompute-strings.wast @@ -1,12 +1,15 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. -;; RUN: wasm-opt %s --precompute --fuzz-exec -all -S -o - | filecheck %s +;; RUN: wasm-opt %s --precompute -all -S -o - | filecheck %s (module + ;; CHECK: (type $array16 (array (mut i16))) + (type $array16 (array (mut i16))) + ;; CHECK: (func $eq-no (type $0) (result i32) ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) - (func $eq-no (export "eq-no") (result i32) + (func $eq-no (result i32) (string.eq (string.const "ab") (string.const "cdefg") @@ -16,7 +19,7 @@ ;; CHECK: (func $eq-yes (type $0) (result i32) ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) - (func $eq-yes (export "eq-yes") (result i32) + (func $eq-yes (result i32) (string.eq (string.const "ab") (string.const "ab") @@ -26,11 +29,104 @@ ;; CHECK: (func $concat (type $0) (result i32) ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) - (func $concat (export "concat") (result i32) + (func $concat (result i32) (string.eq (string.concat (string.const "a") (string.const "b")) (string.const "ab") ) ) -) + ;; CHECK: (func $length (type $0) (result i32) + ;; CHECK-NEXT: (i32.const 7) + ;; CHECK-NEXT: ) + (func $length (result i32) + (stringview_wtf16.length + (string.as_wtf16 + (string.const "1234567") + ) + ) + ) + + ;; CHECK: (func $length-bad (type $0) (result i32) + ;; CHECK-NEXT: (stringview_wtf16.length + ;; CHECK-NEXT: (string.as_wtf16 + ;; CHECK-NEXT: (string.const "$_\c2\a3_\e2\82\ac_\f0\90\8d\88") + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $length-bad (result i32) + ;; Not precomputable because we don't handle unicode yet. + (stringview_wtf16.length + (string.as_wtf16 + ;; $_£_€_𐍈 + (string.const "$_\C2\A3_\E2\82\AC_\F0\90\8D\88") + ) + ) + ) + + ;; CHECK: (func $get_codepoint (type $0) (result i32) + ;; CHECK-NEXT: (i32.const 95) + ;; CHECK-NEXT: ) + (func $get_codepoint (result i32) + ;; This is computable because everything up to the requested index is ascii. Returns 95 ('_'). + (stringview_wtf16.get_codeunit + (string.as_wtf16 + ;; $_£_€_𐍈 + (string.const "$_\C2\A3_\E2\82\AC_\F0\90\8D\88") + ) + (i32.const 1) + ) + ) + + ;; CHECK: (func $get_codepoint-bad (type $0) (result i32) + ;; CHECK-NEXT: (stringview_wtf16.get_codeunit + ;; CHECK-NEXT: (string.as_wtf16 + ;; CHECK-NEXT: (string.const "$_\c2\a3_\e2\82\ac_\f0\90\8d\88") + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $get_codepoint-bad (export "get_codepoint-bad") (result i32) + ;; This is not computable because the requested code unit is not ascii. + (stringview_wtf16.get_codeunit + (string.as_wtf16 + ;; $_£_€_𐍈 + (string.const "$_\C2\A3_\E2\82\AC_\F0\90\8D\88") + ) + (i32.const 2) + ) + ) + + ;; CHECK: (func $encode (type $0) (result i32) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + (func $encode (result i32) + (string.encode_wtf16_array + (string.const "$_") + (array.new_default $array16 + (i32.const 20) + ) + (i32.const 0) + ) + ) + + ;; CHECK: (func $encode-bad (type $0) (result i32) + ;; CHECK-NEXT: (string.encode_wtf16_array + ;; CHECK-NEXT: (string.const "$_\c2\a3_\e2\82\ac_\f0\90\8d\88") + ;; CHECK-NEXT: (array.new_default $array16 + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $encode-bad (result i32) + (string.encode_wtf16_array + ;; $_£_€_𐍈 + (string.const "$_\C2\A3_\E2\82\AC_\F0\90\8D\88") + (array.new_default $array16 + (i32.const 20) + ) + (i32.const 0) + ) + ) +) |