summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2024-02-06 15:44:40 -0800
committerGitHub <noreply@github.com>2024-02-06 15:44:40 -0800
commita238cf95a8ee18e14b30a86097858dff19cf94aa (patch)
tree9d7b091d0d13c42ee0343928d49185bda2eb7f66
parent3a41065a27fc4e65d563ae983a06cbe774ad2ea7 (diff)
downloadbinaryen-a238cf95a8ee18e14b30a86097858dff19cf94aa.tar.gz
binaryen-a238cf95a8ee18e14b30a86097858dff19cf94aa.tar.bz2
binaryen-a238cf95a8ee18e14b30a86097858dff19cf94aa.zip
StringLowering: Start to lower instructions (#6281)
-rwxr-xr-xscripts/fuzz_opt.py1
-rw-r--r--src/passes/StringLowering.cpp82
-rw-r--r--test/lit/passes/string-gathering.wast20
-rw-r--r--test/lit/passes/string-lowering-instructions.wast95
4 files changed, 198 insertions, 0 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
index f839984c2..251b23d03 100755
--- a/scripts/fuzz_opt.py
+++ b/scripts/fuzz_opt.py
@@ -286,6 +286,7 @@ INITIAL_CONTENTS_IGNORE = [
# TODO: fuzzer and interpreter support for strings
'strings.wast',
'simplify-locals-strings.wast',
+ 'string-lowering-instructions.wast',
# TODO: fuzzer and interpreter support for extern conversions
'extern-conversions.wast',
# ignore DWARF because it is incompatible with multivalue atm
diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp
index 31e41b9e8..e4b3ed865 100644
--- a/src/passes/StringLowering.cpp
+++ b/src/passes/StringLowering.cpp
@@ -191,6 +191,9 @@ struct StringLowering : public StringGathering {
// Remove all HeapType::string etc. in favor of externref.
updateTypes(module);
+ // Replace string.* etc. operations with imported ones.
+ replaceInstructions(module);
+
// Disable the feature here after we lowered everything away.
module->features.disable(FeatureSet::Strings);
}
@@ -225,9 +228,88 @@ struct StringLowering : public StringGathering {
void updateTypes(Module* module) {
TypeMapper::TypeUpdates updates;
+ // There is no difference between strings and views with imported strings:
+ // they are all just JS strings, so they all turn into externref.
updates[HeapType::string] = HeapType::ext;
+ updates[HeapType::stringview_wtf8] = HeapType::ext;
+ updates[HeapType::stringview_wtf16] = HeapType::ext;
+ updates[HeapType::stringview_iter] = HeapType::ext;
TypeMapper(*module, updates).map();
}
+
+ // Imported string functions.
+ Name fromCharCodeArrayImport;
+ Name fromCodePointImport;
+
+ // The name of the module to import string functions from.
+ Name WasmStringsModule = "wasm:js-string";
+
+ // Common types used in imports.
+ Type nullArray16 = Type(Array(Field(Field::i16, Mutable)), Nullable);
+ Type nnExt = Type(HeapType::ext, NonNullable);
+
+ // Creates an imported string function, returning its name (which is equal to
+ // the true name of the import, if there is no conflict).
+ Name addImport(Module* module, Name trueName, Type params, Type results) {
+ auto name = Names::getValidFunctionName(*module, trueName);
+ auto sig = Signature(params, results);
+ Builder builder(*module);
+ auto* func = module->addFunction(builder.makeFunction(name, sig, {}));
+ func->module = WasmStringsModule;
+ func->base = trueName;
+ return name;
+ }
+
+ void replaceInstructions(Module* module) {
+ // Add all the possible imports up front, to avoid adding them during
+ // parallel work. Optimizations can remove unneeded ones later.
+
+ // string.fromCharCodeArray: array, start, end -> ext
+ fromCharCodeArrayImport = addImport(
+ module, "fromCharCodeArray", {nullArray16, Type::i32, Type::i32}, nnExt);
+ // string.fromCodePoint: codepoint -> ext
+ fromCodePointImport = addImport(module, "fromCodePoint", Type::i32, nnExt);
+
+ // Replace the string instructions in parallel.
+ struct Replacer : public WalkerPass<PostWalker<Replacer>> {
+ bool isFunctionParallel() override { return true; }
+
+ StringLowering& lowering;
+
+ std::unique_ptr<Pass> create() override {
+ return std::make_unique<Replacer>(lowering);
+ }
+
+ Replacer(StringLowering& lowering) : lowering(lowering) {}
+
+ void visitStringNew(StringNew* curr) {
+ Builder builder(*getModule());
+ switch (curr->op) {
+ case StringNewWTF16Array:
+ replaceCurrent(builder.makeCall(lowering.fromCharCodeArrayImport,
+ {curr->ptr, curr->start, curr->end},
+ lowering.nnExt));
+ return;
+ case StringNewFromCodePoint:
+ replaceCurrent(builder.makeCall(
+ lowering.fromCodePointImport, {curr->ptr}, lowering.nnExt));
+ return;
+ default:
+ WASM_UNREACHABLE("TODO: all of string.new*");
+ }
+ }
+
+ void visitStringAs(StringAs* curr) {
+ // There is no difference between strings and views with imported
+ // strings: they are all just JS strings, so no conversion is needed.
+ replaceCurrent(curr->ref);
+ }
+ };
+
+ Replacer replacer(*this);
+ replacer.run(getPassRunner(), module);
+ replacer.walkModuleCode(module);
+ }
};
Pass* createStringGatheringPass() { return new StringGathering(); }
diff --git a/test/lit/passes/string-gathering.wast b/test/lit/passes/string-gathering.wast
index 657858fc0..8c315ddc1 100644
--- a/test/lit/passes/string-gathering.wast
+++ b/test/lit/passes/string-gathering.wast
@@ -27,12 +27,22 @@
;; CHECK: (global $global2 stringref (global.get $string.const_bar))
;; LOWER: (type $0 (func))
+ ;; LOWER: (type $1 (array (mut i16)))
+
+ ;; LOWER: (type $2 (func (param (ref null $1) i32 i32) (result (ref extern))))
+
+ ;; LOWER: (type $3 (func (param i32) (result (ref extern))))
+
;; LOWER: (import "string.const" "0" (global $string.const_bar (ref extern)))
;; LOWER: (import "string.const" "1" (global $string.const_other (ref extern)))
;; LOWER: (import "string.const" "2" (global $global (ref extern)))
+ ;; LOWER: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $2) (param (ref null $1) i32 i32) (result (ref extern))))
+
+ ;; LOWER: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint (type $3) (param i32) (result (ref extern))))
+
;; LOWER: (global $global2 externref (global.get $string.const_bar))
(global $global2 (ref null string) (string.const "bar"))
@@ -111,6 +121,12 @@
;; Multiple possible reusable globals. Also test ignoring of imports.
(module
;; CHECK: (import "a" "b" (global $import (ref string)))
+ ;; LOWER: (type $0 (array (mut i16)))
+
+ ;; LOWER: (type $1 (func (param (ref null $0) i32 i32) (result (ref extern))))
+
+ ;; LOWER: (type $2 (func (param i32) (result (ref extern))))
+
;; LOWER: (import "a" "b" (global $import (ref extern)))
(import "a" "b" (global $import (ref string)))
@@ -122,6 +138,10 @@
;; LOWER: (import "string.const" "1" (global $global4 (ref extern)))
+ ;; LOWER: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $1) (param (ref null $0) i32 i32) (result (ref extern))))
+
+ ;; LOWER: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint (type $2) (param i32) (result (ref extern))))
+
;; LOWER: (global $global2 (ref extern) (global.get $global1))
(global $global2 (ref string) (string.const "foo"))
diff --git a/test/lit/passes/string-lowering-instructions.wast b/test/lit/passes/string-lowering-instructions.wast
new file mode 100644
index 000000000..05d555ef0
--- /dev/null
+++ b/test/lit/passes/string-lowering-instructions.wast
@@ -0,0 +1,95 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+
+;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s
+
+(module
+ ;; CHECK: (type $0 (func))
+
+ ;; CHECK: (type $array16 (array (mut i16)))
+ (type $array16 (array (mut i16)))
+
+ ;; CHECK: (rec
+ ;; CHECK-NEXT: (type $2 (func (param (ref $array16))))
+
+ ;; CHECK: (type $3 (func (param externref externref externref externref)))
+
+ ;; CHECK: (type $4 (func (param (ref null $array16) i32 i32) (result (ref extern))))
+
+ ;; CHECK: (type $5 (func (param i32) (result (ref extern))))
+
+ ;; CHECK: (import "colliding" "name" (func $fromCodePoint (type $0)))
+ (import "colliding" "name" (func $fromCodePoint))
+
+ ;; CHECK: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $4) (param (ref null $array16) i32 i32) (result (ref extern))))
+
+ ;; CHECK: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint_5 (type $5) (param i32) (result (ref extern))))
+
+ ;; CHECK: (func $string.as (type $3) (param $a externref) (param $b externref) (param $c externref) (param $d externref)
+ ;; CHECK-NEXT: (local.set $b
+ ;; CHECK-NEXT: (local.get $a)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $c
+ ;; CHECK-NEXT: (local.get $a)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $d
+ ;; CHECK-NEXT: (local.get $a)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $string.as
+ (param $a stringref)
+ (param $b stringview_wtf8)
+ (param $c stringview_wtf16)
+ (param $d stringview_iter)
+ ;; These operations all vanish in the lowering, as they all become extref
+ ;; (JS strings).
+ (local.set $b
+ (string.as_wtf8
+ (local.get $a)
+ )
+ )
+ (local.set $c
+ (string.as_wtf16
+ (local.get $a)
+ )
+ )
+ (local.set $d
+ (string.as_iter
+ (local.get $a)
+ )
+ )
+ )
+
+ ;; CHECK: (func $string.new.gc (type $2) (param $array16 (ref $array16))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (call $fromCharCodeArray
+ ;; CHECK-NEXT: (local.get $array16)
+ ;; CHECK-NEXT: (i32.const 7)
+ ;; CHECK-NEXT: (i32.const 8)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $string.new.gc (param $array16 (ref $array16))
+ (drop
+ (string.new_wtf16_array
+ (local.get $array16)
+ (i32.const 7)
+ (i32.const 8)
+ )
+ )
+ )
+
+ ;; CHECK: (func $string.from_code_point (type $0)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (call $fromCodePoint_5
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $string.from_code_point
+ (drop
+ (string.from_code_point
+ (i32.const 1)
+ )
+ )
+ )
+)