summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ir/type-updating.h1
-rw-r--r--src/passes/CMakeLists.txt1
-rw-r--r--src/passes/StringLowering.cpp180
-rw-r--r--src/passes/pass.cpp8
-rw-r--r--src/passes/passes.h1
-rw-r--r--test/lit/help/wasm-opt.test2
-rw-r--r--test/lit/help/wasm2js.test2
-rw-r--r--test/lit/passes/simplify-globals-strings.wast65
-rw-r--r--test/lit/passes/string-gathering.wast96
9 files changed, 290 insertions, 66 deletions
diff --git a/src/ir/type-updating.h b/src/ir/type-updating.h
index 92913699e..ae247b6c7 100644
--- a/src/ir/type-updating.h
+++ b/src/ir/type-updating.h
@@ -443,7 +443,6 @@ public:
std::unordered_map<HeapType, Signature> newSignatures;
-public:
TypeMapper(Module& wasm, const TypeUpdates& mapping)
: GlobalTypeRewriter(wasm), mapping(mapping) {}
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index 0e74d4b93..4cb696c21 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -90,6 +90,7 @@ set(passes_SOURCES
SignaturePruning.cpp
SignatureRefining.cpp
SignExtLowering.cpp
+ StringLowering.cpp
Strip.cpp
StripTargetFeatures.cpp
RedundantSetElimination.cpp
diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp
new file mode 100644
index 000000000..f880a50b2
--- /dev/null
+++ b/src/passes/StringLowering.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2024 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Utilities for lowering strings into simpler things.
+//
+// StringGathering collects all string.const operations and stores them in
+// globals, avoiding them appearing in code that can run more than once (which
+// can have overhead in VMs).
+//
+// Building on that, an extended version of StringGathering will also replace
+// those new globals with imported globals of type externref, for use with the
+// string imports proposal. String operations will likewise need to be lowered.
+// TODO
+//
+
+#include <algorithm>
+
+#include "ir/module-utils.h"
+#include "ir/names.h"
+#include "pass.h"
+#include "wasm-builder.h"
+#include "wasm.h"
+
+namespace wasm {
+
+struct StringGathering : public Pass {
+ // All the strings we found in the module.
+ std::vector<Name> strings;
+
+ // Pointers to all StringConsts, so that we can replace them.
+ using StringPtrs = std::vector<Expression**>;
+ StringPtrs stringPtrs;
+
+ // Main entry point.
+ void run(Module* module) override {
+ processModule(module);
+ addGlobals(module);
+ replaceStrings(module);
+ }
+
+ // Scan the entire wasm to find the relevant strings to populate our global
+ // data structures.
+ void processModule(Module* module) {
+ struct StringWalker : public PostWalker<StringWalker> {
+ StringPtrs& stringPtrs;
+
+ StringWalker(StringPtrs& stringPtrs) : stringPtrs(stringPtrs) {}
+
+ void visitStringConst(StringConst* curr) {
+ stringPtrs.push_back(getCurrentPointer());
+ }
+ };
+
+ ModuleUtils::ParallelFunctionAnalysis<StringPtrs> analysis(
+ *module, [&](Function* func, StringPtrs& stringPtrs) {
+ if (!func->imported()) {
+ StringWalker(stringPtrs).walk(func->body);
+ }
+ });
+
+ // Also walk the global module code (for simplicity, also add it to the
+ // function map, using a "function" key of nullptr).
+ auto& globalStrings = analysis.map[nullptr];
+ StringWalker(globalStrings).walkModuleCode(module);
+
+ // Combine all the strings.
+ std::unordered_set<Name> stringSet;
+ for (auto& [_, currStringPtrs] : analysis.map) {
+ for (auto** stringPtr : currStringPtrs) {
+ stringSet.insert((*stringPtr)->cast<StringConst>()->string);
+ stringPtrs.push_back(stringPtr);
+ }
+ }
+
+ // Sort the strings for determinism (alphabetically).
+ strings = std::vector<Name>(stringSet.begin(), stringSet.end());
+ std::sort(strings.begin(), strings.end());
+ }
+
+ // For each string, the name of the global that replaces it.
+ std::unordered_map<Name, Name> stringToGlobalName;
+
+ Type nnstringref = Type(HeapType::string, NonNullable);
+
+ // Existing globals already in the form we emit can be reused. That is, if
+ // we see
+ //
+ // (global $foo (ref string) (string.const ..))
+ //
+ // then we can just use that as the global for that string. This avoids
+ // repeated executions of the pass adding more and more globals.
+ //
+ // Note that we don't note these in newNames: They are already in the right
+ // sorted position, before any uses, as we use the first of them for each
+ // string. Only actually new names need sorting.
+ //
+ // Any time we reuse a global, we must not modify its body (or else we'd
+ // replace the global that all others read from); we note them here and
+ // avoid them in replaceStrings later to avoid such trampling.
+ std::unordered_set<Expression**> stringPtrsToPreserve;
+
+ void addGlobals(Module* module) {
+ // Note all the new names we create for the sorting later.
+ std::unordered_set<Name> newNames;
+
+ // Find globals to reuse (see comment on stringPtrsToPreserve for context).
+ for (auto& global : module->globals) {
+ if (global->type == nnstringref && !global->imported()) {
+ if (auto* stringConst = global->init->dynCast<StringConst>()) {
+ auto& globalName = stringToGlobalName[stringConst->string];
+ if (!globalName.is()) {
+ // This is the first global for this string, use it.
+ globalName = global->name;
+ stringPtrsToPreserve.insert(&global->init);
+ }
+ }
+ }
+ }
+
+ Builder builder(*module);
+ for (Index i = 0; i < strings.size(); i++) {
+ auto& globalName = stringToGlobalName[strings[i]];
+ if (globalName.is()) {
+ // We are reusing a global for this one.
+ continue;
+ }
+
+ auto& string = strings[i];
+ auto name = Names::getValidGlobalName(
+ *module, std::string("string.const_") + std::string(string.str));
+ globalName = name;
+ newNames.insert(name);
+ auto* stringConst = builder.makeStringConst(string);
+ auto global =
+ builder.makeGlobal(name, nnstringref, stringConst, Builder::Immutable);
+ module->addGlobal(std::move(global));
+ }
+
+ // Sort our new globals to the start, as other global initializers may use
+ // them (and it would be invalid for us to appear after a use). This sort is
+ // a simple way to ensure that we validate, but it may be unoptimal (we
+ // leave that for reorder-globals).
+ std::stable_sort(
+ module->globals.begin(),
+ module->globals.end(),
+ [&](const std::unique_ptr<Global>& a, const std::unique_ptr<Global>& b) {
+ return newNames.count(a->name) && !newNames.count(b->name);
+ });
+ }
+
+ void replaceStrings(Module* module) {
+ Builder builder(*module);
+ for (auto** stringPtr : stringPtrs) {
+ if (stringPtrsToPreserve.count(stringPtr)) {
+ continue;
+ }
+ auto* stringConst = (*stringPtr)->cast<StringConst>();
+ auto globalName = stringToGlobalName[stringConst->string];
+ *stringPtr = builder.makeGlobalGet(globalName, nnstringref);
+ }
+ }
+};
+
+Pass* createStringGatheringPass() { return new StringGathering(); }
+
+} // namespace wasm
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 0b7d3426b..057642ea2 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -475,6 +475,9 @@ void PassRegistry::registerPasses() {
"ssa-nomerge",
"ssa-ify variables so that they have a single assignment, ignoring merges",
createSSAifyNoMergePass);
+ registerPass("string-gathering",
+ "gathers wasm strings to globals",
+ createStringGatheringPass);
registerPass(
"strip", "deprecated; same as strip-debug", createStripDebugPass);
registerPass("stack-check",
@@ -710,6 +713,11 @@ void PassRunner::addDefaultGlobalOptimizationPostPasses() {
addIfNoDWARFIssues("simplify-globals");
}
addIfNoDWARFIssues("remove-unused-module-elements");
+ if (options.optimizeLevel >= 2 && wasm->features.hasStrings()) {
+ // Gather strings to globals right before reorder-globals, which will then
+ // sort them properly.
+ addIfNoDWARFIssues("string-gathering");
+ }
if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) {
addIfNoDWARFIssues("reorder-globals");
}
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 8e263032a..295772109 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -153,6 +153,7 @@ Pass* createSimplifyLocalsNoTeePass();
Pass* createSimplifyLocalsNoStructurePass();
Pass* createSimplifyLocalsNoTeeNoStructurePass();
Pass* createStackCheckPass();
+Pass* createStringGatheringPass();
Pass* createStripDebugPass();
Pass* createStripDWARFPass();
Pass* createStripProducersPass();
diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test
index fc33fce8d..8732f5e27 100644
--- a/test/lit/help/wasm-opt.test
+++ b/test/lit/help/wasm-opt.test
@@ -467,6 +467,8 @@
;; CHECK-NEXT: --stack-check enforce limits on llvm's
;; CHECK-NEXT: __stack_pointer global
;; CHECK-NEXT:
+;; CHECK-NEXT: --string-gathering gathers wasm strings to globals
+;; CHECK-NEXT:
;; CHECK-NEXT: --strip deprecated; same as strip-debug
;; CHECK-NEXT:
;; CHECK-NEXT: --strip-debug strip debug info (including the
diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test
index 7db564101..40d608857 100644
--- a/test/lit/help/wasm2js.test
+++ b/test/lit/help/wasm2js.test
@@ -426,6 +426,8 @@
;; CHECK-NEXT: --stack-check enforce limits on llvm's
;; CHECK-NEXT: __stack_pointer global
;; CHECK-NEXT:
+;; CHECK-NEXT: --string-gathering gathers wasm strings to globals
+;; CHECK-NEXT:
;; CHECK-NEXT: --strip deprecated; same as strip-debug
;; CHECK-NEXT:
;; CHECK-NEXT: --strip-debug strip debug info (including the
diff --git a/test/lit/passes/simplify-globals-strings.wast b/test/lit/passes/simplify-globals-strings.wast
deleted file mode 100644
index 813e2964c..000000000
--- a/test/lit/passes/simplify-globals-strings.wast
+++ /dev/null
@@ -1,65 +0,0 @@
-;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
-;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up.
-
-;; We do not "inline" strings from globals, as that might cause more
-;; allocations to happen. TODO if VMs optimize that, remove this
-
-;; RUN: foreach %s %t wasm-opt --simplify-globals -all -S -o - | filecheck %s
-
-;; Also test with -O3 --gufa to see that no other pass does this kind of thing,
-;; as extra coverage.
-
-;; RUN: foreach %s %t wasm-opt -O3 --gufa -all -S -o - | filecheck %s --check-prefix=O3GUF
-
-(module
- ;; CHECK: (type $0 (func (result anyref)))
-
- ;; CHECK: (global $string stringref (string.const "one"))
- ;; O3GUF: (type $0 (func (result anyref)))
-
- ;; O3GUF: (global $string (ref string) (string.const "one"))
- (global $string stringref (string.const "one"))
-
- ;; CHECK: (global $string-mut (mut stringref) (string.const "two"))
- ;; O3GUF: (global $string-mut (mut (ref string)) (string.const "two"))
- (global $string-mut (mut stringref) (string.const "two"))
-
- ;; CHECK: (export "global" (func $global))
- ;; O3GUF: (export "global" (func $global))
- (export "global" (func $global))
-
- ;; CHECK: (export "written" (func $written))
- ;; O3GUF: (export "written" (func $written))
- (export "written" (func $written))
-
- ;; CHECK: (func $global (type $0) (result anyref)
- ;; CHECK-NEXT: (global.get $string)
- ;; CHECK-NEXT: )
- ;; O3GUF: (func $global (type $0) (result anyref)
- ;; O3GUF-NEXT: (global.get $string)
- ;; O3GUF-NEXT: )
- (func $global (result anyref)
- ;; This should not turn into "one".
- (global.get $string)
- )
-
- ;; CHECK: (func $written (type $0) (result anyref)
- ;; CHECK-NEXT: (global.set $string-mut
- ;; CHECK-NEXT: (string.const "three")
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (global.get $string-mut)
- ;; CHECK-NEXT: )
- ;; O3GUF: (func $written (type $0) (result anyref)
- ;; O3GUF-NEXT: (global.set $string-mut
- ;; O3GUF-NEXT: (string.const "three")
- ;; O3GUF-NEXT: )
- ;; O3GUF-NEXT: (global.get $string-mut)
- ;; O3GUF-NEXT: )
- (func $written (result anyref)
- (global.set $string-mut
- (string.const "three")
- )
- ;; This should not turn into "three".
- (global.get $string-mut)
- )
-)
diff --git a/test/lit/passes/string-gathering.wast b/test/lit/passes/string-gathering.wast
new file mode 100644
index 000000000..21fe358a3
--- /dev/null
+++ b/test/lit/passes/string-gathering.wast
@@ -0,0 +1,96 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+
+;; RUN: foreach %s %t wasm-opt --string-gathering -all -S -o - | filecheck %s
+
+;; All the strings should be collected into globals and used from there. They
+;; should also be sorted deterministically (alphabetically).
+
+(module
+ ;; Note that $global will be reused: no new global will be added for "foo".
+ ;; $global2 almost can, but it has the wrong type, so it won't.
+
+ ;; CHECK: (type $0 (func))
+
+ ;; CHECK: (global $string.const_bar (ref string) (string.const "bar"))
+
+ ;; CHECK: (global $string.const_other (ref string) (string.const "other"))
+
+ ;; CHECK: (global $global (ref string) (string.const "foo"))
+ (global $global (ref string) (string.const "foo"))
+
+ ;; CHECK: (global $global2 stringref (global.get $string.const_bar))
+ (global $global2 (ref null string) (string.const "bar"))
+
+ ;; CHECK: (func $a (type $0)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $string.const_bar)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $global)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $a
+ (drop
+ (string.const "bar")
+ )
+ (drop
+ (string.const "foo")
+ )
+ )
+
+ ;; CHECK: (func $b (type $0)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $string.const_bar)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $string.const_other)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $global)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $global2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $b
+ (drop
+ (string.const "bar")
+ )
+ (drop
+ (string.const "other")
+ )
+ ;; Existing global.gets are not modified (but after this pass,
+ ;; SimplifyGlobals could help; though in practice the globals would have
+ ;; been propagated here anyhow).
+ (drop
+ (global.get $global)
+ )
+ (drop
+ (global.get $global2)
+ )
+ )
+)
+
+;; Multiple possible reusable globals. Also test ignoring of imports.
+(module
+ ;; CHECK: (import "a" "b" (global $import (ref string)))
+ (import "a" "b" (global $import (ref string)))
+
+ ;; CHECK: (global $global1 (ref string) (string.const "foo"))
+ (global $global1 (ref string) (string.const "foo"))
+
+ ;; CHECK: (global $global2 (ref string) (global.get $global1))
+ (global $global2 (ref string) (string.const "foo"))
+
+ ;; CHECK: (global $global3 (ref string) (global.get $global1))
+ (global $global3 (ref string) (string.const "foo"))
+
+ ;; CHECK: (global $global4 (ref string) (string.const "bar"))
+ (global $global4 (ref string) (string.const "bar"))
+
+ ;; CHECK: (global $global5 (ref string) (global.get $global4))
+ (global $global5 (ref string) (string.const "bar"))
+
+ ;; CHECK: (global $global6 (ref string) (global.get $global4))
+ (global $global6 (ref string) (string.const "bar"))
+)