diff options
-rw-r--r-- | src/ir/type-updating.h | 1 | ||||
-rw-r--r-- | src/passes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/passes/StringLowering.cpp | 180 | ||||
-rw-r--r-- | src/passes/pass.cpp | 8 | ||||
-rw-r--r-- | src/passes/passes.h | 1 | ||||
-rw-r--r-- | test/lit/help/wasm-opt.test | 2 | ||||
-rw-r--r-- | test/lit/help/wasm2js.test | 2 | ||||
-rw-r--r-- | test/lit/passes/simplify-globals-strings.wast | 65 | ||||
-rw-r--r-- | test/lit/passes/string-gathering.wast | 96 |
9 files changed, 290 insertions, 66 deletions
diff --git a/src/ir/type-updating.h b/src/ir/type-updating.h index 92913699e..ae247b6c7 100644 --- a/src/ir/type-updating.h +++ b/src/ir/type-updating.h @@ -443,7 +443,6 @@ public: std::unordered_map<HeapType, Signature> newSignatures; -public: TypeMapper(Module& wasm, const TypeUpdates& mapping) : GlobalTypeRewriter(wasm), mapping(mapping) {} diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 0e74d4b93..4cb696c21 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -90,6 +90,7 @@ set(passes_SOURCES SignaturePruning.cpp SignatureRefining.cpp SignExtLowering.cpp + StringLowering.cpp Strip.cpp StripTargetFeatures.cpp RedundantSetElimination.cpp diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp new file mode 100644 index 000000000..f880a50b2 --- /dev/null +++ b/src/passes/StringLowering.cpp @@ -0,0 +1,180 @@ +/* + * Copyright 2024 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Utilities for lowering strings into simpler things. +// +// StringGathering collects all string.const operations and stores them in +// globals, avoiding them appearing in code that can run more than once (which +// can have overhead in VMs). +// +// Building on that, an extended version of StringGathering will also replace +// those new globals with imported globals of type externref, for use with the +// string imports proposal. String operations will likewise need to be lowered. +// TODO +// + +#include <algorithm> + +#include "ir/module-utils.h" +#include "ir/names.h" +#include "pass.h" +#include "wasm-builder.h" +#include "wasm.h" + +namespace wasm { + +struct StringGathering : public Pass { + // All the strings we found in the module. + std::vector<Name> strings; + + // Pointers to all StringConsts, so that we can replace them. + using StringPtrs = std::vector<Expression**>; + StringPtrs stringPtrs; + + // Main entry point. + void run(Module* module) override { + processModule(module); + addGlobals(module); + replaceStrings(module); + } + + // Scan the entire wasm to find the relevant strings to populate our global + // data structures. + void processModule(Module* module) { + struct StringWalker : public PostWalker<StringWalker> { + StringPtrs& stringPtrs; + + StringWalker(StringPtrs& stringPtrs) : stringPtrs(stringPtrs) {} + + void visitStringConst(StringConst* curr) { + stringPtrs.push_back(getCurrentPointer()); + } + }; + + ModuleUtils::ParallelFunctionAnalysis<StringPtrs> analysis( + *module, [&](Function* func, StringPtrs& stringPtrs) { + if (!func->imported()) { + StringWalker(stringPtrs).walk(func->body); + } + }); + + // Also walk the global module code (for simplicity, also add it to the + // function map, using a "function" key of nullptr). + auto& globalStrings = analysis.map[nullptr]; + StringWalker(globalStrings).walkModuleCode(module); + + // Combine all the strings. + std::unordered_set<Name> stringSet; + for (auto& [_, currStringPtrs] : analysis.map) { + for (auto** stringPtr : currStringPtrs) { + stringSet.insert((*stringPtr)->cast<StringConst>()->string); + stringPtrs.push_back(stringPtr); + } + } + + // Sort the strings for determinism (alphabetically). + strings = std::vector<Name>(stringSet.begin(), stringSet.end()); + std::sort(strings.begin(), strings.end()); + } + + // For each string, the name of the global that replaces it. + std::unordered_map<Name, Name> stringToGlobalName; + + Type nnstringref = Type(HeapType::string, NonNullable); + + // Existing globals already in the form we emit can be reused. That is, if + // we see + // + // (global $foo (ref string) (string.const ..)) + // + // then we can just use that as the global for that string. This avoids + // repeated executions of the pass adding more and more globals. + // + // Note that we don't note these in newNames: They are already in the right + // sorted position, before any uses, as we use the first of them for each + // string. Only actually new names need sorting. + // + // Any time we reuse a global, we must not modify its body (or else we'd + // replace the global that all others read from); we note them here and + // avoid them in replaceStrings later to avoid such trampling. + std::unordered_set<Expression**> stringPtrsToPreserve; + + void addGlobals(Module* module) { + // Note all the new names we create for the sorting later. + std::unordered_set<Name> newNames; + + // Find globals to reuse (see comment on stringPtrsToPreserve for context). + for (auto& global : module->globals) { + if (global->type == nnstringref && !global->imported()) { + if (auto* stringConst = global->init->dynCast<StringConst>()) { + auto& globalName = stringToGlobalName[stringConst->string]; + if (!globalName.is()) { + // This is the first global for this string, use it. + globalName = global->name; + stringPtrsToPreserve.insert(&global->init); + } + } + } + } + + Builder builder(*module); + for (Index i = 0; i < strings.size(); i++) { + auto& globalName = stringToGlobalName[strings[i]]; + if (globalName.is()) { + // We are reusing a global for this one. + continue; + } + + auto& string = strings[i]; + auto name = Names::getValidGlobalName( + *module, std::string("string.const_") + std::string(string.str)); + globalName = name; + newNames.insert(name); + auto* stringConst = builder.makeStringConst(string); + auto global = + builder.makeGlobal(name, nnstringref, stringConst, Builder::Immutable); + module->addGlobal(std::move(global)); + } + + // Sort our new globals to the start, as other global initializers may use + // them (and it would be invalid for us to appear after a use). This sort is + // a simple way to ensure that we validate, but it may be unoptimal (we + // leave that for reorder-globals). + std::stable_sort( + module->globals.begin(), + module->globals.end(), + [&](const std::unique_ptr<Global>& a, const std::unique_ptr<Global>& b) { + return newNames.count(a->name) && !newNames.count(b->name); + }); + } + + void replaceStrings(Module* module) { + Builder builder(*module); + for (auto** stringPtr : stringPtrs) { + if (stringPtrsToPreserve.count(stringPtr)) { + continue; + } + auto* stringConst = (*stringPtr)->cast<StringConst>(); + auto globalName = stringToGlobalName[stringConst->string]; + *stringPtr = builder.makeGlobalGet(globalName, nnstringref); + } + } +}; + +Pass* createStringGatheringPass() { return new StringGathering(); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 0b7d3426b..057642ea2 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -475,6 +475,9 @@ void PassRegistry::registerPasses() { "ssa-nomerge", "ssa-ify variables so that they have a single assignment, ignoring merges", createSSAifyNoMergePass); + registerPass("string-gathering", + "gathers wasm strings to globals", + createStringGatheringPass); registerPass( "strip", "deprecated; same as strip-debug", createStripDebugPass); registerPass("stack-check", @@ -710,6 +713,11 @@ void PassRunner::addDefaultGlobalOptimizationPostPasses() { addIfNoDWARFIssues("simplify-globals"); } addIfNoDWARFIssues("remove-unused-module-elements"); + if (options.optimizeLevel >= 2 && wasm->features.hasStrings()) { + // Gather strings to globals right before reorder-globals, which will then + // sort them properly. + addIfNoDWARFIssues("string-gathering"); + } if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) { addIfNoDWARFIssues("reorder-globals"); } diff --git a/src/passes/passes.h b/src/passes/passes.h index 8e263032a..295772109 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -153,6 +153,7 @@ Pass* createSimplifyLocalsNoTeePass(); Pass* createSimplifyLocalsNoStructurePass(); Pass* createSimplifyLocalsNoTeeNoStructurePass(); Pass* createStackCheckPass(); +Pass* createStringGatheringPass(); Pass* createStripDebugPass(); Pass* createStripDWARFPass(); Pass* createStripProducersPass(); diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index fc33fce8d..8732f5e27 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -467,6 +467,8 @@ ;; CHECK-NEXT: --stack-check enforce limits on llvm's ;; CHECK-NEXT: __stack_pointer global ;; CHECK-NEXT: +;; CHECK-NEXT: --string-gathering gathers wasm strings to globals +;; CHECK-NEXT: ;; CHECK-NEXT: --strip deprecated; same as strip-debug ;; CHECK-NEXT: ;; CHECK-NEXT: --strip-debug strip debug info (including the diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index 7db564101..40d608857 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -426,6 +426,8 @@ ;; CHECK-NEXT: --stack-check enforce limits on llvm's ;; CHECK-NEXT: __stack_pointer global ;; CHECK-NEXT: +;; CHECK-NEXT: --string-gathering gathers wasm strings to globals +;; CHECK-NEXT: ;; CHECK-NEXT: --strip deprecated; same as strip-debug ;; CHECK-NEXT: ;; CHECK-NEXT: --strip-debug strip debug info (including the diff --git a/test/lit/passes/simplify-globals-strings.wast b/test/lit/passes/simplify-globals-strings.wast deleted file mode 100644 index 813e2964c..000000000 --- a/test/lit/passes/simplify-globals-strings.wast +++ /dev/null @@ -1,65 +0,0 @@ -;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. -;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. - -;; We do not "inline" strings from globals, as that might cause more -;; allocations to happen. TODO if VMs optimize that, remove this - -;; RUN: foreach %s %t wasm-opt --simplify-globals -all -S -o - | filecheck %s - -;; Also test with -O3 --gufa to see that no other pass does this kind of thing, -;; as extra coverage. - -;; RUN: foreach %s %t wasm-opt -O3 --gufa -all -S -o - | filecheck %s --check-prefix=O3GUF - -(module - ;; CHECK: (type $0 (func (result anyref))) - - ;; CHECK: (global $string stringref (string.const "one")) - ;; O3GUF: (type $0 (func (result anyref))) - - ;; O3GUF: (global $string (ref string) (string.const "one")) - (global $string stringref (string.const "one")) - - ;; CHECK: (global $string-mut (mut stringref) (string.const "two")) - ;; O3GUF: (global $string-mut (mut (ref string)) (string.const "two")) - (global $string-mut (mut stringref) (string.const "two")) - - ;; CHECK: (export "global" (func $global)) - ;; O3GUF: (export "global" (func $global)) - (export "global" (func $global)) - - ;; CHECK: (export "written" (func $written)) - ;; O3GUF: (export "written" (func $written)) - (export "written" (func $written)) - - ;; CHECK: (func $global (type $0) (result anyref) - ;; CHECK-NEXT: (global.get $string) - ;; CHECK-NEXT: ) - ;; O3GUF: (func $global (type $0) (result anyref) - ;; O3GUF-NEXT: (global.get $string) - ;; O3GUF-NEXT: ) - (func $global (result anyref) - ;; This should not turn into "one". - (global.get $string) - ) - - ;; CHECK: (func $written (type $0) (result anyref) - ;; CHECK-NEXT: (global.set $string-mut - ;; CHECK-NEXT: (string.const "three") - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (global.get $string-mut) - ;; CHECK-NEXT: ) - ;; O3GUF: (func $written (type $0) (result anyref) - ;; O3GUF-NEXT: (global.set $string-mut - ;; O3GUF-NEXT: (string.const "three") - ;; O3GUF-NEXT: ) - ;; O3GUF-NEXT: (global.get $string-mut) - ;; O3GUF-NEXT: ) - (func $written (result anyref) - (global.set $string-mut - (string.const "three") - ) - ;; This should not turn into "three". - (global.get $string-mut) - ) -) diff --git a/test/lit/passes/string-gathering.wast b/test/lit/passes/string-gathering.wast new file mode 100644 index 000000000..21fe358a3 --- /dev/null +++ b/test/lit/passes/string-gathering.wast @@ -0,0 +1,96 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt --string-gathering -all -S -o - | filecheck %s + +;; All the strings should be collected into globals and used from there. They +;; should also be sorted deterministically (alphabetically). + +(module + ;; Note that $global will be reused: no new global will be added for "foo". + ;; $global2 almost can, but it has the wrong type, so it won't. + + ;; CHECK: (type $0 (func)) + + ;; CHECK: (global $string.const_bar (ref string) (string.const "bar")) + + ;; CHECK: (global $string.const_other (ref string) (string.const "other")) + + ;; CHECK: (global $global (ref string) (string.const "foo")) + (global $global (ref string) (string.const "foo")) + + ;; CHECK: (global $global2 stringref (global.get $string.const_bar)) + (global $global2 (ref null string) (string.const "bar")) + + ;; CHECK: (func $a (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_bar) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $global) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $a + (drop + (string.const "bar") + ) + (drop + (string.const "foo") + ) + ) + + ;; CHECK: (func $b (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_bar) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_other) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $global) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $global2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $b + (drop + (string.const "bar") + ) + (drop + (string.const "other") + ) + ;; Existing global.gets are not modified (but after this pass, + ;; SimplifyGlobals could help; though in practice the globals would have + ;; been propagated here anyhow). + (drop + (global.get $global) + ) + (drop + (global.get $global2) + ) + ) +) + +;; Multiple possible reusable globals. Also test ignoring of imports. +(module + ;; CHECK: (import "a" "b" (global $import (ref string))) + (import "a" "b" (global $import (ref string))) + + ;; CHECK: (global $global1 (ref string) (string.const "foo")) + (global $global1 (ref string) (string.const "foo")) + + ;; CHECK: (global $global2 (ref string) (global.get $global1)) + (global $global2 (ref string) (string.const "foo")) + + ;; CHECK: (global $global3 (ref string) (global.get $global1)) + (global $global3 (ref string) (string.const "foo")) + + ;; CHECK: (global $global4 (ref string) (string.const "bar")) + (global $global4 (ref string) (string.const "bar")) + + ;; CHECK: (global $global5 (ref string) (global.get $global4)) + (global $global5 (ref string) (string.const "bar")) + + ;; CHECK: (global $global6 (ref string) (global.get $global4)) + (global $global6 (ref string) (string.const "bar")) +) |