1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
|
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Utilities for lowering strings into simpler things.
//
// StringGathering collects all string.const operations and stores them in
// globals, avoiding them appearing in code that can run more than once (which
// can have overhead in VMs).
//
// StringLowering does the same, and also replaces those new globals with
// imported globals of type externref, for use with the string imports proposal.
// String operations will likewise need to be lowered. TODO
//
#include <algorithm>
#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/type-updating.h"
#include "pass.h"
#include "support/json.h"
#include "wasm-builder.h"
#include "wasm.h"
namespace wasm {
struct StringGathering : public Pass {
// All the strings we found in the module.
std::vector<Name> strings;
// Pointers to all StringConsts, so that we can replace them.
using StringPtrs = std::vector<Expression**>;
StringPtrs stringPtrs;
// Main entry point.
void run(Module* module) override {
processModule(module);
addGlobals(module);
replaceStrings(module);
}
// Scan the entire wasm to find the relevant strings to populate our global
// data structures.
void processModule(Module* module) {
struct StringWalker : public PostWalker<StringWalker> {
StringPtrs& stringPtrs;
StringWalker(StringPtrs& stringPtrs) : stringPtrs(stringPtrs) {}
void visitStringConst(StringConst* curr) {
stringPtrs.push_back(getCurrentPointer());
}
};
ModuleUtils::ParallelFunctionAnalysis<StringPtrs> analysis(
*module, [&](Function* func, StringPtrs& stringPtrs) {
if (!func->imported()) {
StringWalker(stringPtrs).walk(func->body);
}
});
// Also walk the global module code (for simplicity, also add it to the
// function map, using a "function" key of nullptr).
auto& globalStrings = analysis.map[nullptr];
StringWalker(globalStrings).walkModuleCode(module);
// Combine all the strings.
std::unordered_set<Name> stringSet;
for (auto& [_, currStringPtrs] : analysis.map) {
for (auto** stringPtr : currStringPtrs) {
stringSet.insert((*stringPtr)->cast<StringConst>()->string);
stringPtrs.push_back(stringPtr);
}
}
// Sort the strings for determinism (alphabetically).
strings = std::vector<Name>(stringSet.begin(), stringSet.end());
std::sort(strings.begin(), strings.end());
}
// For each string, the name of the global that replaces it.
std::unordered_map<Name, Name> stringToGlobalName;
Type nnstringref = Type(HeapType::string, NonNullable);
// Existing globals already in the form we emit can be reused. That is, if
// we see
//
// (global $foo (ref string) (string.const ..))
//
// then we can just use that as the global for that string. This avoids
// repeated executions of the pass adding more and more globals.
//
// Note that we don't note these in newNames: They are already in the right
// sorted position, before any uses, as we use the first of them for each
// string. Only actually new names need sorting.
//
// Any time we reuse a global, we must not modify its body (or else we'd
// replace the global that all others read from); we note them here and
// avoid them in replaceStrings later to avoid such trampling.
std::unordered_set<Expression**> stringPtrsToPreserve;
void addGlobals(Module* module) {
// Note all the new names we create for the sorting later.
std::unordered_set<Name> newNames;
// Find globals to reuse (see comment on stringPtrsToPreserve for context).
for (auto& global : module->globals) {
if (global->type == nnstringref && !global->imported()) {
if (auto* stringConst = global->init->dynCast<StringConst>()) {
auto& globalName = stringToGlobalName[stringConst->string];
if (!globalName.is()) {
// This is the first global for this string, use it.
globalName = global->name;
stringPtrsToPreserve.insert(&global->init);
}
}
}
}
Builder builder(*module);
for (Index i = 0; i < strings.size(); i++) {
auto& globalName = stringToGlobalName[strings[i]];
if (globalName.is()) {
// We are reusing a global for this one.
continue;
}
auto& string = strings[i];
auto name = Names::getValidGlobalName(
*module, std::string("string.const_") + std::string(string.str));
globalName = name;
newNames.insert(name);
auto* stringConst = builder.makeStringConst(string);
auto global =
builder.makeGlobal(name, nnstringref, stringConst, Builder::Immutable);
module->addGlobal(std::move(global));
}
// Sort our new globals to the start, as other global initializers may use
// them (and it would be invalid for us to appear after a use). This sort is
// a simple way to ensure that we validate, but it may be unoptimal (we
// leave that for reorder-globals).
std::stable_sort(
module->globals.begin(),
module->globals.end(),
[&](const std::unique_ptr<Global>& a, const std::unique_ptr<Global>& b) {
return newNames.count(a->name) && !newNames.count(b->name);
});
}
void replaceStrings(Module* module) {
Builder builder(*module);
for (auto** stringPtr : stringPtrs) {
if (stringPtrsToPreserve.count(stringPtr)) {
continue;
}
auto* stringConst = (*stringPtr)->cast<StringConst>();
auto globalName = stringToGlobalName[stringConst->string];
*stringPtr = builder.makeGlobalGet(globalName, nnstringref);
}
}
};
struct StringLowering : public StringGathering {
void run(Module* module) override {
if (!module->features.has(FeatureSet::Strings)) {
return;
}
// First, run the gathering operation so all string.consts are in one place.
StringGathering::run(module);
// Lower the string.const globals into imports.
makeImports(module);
// Remove all HeapType::string etc. in favor of externref.
updateTypes(module);
// Disable the feature here after we lowered everything away.
module->features.disable(FeatureSet::Strings);
}
void makeImports(Module* module) {
Index importIndex = 0;
json::Value stringArray;
stringArray.setArray();
std::vector<Name> importedStrings;
for (auto& global : module->globals) {
if (global->init) {
if (auto* c = global->init->dynCast<StringConst>()) {
global->module = "string.const";
global->base = std::to_string(importIndex);
importIndex++;
global->init = nullptr;
auto str = json::Value::make(std::string(c->string.str).c_str());
stringArray.push_back(str);
}
}
}
// Add a custom section with the JSON.
std::stringstream stream;
stringArray.stringify(stream);
auto str = stream.str();
auto vec = std::vector<char>(str.begin(), str.end());
module->customSections.emplace_back(
CustomSection{"string.consts", std::move(vec)});
}
void updateTypes(Module* module) {
TypeMapper::TypeUpdates updates;
updates[HeapType::string] = HeapType::ext;
TypeMapper(*module, updates).map();
}
};
Pass* createStringGatheringPass() { return new StringGathering(); }
Pass* createStringLoweringPass() { return new StringLowering(); }
} // namespace wasm
|