1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
|
/*
* Copyright 2023 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Optimize away trivial tuples. When values are bundled together in a tuple, we
// are limited in how we can optimize then in the various local-related passes,
// like this:
//
// (local.set $tuple
// (tuple.make 3 (A) (B) (C)))
// (use
// (tuple.extract 3 0
// (local.get $tuple)))
//
// If there are no other uses, then we just need one of the three elements. By
// lowing them to three separate locals, other passes can remove the other two.
//
// Specifically, this pass seeks out tuple locals that have these properties:
//
// * They are always written either a tuple.make or another tuple local with
// these properties.
// * They are always used either in tuple.extract or they are copied to another
// tuple local with these properties.
//
// The set of those tuple locals can be easily optimized into individual locals,
// as the tuple does not "escape" into, say, a return value.
//
// TODO: Blocks etc. might be handled here, but it's not clear if we want to:
// there are situations where multivalue leads to smaller code using
// those constructs. Atm this pass should only remove things that are
// definitely worth lowering.
//
#include <pass.h>
#include <support/unique_deferring_queue.h>
#include <wasm-builder.h>
#include <wasm.h>
namespace wasm {
struct TupleOptimization : public WalkerPass<PostWalker<TupleOptimization>> {
bool isFunctionParallel() override { return true; }
std::unique_ptr<Pass> create() override {
return std::make_unique<TupleOptimization>();
}
// Track the number of uses for each tuple local. We consider a use as a
// local.get, a set, or a tee. A tee counts as two uses (since it both sets
// and gets, and so we must see that it is both used and uses properly).
std::vector<Index> uses;
// Tracks which tuple local uses are valid, that is, follow the properties
// above. If we have more uses than valid uses then we must have an invalid
// one, and the local cannot be optimized.
std::vector<Index> validUses;
// When one tuple local copies the value of another, we need to track the
// index that was copied, as if the source ends up bad then the target is bad
// as well.
//
// This is a symmetrical map, that is, we consider copies to work both ways:
//
// x \in copiedIndexed[y] <==> y \in copiedIndexed[x]
//
std::vector<std::unordered_set<Index>> copiedIndexes;
void doWalkFunction(Function* func) {
// If tuples are not enabled, or there are no tuple locals, then there is no
// work to do.
if (!getModule()->features.hasMultivalue()) {
return;
}
bool hasTuple = false;
for (auto var : func->vars) {
if (var.isTuple()) {
hasTuple = true;
break;
}
}
if (!hasTuple) {
return;
}
// Prepare global data structures before we collect info.
auto numLocals = func->getNumLocals();
uses.resize(numLocals);
validUses.resize(numLocals);
copiedIndexes.resize(numLocals);
// Walk the code to collect info.
Super::doWalkFunction(func);
// Analyze and optimize.
optimize(func);
}
void visitLocalGet(LocalGet* curr) {
if (curr->type.isTuple()) {
uses[curr->index]++;
}
}
void visitLocalSet(LocalSet* curr) {
if (getFunction()->getLocalType(curr->index).isTuple()) {
// See comment above about tees (we consider their set and get each a
// separate use).
uses[curr->index] += curr->isTee() ? 2 : 1;
auto* value = curr->value;
// We need the input to the local to be another such local (from a tee, or
// a get), or a tuple.make.
if (auto* tee = value->dynCast<LocalSet>()) {
assert(tee->isTee());
// We don't want to count anything as valid if the inner tee is
// unreachable. In that case the outer tee is also unreachable, of
// course, and in fact they might not even have the same tuple type or
// the inner one might not even be a tuple (since we are in unreachable
// code, that is possible). We could try to optimize unreachable tees in
// some cases, but it's simpler to let DCE simplify the code first.
if (tee->type != Type::unreachable) {
validUses[tee->index]++;
validUses[curr->index]++;
copiedIndexes[tee->index].insert(curr->index);
copiedIndexes[curr->index].insert(tee->index);
}
} else if (auto* get = value->dynCast<LocalGet>()) {
validUses[get->index]++;
validUses[curr->index]++;
copiedIndexes[get->index].insert(curr->index);
copiedIndexes[curr->index].insert(get->index);
} else if (value->is<TupleMake>()) {
validUses[curr->index]++;
}
}
}
void visitTupleExtract(TupleExtract* curr) {
// We need the input to be a local, either from a tee or a get.
if (auto* set = curr->tuple->dynCast<LocalSet>()) {
validUses[set->index]++;
} else if (auto* get = curr->tuple->dynCast<LocalGet>()) {
validUses[get->index]++;
}
}
void optimize(Function* func) {
auto numLocals = func->getNumLocals();
// Find the set of bad indexes. We add each such candidate to a worklist
// that we will then flow to find all those corrupted.
std::vector<bool> bad(numLocals);
UniqueDeferredQueue<Index> work;
for (Index i = 0; i < uses.size(); i++) {
assert(validUses[i] <= uses[i]);
if (uses[i] > 0 && validUses[i] < uses[i]) {
// This is a bad tuple.
work.push(i);
}
}
// Flow badness forward.
while (!work.empty()) {
auto i = work.pop();
if (bad[i]) {
continue;
}
bad[i] = true;
for (auto target : copiedIndexes[i]) {
work.push(target);
}
}
// Good indexes we can optimize are tuple locals with uses that are not bad.
std::vector<bool> good(numLocals);
bool hasGood = false;
for (Index i = 0; i < uses.size(); i++) {
if (uses[i] > 0 && !bad[i]) {
good[i] = true;
hasGood = true;
}
}
if (!hasGood) {
return;
}
// We found things to optimize! Create new non-tuple locals for their
// contents, and then rewrite the code to use those according to the
// mapping from tuple locals to normal ones. The mapping maps a tuple local
// to the base index used for its contents: an index and several others
// right after it, depending on the tuple size.
std::unordered_map<Index, Index> tupleToNewBaseMap;
for (Index i = 0; i < good.size(); i++) {
if (!good[i]) {
continue;
}
auto newBase = func->getNumLocals();
tupleToNewBaseMap[i] = newBase;
Index lastNewIndex = 0;
for (auto t : func->getLocalType(i)) {
Index newIndex = Builder::addVar(func, t);
if (lastNewIndex == 0) {
// This is the first new local we added (0 is an impossible value,
// since tuple locals exist, hence index 0 was already taken), so it
// must be equal to the base.
assert(newIndex == newBase);
} else {
// This must be right after the former.
assert(newIndex == lastNewIndex + 1);
}
lastNewIndex = newIndex;
}
}
MapApplier mapApplier(tupleToNewBaseMap);
mapApplier.walkFunctionInModule(func, getModule());
}
struct MapApplier : public PostWalker<MapApplier> {
std::unordered_map<Index, Index>& tupleToNewBaseMap;
MapApplier(std::unordered_map<Index, Index>& tupleToNewBaseMap)
: tupleToNewBaseMap(tupleToNewBaseMap) {}
// Gets the new base index if there is one, or 0 if not (0 is an impossible
// value for a new index, as local index 0 was taken before, as tuple
// locals existed).
Index getNewBaseIndex(Index i) {
auto iter = tupleToNewBaseMap.find(i);
if (iter == tupleToNewBaseMap.end()) {
return 0;
}
return iter->second;
}
// Given a local.get or local.set, return the new base index for the local
// index used there. Returns 0 (an impossible value, see above) otherwise.
Index getSetOrGetBaseIndex(Expression* setOrGet) {
Index index;
if (auto* set = setOrGet->dynCast<LocalSet>()) {
index = set->index;
} else if (auto* get = setOrGet->dynCast<LocalGet>()) {
index = get->index;
} else {
return 0;
}
return getNewBaseIndex(index);
}
// Replacing a local.tee requires some care, since we might have
//
// (local.set
// (local.tee
// ..
//
// We replace the local.tee with a block of sets of the new non-tuple
// locals, and the outer set must then (1) keep those around and also (2)
// identify the local that was tee'd, so we know what to get (which has been
// replaced by the block). To make that simple keep a map of the things that
// replaced tees.
std::unordered_map<Expression*, LocalSet*> replacedTees;
void visitLocalSet(LocalSet* curr) {
auto replace = [&](Expression* replacement) {
if (curr->isTee()) {
replacedTees[replacement] = curr;
}
replaceCurrent(replacement);
};
if (auto targetBase = getNewBaseIndex(curr->index)) {
Builder builder(*getModule());
auto type = getFunction()->getLocalType(curr->index);
auto* value = curr->value;
if (auto* make = value->dynCast<TupleMake>()) {
// Write each of the tuple.make fields into the proper local.
std::vector<Expression*> sets;
for (Index i = 0; i < type.size(); i++) {
auto* value = make->operands[i];
sets.push_back(builder.makeLocalSet(targetBase + i, value));
}
replace(builder.makeBlock(sets));
return;
}
std::vector<Expression*> contents;
auto iter = replacedTees.find(value);
if (iter != replacedTees.end()) {
// The input to us was a tee that has been replaced. The actual value
// we read from (the tee) can be found in replacedTees. Also, we
// need to keep around the replacement of the tee.
contents.push_back(value);
value = iter->second;
}
// This is a copy of a tuple local into another. Copy all the fields
// between them.
Index sourceBase = getSetOrGetBaseIndex(value);
// The target is being optimized, so the source must be as well, or else
// we were confused earlier and the target should not be.
assert(sourceBase);
// The source and target may have different element types due to
// subtyping (but their sizes must be equal).
auto sourceType = value->type;
assert(sourceType.size() == type.size());
for (Index i = 0; i < type.size(); i++) {
auto* get = builder.makeLocalGet(sourceBase + i, sourceType[i]);
contents.push_back(builder.makeLocalSet(targetBase + i, get));
}
replace(builder.makeBlock(contents));
}
}
void visitTupleExtract(TupleExtract* curr) {
auto* value = curr->tuple;
Expression* extraContents = nullptr;
auto iter = replacedTees.find(value);
if (iter != replacedTees.end()) {
// The input to us was a tee that has been replaced. Handle it as in
// visitLocalSet.
extraContents = value;
value = iter->second;
}
auto type = value->type;
if (type == Type::unreachable) {
return;
}
Index sourceBase = getSetOrGetBaseIndex(value);
if (!sourceBase) {
return;
}
Builder builder(*getModule());
auto i = curr->index;
auto* get = builder.makeLocalGet(sourceBase + i, type[i]);
if (extraContents) {
replaceCurrent(builder.makeSequence(extraContents, get));
} else {
replaceCurrent(get);
}
}
};
};
Pass* createTupleOptimizationPass() { return new TupleOptimization(); }
} // namespace wasm
|