1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
|
/*
* Copyright 2017 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Eliminate redundant local.sets: if a local already has a particular
// value, we don't need to set it again. A common case here is loops
// that start at zero, since the default value is initialized to
// zero anyhow.
//
// A risk here is that we extend live ranges, e.g. we may use the default
// value at the very end of a function, keeping that local alive throughout.
// For that reason it is probably better to run this near the end of
// optimization, and especially after coalesce-locals. A final vaccum
// should be done after it, as this pass can leave around drop()s of
// values no longer necessary.
//
// So far this tracks constant values, and for everything else it considers
// them unique (so each local.set of a non-constant is a unique value, each
// merge is a unique value, etc.; there is no sophisticated value numbering
// here).
//
#include <cfg/cfg-traversal.h>
#include <ir/literal-utils.h>
#include <ir/numbering.h>
#include <ir/properties.h>
#include <ir/utils.h>
#include <pass.h>
#include <support/small_set.h>
#include <support/unique_deferring_queue.h>
#include <wasm-builder.h>
#include <wasm.h>
namespace wasm {
// Map each local index to its current value number (which is computed in
// ValueNumbering).
using LocalValues = std::vector<Index>;
namespace {
// information in a basic block
struct Info {
LocalValues start, end; // the local values at the start and end of the block
std::vector<Expression**> items;
};
struct RedundantSetElimination
: public WalkerPass<CFGWalker<RedundantSetElimination,
Visitor<RedundantSetElimination>,
Info>> {
bool isFunctionParallel() override { return true; }
std::unique_ptr<Pass> create() override {
return std::make_unique<RedundantSetElimination>();
}
// Branches outside of the function can be ignored, as we only look at locals
// which vanish when we leave.
bool ignoreBranchesOutsideOfFunc = true;
Index numLocals;
// In rare cases we make a change to a type that requires a refinalize.
bool refinalize = false;
// cfg traversal work
static void doVisitLocalGet(RedundantSetElimination* self,
Expression** currp) {
if (self->currBasicBlock) {
self->currBasicBlock->contents.items.push_back(currp);
}
}
static void doVisitLocalSet(RedundantSetElimination* self,
Expression** currp) {
if (self->currBasicBlock) {
self->currBasicBlock->contents.items.push_back(currp);
}
}
// main entry point
void doWalkFunction(Function* func) {
numLocals = func->getNumLocals();
if (numLocals == 0) {
return; // nothing to do
}
// Create a unique value for use to mark unseen locations.
unseenValue = valueNumbering.getUniqueValue();
// create the CFG by walking the IR
CFGWalker<RedundantSetElimination, Visitor<RedundantSetElimination>, Info>::
doWalkFunction(func);
// flow values across blocks
flowValues(func);
// remove redundant sets
optimize(func);
if (refinalize) {
ReFinalize().walkFunctionInModule(func, this->getModule());
}
}
// Use a value numbering for the values of expressions.
ValueNumbering valueNumbering;
// In additon to valueNumbering, each block has values for each merge.
std::unordered_map<BasicBlock*, std::unordered_map<Index, Index>>
blockMergeValues;
// A value that indicates we haven't seen this location yet.
Index unseenValue;
Index getUniqueValue() {
auto value = valueNumbering.getUniqueValue();
#ifdef RSE_DEBUG
std::cout << "new unique value " << value << '\n';
#endif
return value;
}
Index getValue(Literals lit) {
auto value = valueNumbering.getValue(lit);
#ifdef RSE_DEBUG
std::cout << "lit value " << value << '\n';
#endif
return value;
}
Index getBlockMergeValue(BasicBlock* block, Index index) {
auto& mergeValues = blockMergeValues[block];
auto iter = mergeValues.find(index);
if (iter != mergeValues.end()) {
return iter->second;
}
#ifdef RSE_DEBUG
std::cout << "new block-merge value for " << block << " : " << index
<< '\n';
#endif
return mergeValues[index] = getUniqueValue();
}
bool isBlockMergeValue(BasicBlock* block, Index index, Index value) {
auto iter = blockMergeValues.find(block);
if (iter == blockMergeValues.end()) {
return false;
}
auto& mergeValues = iter->second;
auto iter2 = mergeValues.find(index);
if (iter2 == mergeValues.end()) {
return false;
}
return value == iter2->second;
}
Index getValue(Expression* expr, LocalValues& currValues) {
if (auto* get = expr->dynCast<LocalGet>()) {
// a copy of whatever that was
return currValues[get->index];
}
auto value = valueNumbering.getValue(expr);
#ifdef RSE_DEBUG
std::cout << "expr value " << value << '\n';
#endif
return value;
}
// flowing
void flowValues(Function* func) {
for (auto& block : basicBlocks) {
LocalValues& start = block->contents.start;
start.resize(numLocals);
if (block.get() == entry) {
// params are complex values we can't optimize; vars are zeros
for (Index i = 0; i < numLocals; i++) {
auto type = func->getLocalType(i);
if (func->isParam(i)) {
#ifdef RSE_DEBUG
std::cout << "new param value for " << i << '\n';
#endif
start[i] = getUniqueValue();
} else if (!LiteralUtils::canMakeZero(type)) {
#ifdef RSE_DEBUG
std::cout << "new unique value for non-zeroable " << i << '\n';
#endif
start[i] = getUniqueValue();
} else {
start[i] = getValue(Literal::makeZeros(type));
}
}
} else {
// other blocks have all unseen values to begin with
for (Index i = 0; i < numLocals; i++) {
start[i] = unseenValue;
}
}
// the ends all begin unseen
LocalValues& end = block->contents.end;
end.resize(numLocals);
for (Index i = 0; i < numLocals; i++) {
end[i] = unseenValue;
}
}
// keep working while stuff is flowing. we use a unique deferred queue
// which ensures both FIFO and that we don't do needless work - if
// A and B reach C, and both queue C, we only want to do C at the latest
// time, when we have information from all those reaching it.
UniqueDeferredQueue<BasicBlock*> work;
work.push(entry);
while (!work.empty()) {
auto* curr = work.pop();
#ifdef RSE_DEBUG
std::cout << "flow block " << curr << '\n';
#endif
// process a block: first, update its start based on those reaching it
if (!curr->in.empty()) {
if (curr->in.size() == 1) {
// just copy the pred, nothing to merge
curr->contents.start = (*curr->in.begin())->contents.end;
} else {
// perform a merge
auto in = curr->in;
for (Index i = 0; i < numLocals; i++) {
auto old = curr->contents.start[i];
// If we already had a merge value here, keep it.
// TODO This may have some false positives, as we may e.g. have
// a single pred that first gives us x, then later y after
// flow led to a merge, and we may see x and y at the same
// time due to flow from a successor, and then it looks like
// we need a merge but we don't. avoiding that would require
// more memory and is probably not worth it, but might be
// worth investigating
// NB While suboptimal, this simplification provides a simple proof
// of convergence. We prove that, in each fixed block+local,
// the value number at the end is nondecreasing across
// iterations, by induction on the iteration:
// * The first iteration is on the entry block. It increases
// the value number at the end from 0 (unseen) to something
// else (a value number for 0 for locals, a unique value
// for params; all >0).
// * Induction step: assuming the property holds for all past
// iterations, consider the current iteration. Of our
// predecessors, those that we iterated on have the property;
// those that we haven't will have 0 (unseen).
// * If we assign to that local in this block, that will be
// the value in the output, forever, and it is greater
// than the initial value of 0.
// * If we see different values coming in, we create a merge
// value number. Its number is higher than everything
// else since we give it the next available number, so we
// do not decrease in this iteration, and we will output
// the same value in the future too (here is where we use
// the simplification property).
// * Otherwise, we will flow the incoming value through,
// and it did not decrease (by induction), so neither do
// we.
// Finally, given value numbers are nondecreasing, we must
// converge since we only keep working as long as we see new
// values at the end of a block.
//
// Not that we don't trust this proof, but the convergence
// property (value numbers at block ends do not decrease) is
// verified later down.
if (isBlockMergeValue(curr, i, old)) {
continue;
}
auto iter = in.begin();
auto value = (*iter)->contents.end[i];
iter++;
while (iter != in.end()) {
auto otherValue = (*iter)->contents.end[i];
if (value == unseenValue) {
value = otherValue;
} else if (otherValue == unseenValue) {
// nothing to do, other has no information
} else if (value != otherValue) {
// 2 different values, this is a merged value
value = getBlockMergeValue(curr, i);
break; // no more work once we see a merge
}
iter++;
}
curr->contents.start[i] = value;
}
}
}
#ifdef RSE_DEBUG
dump("start", curr->contents.start);
#endif
// flow values through it, then add those we can reach if they need an
// update.
auto currValues = curr->contents.start; // we'll modify this as we go
auto& items = curr->contents.items;
for (auto** item : items) {
if (auto* set = (*item)->dynCast<LocalSet>()) {
auto* value = Properties::getFallthrough(
set->value, getPassOptions(), *getModule());
currValues[set->index] = getValue(value, currValues);
}
}
if (currValues == curr->contents.end) {
// nothing changed, so no more work to do
// note that the first iteration this is always not the case,
// since end contains unseen (and then the comparison ends on
// the first element)
continue;
}
// update the end state and update children
#ifndef NDEBUG
// verify the convergence property mentioned in the NB comment
// above: the value numbers at the end must be nondecreasing
for (Index i = 0; i < numLocals; i++) {
assert(currValues[i] >= curr->contents.end[i]);
}
#endif
curr->contents.end.swap(currValues);
#ifdef RSE_DEBUG
dump("end ", curr->contents.end);
#endif
for (auto* next : curr->out) {
work.push(next);
}
}
}
// optimizing
void optimize(Function* func) {
// Find which locals are refinable, that is, that when we see a global.get
// of them we may consider switching to another local index that has the
// same value but in a refined type. Computing which locals are relevant for
// that optimization is efficient because it avoids a bunch of work below
// for hashing numbers etc.
std::vector<bool> isRefinable(numLocals, false);
for (Index i = 0; i < numLocals; i++) {
// TODO: we could also note which locals have "maximal" types, where no
// other local is a refinement of them
if (func->getLocalType(i).isRef()) {
isRefinable[i] = true;
}
}
// in each block, run the values through the sets,
// and remove redundant sets when we see them
for (auto& block : basicBlocks) {
auto currValues = block->contents.start; // we'll modify this as we go
auto& items = block->contents.items;
// Set up the equivalences at the beginning of the block. We'll update
// them as we go, so we can use them at any point in the middle. This data
// structure maps a value number to the local indexes that have that
// value.
//
// Note that the set here must be ordered to avoid nondeterminism when
// picking between multiple equally-good indexes (we'll pick the first in
// the iteration, which will have the lowest index).
std::unordered_map<Index, SmallSet<Index, 3>> valueToLocals;
assert(currValues.size() == numLocals);
for (Index i = 0; i < numLocals; i++) {
if (isRefinable[i]) {
valueToLocals[currValues[i]].insert(i);
}
}
for (auto** item : items) {
if (auto* set = (*item)->dynCast<LocalSet>()) {
auto oldValue = currValues[set->index];
auto* value = Properties::getFallthrough(
set->value, getPassOptions(), *getModule());
auto newValue = getValue(value, currValues);
auto index = set->index;
if (newValue == oldValue) {
remove(item);
} else {
// update for later steps
currValues[index] = newValue;
if (isRefinable[index]) {
valueToLocals[oldValue].erase(index);
valueToLocals[newValue].insert(index);
}
}
continue;
}
// For gets, see if there is another index with that value, of a more
// refined type.
auto* get = (*item)->dynCast<LocalGet>();
if (!isRefinable[get->index]) {
continue;
}
for (auto i : valueToLocals[getValue(get, currValues)]) {
auto currType = func->getLocalType(get->index);
auto possibleType = func->getLocalType(i);
if (possibleType != currType &&
Type::isSubType(possibleType, currType)) {
// We found an improvement!
get->index = i;
get->type = possibleType;
refinalize = true;
}
}
}
}
}
void remove(Expression** item) {
auto* set = (*item)->cast<LocalSet>();
auto* value = set->value;
if (!set->isTee()) {
auto* drop = ExpressionManipulator::convert<LocalSet, Drop>(set);
drop->value = value;
drop->finalize();
} else {
// If we are replacing the set with something of a more specific type,
// then we need to refinalize, for example:
//
// (struct.get $X 0
// (local.tee $x
// (..something of type $Y, a subtype of $X..)
// )
// )
//
// After the replacement the struct.get will read from $Y, whose field may
// have a more refined type.
if (value->type != set->type) {
refinalize = true;
}
*item = value;
}
}
// debugging
void dump(BasicBlock* block) {
std::cout << "====\n";
if (block) {
std::cout << "block: " << block << '\n';
for (auto* out : block->out) {
std::cout << " goes to " << out << '\n';
}
}
for (Index i = 0; i < block->contents.start.size(); i++) {
std::cout << " start[" << i << "] = " << block->contents.start[i]
<< '\n';
}
for (auto** item : block->contents.items) {
std::cout << " " << *item << '\n';
}
std::cout << "====\n";
}
void dump(const char* desc, LocalValues& values) {
std::cout << desc << ": ";
for (auto x : values) {
std::cout << x << ' ';
}
std::cout << '\n';
}
};
} // namespace
Pass* createRedundantSetEliminationPass() {
return new RedundantSetElimination();
}
} // namespace wasm
|