1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
|
/*
* Copyright 2022 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Finds types which are only created in assignments to immutable globals. For
// such types we can replace a struct.get with a global.get when there is a
// single possible global, or if there are two then with this pattern:
//
// (struct.get $foo i
// (..ref..))
// =>
// (select
// (value1)
// (value2)
// (ref.eq
// (..ref..)
// (global.get $global1)))
//
// That is a valid transformation if there are only two struct.news of $foo, it
// is created in two immutable globals $global1 and $global2, the field is
// immutable, the values of field |i| in them are value1 and value2
// respectively, and $foo has no subtypes. In that situation, the reference must
// be one of those two, so we can compare the reference to the globals and pick
// the right value there. (We can also handle subtypes, if we look at their
// values as well, see below.)
//
// The benefit of this optimization is primarily in the case of constant values
// that we can heavily optimize, like function references (constant function
// refs let us inline, etc.). Function references cannot be directly compared,
// so we cannot use ConstantFieldPropagation or such with an extension to
// multiple values, as the select pattern shown above can't be used - it needs a
// comparison. But we can compare structs, so if the function references are in
// vtables, and the vtables follow the above pattern, then we can optimize.
//
// TODO: Only do the case with a select when shrinkLevel == 0?
//
#include <variant>
#include "ir/bits.h"
#include "ir/debuginfo.h"
#include "ir/find_all.h"
#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/possible-constant.h"
#include "ir/subtypes.h"
#include "ir/utils.h"
#include "pass.h"
#include "wasm-builder.h"
#include "wasm.h"
namespace wasm {
namespace {
struct GlobalStructInference : public Pass {
// Only modifies struct.get operations.
bool requiresNonNullableLocalFixups() override { return false; }
// Maps optimizable struct types to the globals whose init is a struct.new of
// them.
//
// We will remove unoptimizable types from here, so in practice, if a type is
// optimizable it will have an entry here, and not if not.
std::unordered_map<HeapType, std::vector<Name>> typeGlobals;
void run(Module* module) override {
if (!module->features.hasGC()) {
return;
}
if (!getPassOptions().closedWorld) {
Fatal() << "GSI requires --closed-world";
}
// First, find all the information we need. We need to know which struct
// types are created in functions, because we will not be able to optimize
// those.
using HeapTypes = std::unordered_set<HeapType>;
ModuleUtils::ParallelFunctionAnalysis<HeapTypes> analysis(
*module, [&](Function* func, HeapTypes& types) {
if (func->imported()) {
return;
}
for (auto* structNew : FindAll<StructNew>(func->body).list) {
auto type = structNew->type;
if (type.isRef()) {
types.insert(type.getHeapType());
}
}
});
// We cannot optimize types that appear in a struct.new in a function, which
// we just collected and merge now.
HeapTypes unoptimizable;
for (auto& [func, types] : analysis.map) {
for (auto type : types) {
unoptimizable.insert(type);
}
}
// Process the globals.
for (auto& global : module->globals) {
if (global->imported()) {
continue;
}
// We cannot optimize a type that appears in a non-toplevel location in a
// global init.
for (auto* structNew : FindAll<StructNew>(global->init).list) {
auto type = structNew->type;
if (type.isRef() && structNew != global->init) {
unoptimizable.insert(type.getHeapType());
}
}
if (!global->init->type.isRef()) {
continue;
}
auto type = global->init->type.getHeapType();
// The global's declared type must match the init's type. If not, say if
// we had a global declared as type |any| but that contains (ref $A), then
// that is not something we can optimize, as ref.eq on a global.get of
// that global will not validate. (This should not be a problem after
// GlobalSubtyping runs, which will specialize the type of the global.)
if (global->type != global->init->type) {
unoptimizable.insert(type);
continue;
}
// We cannot optimize mutable globals.
if (global->mutable_) {
unoptimizable.insert(type);
continue;
}
// Finally, if this is a struct.new then it is one we can optimize; note
// it.
if (global->init->is<StructNew>()) {
typeGlobals[type].push_back(global->name);
}
}
// A struct.get might also read from any of the subtypes. As a result, an
// unoptimizable type makes all its supertypes unoptimizable as well.
// TODO: this could be specific per field (and not all supers have all
// fields)
// Iterate on a copy to avoid invalidation as we insert.
auto unoptimizableCopy = unoptimizable;
for (auto type : unoptimizableCopy) {
while (1) {
unoptimizable.insert(type);
// Also erase the globals, as we will never read them anyhow. This can
// allow us to skip unneeded work, when we check if typeGlobals is
// empty, below.
typeGlobals.erase(type);
auto super = type.getDeclaredSuperType();
if (!super) {
break;
}
type = *super;
}
}
// Similarly, propagate global names: if one type has [global1], then a get
// of any supertype might access that, so propagate to them.
auto typeGlobalsCopy = typeGlobals;
for (auto& [type, globals] : typeGlobalsCopy) {
auto curr = type;
while (1) {
auto super = curr.getDeclaredSuperType();
if (!super) {
break;
}
curr = *super;
// As above, avoid adding pointless data for anything unoptimizable.
if (!unoptimizable.count(curr)) {
for (auto global : globals) {
typeGlobals[curr].push_back(global);
}
}
}
}
if (typeGlobals.empty()) {
// We found nothing we can optimize.
return;
}
// The above loop on typeGlobalsCopy is on an unsorted data structure, and
// that can lead to nondeterminism in typeGlobals. Sort the vectors there to
// ensure determinism.
for (auto& [type, globals] : typeGlobals) {
std::sort(globals.begin(), globals.end());
}
// We are looking for the case where we can pick between two values using a
// single comparison. More than two values, or more than a single
// comparison, lead to tradeoffs that may not be worth it.
//
// Note that situation may involve more than two globals. For example we may
// have three relevant globals, but two may have the same value. In that
// case we can compare against the third:
//
// $global0: (struct.new $Type (i32.const 42))
// $global1: (struct.new $Type (i32.const 42))
// $global2: (struct.new $Type (i32.const 1337))
//
// (struct.get $Type (ref))
// =>
// (select
// (i32.const 1337)
// (i32.const 42)
// (ref.eq (ref) $global2))
//
// To discover these situations, we compute and group the possible values
// that can be read from a particular struct.get, using the following data
// structure.
struct Value {
// A value is either a constant, or if not, then we point to whatever
// expression it is.
std::variant<PossibleConstantValues, Expression*> content;
// The list of globals that have this Value. In the example from above,
// the Value for 42 would list globals = [$global0, $global1].
// TODO: SmallVector?
std::vector<Name> globals;
bool isConstant() const {
return std::get_if<PossibleConstantValues>(&content);
}
const PossibleConstantValues& getConstant() const {
assert(isConstant());
return std::get<PossibleConstantValues>(content);
}
Expression* getExpression() const {
assert(!isConstant());
return std::get<Expression*>(content);
}
};
// Constant expressions are easy to handle, and we can emit a select as in
// the last example. But we can handle non-constant ones too, by un-nesting
// the relevant global. Imagine we have this:
//
// (global $g (struct.new $S
// (struct.new $T ..)
//
// We have a nested struct.new here. That is not a constant value, but we
// can turn it into a global.get:
//
// (global $g.nested (struct.new $T ..)
// (global $g (struct.new $S
// (global.get $g.nested)
//
// After this un-nesting we end up with a global.get of an immutable global,
// which is constant. Note that this adds a global and may increase code
// size slightly, but if it lets us infer constant values that may lead to
// devirtualization and other large benefits. Later passes can also re-nest.
//
// We do most of our optimization work in parallel, but we cannot add
// globals in parallel, so instead we note the places we need to un-nest in
// this data structure and process them at the end.
struct GlobalToUnnest {
// The global we want to refer to a nested part of, by un-nesting it. The
// global contains a struct.new, and we want to refer to one of the
// operands of the struct.new directly, which we can do by moving it out
// to its own new global.
Name global;
// The index of the struct.new in the global named |global|.
Index index;
// The global.get that should refer to the new global. At the end, after
// we create a new global and have a name for it, we update this get to
// point to it.
GlobalGet* get;
};
using GlobalsToUnnest = std::vector<GlobalToUnnest>;
struct FunctionOptimizer : PostWalker<FunctionOptimizer> {
private:
GlobalStructInference& parent;
GlobalsToUnnest& globalsToUnnest;
public:
FunctionOptimizer(GlobalStructInference& parent,
GlobalsToUnnest& globalsToUnnest)
: parent(parent), globalsToUnnest(globalsToUnnest) {}
bool refinalize = false;
void visitStructGet(StructGet* curr) {
auto type = curr->ref->type;
if (type == Type::unreachable) {
return;
}
// We must ignore the case of a non-struct heap type, that is, a bottom
// type (which is all that is left after we've already ruled out
// unreachable). Such things will not be in typeGlobals, which we are
// checking now anyhow.
auto heapType = type.getHeapType();
auto iter = parent.typeGlobals.find(heapType);
if (iter == parent.typeGlobals.end()) {
return;
}
// This cannot be a bottom type as we found it in the typeGlobals map,
// which only contains types of struct.news.
assert(heapType.isStruct());
// The field must be immutable.
auto fieldIndex = curr->index;
auto& field = heapType.getStruct().fields[fieldIndex];
if (field.mutable_ == Mutable) {
return;
}
const auto& globals = iter->second;
if (globals.size() == 0) {
return;
}
auto& wasm = *getModule();
Builder builder(wasm);
if (globals.size() == 1) {
// Leave it to other passes to infer the constant value of the field,
// if there is one: just change the reference to the global, which
// will unlock those other optimizations. Note we must trap if the ref
// is null, so add RefAsNonNull here.
auto global = globals[0];
auto globalType = wasm.getGlobal(global)->type;
if (globalType != curr->ref->type) {
// The struct.get will now read from something of the type of the
// global, which is different, so the field being read might be
// refined, which could change the struct.get's type.
refinalize = true;
}
// No need to worry about atomic gets here. We will still read from
// the same memory location as before and preserve all side effects
// (including synchronization) that were previously present. The
// memory location is immutable anyway, so there cannot be any writes
// to synchronize with in the first place.
curr->ref = builder.makeSequence(
builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)),
builder.makeGlobalGet(global, globalType));
return;
}
// TODO: SmallVector?
std::vector<Value> values;
// Scan the relevant struct.new operands.
auto fieldType = field.type;
for (Index i = 0; i < globals.size(); i++) {
Name global = globals[i];
auto* structNew = wasm.getGlobal(global)->init->cast<StructNew>();
// The value that is read from this struct.new.
Value value;
// Find the value read from the struct and represent it as a Value.
PossibleConstantValues constant;
if (structNew->isWithDefault()) {
constant.note(Literal::makeZero(fieldType));
value.content = constant;
} else {
Expression* operand = structNew->operands[fieldIndex];
constant.note(operand, wasm);
if (constant.isConstant()) {
value.content = constant;
} else {
value.content = operand;
}
}
// If the value is constant, it may be grouped as mentioned before.
// See if it matches anything we've seen before.
bool grouped = false;
if (value.isConstant()) {
for (auto& oldValue : values) {
if (oldValue.isConstant() &&
oldValue.getConstant() == value.getConstant()) {
// Add us to this group.
oldValue.globals.push_back(global);
grouped = true;
break;
}
}
}
if (!grouped) {
// This is a new value, so create a new group, unless we've seen too
// many unique values. In that case, give up.
if (values.size() == 2) {
return;
}
value.globals.push_back(global);
values.push_back(value);
}
}
// Helper for optimization: Given a Value, returns what we should read
// for it.
auto getReadValue = [&](const Value& value) -> Expression* {
Expression* ret;
if (value.isConstant()) {
// This is known to be a constant, so simply emit an expression for
// that constant, and handle if the field is packed.
auto* expr = value.getConstant().makeExpression(wasm);
ret = Bits::makePackedFieldGet(expr, field, curr->signed_, wasm);
} else {
// Otherwise, this is non-constant, so we are in the situation where
// we want to un-nest the value out of the struct.new it is in. Note
// that for later work, as we cannot add a global in parallel.
// There can only be one global in a value that is not constant,
// which is the global we want to read from.
assert(value.globals.size() == 1);
// Create a global.get with temporary name, leaving only the
// updating of the name to later work.
auto* get = builder.makeGlobalGet(value.globals[0],
value.getExpression()->type);
globalsToUnnest.emplace_back(
GlobalToUnnest{value.globals[0], fieldIndex, get});
ret = get;
}
// This value replaces the struct.get, so it should have the same
// source location.
debuginfo::copyOriginalToReplacement(curr, ret, getFunction());
return ret;
};
// We have some globals (at least 2), and so must have at least one
// value. And we have already exited if we have more than 2 values (see
// the early return above) so that only leaves 1 and 2.
if (values.size() == 1) {
// The case of 1 value is simple: trap if the ref is null, and
// otherwise return the value. We must also fence if the get was
// seqcst. No additional work is necessary for an acquire get because
// there cannot have been any writes to this immutable field that it
// would synchronize with.
Expression* replacement =
builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref));
if (curr->order == MemoryOrder::SeqCst) {
replacement =
builder.blockify(replacement, builder.makeAtomicFence());
}
replaceCurrent(
builder.blockify(replacement, getReadValue(values[0])));
return;
}
assert(values.size() == 2);
// We have two values. Check that we can pick between them using a
// single comparison. While doing so, ensure that the index we can check
// on is 0, that is, the first value has a single global.
if (values[0].globals.size() == 1) {
// The checked global is already in index 0.
} else if (values[1].globals.size() == 1) {
// Flip so the value to check is in index 0.
std::swap(values[0], values[1]);
} else {
// Both indexes have more than one option, so we'd need more than one
// comparison. Give up.
return;
}
// Excellent, we can optimize here! Emit a select.
auto checkGlobal = values[0].globals[0];
// Compute the left and right values before the next line, as the order
// of their execution matters (they may note globals for un-nesting).
auto* left = getReadValue(values[0]);
auto* right = getReadValue(values[1]);
// Note that we must trap on null, so add a ref.as_non_null here. We
// must also add a fence if this get is seqcst. As before, no extra work
// is necessary for an acquire get because there cannot be a write it
// synchronizes with.
Expression* getGlobal =
builder.makeGlobalGet(checkGlobal, wasm.getGlobal(checkGlobal)->type);
if (curr->order == MemoryOrder::SeqCst) {
getGlobal =
builder.makeSequence(builder.makeAtomicFence(), getGlobal);
}
replaceCurrent(builder.makeSelect(
builder.makeRefEq(builder.makeRefAs(RefAsNonNull, curr->ref),
getGlobal),
left,
right));
}
void visitFunction(Function* func) {
if (refinalize) {
ReFinalize().walkFunctionInModule(func, getModule());
}
}
};
// Find the optimization opportunitites in parallel.
ModuleUtils::ParallelFunctionAnalysis<GlobalsToUnnest> optimization(
*module, [&](Function* func, GlobalsToUnnest& globalsToUnnest) {
if (func->imported()) {
return;
}
FunctionOptimizer optimizer(*this, globalsToUnnest);
optimizer.walkFunctionInModule(func, module);
});
// Un-nest any globals as needed, using the deterministic order of the
// functions in the module.
Builder builder(*module);
auto addedGlobals = false;
for (auto& func : module->functions) {
// Each work item here is a global with a struct.new, from which we want
// to read a particular index, from a particular global.get.
for (auto& [globalName, index, get] : optimization.map[func.get()]) {
auto* global = module->getGlobal(globalName);
auto* structNew = global->init->cast<StructNew>();
assert(index < structNew->operands.size());
auto*& operand = structNew->operands[index];
// If we already un-nested this then we don't need to repeat that work.
if (auto* nestedGet = operand->dynCast<GlobalGet>()) {
// We already un-nested, and this global.get refers to the new global.
// Simply copy the target.
get->name = nestedGet->name;
assert(get->type == nestedGet->type);
} else {
// Add a new global, initialized to the operand.
auto newName = Names::getValidGlobalName(
*module,
global->name.toString() + ".unnested." + std::to_string(index));
module->addGlobal(builder.makeGlobal(
newName, get->type, operand, Builder::Immutable));
// Replace the operand with a get of that new global, and update the
// original get to read the same.
operand = builder.makeGlobalGet(newName, get->type);
get->name = newName;
addedGlobals = true;
}
}
}
if (addedGlobals) {
// Sort the globals so that added ones appear before their uses.
PassRunner runner(module);
runner.add("reorder-globals-always");
runner.setIsNested(true);
runner.run();
}
}
};
} // anonymous namespace
Pass* createGlobalStructInferencePass() { return new GlobalStructInference(); }
} // namespace wasm
|