summaryrefslogtreecommitdiff
path: root/src/passes/SimplifyLocals.cpp
blob: 6f8b346b5932f8838adb7deda073d5e3d15c72aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
/*
 * Copyright 2015 WebAssembly Community Group participants
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//
// Locals-related optimizations
//
// This "sinks" set_locals, pushing them to the next get_local where possible,
// and removing the set if there are no gets remaining (the latter is
// particularly useful in ssa mode, but not only).
//
// We also note where set_locals coalesce: if all breaks of a block set
// a specific local, we can use a block return value for it, in effect
// removing multiple set_locals and replacing them with one that the
// block returns to. Further optimization rounds then have the opportunity
// to remove that set_local as well. TODO: support partial traces; right
// now, whenever control flow splits, we invalidate everything.
//
// After this pass, some locals may be completely unused. reorder-locals
// can get rid of those (the operation is trivial there after it sorts by use
// frequency).
//
// This pass has options:
//
//   * Tee: allow teeing, i.e., sinking a local with more than one use,
//          and so after sinking we have a tee for the first use.
//   * Structure: create block and if return values, by merging the
//                internal set_locals into one on the outside,
//                that can itself then be sunk further.
//
// There is also an option to disallow nesting entirely, which disallows
// Tee and Structure from those 2 options, and also disallows any sinking
// operation that would create nesting. This keeps the IR flat while
// removing redundant locals.
//

#include <wasm.h>
#include <wasm-builder.h>
#include <wasm-traversal.h>
#include <pass.h>
#include <ir/count.h>
#include <ir/effects.h>
#include <ir/find_all.h>
#include <ir/manipulation.h>

namespace wasm {

// Helper classes

struct SetLocalRemover : public PostWalker<SetLocalRemover> {
  std::vector<Index>* numGetLocals;

  void visitSetLocal(SetLocal *curr) {
    if ((*numGetLocals)[curr->index] == 0) {
      auto* value = curr->value;
      if (curr->isTee()) {
        replaceCurrent(value);
      } else {
        Drop* drop = ExpressionManipulator::convert<SetLocal, Drop>(curr);
        drop->value = value;
        drop->finalize();
      }
    }
  }
};

// Main class

template<bool allowTee = true, bool allowStructure = true, bool allowNesting = true>
struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<allowTee, allowStructure, allowNesting>>> {
  bool isFunctionParallel() override { return true; }

  Pass* create() override { return new SimplifyLocals<allowTee, allowStructure, allowNesting>(); }

  // information for a set_local we can sink
  struct SinkableInfo {
    Expression** item;
    EffectAnalyzer effects;

    SinkableInfo(Expression** item, PassOptions& passOptions) : item(item), effects(passOptions, *item) {}
  };

  // a list of sinkables in a linear execution trace
  typedef std::map<Index, SinkableInfo> Sinkables;

  // locals in current linear execution trace, which we try to sink
  Sinkables sinkables;

  // Information about an exit from a block: the break, and the
  // sinkables. For the final exit from a block (falling off)
  // exitter is null.
  struct BlockBreak {
    Expression** brp;
    Sinkables sinkables;
  };

  // a list of all sinkable traces that exit a block. the last
  // is falling off the end, others are branches. this is used for
  // block returns
  std::map<Name, std::vector<BlockBreak>> blockBreaks;

  // blocks that we can't produce a block return value for them.
  // (switch target, or some other reason)
  std::set<Name> unoptimizableBlocks;

  // A stack of sinkables from the current traversal state. When
  // execution reaches an if-else, it splits, and can then
  // be merged on return.
  std::vector<Sinkables> ifStack;

  // whether we need to run an additional cycle
  bool anotherCycle;

  // whether this is the first cycle, in which we always disallow teeing
  bool firstCycle;

  // local => # of get_locals for it
  GetLocalCounter getCounter;

  static void doNoteNonLinear(SimplifyLocals<allowTee, allowStructure, allowNesting>* self, Expression** currp) {
    auto* curr = *currp;
    if (curr->is<Break>()) {
      auto* br = curr->cast<Break>();
      if (br->value) {
        // value means the block already has a return value
        self->unoptimizableBlocks.insert(br->name);
      } else {
        self->blockBreaks[br->name].push_back({ currp, std::move(self->sinkables) });
      }
    } else if (curr->is<Block>()) {
      return; // handled in visitBlock
    } else if (curr->is<If>()) {
      assert(!curr->cast<If>()->ifFalse); // if-elses are handled by doNoteIfElse* methods
    } else if (curr->is<Switch>()) {
      auto* sw = curr->cast<Switch>();
      for (auto target : sw->targets) {
        self->unoptimizableBlocks.insert(target);
      }
      self->unoptimizableBlocks.insert(sw->default_);
      // TODO: we could use this info to stop gathering data on these blocks
    }
    self->sinkables.clear();
  }

  static void doNoteIfElseCondition(SimplifyLocals<allowTee, allowStructure, allowNesting>* self, Expression** currp) {
    // we processed the condition of this if-else, and now control flow branches
    // into either the true or the false sides
    assert((*currp)->cast<If>()->ifFalse);
    self->sinkables.clear();
  }

  static void doNoteIfElseTrue(SimplifyLocals<allowTee, allowStructure, allowNesting>* self, Expression** currp) {
    // we processed the ifTrue side of this if-else, save it on the stack
    assert((*currp)->cast<If>()->ifFalse);
    self->ifStack.push_back(std::move(self->sinkables));
  }

  static void doNoteIfElseFalse(SimplifyLocals<allowTee, allowStructure, allowNesting>* self, Expression** currp) {
    // we processed the ifFalse side of this if-else, we can now try to
    // mere with the ifTrue side and optimize a return value, if possible
    auto* iff = (*currp)->cast<If>();
    assert(iff->ifFalse);
    if (allowStructure) {
      self->optimizeIfReturn(iff, currp, self->ifStack.back());
    }
    self->ifStack.pop_back();
    self->sinkables.clear();
  }

  void visitBlock(Block* curr) {
    bool hasBreaks = curr->name.is() && blockBreaks[curr->name].size() > 0;

    if (allowStructure) {
      optimizeBlockReturn(curr); // can modify blockBreaks
    }

    // post-block cleanups
    if (curr->name.is()) {
      if (unoptimizableBlocks.count(curr->name)) {
        sinkables.clear();
        unoptimizableBlocks.erase(curr->name);
      }

      if (hasBreaks) {
        // more than one path to here, so nonlinear
        sinkables.clear();
        blockBreaks.erase(curr->name);
      }
    }
  }

  void visitLoop(Loop* curr) {
    if (allowStructure) {
      loops.push_back(this->getCurrentPointer());
    }
  }

  void visitGetLocal(GetLocal *curr) {
    auto found = sinkables.find(curr->index);
    if (found != sinkables.end()) {
      auto* set = (*found->second.item)->template cast<SetLocal>(); // the set we may be sinking
      bool oneUse = firstCycle || getCounter.num[curr->index] == 1;
      auto* get = set->value->template dynCast<GetLocal>(); // the set's value may be a get (i.e., the set is a copy)
      // if nesting is not allowed, and this might cause nesting, check if the sink would cause such a thing
      if (!allowNesting) {
        // a get is always ok to sink
        if (!get) {
          assert(expressionStack.size() >= 2);
          assert(expressionStack[expressionStack.size() - 1] == curr);
          auto* parent = expressionStack[expressionStack.size() - 2];
          bool parentIsSet = parent->template is<SetLocal>();
          // if the parent of this get is a set, we can sink into the set's value,
          // it would not be nested.
          if (!parentIsSet) {
            return;
          }
        }
      }
      // we can optimize here
      if (!allowNesting && get && !oneUse) {
        // if we can't nest 's a copy with multiple uses, then we can't create
        // a tee, and we can't nop the origin, but we can at least switch to
        // the copied index, which may make the origin unneeded eventually.
        curr->index = get->index;
        anotherCycle = true;
        return;
      }
      // sink it, and nop the origin
      if (oneUse) {
        // with just one use, we can sink just the value
        this->replaceCurrent(set->value);
      } else {
        this->replaceCurrent(set);
        assert(!set->isTee());
        set->setTee(true);
      }
      // reuse the getlocal that is dying
      *found->second.item = curr;
      ExpressionManipulator::nop(curr);
      sinkables.erase(found);
      anotherCycle = true;
    }
  }

  void visitDrop(Drop* curr) {
    // collapse drop-tee into set, which can occur if a get was sunk into a tee
    auto* set = curr->value->dynCast<SetLocal>();
    if (set) {
      assert(set->isTee());
      set->setTee(false);
      this->replaceCurrent(set);
    }
  }

  void checkInvalidations(EffectAnalyzer& effects) {
    // TODO: this is O(bad)
    std::vector<Index> invalidated;
    for (auto& sinkable : sinkables) {
      if (effects.invalidates(sinkable.second.effects)) {
        invalidated.push_back(sinkable.first);
      }
    }
    for (auto index : invalidated) {
      sinkables.erase(index);
    }
  }

  // a full expression stack is used when !allowNesting, so that we can check if
  // a sink would cause nesting
  std::vector<Expression*> expressionStack;

  static void visitPre(SimplifyLocals<allowTee, allowStructure, allowNesting>* self, Expression** currp) {
    Expression* curr = *currp;

    EffectAnalyzer effects(self->getPassOptions());
    if (effects.checkPre(curr)) {
      self->checkInvalidations(effects);
    }

    if (!allowNesting) {
      self->expressionStack.push_back(curr);
    }
  }

  static void visitPost(SimplifyLocals<allowTee, allowStructure, allowNesting>* self, Expression** currp) {
    // perform main SetLocal processing here, since we may be the result of
    // replaceCurrent, i.e., the visitor was not called.
    auto* set = (*currp)->dynCast<SetLocal>();

    if (set) {
      // if we see a set that was already potentially-sinkable, then the previous
      // store is dead, leave just the value
      auto found = self->sinkables.find(set->index);
      if (found != self->sinkables.end()) {
        auto* previous = (*found->second.item)->template cast<SetLocal>();
        assert(!previous->isTee());
        auto* previousValue = previous->value;
        Drop* drop = ExpressionManipulator::convert<SetLocal, Drop>(previous);
        drop->value = previousValue;
        drop->finalize();
        self->sinkables.erase(found);
        self->anotherCycle = true;
      }
    }

    EffectAnalyzer effects(self->getPassOptions());
    if (effects.checkPost(*currp)) {
      self->checkInvalidations(effects);
    }

    if (set && self->canSink(set)) {
      Index index = set->index;
      assert(self->sinkables.count(index) == 0);
      self->sinkables.emplace(std::make_pair(index, SinkableInfo(currp, self->getPassOptions())));
    }

    if (!allowNesting) {
      self->expressionStack.pop_back();
    }
  }

  bool canSink(SetLocal* set) {
    // we can never move a tee
    if (set->isTee()) return false;
    // if in the first cycle, or not allowing tees, then we cannot sink if >1 use as that would make a tee
    if ((firstCycle || !allowTee) && getCounter.num[set->index] > 1) return false;
    return true;
  }

  std::vector<Block*> blocksToEnlarge;
  std::vector<If*> ifsToEnlarge;
  std::vector<Expression**> loops;

  void optimizeBlockReturn(Block* block) {
    if (!block->name.is() || unoptimizableBlocks.count(block->name) > 0) {
      return;
    }
    auto breaks = std::move(blockBreaks[block->name]);
    blockBreaks.erase(block->name);
    if (breaks.size() == 0) return; // block has no branches TODO we might optimize trivial stuff here too
    assert(!(*breaks[0].brp)->template cast<Break>()->value); // block does not already have a return value (if one break has one, they all do)
    // look for a set_local that is present in them all
    bool found = false;
    Index sharedIndex = -1;
    for (auto& sinkable : sinkables) {
      Index index = sinkable.first;
      bool inAll = true;
      for (size_t j = 0; j < breaks.size(); j++) {
        if (breaks[j].sinkables.count(index) == 0) {
          inAll = false;
          break;
        }
      }
      if (inAll) {
        sharedIndex = index;
        found = true;
        break;
      }
    }
    if (!found) return;
    // If one of our brs is a br_if, then we will give it a value. since
    // the value executes before the condition, it is dangerous if we are
    // moving code out of the condition,
    //  (br_if
    //   (block
    //    ..use $x..
    //    (set_local $x ..)
    //   )
    //  )
    // =>
    //  (br_if
    //   (tee_local $x ..) ;; this now affects the use!
    //   (block
    //    ..use $x..
    //   )
    //  )
    // so we must check for that.
    for (size_t j = 0; j < breaks.size(); j++) {
      // move break set_local's value to the break
      auto* breakSetLocalPointer = breaks[j].sinkables.at(sharedIndex).item;
      auto* brp = breaks[j].brp;
      auto* br = (*brp)->template cast<Break>();
      auto* set = (*breakSetLocalPointer)->template cast<SetLocal>();
      if (br->condition) {
        // TODO: optimize
        FindAll<SetLocal> findAll(br->condition);
        for (auto* otherSet : findAll.list) {
          if (otherSet == set) {
            // the set is indeed in the condition, so we can't just move it
            // but maybe there are no effects? see if, ignoring the set
            // itself, there is any risk
            Nop nop;
            *breakSetLocalPointer = &nop;
            EffectAnalyzer condition(this->getPassOptions(), br->condition);
            EffectAnalyzer value(this->getPassOptions(), set);
            *breakSetLocalPointer = set;
            if (condition.invalidates(value)) {
              // indeed, we can't do this, stop
              return;
            }
            break; // we found set in the list, can stop now
          }
        }
      }
    }
    // Great, this local is set in them all, we can optimize!
    if (block->list.size() == 0 || !block->list.back()->is<Nop>()) {
      // We can't do this here, since we can't push to the block -
      // it would invalidate sinkable pointers. So we queue a request
      // to grow the block at the end of the turn, we'll get this next
      // cycle.
      blocksToEnlarge.push_back(block);
      return;
    }
    // move block set_local's value to the end, in return position, and nop the set
    auto* blockSetLocalPointer = sinkables.at(sharedIndex).item;
    auto* value = (*blockSetLocalPointer)->template cast<SetLocal>()->value;
    block->list[block->list.size() - 1] = value;
    block->type = value->type;
    ExpressionManipulator::nop(*blockSetLocalPointer);
    for (size_t j = 0; j < breaks.size(); j++) {
      // move break set_local's value to the break
      auto* breakSetLocalPointer = breaks[j].sinkables.at(sharedIndex).item;
      auto* brp = breaks[j].brp;
      auto* br = (*brp)->template cast<Break>();
      assert(!br->value);
      // if the break is conditional, then we must set the value here - if the break is not reached, we must still have the new value in the local
      auto* set = (*breakSetLocalPointer)->template cast<SetLocal>();
      if (br->condition) {
        br->value = set;
        set->setTee(true);
        *breakSetLocalPointer = this->getModule()->allocator.template alloc<Nop>();
        // in addition, as this is a conditional br that now has a value, it now returns a value, so it must be dropped
        br->finalize();
        *brp = Builder(*this->getModule()).makeDrop(br);
      } else {
        br->value = set->value;
        ExpressionManipulator::nop(set);
      }
    }
    // finally, create a set_local on the block itself
    auto* newSetLocal = Builder(*this->getModule()).makeSetLocal(sharedIndex, block);
    this->replaceCurrent(newSetLocal);
    sinkables.clear();
    anotherCycle = true;
  }

  // optimize set_locals from both sides of an if into a return value
  void optimizeIfReturn(If* iff, Expression** currp, Sinkables& ifTrue) {
    assert(iff->ifFalse);
    // if this if already has a result, or is unreachable code, we have
    // nothing to do
    if (iff->type != none) return;
    // We now have the sinkables from both sides of the if, and can look
    // for something to sink. That is either a shared index on both sides,
    // *or* if one side is unreachable, we can sink anything from the other,
    //   (if
    //     (..)
    //     (br $x)
    //     (set_local $y (..))
    //   )
    //    =>
    //   (set_local $y
    //     (if (result i32)
    //       (..)
    //       (br $x)
    //       (..)
    //     )
    //   )
    Sinkables& ifFalse = sinkables;
    Index goodIndex = -1;
    bool found = false;
    if (iff->ifTrue->type == unreachable) {
      assert(iff->ifFalse->type != unreachable); // since the if type is none
      if (!ifFalse.empty()) {
        goodIndex = ifFalse.begin()->first;
        found = true;
      }
    } else if (iff->ifFalse->type == unreachable) {
      assert(iff->ifTrue->type != unreachable); // since the if type is none
      if (!ifTrue.empty()) {
        goodIndex = ifTrue.begin()->first;
        found = true;
      }
    } else {
      // Look for a shared index.
      for (auto& sinkable : ifTrue) {
        Index index = sinkable.first;
        if (ifFalse.count(index) > 0) {
          goodIndex = index;
          found = true;
          break;
        }
      }
    }
    if (!found) return;
    // great, we can optimize!
    // ensure we have a place to write the return values for, if not, we
    // need another cycle
    auto* ifTrueBlock  = iff->ifTrue->dynCast<Block>();
    if (iff->ifTrue->type != unreachable) {
      if (!ifTrueBlock  || ifTrueBlock->list.size() == 0  || !ifTrueBlock->list.back()->is<Nop>()) {
        ifsToEnlarge.push_back(iff);
        return;
      }
    }
    auto* ifFalseBlock = iff->ifFalse->dynCast<Block>();
    if (iff->ifFalse->type != unreachable) {
      if (!ifFalseBlock  || ifFalseBlock->list.size() == 0  || !ifFalseBlock->list.back()->is<Nop>()) {
        ifsToEnlarge.push_back(iff);
        return;
      }
    }
    // all set, go
    if (iff->ifTrue->type != unreachable) {
      auto *ifTrueItem = ifTrue.at(goodIndex).item;
      ifTrueBlock->list[ifTrueBlock->list.size() - 1] = (*ifTrueItem)->template cast<SetLocal>()->value;
      ExpressionManipulator::nop(*ifTrueItem);
      ifTrueBlock->finalize();
      assert(ifTrueBlock->type != none);
    }
    if (iff->ifFalse->type != unreachable) {
      auto *ifFalseItem = ifFalse.at(goodIndex).item;
      ifFalseBlock->list[ifFalseBlock->list.size() - 1] = (*ifFalseItem)->template cast<SetLocal>()->value;
      ExpressionManipulator::nop(*ifFalseItem);
      ifFalseBlock->finalize();
      assert(ifFalseBlock->type != none);
    }
    iff->finalize(); // update type
    assert(iff->type != none);
    // finally, create a set_local on the iff itself
    auto* newSetLocal = Builder(*this->getModule()).makeSetLocal(goodIndex, iff);
    *currp = newSetLocal;
    anotherCycle = true;
  }

  // override scan to add a pre and a post check task to all nodes
  static void scan(SimplifyLocals<allowTee, allowStructure, allowNesting>* self, Expression** currp) {
    self->pushTask(visitPost, currp);

    auto* curr = *currp;

    if (curr->is<If>() && curr->cast<If>()->ifFalse) {
      // handle if-elses in a special manner, using the ifStack
      self->pushTask(SimplifyLocals<allowTee, allowStructure, allowNesting>::doNoteIfElseFalse, currp);
      self->pushTask(SimplifyLocals<allowTee, allowStructure, allowNesting>::scan, &curr->cast<If>()->ifFalse);
      self->pushTask(SimplifyLocals<allowTee, allowStructure, allowNesting>::doNoteIfElseTrue, currp);
      self->pushTask(SimplifyLocals<allowTee, allowStructure, allowNesting>::scan, &curr->cast<If>()->ifTrue);
      self->pushTask(SimplifyLocals<allowTee, allowStructure, allowNesting>::doNoteIfElseCondition, currp);
      self->pushTask(SimplifyLocals<allowTee, allowStructure, allowNesting>::scan, &curr->cast<If>()->condition);
    } else {
      WalkerPass<LinearExecutionWalker<SimplifyLocals<allowTee, allowStructure, allowNesting>>>::scan(self, currp);
    }

    self->pushTask(visitPre, currp);
  }

  void doWalkFunction(Function* func) {
    // scan get_locals
    getCounter.analyze(func);
    // multiple passes may be required per function, consider this:
    //    x = load
    //    y = store
    //    c(x, y)
    // the load cannot cross the store, but y can be sunk, after which so can x.
    //
    // we start with a cycle focusing on single-use locals, which are easy to
    // sink (we don't need to put a set), and a good match for common compiler
    // output patterns. further cycles do fully general sinking.
    firstCycle = true;
    do {
      anotherCycle = false;
      // main operation
      WalkerPass<LinearExecutionWalker<SimplifyLocals<allowTee, allowStructure, allowNesting>>>::doWalkFunction(func);
      // enlarge blocks that were marked, for the next round
      if (blocksToEnlarge.size() > 0) {
        for (auto* block : blocksToEnlarge) {
          block->list.push_back(this->getModule()->allocator.template alloc<Nop>());
        }
        blocksToEnlarge.clear();
        anotherCycle = true;
      }
      // enlarge ifs that were marked, for the next round
      if (ifsToEnlarge.size() > 0) {
        for (auto* iff : ifsToEnlarge) {
          auto ifTrue = Builder(*this->getModule()).blockify(iff->ifTrue);
          iff->ifTrue = ifTrue;
          if (ifTrue->list.size() == 0 || !ifTrue->list.back()->template is<Nop>()) {
            ifTrue->list.push_back(this->getModule()->allocator.template alloc<Nop>());
          }
          auto ifFalse = Builder(*this->getModule()).blockify(iff->ifFalse);
          iff->ifFalse = ifFalse;
          if (ifFalse->list.size() == 0 || !ifFalse->list.back()->template is<Nop>()) {
            ifFalse->list.push_back(this->getModule()->allocator.template alloc<Nop>());
          }
        }
        ifsToEnlarge.clear();
        anotherCycle = true;
      }
      // handle loops (we can't modify set_locals in the main pass, as they are
      // being tracked)
      for (auto* currp : loops) {
        auto* curr = (*currp)->template cast<Loop>();
        // Optimizing a loop return value is trivial: just see if it contains
        // a set_local, and pull that out.
        if (auto* set = curr->body->template dynCast<SetLocal>()) {
          if (isConcreteType(set->value->type)) {
            curr->body = set->value;
            set->value = curr;
            curr->finalize(curr->body->type);
            *currp = set;
            anotherCycle = true;
          }
        }
      }
      loops.clear();
      // clean up
      sinkables.clear();
      blockBreaks.clear();
      unoptimizableBlocks.clear();
      if (firstCycle) {
        firstCycle = false;
        anotherCycle = true;
      }
    } while (anotherCycle);
    // Finally, after optimizing a function, we can see if we have set_locals
    // for a local with no remaining gets, in which case, we can
    // remove the set.
    // First, recount get_locals
    getCounter.analyze(func);
    // Second, remove unneeded sets
    SetLocalRemover remover;
    remover.numGetLocals = &getCounter.num;
    remover.walkFunction(func);
  }
};

Pass *createSimplifyLocalsPass() {
  return new SimplifyLocals<true, true>();
}

Pass *createSimplifyLocalsNoTeePass() {
  return new SimplifyLocals<false, true>();
}

Pass *createSimplifyLocalsNoStructurePass() {
  return new SimplifyLocals<true, false>();
}

Pass *createSimplifyLocalsNoTeeNoStructurePass() {
  return new SimplifyLocals<false, false>();
}

Pass *createSimplifyLocalsNoNestingPass() {
  return new SimplifyLocals<false, false, false>();
}

} // namespace wasm