summaryrefslogtreecommitdiff
path: root/src/passes/Precompute.cpp
blob: 93f2f1d69473f493848bdfdf0a5d542d6529a9b0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
/*
 * Copyright 2016 WebAssembly Community Group participants
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//
// Computes code at compile time where possible, replacing it with the
// computed constant.
//
// The "propagate" variant of this pass also propagates constants across
// sets and gets, which implements a standard constant propagation.
//
// Possible nondeterminism: WebAssembly NaN signs are nondeterministic,
// and this pass may optimize e.g. a float 0 / 0 into +nan while a VM may
// emit -nan, which can be a noticeable difference if the bits are
// looked at.
//

#include "ir/iteration.h"
#include "ir/literal-utils.h"
#include "ir/local-graph.h"
#include "ir/manipulation.h"
#include "ir/properties.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/insert_ordered.h"
#include "support/small_vector.h"
#include "wasm-builder.h"
#include "wasm-interpreter.h"
#include "wasm.h"

namespace wasm {

// A map of gets to their constant values. If a get does not have a constant
// value then it does not appear in the map (that avoids allocating for the
// majority of gets).
using GetValues = std::unordered_map<LocalGet*, Literals>;

// A map of values on the heap. This maps the expressions that create the
// heap data (struct.new, array.new, etc.) to the data they are created with.
// Each such expression gets its own GCData created for it. This allows
// computing identity between locals referring to the same GCData, by seeing
// if they point to the same thing.
//
// Note that a source expression may create different data each time it is
// reached in a loop,
//
// (loop
//  (if ..
//   (local.set $x
//    (struct.new ..
//   )
//  )
//  ..compare $x to something..
// )
//
// Just like in SSA form, this is not a problem because the loop entry must
// have a merge, if a value entering the loop might be noticed. In SSA form
// that means a phi is created, and identity is set there. In our
// representation, the merge will cause a local.get of $x to have more
// possible input values than that struct.new, which means we will not infer
// a value for it, and not attempt to say anything about comparisons of $x.
using HeapValues = std::unordered_map<Expression*, std::shared_ptr<GCData>>;

// Precomputes an expression. Errors if we hit anything that can't be
// precomputed. Inherits most of its functionality from
// ConstantExpressionRunner, which it shares with the C-API, but adds handling
// of GetValues computed during the precompute pass.
class PrecomputingExpressionRunner
  : public ConstantExpressionRunner<PrecomputingExpressionRunner> {

  using Super = ConstantExpressionRunner<PrecomputingExpressionRunner>;

  // Concrete values of gets computed during the pass, which the runner does not
  // know about since it only records values of sets it visits.
  const GetValues& getValues;

  HeapValues& heapValues;

  // Limit evaluation depth for 2 reasons: first, it is highly unlikely
  // that we can do anything useful to precompute a hugely nested expression
  // (we should succed at smaller parts of it first). Second, a low limit is
  // helpful to avoid platform differences in native stack sizes.
  static const Index MAX_DEPTH = 50;

  // Limit loop iterations since loops might be infinite. Since we are going to
  // replace the expression and must preserve side effects, we limit this to the
  // very first iteration because a side effect would be necessary to achieve
  // more than one iteration before becoming concrete.
  static const Index MAX_LOOP_ITERATIONS = 1;

public:
  PrecomputingExpressionRunner(Module* module,
                               const GetValues& getValues,
                               HeapValues& heapValues,
                               bool replaceExpression)
    : ConstantExpressionRunner<PrecomputingExpressionRunner>(
        module,
        replaceExpression ? FlagValues::PRESERVE_SIDEEFFECTS
                          : FlagValues::DEFAULT,
        MAX_DEPTH,
        MAX_LOOP_ITERATIONS),
      getValues(getValues), heapValues(heapValues) {}

  Flow visitLocalGet(LocalGet* curr) {
    auto iter = getValues.find(curr);
    if (iter != getValues.end()) {
      auto values = iter->second;
      assert(values.isConcrete());
      return Flow(values);
    }
    return ConstantExpressionRunner<
      PrecomputingExpressionRunner>::visitLocalGet(curr);
  }

  // TODO: Use immutability for values
  Flow visitStructNew(StructNew* curr) {
    auto flow = Super::visitStructNew(curr);
    if (flow.breaking()) {
      return flow;
    }
    return getHeapCreationFlow(flow, curr);
  }
  Flow visitStructSet(StructSet* curr) { return Flow(NONCONSTANT_FLOW); }
  Flow visitStructGet(StructGet* curr) {
    if (curr->ref->type == Type::unreachable || curr->ref->type.isNull()) {
      return Flow(NONCONSTANT_FLOW);
    }
    switch (curr->order) {
      case MemoryOrder::Unordered:
        // This can always be precomputed.
        break;
      case MemoryOrder::SeqCst:
        // This can never be precomputed away because it synchronizes with other
        // threads.
        return Flow(NONCONSTANT_FLOW);
      case MemoryOrder::AcqRel:
        // This synchronizes only with writes to the same data, so it can still
        // be precomputed if the data is not shared with other threads.
        if (curr->ref->type.getHeapType().isShared()) {
          return Flow(NONCONSTANT_FLOW);
        }
        break;
    }
    // If this field is immutable then we may be able to precompute this, as
    // if we also created the data in this function (or it was created in an
    // immutable global) then we know the value in the field. If it is
    // immutable, call the super method which will do the rest here. That
    // includes checking for the data being properly created, as if it was
    // not then we will not have a constant value for it, which means the
    // local.get of that value will stop us.
    auto& field = curr->ref->type.getHeapType().getStruct().fields[curr->index];
    if (field.mutable_ == Mutable) {
      return Flow(NONCONSTANT_FLOW);
    }
    return Super::visitStructGet(curr);
  }
  Flow visitArrayNew(ArrayNew* curr) {
    auto flow = Super::visitArrayNew(curr);
    if (flow.breaking()) {
      return flow;
    }
    return getHeapCreationFlow(flow, curr);
  }
  Flow visitArrayNewFixed(ArrayNewFixed* curr) {
    auto flow = Super::visitArrayNewFixed(curr);
    if (flow.breaking()) {
      return flow;
    }
    return getHeapCreationFlow(flow, curr);
  }
  Flow visitArraySet(ArraySet* curr) { return Flow(NONCONSTANT_FLOW); }
  Flow visitArrayGet(ArrayGet* curr) {
    if (curr->ref->type != Type::unreachable && !curr->ref->type.isNull()) {
      // See above with struct.get
      auto element = curr->ref->type.getHeapType().getArray().element;
      if (element.mutable_ == Immutable) {
        return Super::visitArrayGet(curr);
      }
    }

    // Otherwise, we've failed to precompute.
    return Flow(NONCONSTANT_FLOW);
  }
  // ArrayLen is not disallowed here as it is an immutable property.
  Flow visitArrayCopy(ArrayCopy* curr) { return Flow(NONCONSTANT_FLOW); }

  // Generates heap info for a heap-allocating expression.
  template<typename T> Flow getHeapCreationFlow(Flow flow, T* curr) {
    // We must return a literal that refers to the canonical location for this
    // source expression, so that each time we compute a specific struct.new
    // we get the same identity.
    std::shared_ptr<GCData>& canonical = heapValues[curr];
    std::shared_ptr<GCData> newGCData = flow.getSingleValue().getGCData();
    if (!canonical) {
      canonical = std::make_shared<GCData>(*newGCData);
    } else {
      *canonical = *newGCData;
    }
    return Literal(canonical, curr->type.getHeapType());
  }

  Flow visitStringNew(StringNew* curr) {
    if (curr->op != StringNewWTF16Array) {
      // TODO: handle other string ops. For now we focus on JS-like strings.
      return Flow(NONCONSTANT_FLOW);
    }

    // string.encode_wtf16_array is effectively an Array read operation, so
    // just like ArrayGet above we must check for immutability.
    auto refType = curr->ref->type;
    if (refType.isRef()) {
      auto heapType = refType.getHeapType();
      if (heapType.isArray()) {
        if (heapType.getArray().element.mutable_ == Immutable) {
          return Super::visitStringNew(curr);
        }
      }
    }

    // Otherwise, this is mutable or unreachable or otherwise uninteresting.
    return Flow(NONCONSTANT_FLOW);
  }

  Flow visitStringEncode(StringEncode* curr) {
    // string.encode_wtf16_array is effectively an Array write operation, so
    // just like ArraySet and ArrayCopy above we must mark it as disallowed
    // (due to side effects). (And we do not support other operations than
    // string.encode_wtf16_array anyhow.)
    return Flow(NONCONSTANT_FLOW);
  }
};

struct Precompute
  : public WalkerPass<
      PostWalker<Precompute, UnifiedExpressionVisitor<Precompute>>> {
  bool isFunctionParallel() override { return true; }

  std::unique_ptr<Pass> create() override {
    return std::make_unique<Precompute>(propagate);
  }

  bool propagate = false;

  Precompute(bool propagate) : propagate(propagate) {}

  GetValues getValues;
  HeapValues heapValues;

  bool canPartiallyPrecompute;

  void doWalkFunction(Function* func) {
    // Perform partial precomputing only when the optimization level is non-
    // trivial, as it is slower and less likely to help.
    canPartiallyPrecompute = getPassOptions().optimizeLevel >= 2;

    // Walk the function and precompute things.
    Super::doWalkFunction(func);
    partiallyPrecompute(func);
    if (!propagate) {
      return;
    }
    // When propagating, we can utilize the graph of local operations to
    // precompute the values from a local.set to a local.get. This populates
    // getValues which is then used by a subsequent walk that applies those
    // values.
    bool propagated = propagateLocals(func);
    if (propagated) {
      // We found constants to propagate and entered them in getValues. Do
      // another walk to apply them and perhaps other optimizations that are
      // unlocked.
      Super::doWalkFunction(func);
      // We could also try to partially precompute again, but that is a somewhat
      // heavy operation, so we only do it the first time, and leave such things
      // for later runs of this pass and for --converge.
    }
    // Note that in principle even more cycles could find further work here, in
    // very rare cases. To avoid constructing a LocalGraph again just for that
    // unlikely chance, we leave such things for later.
  }

  template<typename T> void reuseConstantNode(T* curr, Flow flow) {
    if (flow.values.isConcrete()) {
      // reuse a const / ref.null / ref.func node if there is one
      if (curr->value && flow.values.size() == 1) {
        Literal singleValue = flow.getSingleValue();
        if (singleValue.type.isNumber()) {
          if (auto* c = curr->value->template dynCast<Const>()) {
            c->value = singleValue;
            c->finalize();
            curr->finalize();
            return;
          }
        } else if (singleValue.isNull()) {
          if (auto* n = curr->value->template dynCast<RefNull>()) {
            n->finalize(singleValue.type);
            curr->finalize();
            return;
          }
        } else if (singleValue.type.isRef() &&
                   singleValue.type.getHeapType().isSignature()) {
          if (auto* r = curr->value->template dynCast<RefFunc>()) {
            r->func = singleValue.getFunc();
            auto heapType = getModule()->getFunction(r->func)->type;
            r->finalize(Type(heapType, NonNullable));
            curr->finalize();
            return;
          }
        }
      }
      curr->value = flow.getConstExpression(*getModule());
    } else {
      curr->value = nullptr;
    }
    curr->finalize();
  }

  void visitExpression(Expression* curr) {
    // TODO: if local.get, only replace with a constant if we don't care about
    // size...?
    if (Properties::isConstantExpression(curr) || curr->is<Nop>()) {
      return;
    }
    // try to evaluate this into a const
    Flow flow = precomputeExpression(curr);
    if (!canEmitConstantFor(flow.values)) {
      return;
    }
    if (flow.breaking()) {
      if (flow.breakTo == NONCONSTANT_FLOW) {
        // This cannot be turned into a constant, but perhaps we can partially
        // precompute it.
        considerPartiallyPrecomputing(curr);
        return;
      }
      if (flow.breakTo == RETURN_FLOW) {
        // this expression causes a return. if it's already a return, reuse the
        // node
        if (auto* ret = curr->dynCast<Return>()) {
          reuseConstantNode(ret, flow);
        } else {
          Builder builder(*getModule());
          replaceCurrent(builder.makeReturn(
            flow.values.isConcrete() ? flow.getConstExpression(*getModule())
                                     : nullptr));
        }
        return;
      }
      // this expression causes a break, emit it directly. if it's already a br,
      // reuse the node.
      if (auto* br = curr->dynCast<Break>()) {
        br->name = flow.breakTo;
        br->condition = nullptr;
        reuseConstantNode(br, flow);
      } else {
        Builder builder(*getModule());
        replaceCurrent(builder.makeBreak(
          flow.breakTo,
          flow.values.isConcrete() ? flow.getConstExpression(*getModule())
                                   : nullptr));
      }
      return;
    }
    // this was precomputed
    if (flow.values.isConcrete()) {
      replaceCurrent(flow.getConstExpression(*getModule()));
    } else {
      ExpressionManipulator::nop(curr);
    }
  }

  void visitBlock(Block* curr) {
    // When block precomputation fails, it can lead to quadratic slowness due to
    // the "tower of blocks" pattern used to implement switches:
    //
    //  (block
    //    (block
    //      ...
    //        (block
    //          (br_table ..
    //
    // If we try to precompute each block here, and fail on each, then we end up
    // doing quadratic work. This is also wasted work as once a nested block
    // fails to precompute there is not really a chance to succeed on the
    // parent. If we do *not* fail to precompute, however, then we do want to
    // precompute such nested blocks, e.g.:
    //
    //  (block $out
    //    (block
    //      (br $out)
    //    )
    //  )
    //
    // Here we *can* precompute the inner block, so when we get to the outer one
    // we see this:
    //
    //  (block $out
    //    (br $out)
    //  )
    //
    // And that precomputes to nothing. Therefore when we see a child of the
    // block that is another block (it failed to precompute to something
    // simpler) then we leave early here.
    //
    // Note that in theory we could still precompute here if wasm had
    // instructions that allow such things, e.g.:
    //
    //  (block $out
    //    (block
    //      (cause side effect1)
    //      (cause side effect2)
    //    )
    //    (undo those side effects exactly)
    //  )
    //
    // We are forced to invent a side effect that we can precisely undo (unlike,
    // say locals - a local.set would persist outside of the block, and even if
    // we did another set to the original value, this pass doesn't track values
    // that way). Only with that can we make the inner block un-precomputable
    // (because there are side effects) but the outer one is (because those
    // effects are undone). Note that it is critical that we have two things in
    // the block, so that we can't precompute it to one of them (which is what
    // we did to the br in the previous example). Note also that this is still
    // optimizable using other passes, as merge-blocks will fold the two blocks
    // together.
    if (!curr->list.empty() && curr->list[0]->is<Block>()) {
      // The first child is a block, that is, it could not be simplified, so
      // this looks like the "tower of blocks" pattern. Avoid quadratic time
      // here as explained above. (We could also look at other children of the
      // block, but the only real-world pattern identified so far is on the
      // first child, so keep things simple here.)
      return;
    }

    // Otherwise, precompute normally like all other expressions.
    visitExpression(curr);
  }

  // If we failed to precompute a constant, perhaps we can still precompute part
  // of an expression. Specifically, consider this case:
  //
  //  (A
  //    (select
  //      (B)
  //      (C)
  //      (condition)
  //    )
  //  )
  //
  // Perhaps we can compute A(B) and A(C). If so, we can emit a better select:
  //
  //  (select
  //    (constant result of A(B))
  //    (constant result of A(C))
  //    (condition)
  //  )
  //
  // Note that in general for code size we want to move operations *out* of
  // selects and ifs (OptimizeInstructions does that), but here we are
  // computing two constants which replace three expressions, so it is
  // worthwhile.
  //
  // To do such partial precomputing, in the main pass we note selects that look
  // promising. If we find any then we do a second pass later just for that (as
  // doing so requires walking up the stack in a manner that we want to avoid in
  // the main pass for overhead reasons; see below).
  //
  // Note that selects are all we really need here: Other passes would turn an
  // if into a select if the arms are simple enough, and only in those cases
  // (simple arms) do we have a chance at partially precomputing. For example,
  // if an arm is a constant then we can, but if it is a call then we can't.)
  // However, there are cases like an if with arms with side effects that end in
  // precomputable things, that are missed atm TODO
  std::unordered_set<Select*> partiallyPrecomputable;

  void considerPartiallyPrecomputing(Expression* curr) {
    if (!canPartiallyPrecompute) {
      return;
    }

    if (auto* select = curr->dynCast<Select>()) {
      // We only have a reasonable hope of success if the select arms are things
      // like constants or global gets. At a first approximation, allow the set
      // of things we allow in constant initializers (but we can probably allow
      // more here TODO).
      //
      // We also ignore selects with no parent (that are the entire function
      // body) as then there is nothing to optimize into their arms.
      auto& wasm = *getModule();
      if (Properties::isValidConstantExpression(wasm, select->ifTrue) &&
          Properties::isValidConstantExpression(wasm, select->ifFalse) &&
          getFunction()->body != select) {
        partiallyPrecomputable.insert(select);
      }
    }
  }

  // To partially precompute selects we walk up the stack from them, like this:
  //
  //  (A
  //    (B
  //      (select
  //        (C)
  //        (D)
  //        (condition)
  //      )
  //    )
  //  )
  //
  // First we try to apply B to C and D. If that works, we arrive at this:
  //
  //  (A
  //    (select
  //      (constant result of B(C))
  //      (constant result of B(D))
  //      (condition)
  //    )
  //  )
  //
  // We can then proceed to perhaps apply A. However, even if we failed to apply
  // B then we can try to apply A and B together, because that combination may
  // succeed where incremental work fails, for example:
  //
  //  (global $C
  //    (struct.new    ;; outer
  //      (struct.new  ;; inner
  //        (i32.const 10)
  //      )
  //    )
  //  )
  //
  //  (struct.get    ;; outer
  //    (struct.get  ;; inner
  //      (select
  //        (global.get $C)
  //        (global.get $D)
  //        (condition)
  //      )
  //    )
  //  )
  //
  // Applying the inner struct.get to $C leads us to the inner struct.new, but
  // that is an interior pointer in the global - it is not something we can
  // refer to using a global.get, so precomputing it fails. However, when we
  // apply both struct.gets at once we arrive at the outer struct.new, which is
  // in fact the global $C, and we succeed.
  void partiallyPrecompute(Function* func) {
    if (!canPartiallyPrecompute || partiallyPrecomputable.empty()) {
      // Nothing to do.
      return;
    }

    // Walk the function to find the parent stacks of the promising selects. We
    // copy the stacks and process them later. We do it like this because if we
    // wanted to process stacks as we reached them then we'd trip over
    // ourselves: when we optimize we replace a parent, but that parent is an
    // expression we'll reach later in the walk, so modifying it is unsafe.
    struct StackFinder : public ExpressionStackWalker<StackFinder> {
      Precompute& parent;

      StackFinder(Precompute& parent) : parent(parent) {}

      // We will later iterate on this in the order of insertion, which keeps
      // things deterministic, and also usually lets us do consecutive work
      // like a select nested in another select's condition, simply because we
      // will traverse the selects in postorder (however, because we cannot
      // always succeed in an incremental manner - see the comment on this
      // function - it is possible in theory that some work can happen only in a
      // later execution of the pass).
      InsertOrderedMap<Select*, ExpressionStack> stackMap;

      void visitSelect(Select* curr) {
        if (parent.partiallyPrecomputable.count(curr)) {
          stackMap[curr] = expressionStack;
        }
      }
    } stackFinder(*this);
    stackFinder.walkFunction(func);

    // Note which expressions we've modified as we go, as it is invalid to
    // modify more than once. This could happen in theory in a situation like
    // this:
    //
    //  (ternary.f32.max  ;; fictional instruction for explanatory purposes
    //    (select ..)
    //    (select ..)
    //    (f32.infinity)
    //  )
    //
    // When we consider the first select we can see that the computation result
    // is always infinity, so we can optimize here and replace the ternary. Then
    // the same thing happens with the second select, causing the ternary to be
    // replaced again, which is unsafe because it no longer exists after we
    // precomputed it the first time. (Note that in this example the result is
    // the same either way, but at least in theory an instruction could exist
    // for whom there was a difference.) In practice it does not seem that wasm
    // has instructions capable of this atm but this code is still useful to
    // guard against future problems, and as a minor speedup (quickly skip code
    // if it was already modified).
    std::unordered_set<Expression*> modified;

    for (auto& [select, stack] : stackFinder.stackMap) {
      // Each stack ends in the select itself, and contains more than the select
      // itself (otherwise we'd have ignored the select), i.e., the select has a
      // parent that we can try to optimize into the arms.
      assert(stack.back() == select);
      assert(stack.size() >= 2);
      Index selectIndex = stack.size() - 1;
      assert(selectIndex >= 1);

      if (modified.count(select)) {
        // This select was modified; go to the next one.
        continue;
      }

      // Go up through the parents, until we can't do any more work. At each
      // parent we'll try to execute it and all intermediate parents into the
      // select arms.
      for (Index parentIndex = selectIndex - 1; parentIndex != Index(-1);
           parentIndex--) {
        auto* parent = stack[parentIndex];
        if (modified.count(parent)) {
          // This parent was modified; exit the loop on parents as no upper
          // parent is valid to try either.
          break;
        }

        // If the parent lacks a concrete type then we can't move it into the
        // select: the select needs a concrete (and non-tuple) type. For example
        // if the parent is a drop or is unreachable, those are things we don't
        // want to handle, and we stop here (once we see one such parent we
        // can't expect to make any more progress).
        if (!parent->type.isConcrete() || parent->type.isTuple()) {
          break;
        }

        // We are precomputing the select arms, but leaving the condition as-is.
        // If the condition breaks to the parent, then we can't move the parent
        // into the select arms:
        //
        //  (block $name ;; this must stay outside of the select
        //    (select
        //      (B)
        //      (C)
        //      (block ;; condition
        //        (br_if $target
        //
        // Ignore all control flow for simplicity, as they aren't interesting
        // for us, and other passes should have removed them anyhow.
        if (Properties::isControlFlowStructure(parent)) {
          break;
        }

        // This looks promising, so try to precompute here. What we do is
        // precompute twice, once with the select replaced with the left arm,
        // and once with the right. If both succeed then we can create a new
        // select (with the same condition as before) whose arms are the
        // precomputed values.
        auto isValidPrecomputation = [&](const Flow& flow) {
          // For now we handle simple concrete values. We could also handle
          // breaks in principle TODO
          return canEmitConstantFor(flow.values) && !flow.breaking() &&
                 flow.values.isConcrete();
        };

        // Find the pointer to the select in its immediate parent so that we can
        // replace it first with one arm and then the other.
        auto** pointerToSelect =
          getChildPointerInImmediateParent(stack, selectIndex, func);
        *pointerToSelect = select->ifTrue;
        auto ifTrue = precomputeExpression(parent);
        if (isValidPrecomputation(ifTrue)) {
          *pointerToSelect = select->ifFalse;
          auto ifFalse = precomputeExpression(parent);
          if (isValidPrecomputation(ifFalse)) {
            // Wonderful, we can precompute here! The select can now contain the
            // computed values in its arms.
            select->ifTrue = ifTrue.getConstExpression(*getModule());
            select->ifFalse = ifFalse.getConstExpression(*getModule());
            select->finalize();

            // The parent of the select is now replaced by the select.
            auto** pointerToParent =
              getChildPointerInImmediateParent(stack, parentIndex, func);
            *pointerToParent = select;

            // Update state for further iterations: Mark everything modified and
            // move the select to the parent's location.
            for (Index i = parentIndex; i <= selectIndex; i++) {
              modified.insert(stack[i]);
            }
            selectIndex = parentIndex;
            stack[selectIndex] = select;
            stack.resize(selectIndex + 1);
          }
        }

        // Whether we succeeded to precompute here or not, restore the parent's
        // pointer to its original state (if we precomputed, the parent is no
        // longer in use, but there is no harm in modifying it).
        *pointerToSelect = select;
      }
    }
  }

  void visitFunction(Function* curr) {
    // removing breaks can alter types
    ReFinalize().walkFunctionInModule(curr, getModule());
  }

private:
  // Precompute an expression, returning a flow, which may be a constant
  // (that we can replace the expression with if replaceExpression is set).
  Flow precomputeExpression(Expression* curr, bool replaceExpression = true) {
    Flow flow;
    try {
      flow = PrecomputingExpressionRunner(
               getModule(), getValues, heapValues, replaceExpression)
               .visit(curr);
    } catch (PrecomputingExpressionRunner::NonconstantException&) {
      return Flow(NONCONSTANT_FLOW);
    }
    // If we are replacing the expression, then the resulting value must be of
    // a type we can emit a constant for.
    if (!flow.breaking() && replaceExpression &&
        !canEmitConstantFor(flow.values)) {
      return Flow(NONCONSTANT_FLOW);
    }
    return flow;
  }

  // Precomputes the value of an expression, as opposed to the expression
  // itself. This differs from precomputeExpression in that we care about
  // the value the expression will have, which we cannot necessary replace
  // the expression with. For example,
  //  (local.tee (i32.const 1))
  // will have value 1 which we can optimize here, but in precomputeExpression
  // we could not do anything.
  Literals precomputeValue(Expression* curr) {
    // Note that we set replaceExpression to false, as we just care about
    // the value here.
    Flow flow = precomputeExpression(curr, false /* replaceExpression */);
    if (flow.breaking()) {
      return {};
    }
    return flow.values;
  }

  // Propagates values around. Returns whether we propagated.
  bool propagateLocals(Function* func) {
    bool propagated = false;

    // Using the graph of get-set interactions, do a constant-propagation type
    // operation: note which sets are assigned locals, then see if that lets us
    // compute other sets as locals (since some of the gets they read may be
    // constant). We do this lazily as most locals do not end up with constant
    // values that we can propagate.
    LazyLocalGraph localGraph(func, getModule());

    // A map of sets to their constant values. If a set does not appear here
    // then it is not constant, like |getValues|.
    std::unordered_map<LocalSet*, Literals> setValues;

    // The work list, which will contain sets and gets that have just been
    // found to have a constant value. As we only add them to the list when they
    // are found to be constant, each can only be added once, and a simple
    // vector is enough here (which we can make a small vector to avoid any
    // allocation in small-enough functions).
    SmallVector<Expression*, 10> work;

    // Given a set, see if it has a constant value. If so, note that on
    // setValues and add to the work list.
    auto checkConstantSet = [&](LocalSet* set) {
      if (setValues.count(set)) {
        // Already known to be constant.
        return;
      }

      // Precompute the value. Note that this executes the code from scratch
      // each time we reach this point, and so we need to be careful about
      // repeating side effects if those side effects are expressed *in the
      // value*. A case where that can happen is GC data (each struct.new
      // creates a new, unique struct, even if the data is equal), and so
      // PrecomputingExpressionRunner has special logic to make sure that
      // reference identity is preserved properly.
      //
      // (Other side effects are fine; if an expression does a call and we
      // somehow know the entire expression precomputes to a 42, then we can
      // propagate that 42 along to the users, regardless of whatever the call
      // did globally.)
      auto values = precomputeValue(
        Properties::getFallthrough(set->value, getPassOptions(), *getModule()));

      // We precomputed the *fallthrough* value (which allows us to look through
      // some things that would otherwise block us). But in some cases, like a
      // ref.cast, the fallthrough value can have an incompatible type for the
      // entire expression, which would be invalid for us to propagate, e.g.:
      //
      //  (ref.cast (ref struct)
      //    (ref.null any)
      //  )
      //
      // In such a case the value cannot actually fall through. Ignore such
      // cases (which other optimizations can handle) by making sure that we
      // only propagate a valid subtype.
      if (values.isConcrete() &&
          Type::isSubType(values.getType(), set->value->type)) {
        setValues[set] = values;
        work.push_back(set);
      }
    };

    // The same, for a get.
    auto checkConstantGet = [&](LocalGet* get) {
      if (getValues.count(get)) {
        // Already known to be constant.
        return;
      }

      // For this get to have constant value, all sets must agree on a constant.
      Literals values;
      bool first = true;
      for (auto* set : localGraph.getSets(get)) {
        Literals curr;
        if (set == nullptr) {
          if (getFunction()->isVar(get->index)) {
            auto localType = getFunction()->getLocalType(get->index);
            if (!localType.isDefaultable()) {
              // This is a nondefaultable local that seems to read the default
              // value at the function entry. This is either an internal error
              // or a case of unreachable code; the latter is possible as
              // LocalGraph is not precise in unreachable code. Give up.
              return;
            } else {
              curr = Literal::makeZeros(localType);
            }
          } else {
            // It's a param, so the value is non-constant. Give up.
            return;
          }
        } else {
          // If there is an entry for the set, use that constant. Otherwise, the
          // set is not constant, and we give up.
          auto iter = setValues.find(set);
          if (iter == setValues.end()) {
            return;
          }
          curr = iter->second;
        }

        // We found a concrete value, so there is a chance, if it matches all
        // the rest.
        assert(curr.isConcrete());
        if (first) {
          // This is the first ever value we see. All later ones must match it.
          values = curr;
          first = false;
        } else if (values != curr) {
          // This later value is not the same as before, give up.
          return;
        }
      }

      if (values.isConcrete()) {
        // We found a constant value!
        getValues[get] = values;
        work.push_back(get);
        propagated = true;
      } else {
        // If it is not concrete then, since we early-exited before on any
        // possible problem, there must be no sets for this get, which means it
        // is in unreachable code. In that case, we never switched |first| from
        // true to false.
        assert(first == true);
        // We could optimize using unreachability here, but we leave that for
        // other passes.
      }
    };

    // Check all gets and sets to find which are constant, mark them as such,
    // and add propagation work based on that.
    for (auto& [curr, _] : localGraph.getLocations()) {
      if (auto* set = curr->dynCast<LocalSet>()) {
        checkConstantSet(set);
      } else {
        checkConstantGet(curr->cast<LocalGet>());
      }
    }

    // Propagate constant values while work remains.
    while (!work.empty()) {
      auto* curr = work.back();
      work.pop_back();

      // This get or set is a constant value. Check if the things it influences
      // become constant.
      if (auto* set = curr->dynCast<LocalSet>()) {
        for (auto* get : localGraph.getSetInfluences(set)) {
          checkConstantGet(get);
        }
      } else {
        auto* get = curr->cast<LocalGet>();
        for (auto* set : localGraph.getGetInfluences(get)) {
          checkConstantSet(set);
        }
      }
    }

    return propagated;
  }

  bool canEmitConstantFor(const Literals& values) {
    for (auto& value : values) {
      if (!canEmitConstantFor(value)) {
        return false;
      }
    }
    return true;
  }

  bool canEmitConstantFor(const Literal& value) {
    // A null is fine to emit a constant for - we'll emit a RefNull. Otherwise,
    // see below about references to GC data.
    if (value.isNull()) {
      return true;
    }
    return canEmitConstantFor(value.type);
  }

  bool canEmitConstantFor(Type type) {
    // A function is fine to emit a constant for - we'll emit a RefFunc, which
    // is compact and immutable, so there can't be a problem.
    if (type.isFunction()) {
      return true;
    }
    // We can emit a StringConst for a string constant.
    if (type.isString()) {
      return true;
    }
    // All other reference types cannot be precomputed. Even an immutable GC
    // reference is not currently something this pass can handle, as it will
    // evaluate and reevaluate code multiple times in e.g. propagateLocals, see
    // the comment above.
    if (type.isRef()) {
      return false;
    }

    return true;
  }

  // Helpers for partial precomputing.

  // Given a stack of expressions and the index of an expression in it, find
  // the pointer to that expression in the parent. This gives us a pointer that
  // allows us to replace the expression.
  Expression** getChildPointerInImmediateParent(const ExpressionStack& stack,
                                                Index index,
                                                Function* func) {
    if (index == 0) {
      // There is nothing above this expression, so the pointer referring to it
      // is the function's body.
      return &func->body;
    }

    auto* child = stack[index];
    auto childIterator = ChildIterator(stack[index - 1]);
    for (auto** currChild : childIterator.children) {
      if (*currChild == child) {
        return currChild;
      }
    }

    WASM_UNREACHABLE("child not found in parent");
  }
};

Pass* createPrecomputePass() { return new Precompute(false); }

Pass* createPrecomputePropagatePass() { return new Precompute(true); }

} // namespace wasm