summaryrefslogtreecommitdiff
path: root/src/passes/Inlining.cpp
blob: 998e355c91f7cd5b69068038cf37bf827d5e5e30 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
/*
 * Copyright 2016 WebAssembly Community Group participants
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//
// Inlining.
//
// Two versions are provided: inlining and inlining-optimizing. You
// probably want the optimizing version, which will optimize locations
// we inlined into, as inlining by itself creates a block to house the
// inlined code, some temp locals, etc., which can usually be removed
// by optimizations. Note that the two versions use the same heuristics,
// so we don't take into account the overhead if you don't optimize
// afterwards. The non-optimizing version is mainly useful for debugging,
// or if you intend to run a full set of optimizations anyhow on
// everything later.
//

#include <atomic>

#include "ir/branch-utils.h"
#include "ir/debuginfo.h"
#include "ir/drop.h"
#include "ir/eh-utils.h"
#include "ir/element-utils.h"
#include "ir/find_all.h"
#include "ir/literal-utils.h"
#include "ir/localize.h"
#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/type-updating.h"
#include "ir/utils.h"
#include "parsing.h"
#include "pass.h"
#include "passes/opt-utils.h"
#include "wasm-builder.h"
#include "wasm.h"

namespace wasm {

namespace {

enum class InliningMode {
  // We do not know yet if this function can be inlined, as that has
  // not been computed yet.
  Unknown,
  // This function cannot be inlinined in any way.
  Uninlineable,
  // This function can be inlined fully, that is, normally: the entire function
  // can be inlined. This is in contrast to split/partial inlining, see below.
  Full,
  // This function cannot be inlined normally, but we can use split inlining,
  // using pattern "A" or "B" (see below).
  SplitPatternA,
  SplitPatternB
};

// Useful into on a function, helping us decide if we can inline it
struct FunctionInfo {
  std::atomic<Index> refs;
  Index size;
  bool hasCalls;
  bool hasLoops;
  bool hasTryDelegate;
  // Something is used globally if there is a reference to it in a table or
  // export etc.
  bool usedGlobally;
  // We consider a function to be a trivial call if the body is just a call with
  // trivial arguments, like this:
  //
  //  (func $forward (param $x) (param $y)
  //    (call $target (local.get $x) (local.get $y))
  //  )
  //
  // Specifically the body must be a call, and the operands to the call must be
  // of size 1 (generally, LocalGet or Const).
  bool isTrivialCall;
  InliningMode inliningMode;

  FunctionInfo() { clear(); }

  void clear() {
    refs = 0;
    size = 0;
    hasCalls = false;
    hasLoops = false;
    hasTryDelegate = false;
    usedGlobally = false;
    isTrivialCall = false;
    inliningMode = InliningMode::Unknown;
  }

  // Provide an explicit = operator as the |refs| field lacks one by default.
  FunctionInfo& operator=(const FunctionInfo& other) {
    refs = other.refs.load();
    size = other.size;
    hasCalls = other.hasCalls;
    hasLoops = other.hasLoops;
    hasTryDelegate = other.hasTryDelegate;
    usedGlobally = other.usedGlobally;
    isTrivialCall = other.isTrivialCall;
    inliningMode = other.inliningMode;
    return *this;
  }

  // See pass.h for how defaults for these options were chosen.
  bool worthFullInlining(PassOptions& options) {
    // Until we have proper support for try-delegate, ignore such functions.
    // FIXME https://github.com/WebAssembly/binaryen/issues/3634
    if (hasTryDelegate) {
      return false;
    }
    // If it's small enough that we always want to inline such things, do so.
    if (size <= options.inlining.alwaysInlineMaxSize) {
      return true;
    }
    // If it has one use, then inlining it would likely reduce code size, at
    // least for reasonable function sizes.
    if (refs == 1 && !usedGlobally &&
        size <= options.inlining.oneCallerInlineMaxSize) {
      return true;
    }
    // If it's so big that we have no flexible options that could allow it,
    // do not inline.
    if (size > options.inlining.flexibleInlineMaxSize) {
      return false;
    }
    // More than one use, so we can't eliminate it after inlining, and inlining
    // it will hurt code size. Stop if we are focused on size or not heavily
    // focused on speed.
    if (options.shrinkLevel > 0 || options.optimizeLevel < 3) {
      return false;
    }
    if (hasCalls) {
      // This has calls. If it is just a trivial call itself then inline, as we
      // will save a call that way - basically we skip a trampoline in the
      // middle - but if it is something more complex, leave it alone, as we may
      // not help much (and with recursion we may end up with a wasteful
      // increase in code size).
      //
      // Note that inlining trivial calls may increase code size, e.g. if they
      // use a parameter more than once (forcing us after inlining to save that
      // value to a local, etc.), but here we are optimizing for speed and not
      // size, so we risk it.
      return isTrivialCall;
    }
    // This doesn't have calls. Inline if loops do not prevent us (normally, a
    // loop suggests a lot of work and so inlining is less useful).
    return !hasLoops || options.inlining.allowFunctionsWithLoops;
  }
};

static bool canHandleParams(Function* func) {
  // We cannot inline a function if we cannot handle placing its params in a
  // locals, as all params become locals.
  for (auto param : func->getParams()) {
    if (!TypeUpdating::canHandleAsLocal(param)) {
      return false;
    }
  }
  return true;
}

using NameInfoMap = std::unordered_map<Name, FunctionInfo>;

struct FunctionInfoScanner
  : public WalkerPass<PostWalker<FunctionInfoScanner>> {
  bool isFunctionParallel() override { return true; }

  FunctionInfoScanner(NameInfoMap& infos) : infos(infos) {}

  std::unique_ptr<Pass> create() override {
    return std::make_unique<FunctionInfoScanner>(infos);
  }

  void visitLoop(Loop* curr) {
    // having a loop
    infos[getFunction()->name].hasLoops = true;
  }

  void visitCall(Call* curr) {
    // can't add a new element in parallel
    assert(infos.count(curr->target) > 0);
    infos[curr->target].refs++;
    // having a call
    infos[getFunction()->name].hasCalls = true;
  }

  // N.B.: CallIndirect and CallRef are intentionally omitted here, as we only
  //       note direct calls. Direct calls can lead to infinite recursion
  //       which we need to avoid, while indirect ones may in theory be
  //       optimized to direct calls later, but we take that risk - which is
  //       worthwhile as if we do manage to turn an indirect call into something
  //       else then it can be a big speedup, so we do want to inline code that
  //       has such indirect calls.

  void visitTry(Try* curr) {
    if (curr->isDelegate()) {
      infos[getFunction()->name].hasTryDelegate = true;
    }
  }

  void visitRefFunc(RefFunc* curr) {
    assert(infos.count(curr->func) > 0);
    infos[curr->func].refs++;
  }

  void visitFunction(Function* curr) {
    auto& info = infos[curr->name];

    if (!canHandleParams(curr)) {
      info.inliningMode = InliningMode::Uninlineable;
    }

    info.size = Measurer::measure(curr->body);

    if (auto* call = curr->body->dynCast<Call>()) {
      if (info.size == call->operands.size() + 1) {
        // This function body is a call with some trivial (size 1) operands like
        // LocalGet or Const, so it is a trivial call.
        info.isTrivialCall = true;
      }
    }
  }

private:
  NameInfoMap& infos;
};

struct InliningAction {
  Expression** callSite;
  Function* contents;
  bool insideATry;

  // An optional name hint can be provided, which will then be used in the name
  // of the block we put the inlined code in. Using a unique name hint in each
  // inlining can reduce the risk of name overlaps (which cause fixup work in
  // UniqueNameMapper::uniquify).
  Index nameHint = 0;

  InliningAction(Expression** callSite,
                 Function* contents,
                 bool insideATry,
                 Index nameHint = 0)
    : callSite(callSite), contents(contents), insideATry(insideATry),
      nameHint(nameHint) {}
};

struct InliningState {
  // Maps functions worth inlining to the mode with which we can inline them.
  std::unordered_map<Name, InliningMode> inlinableFunctions;
  // function name => actions that can be performed in it
  std::unordered_map<Name, std::vector<InliningAction>> actionsForFunction;
};

struct Planner : public WalkerPass<TryDepthWalker<Planner>> {
  bool isFunctionParallel() override { return true; }

  Planner(InliningState* state) : state(state) {}

  std::unique_ptr<Pass> create() override {
    return std::make_unique<Planner>(state);
  }

  void visitCall(Call* curr) {
    // plan to inline if we know this is valid to inline, and if the call is
    // actually performed - if it is dead code, it's pointless to inline.
    // we also cannot inline ourselves.
    bool isUnreachable;
    if (curr->isReturn) {
      // Tail calls are only actually unreachable if an argument is
      isUnreachable = std::any_of(
        curr->operands.begin(), curr->operands.end(), [](Expression* op) {
          return op->type == Type::unreachable;
        });
    } else {
      isUnreachable = curr->type == Type::unreachable;
    }
    if (state->inlinableFunctions.count(curr->target) && !isUnreachable &&
        curr->target != getFunction()->name) {
      // can't add a new element in parallel
      assert(state->actionsForFunction.count(getFunction()->name) > 0);
      state->actionsForFunction[getFunction()->name].emplace_back(
        getCurrentPointer(),
        getModule()->getFunction(curr->target),
        tryDepth > 0);
    }
  }

private:
  InliningState* state;
};

struct Updater : public TryDepthWalker<Updater> {
  Module* module;
  std::map<Index, Index> localMapping;
  Name returnName;
  Type resultType;
  bool isReturn;
  Builder* builder;
  PassOptions& options;

  struct ReturnCallInfo {
    // The original `return_call` or `return_call_indirect` or `return_call_ref`
    // with its operands replaced with `local.get`s.
    Expression* call;
    // The branch that is serving as the "return" part of the original
    // `return_call`.
    Break* branch;
  };

  // Collect information on return_calls in the inlined body. Each will be
  // turned into branches out of the original inlined body followed by
  // non-return version of the original `return_call`, followed by a branch out
  // to the caller. The branch labels will be filled in at the end of the walk.
  std::vector<ReturnCallInfo> returnCallInfos;

  Updater(PassOptions& options) : options(options) {}

  void visitReturn(Return* curr) {
    replaceCurrent(builder->makeBreak(returnName, curr->value));
  }

  template<typename T> void handleReturnCall(T* curr, Signature sig) {
    if (isReturn || !curr->isReturn) {
      // If the inlined callsite was already a return_call, then we can keep
      // return_calls in the inlined function rather than downgrading them.
      // That is, if A->B and B->C and both those calls are return_calls
      // then after inlining A->B we want to now have A->C be a
      // return_call.
      return;
    }

    if (tryDepth == 0) {
      // Return calls in inlined functions should only break out of
      // the scope of the inlined code, not the entire function they
      // are being inlined into. To achieve this, make the call a
      // non-return call and add a break. This does not cause
      // unbounded stack growth because inlining and return calling
      // both avoid creating a new stack frame.
      curr->isReturn = false;
      curr->type = sig.results;
      // There might still be unreachable children causing this to be
      // unreachable.
      curr->finalize();
      if (sig.results.isConcrete()) {
        replaceCurrent(builder->makeBreak(returnName, curr));
      } else {
        replaceCurrent(builder->blockify(curr, builder->makeBreak(returnName)));
      }
    } else {
      // Set the children to locals as necessary, then add a branch out of the
      // inlined body. The branch label will be set later when we create branch
      // targets for the calls.
      Block* childBlock = ChildLocalizer(curr, getFunction(), *module, options)
                            .getChildrenReplacement();
      Break* branch = builder->makeBreak(Name());
      childBlock->list.push_back(branch);
      childBlock->type = Type::unreachable;
      replaceCurrent(childBlock);

      curr->isReturn = false;
      curr->type = sig.results;
      returnCallInfos.push_back({curr, branch});
    }
  }

  void visitCall(Call* curr) {
    handleReturnCall(curr, module->getFunction(curr->target)->getSig());
  }

  void visitCallIndirect(CallIndirect* curr) {
    handleReturnCall(curr, curr->heapType.getSignature());
  }

  void visitCallRef(CallRef* curr) {
    Type targetType = curr->target->type;
    if (!targetType.isSignature()) {
      // We don't know what type the call should return, but it will also never
      // be reached, so we don't need to do anything here.
      return;
    }
    handleReturnCall(curr, targetType.getHeapType().getSignature());
  }

  void visitLocalGet(LocalGet* curr) {
    curr->index = localMapping[curr->index];
  }

  void visitLocalSet(LocalSet* curr) {
    curr->index = localMapping[curr->index];
  }

  void walk(Expression*& curr) {
    PostWalker<Updater>::walk(curr);
    if (returnCallInfos.empty()) {
      return;
    }

    Block* body = builder->blockify(curr);
    curr = body;
    auto blockNames = BranchUtils::BranchAccumulator::get(body);

    for (Index i = 0; i < returnCallInfos.size(); ++i) {
      auto& info = returnCallInfos[i];

      // Add a block containing the previous body and a branch up to the caller.
      // Give the block a name that will allow this return_call's original
      // callsite to branch out of it then execute the call before returning to
      // the caller.
      auto name = Names::getValidName(
        "__return_call", [&](Name test) { return !blockNames.count(test); }, i);
      blockNames.insert(name);
      info.branch->name = name;
      Block* oldBody = builder->makeBlock(body->list, body->type);
      body->list.clear();

      if (resultType.isConcrete()) {
        body->list.push_back(builder->makeBlock(
          name, {builder->makeBreak(returnName, oldBody)}, Type::none));
      } else {
        oldBody->list.push_back(builder->makeBreak(returnName));
        oldBody->name = name;
        oldBody->type = Type::none;
        body->list.push_back(oldBody);
      }
      body->list.push_back(info.call);
      body->finalize(resultType);
    }
  }
};

// Core inlining logic. Modifies the outside function (adding locals as
// needed) by copying the inlined code into it.
static void doCodeInlining(Module* module,
                           Function* into,
                           const InliningAction& action,
                           PassOptions& options) {
  Function* from = action.contents;
  auto* call = (*action.callSite)->cast<Call>();

  // Works for return_call, too
  Type retType = module->getFunction(call->target)->getResults();

  // Build the block that will contain the inlined contents.
  Builder builder(*module);
  auto* block = builder.makeBlock();
  auto name = std::string("__inlined_func$") + from->name.toString();
  if (action.nameHint) {
    name += '$' + std::to_string(action.nameHint);
  }
  block->name = Name(name);

  // In the unlikely event that the function already has a branch target with
  // this name, fix that up, as otherwise we can get unexpected capture of our
  // branches, that is, we could end up with this:
  //
  //  (block $X             ;; a new block we add as the target of returns
  //    (from's contents
  //      (block $X         ;; a block in from's contents with a colliding name
  //        (br $X          ;; a new br we just added that replaces a return
  //
  // Here the br wants to go to the very outermost block, to represent a
  // return from the inlined function's code, but it ends up captured by an
  // internal block. We also need to be careful of the call's children:
  //
  //  (block $X             ;; a new block we add as the target of returns
  //    (local.set $param
  //      (call's first parameter
  //        (br $X)         ;; nested br in call's first parameter
  //      )
  //    )
  //
  // (In this case we could use a second block and define the named block $X
  // after the call's parameters, but that adds work for an extremely rare
  // situation.) The latter case does not apply if the call is a
  // return_call inside a try, because in that case the call's
  // children do not appear inside the same block as the inlined body.
  bool hoistCall = call->isReturn && action.insideATry;
  if (BranchUtils::hasBranchTarget(from->body, block->name) ||
      (!hoistCall && BranchUtils::BranchSeeker::has(call, block->name))) {
    auto fromNames = BranchUtils::getBranchTargets(from->body);
    auto callNames = hoistCall ? BranchUtils::NameSet{}
                               : BranchUtils::BranchAccumulator::get(call);
    block->name = Names::getValidName(block->name, [&](Name test) {
      return !fromNames.count(test) && !callNames.count(test);
    });
  }

  // Prepare to update the inlined code's locals and other things.
  Updater updater(options);
  updater.setFunction(into);
  updater.module = module;
  updater.resultType = from->getResults();
  updater.returnName = block->name;
  updater.isReturn = call->isReturn;
  updater.builder = &builder;
  // Set up a locals mapping
  for (Index i = 0; i < from->getNumLocals(); i++) {
    updater.localMapping[i] = builder.addVar(into, from->getLocalType(i));
  }

  if (hoistCall) {
    // Wrap the existing function body in a block we can branch out of before
    // entering the inlined function body. This block must have a name that is
    // different from any other block name above the branch.
    auto intoNames = BranchUtils::BranchAccumulator::get(into->body);
    auto bodyName =
      Names::getValidName(Name("__original_body"),
                          [&](Name test) { return !intoNames.count(test); });
    if (retType.isConcrete()) {
      into->body = builder.makeBlock(
        bodyName, {builder.makeReturn(into->body)}, Type::none);
    } else {
      into->body = builder.makeBlock(
        bodyName, {into->body, builder.makeReturn()}, Type::none);
    }

    // Sequence the inlined function body after the original caller body.
    into->body = builder.makeSequence(into->body, block, retType);

    // Replace the original callsite with an expression that assigns the
    // operands into the params and branches out of the original body.
    auto numParams = from->getParams().size();
    if (numParams) {
      auto* branchBlock = builder.makeBlock();
      for (Index i = 0; i < numParams; i++) {
        branchBlock->list.push_back(
          builder.makeLocalSet(updater.localMapping[i], call->operands[i]));
      }
      branchBlock->list.push_back(builder.makeBreak(bodyName));
      branchBlock->finalize(Type::unreachable);
      *action.callSite = branchBlock;
    } else {
      *action.callSite = builder.makeBreak(bodyName);
    }
  } else {
    // Assign the operands into the params
    for (Index i = 0; i < from->getParams().size(); i++) {
      block->list.push_back(
        builder.makeLocalSet(updater.localMapping[i], call->operands[i]));
    }
    // Zero out the vars (as we may be in a loop, and may depend on their
    // zero-init value
    for (Index i = 0; i < from->vars.size(); i++) {
      auto type = from->vars[i];
      if (!LiteralUtils::canMakeZero(type)) {
        // Non-zeroable locals do not need to be zeroed out. As they have no
        // zero value they by definition should not be used before being written
        // to, so any value we set here would not be observed anyhow.
        continue;
      }
      block->list.push_back(
        builder.makeLocalSet(updater.localMapping[from->getVarIndexBase() + i],
                             LiteralUtils::makeZero(type, *module)));
    }
    if (call->isReturn) {
      assert(!action.insideATry);
      if (retType.isConcrete()) {
        *action.callSite = builder.makeReturn(block);
      } else {
        *action.callSite = builder.makeSequence(block, builder.makeReturn());
      }
    } else {
      *action.callSite = block;
    }
  }

  // Generate and update the inlined contents
  auto* contents = ExpressionManipulator::copy(from->body, *module);
  debuginfo::copyBetweenFunctions(from->body, contents, from, into);
  updater.walk(contents);
  block->list.push_back(contents);
  block->type = retType;

  // The ReFinalize below will handle propagating unreachability if we need to
  // do so, that is, if the call was reachable but now the inlined content we
  // replaced it with was unreachable. The opposite case requires special
  // handling: ReFinalize works under the assumption that code can become
  // unreachable, but it does not go back from that state. But inlining can
  // cause that:
  //
  //  (call $A                               ;; an unreachable call
  //    (unreachable)
  //  )
  // =>
  //  (block $__inlined_A_body (result i32)  ;; reachable code after inlining
  //    (unreachable)
  //  )
  //
  // That is, if the called function wraps the input parameter in a block with a
  // declared type, then the block is not unreachable. And then we might error
  // if the outside expects the code to be unreachable - perhaps it only
  // validates that way. To fix this, if the call was unreachable then we make
  // the inlined code unreachable as well. That also maximizes DCE
  // opportunities by propagating unreachability as much as possible.
  //
  // (Note that we don't need to do this for a return_call, which is always
  // unreachable anyhow.)
  if (call->type == Type::unreachable && !call->isReturn) {
    // Make the replacement code unreachable. Note that we can't just add an
    // unreachable at the end, as the block might have breaks to it (returns are
    // transformed into those).
    Expression* old = block;
    if (old->type.isConcrete()) {
      old = builder.makeDrop(old);
    }
    *action.callSite = builder.makeSequence(old, builder.makeUnreachable());
  }
}

// Updates the outer function after we inline into it. This is a general
// operation that does not depend on what we inlined, it just makes sure that we
// refinalize everything, have no duplicate break labels, etc.
static void updateAfterInlining(Module* module, Function* into) {
  // Anything we inlined into may now have non-unique label names, fix it up.
  // Note that we must do this before refinalization, as otherwise duplicate
  // block labels can lead to errors (the IR must be valid before we
  // refinalize).
  wasm::UniqueNameMapper::uniquify(into->body);
  // Inlining unreachable contents can make things in the function we inlined
  // into unreachable.
  ReFinalize().walkFunctionInModule(into, module);
  // New locals we added may require fixups for nondefaultability.
  // FIXME Is this not done automatically?
  TypeUpdating::handleNonDefaultableLocals(into, *module);
}

static void doInlining(Module* module,
                       Function* into,
                       const InliningAction& action,
                       PassOptions& options) {
  doCodeInlining(module, into, action, options);
  updateAfterInlining(module, into);
}

// A map of function names to the inlining actions we've decided to actually
// perform in them.
using ChosenActions = std::unordered_map<Name, std::vector<InliningAction>>;

// A pass that calls doInlining() on a bunch of actions that were chosen to
// perform.
struct DoInlining : public Pass {
  bool isFunctionParallel() override { return true; }

  std::unique_ptr<Pass> create() override {
    return std::make_unique<DoInlining>(chosenActions);
  }

  DoInlining(const ChosenActions& chosenActions)
    : chosenActions(chosenActions) {}

  void runOnFunction(Module* module, Function* func) override {
    auto iter = chosenActions.find(func->name);
    // We must be called on a function that we actually want to inline into.
    assert(iter != chosenActions.end());
    const auto& actions = iter->second;
    assert(!actions.empty());

    // Inline all the code first, then update func once at the end (which saves
    // e.g. running ReFinalize after each action, of which there might be many).
    for (auto action : actions) {
      doCodeInlining(module, func, action, getPassOptions());
    }
    updateAfterInlining(module, func);
  }

private:
  const ChosenActions& chosenActions;
};

//
// Function splitting / partial inlining / inlining of conditions.
//
// A function may be too costly to inline, but it may be profitable to
// *partially* inline it. The specific cases optimized here are functions with a
// condition,
//
//  function foo(x) {
//    if (x) return;
//    ..lots and lots of other code..
//  }
//
// If the other code after the if is large enough or costly enough, then we will
// not inline the entire function. But it is useful to inline the condition.
// Consider this caller:
//
//  function caller(x) {
//    foo(0);
//    foo(x);
//  }
//
// If we inline the condition, we end up with this:
//
//  function caller(x) {
//    if (0) foo(0);
//    if (x) foo(x);
//  }
//
// By inlining the condition out of foo() we gain two benefits:
//
//  * In the first call here the condition is zero, which means we can
//    statically optimize out the call entirely.
//  * Even if we can't do that (as in the second call) if at runtime we see the
//    condition is false then we avoid the call. That means we perform what is
//    hopefully a cheap branch instead of a call and then a branch.
//
// The cost to doing this is an increase in code size, and so this is only done
// when optimizing heavily for speed.
//
// To implement partial inlining we split the function to be inlined. Starting
// with
//
//  function foo(x) {
//    if (x) return;
//    ..lots and lots of other code..
//  }
//
// We create the "inlineable" part of it, and the code that is "outlined":
//
//  function foo$inlineable(x) {
//    if (x) return;
//    foo$outlined(x);
//  }
//  function foo$outlined(x) {
//    ..lots and lots of other code..
//  }
//
// (Note how if the second function were inlined into the first, we would end
// up where we started, with the original function.) After splitting the
// function in this way, we simply inline the inlineable part using the normal
// mechanism for that. That ends up replacing  foo(x);  with
//
//  if (x) foo$outlined(x);
//
// which is what we wanted.
//
// To reduce the complexity of this feature, it is implemented almost entirely
// in its own class, FunctionSplitter. The main inlining logic just calls out to
// the splitter to check if a function is worth splitting, and to get the split
// part if so.
//

struct FunctionSplitter {
  Module* module;
  PassOptions& options;

  FunctionSplitter(Module* module, PassOptions& options)
    : module(module), options(options) {}

  // Check if an function could be split in order to at least inline part of it,
  // in a worthwhile manner.
  //
  // Even if this returns a split inlining mode, we may not end up inlining the
  // function, as the main inlining logic has a few other considerations to take
  // into account (like limitations on which functions can be inlined into in
  // each iteration, the number of iterations, etc.). Therefore this function
  // may only find out if we *can* split, but not actually do any splitting.
  //
  // Note that to avoid wasteful work, this function may return "Full" inlining
  // mode instead of a split inlining. That is, if it detects that a partial
  // inlining will trigger a follow up full inline of the splitted function
  // then it will instead return "InliningMode::Full" directly. In more detail,
  // imagine we have
  //
  //   foo(10);
  //
  // which calls
  //
  //   func foo(x) {
  //     if (x) {
  //       CODE
  //     }
  //   }
  //
  // If we partially inline then the call becomes
  //
  //   if (10) {
  //     outlined-CODE()
  //   }
  //
  // That is, we've only inlined the |if| out of foo(), and we call a function
  // that contains the code in CODE. But if CODE is small enough to be inlined
  // then a later iteration of the inliner will do just that. The result of this
  // split inlining and then full inlining of the outlined code is exactly the
  // same as if we normally inlined from the beginning, but it adds more work,
  // so we'd like to avoid it, which we do by seeing when a split would be
  // followed by inlining the remainder, and then we return Full here and just
  // inline it all normally.
  //
  // Note that the above issue shows that we may in some cases inline more than
  // the normal inlining limit. That is, if the inlining limit is 20, and CODE
  // in the example above was 19, then foo()'s size was 21 (when we add the if
  // and get of |x|). That leads to the situation where foo() is too big to
  // normally inline, but the outlined CODE can then be inlined. And this shows
  // that we end up inlining a function of size 21, even though it is larger
  // than our inlining limit. We consider this acceptable because it only
  // occurs on functions slightly larger than the inlining limit, and only if
  // they have the simple forms that split inlining recognizes, that we think
  // are useful to inline. Alternatively, if we wanted to avoid this, it would
  // add complexity because we'd need to recognize the outlined function with
  // CODE in it and *not* inline it even though it is small enough, after
  // splitting, to be inlined. That is, splitting creates smaller functions, so
  // it can lead to more inlining (but, again, that makes sense since we only
  // split on very specific patterns we believe are worth handling in that
  // manner).
  InliningMode getSplitDrivenInliningMode(Function* func, FunctionInfo& info) {
    const Index MaxIfs = options.inlining.partialInliningIfs;
    assert(MaxIfs > 0);

    auto* body = func->body;

    // If the body is a block, and we have breaks to that block, then we cannot
    // outline any code - we can't outline a break without the break's target.
    if (auto* block = body->dynCast<Block>()) {
      if (BranchUtils::BranchSeeker::has(block, block->name)) {
        return InliningMode::Uninlineable;
      }
    }

    // All the patterns we look for right now start with an if at the very top
    // of the function.
    auto* iff = getIf(body);
    if (!iff) {
      return InliningMode::Uninlineable;
    }

    // If the condition is not very simple, the benefits of this optimization
    // are not obvious.
    if (!isSimple(iff->condition)) {
      return InliningMode::Uninlineable;
    }

    // Pattern A: Check if the function begins with
    //
    //  if (simple) return;
    //
    // TODO: support a return value
    if (!iff->ifFalse && func->getResults() == Type::none &&
        iff->ifTrue->is<Return>()) {
      // The body must be a block, because if it were not then the function
      // would be easily inlineable (just an if with a simple condition and a
      // return), and we would not even attempt to do splitting.
      assert(body->is<Block>());

      auto outlinedFunctionSize = info.size - Measurer::measure(iff);
      // If outlined function will be worth normal inline, skip the intermediate
      // state and inline fully now. Note that if full inlining is disabled we
      // will not do this, and instead inline partially.
      if (!func->noFullInline &&
          outlinedFunctionWorthInlining(info, outlinedFunctionSize)) {
        return InliningMode::Full;
      }

      return InliningMode::SplitPatternA;
    }

    // Pattern B: Represents a function whose entire body looks like
    //
    //  if (A_1) {
    //    ..heavy work..
    //  }
    //  ..
    //  if (A_k) {
    //    ..heavy work..
    //  }
    //  B; // an optional final value (which can be a return value)
    //
    // where there is a small number of such ifs with arguments A1..A_k, and
    // A_1..A_k and B (if the final value B exists) are very simple.
    //
    // Also, each if body must either be unreachable, or it must have type none
    // and have no returns. If it is unreachable, for example because it is a
    // return, then we will just return a value in the inlineable function:
    //
    //  if (A_i) {
    //    return outlined(..);
    //  }
    //
    // Or, if an if body has type none, then for now we assume that we do not
    // need to return a value from there, which makes things simpler, and in
    // that case we just do this, which continues onward in the function:
    //
    //  if (A_i) {
    //    outlined(..);
    //  }
    //
    // TODO: handle a possible returned value in this case as well.
    //
    // Note that the if body type must be unreachable or none, as this is an if
    // without an else.

    // Find the number of ifs.
    Index numIfs = 0;
    while (getIf(body, numIfs) && numIfs <= MaxIfs) {
      numIfs++;
    }
    if (numIfs == 0 || numIfs > MaxIfs) {
      return InliningMode::Uninlineable;
    }

    // Look for a final item after the ifs.
    auto* finalItem = getItem(body, numIfs);

    // The final item must be simple (or not exist, which is simple enough).
    if (finalItem && !isSimple(finalItem)) {
      return InliningMode::Uninlineable;
    }

    // There must be no other items after the optional final one.
    if (finalItem && getItem(body, numIfs + 1)) {
      return InliningMode::Uninlineable;
    }
    // This has the general shape we seek. Check each if.
    for (Index i = 0; i < numIfs; i++) {
      auto* iff = getIf(body, i);
      // The if must have a simple condition and no else arm.
      if (!isSimple(iff->condition) || iff->ifFalse) {
        return InliningMode::Uninlineable;
      }
      if (iff->ifTrue->type == Type::none) {
        // This must have no returns.
        if (!FindAll<Return>(iff->ifTrue).list.empty()) {
          return InliningMode::Uninlineable;
        }
      } else {
        // This is an if without an else, and so the type is either none or
        // unreachable, and we ruled out none before.
        assert(iff->ifTrue->type == Type::unreachable);
      }
    }
    // Success, this matches the pattern.

    // If the outlined function will be worth inlining normally, skip the
    // intermediate state and inline fully now. (As above, if full inlining is
    // disabled, we only partially inline.)
    if (numIfs == 1) {
      auto outlinedFunctionSize = Measurer::measure(iff->ifTrue);
      if (!func->noFullInline &&
          outlinedFunctionWorthInlining(info, outlinedFunctionSize)) {
        return InliningMode::Full;
      }
    }

    return InliningMode::SplitPatternB;
  }

  // Returns the function we should inline, after we split the function into two
  // pieces as described above (that is, in the example above, this would return
  // foo$inlineable).
  //
  // This is called when we are definitely inlining the function, and so it will
  // perform the splitting (if that has not already been done before).
  Function* getInlineableSplitFunction(Function* func,
                                       InliningMode inliningMode) {
    assert(inliningMode == InliningMode::SplitPatternA ||
           inliningMode == InliningMode::SplitPatternB);
    auto& split = splits[func->name];

    if (!split.inlineable) {
      // We haven't performed the split, do it now.
      split.inlineable = doSplit(func, inliningMode);
    }

    return split.inlineable;
  }

  // Clean up. When we are done we no longer need the inlineable functions on
  // the module, as they have been inlined into all the places we wanted them
  // for.
  //
  // Returns a list of the names of the functions we split.
  std::vector<Name> finish() {
    std::vector<Name> ret;
    std::unordered_set<Name> inlineableNames;
    for (auto& [func, split] : splits) {
      auto* inlineable = split.inlineable;
      if (inlineable) {
        inlineableNames.insert(inlineable->name);
        ret.push_back(func);
      }
    }
    module->removeFunctions([&](Function* func) {
      return inlineableNames.find(func->name) != inlineableNames.end();
    });
    return ret;
  }

private:
  // Information about splitting a function.
  struct Split {
    // The inlineable function out of the two that we generate by splitting.
    // That is, foo$inlineable from above.
    Function* inlineable = nullptr;

    // The outlined function, that is, foo$outlined from above.
    Function* outlined = nullptr;
  };

  // All the splitting we have already performed.
  //
  // Note that this maps from function names, and not Function*, as the main
  // inlining code can remove functions as it goes, but we can rely on names
  // staying constant.
  std::unordered_map<Name, Split> splits;

  bool outlinedFunctionWorthInlining(FunctionInfo& origin, Index sizeEstimate) {
    FunctionInfo info;
    // Start with a copy of the origin's info, and apply the size estimate.
    // This is not accurate, for example the origin function may have
    // loop or calls even though this section may not have.
    // This is a conservative estimate, that is, it will return true only when
    // it should, but might return false when a more precise analysis would
    // return true. And it is a practical estimation to avoid extra future work.
    info = origin;
    info.size = sizeEstimate;
    return info.worthFullInlining(options);
  }

  Function* doSplit(Function* func, InliningMode inliningMode) {
    Builder builder(*module);

    if (inliningMode == InliningMode::SplitPatternA) {
      // Note that "A" in the name here identifies this as being a split from
      // pattern A. The second pattern B will have B in the name.
      Function* inlineable = copyFunction(func, "inlineable-A");
      auto* outlined = copyFunction(func, "outlined-A");

      // The inlineable function should only have the if, which will call the
      // outlined function with a flipped condition.
      auto* inlineableIf = getIf(inlineable->body);
      inlineableIf->condition =
        builder.makeUnary(EqZInt32, inlineableIf->condition);
      inlineableIf->ifTrue = builder.makeCall(
        outlined->name, getForwardedArgs(func, builder), Type::none);
      inlineable->body = inlineableIf;

      // The outlined function no longer needs the initial if.
      auto& outlinedList = outlined->body->cast<Block>()->list;
      outlinedList.erase(outlinedList.begin());

      return inlineable;
    }

    assert(inliningMode == InliningMode::SplitPatternB);

    Function* inlineable = copyFunction(func, "inlineable-B");

    const Index MaxIfs = options.inlining.partialInliningIfs;
    assert(MaxIfs > 0);

    // The inlineable function should only have the ifs, which will call the
    // outlined heavy work.
    for (Index i = 0; i < MaxIfs; i++) {
      // For each if, create an outlined function with the body of that if,
      // and call that from the if.
      auto* inlineableIf = getIf(inlineable->body, i);
      if (!inlineableIf) {
        break;
      }
      auto* outlined = copyFunction(func, "outlined-B");
      outlined->body = inlineableIf->ifTrue;

      // The outlined function either returns the same results as the original
      // one, or nothing, depending on if a value is returned here.
      auto valueReturned =
        func->getResults() != Type::none && outlined->body->type != Type::none;
      outlined->setResults(valueReturned ? func->getResults() : Type::none);
      inlineableIf->ifTrue = builder.makeCall(outlined->name,
                                              getForwardedArgs(func, builder),
                                              outlined->getResults());
      if (valueReturned) {
        inlineableIf->ifTrue = builder.makeReturn(inlineableIf->ifTrue);
      }
    }

    return inlineable;
  }

  Function* copyFunction(Function* func, std::string prefix) {
    // TODO: We copy quite a lot more than we need here, and throw stuff out.
    //       It is simple to just copy the entire thing to get the params and
    //       results and all that, but we could be more efficient.
    prefix = "byn-split-" + prefix;
    return ModuleUtils::copyFunction(
      func,
      *module,
      Names::getValidFunctionName(*module,
                                  prefix + '$' + func->name.toString()));
  }

  // Get the i-th item in a sequence of initial items in an expression. That is,
  // if the item is a block, it may have several such items, and otherwise there
  // is a single item, that item itself. This basically provides a simpler
  // interface than checking if something is a block or not when there is just
  // one item.
  //
  // Returns nullptr if there is no such item.
  static Expression* getItem(Expression* curr, Index i = 0) {
    if (auto* block = curr->dynCast<Block>()) {
      auto& list = block->list;
      if (i < list.size()) {
        return list[i];
      }
    }
    if (i == 0) {
      return curr;
    }
    return nullptr;
  }

  // Get the i-th if in a sequence of initial ifs in an expression. If no such
  // if exists, returns nullptr.
  static If* getIf(Expression* curr, Index i = 0) {
    auto* item = getItem(curr, i);
    if (!item) {
      return nullptr;
    }
    if (auto* iff = item->dynCast<If>()) {
      return iff;
    }
    return nullptr;
  }

  // Checks if an expression is very simple - something simple enough that we
  // are willing to inline it in this optimization. This should basically take
  // almost no cost at all to compute.
  bool isSimple(Expression* curr) {
    // For now, support local and global gets, and unary operations.
    // TODO: Generalize? Use costs.h?
    if (curr->type == Type::unreachable) {
      return false;
    }
    if (curr->is<GlobalGet>() || curr->is<LocalGet>()) {
      return true;
    }
    if (auto* unary = curr->dynCast<Unary>()) {
      return isSimple(unary->value);
    }
    if (auto* is = curr->dynCast<RefIsNull>()) {
      return isSimple(is->value);
    }
    return false;
  }

  // Returns a list of local.gets, one for each of the parameters to the
  // function. This forwards the arguments passed to the inlineable function to
  // the outlined one.
  std::vector<Expression*> getForwardedArgs(Function* func, Builder& builder) {
    std::vector<Expression*> args;
    for (Index i = 0; i < func->getNumParams(); i++) {
      args.push_back(builder.makeLocalGet(i, func->getLocalType(i)));
    }
    return args;
  }
};

struct Inlining : public Pass {
  // This pass changes locals and parameters.
  // FIXME DWARF updating does not handle local changes yet.
  bool invalidatesDWARF() override { return true; }

  // whether to optimize where we inline
  bool optimize = false;

  // the information for each function. recomputed in each iteraction
  NameInfoMap infos;

  std::unique_ptr<FunctionSplitter> functionSplitter;

  Module* module = nullptr;

  void run(Module* module_) override {
    module = module_;

    // No point to do more iterations than the number of functions, as it means
    // we are infinitely recursing (which should be very rare in practice, but
    // it is possible that a recursive call can look like it is worth inlining).
    Index iterationNumber = 0;

    auto numOriginalFunctions = module->functions.size();

    // Track in how many iterations a function was inlined into. We are willing
    // to inline many times into a function within an iteration, as e.g. that
    // helps the case of many calls of a small getter. However, if we only do
    // more inlining in separate iterations then it is likely code that was the
    // result of previous inlinings that is now being inlined into. That is, an
    // old inlining added a call to somewhere, and now we are inlining into that
    // call. This is typically recursion, which to some extent can help, but
    // then like loop unrolling it loses its benefit quickly, so set a limit
    // here.
    //
    // In addition to inlining into a function, we track how many times we do
    // other potentially repetitive operations like splitting a function before
    // inlining, as any such repetitive operation should be limited in how many
    // times we perform it. (An exception is how many times we inlined a
    // function, which we do not want to limit - it can be profitable to inline
    // a call into a great many callsites, over many iterations.)
    //
    // (Track names here, and not Function pointers, as we can remove functions
    // while inlining, and it may be confusing during debugging to have a
    // pointer to something that was removed.)
    std::unordered_map<Name, Index> iterationCounts;

    const size_t MaxIterationsForFunc = 5;

    while (iterationNumber <= numOriginalFunctions) {
#ifdef INLINING_DEBUG
      std::cout << "inlining loop iter " << iterationNumber
                << " (numFunctions: " << module->functions.size() << ")\n";
#endif
      iterationNumber++;

      std::unordered_set<Function*> inlinedInto;

      prepare();
      iteration(inlinedInto);

      if (inlinedInto.empty()) {
        return;
      }

#ifdef INLINING_DEBUG
      std::cout << "  inlined into " << inlinedInto.size() << " funcs.\n";
#endif

      for (auto* func : inlinedInto) {
        EHUtils::handleBlockNestedPops(func, *module);
      }

      for (auto* func : inlinedInto) {
        if (++iterationCounts[func->name] >= MaxIterationsForFunc) {
          return;
        }
      }

      if (functionSplitter) {
        auto splitNames = functionSplitter->finish();
        for (auto name : splitNames) {
          if (++iterationCounts[name] >= MaxIterationsForFunc) {
            return;
          }
        }
      }
    }
  }

  void prepare() {
    infos.clear();
    // fill in info, as we operate on it in parallel (each function to its own
    // entry)
    for (auto& func : module->functions) {
      infos[func->name];
    }
    {
      FunctionInfoScanner scanner(infos);
      scanner.run(getPassRunner(), module);
      scanner.walkModuleCode(module);
    }
    for (auto& ex : module->exports) {
      if (ex->kind == ExternalKind::Function) {
        infos[ex->value].usedGlobally = true;
      }
    }
    if (module->start.is()) {
      infos[module->start].usedGlobally = true;
    }

    // When optimizing heavily for size, we may potentially split functions in
    // order to inline parts of them, if partialInliningIfs is enabled.
    auto& options = getPassOptions();
    if (options.optimizeLevel >= 3 && !options.shrinkLevel &&
        options.inlining.partialInliningIfs) {
      functionSplitter = std::make_unique<FunctionSplitter>(module, options);
    }
  }

  void iteration(std::unordered_set<Function*>& inlinedInto) {
    // decide which to inline
    InliningState state;
    ModuleUtils::iterDefinedFunctions(*module, [&](Function* func) {
      InliningMode inliningMode = getInliningMode(func->name);
      assert(inliningMode != InliningMode::Unknown);
      if (inliningMode != InliningMode::Uninlineable) {
        state.inlinableFunctions[func->name] = inliningMode;
      }
    });
    if (state.inlinableFunctions.empty()) {
      return;
    }
    // Fill in actionsForFunction, as we operate on it in parallel (each
    // function to its own entry). Also generate a vector of the function names
    // so that in the later loop we can iterate on it deterministically and
    // without iterator invalidation.
    std::vector<Name> funcNames;
    for (auto& func : module->functions) {
      state.actionsForFunction[func->name];
      funcNames.push_back(func->name);
    }

    // Find and plan inlinings in parallel. This discovers inlining
    // opportunities, by themselves, but does not yet take into account
    // interactions between them (e.g. we don't want to both inline into a
    // function and then inline it as well).
    Planner(&state).run(getPassRunner(), module);

    // Choose which inlinings to perform. We do this sequentially so that we
    // can consider interactions between them, and avoid nondeterminism.
    ChosenActions chosenActions;

    // How many uses (calls of the function) we inlined.
    std::unordered_map<Name, Index> inlinedUses;

    for (auto name : funcNames) {
      auto* func = module->getFunction(name);
      // if we've inlined a function, don't inline into it in this iteration,
      // avoid risk of races
      // note that we do not risk stalling progress, as each iteration() will
      // inline at least one call before hitting this
      if (inlinedUses.count(func->name)) {
        continue;
      }
      for (auto& action : state.actionsForFunction[name]) {
        auto* inlinedFunction = action.contents;
        // if we've inlined into a function, don't inline it in this iteration,
        // avoid risk of races
        // note that we do not risk stalling progress, as each iteration() will
        // inline at least one call before hitting this
        if (inlinedInto.count(inlinedFunction)) {
          continue;
        }
        Name inlinedName = inlinedFunction->name;
        if (!isUnderSizeLimit(func->name, inlinedName)) {
          continue;
        }

        // Success - we can inline.
#ifdef INLINING_DEBUG
        std::cout << "inline " << inlinedName << " into " << func->name << '\n';
#endif

        // Update the action for the actual inlining we have chosen to perform
        // (when splitting, we will actually inline one of the split pieces and
        // not the original function itself; note how even if we do that then
        // we are still removing a call to the original function here, and so
        // we do not need to change anything else lower down - we still want to
        // note that we got rid of one use of the original function).
        action.contents = getActuallyInlinedFunction(action.contents);
        action.nameHint = inlinedNameHint++;
        chosenActions[func->name].push_back(action);
        inlinedUses[inlinedName]++;
        inlinedInto.insert(func);
        assert(inlinedUses[inlinedName] <= infos[inlinedName].refs);
      }
    }

    if (chosenActions.empty()) {
      // We found nothing to do.
      return;
    }

    // Perform the inlinings in parallel (sequentially inside each function we
    // inline into, but in parallel between them). If we are optimizing, do so
    // as well.
    {
      PassUtils::FilteredPassRunner runner(
        module, inlinedInto, getPassRunner()->options);
      runner.setIsNested(true);
      runner.add(std::make_unique<DoInlining>(chosenActions));
      if (optimize) {
        OptUtils::addUsefulPassesAfterInlining(runner);
      }
      runner.run();
    }

    // remove functions that we no longer need after inlining
    module->removeFunctions([&](Function* func) {
      auto name = func->name;
      auto& info = infos[name];
      return inlinedUses.count(name) && inlinedUses[name] == info.refs &&
             !info.usedGlobally;
    });
  }

  // See explanation in InliningAction.
  Index inlinedNameHint = 0;

  // Decide for a given function whether to inline, and if so in what mode.
  InliningMode getInliningMode(Name name) {
    auto* func = module->getFunction(name);
    auto& info = infos[name];

    if (info.inliningMode != InliningMode::Unknown) {
      return info.inliningMode;
    }

    // Check if the function itself is worth inlining as it is.
    if (!func->noFullInline && info.worthFullInlining(getPassOptions())) {
      return info.inliningMode = InliningMode::Full;
    }

    // Otherwise, check if we can at least inline part of it, if we are
    // interested in such things.
    if (!func->noPartialInline && functionSplitter) {
      info.inliningMode = functionSplitter->getSplitDrivenInliningMode(
        module->getFunction(name), info);
      return info.inliningMode;
    }

    // Cannot be fully or partially inlined => uninlineable.
    info.inliningMode = InliningMode::Uninlineable;
    return info.inliningMode;
  }

  // Gets the actual function to be inlined. Normally this is the function
  // itself, but if it is a function that we must first split (i.e., we only
  // want to partially inline it) then it will be the inlineable part of the
  // split.
  //
  // This is called right before actually performing the inlining, that is, we
  // are guaranteed to inline after this.
  Function* getActuallyInlinedFunction(Function* func) {
    InliningMode inliningMode = infos[func->name].inliningMode;
    // If we want to inline this function itself, do so.
    if (inliningMode == InliningMode::Full) {
      return func;
    }

    // Otherwise, this is a case where we want to inline part of it, after
    // splitting.
    assert(functionSplitter);
    return functionSplitter->getInlineableSplitFunction(func, inliningMode);
  }

  // Checks if the combined size of the code after inlining is under the
  // absolute size limit. We have an absolute limit in order to avoid
  // extremely-large sizes after inlining, as they may hit limits in VMs and/or
  // slow down startup (measurements there indicate something like ~1 second to
  // optimize a 100K function). See e.g.
  // https://github.com/WebAssembly/binaryen/pull/3730#issuecomment-867939138
  // https://github.com/emscripten-core/emscripten/issues/13899#issuecomment-825073344
  bool isUnderSizeLimit(Name target, Name source) {
    // Estimate the combined binary size from the number of instructions.
    auto combinedSize = infos[target].size + infos[source].size;
    auto estimatedBinarySize = Measurer::BytesPerExpr * combinedSize;
    // The limit is arbitrary, but based on the links above. It is a very high
    // value that should appear very rarely in practice (for example, it does
    // not occur on the Emscripten benchmark suite of real-world codebases).
    const Index MaxCombinedBinarySize = 400 * 1024;
    return estimatedBinarySize < MaxCombinedBinarySize;
  }
};

} // anonymous namespace

//
// InlineMain
//
// Inline __original_main into main, if they exist. This works around the odd
// thing that clang/llvm currently do, where __original_main contains the user's
// actual main (this is done as a workaround for main having two different
// possible signatures).
//

static const char* MAIN = "main";
static const char* ORIGINAL_MAIN = "__original_main";

struct InlineMainPass : public Pass {
  void run(Module* module) override {
    auto* main = module->getFunctionOrNull(MAIN);
    auto* originalMain = module->getFunctionOrNull(ORIGINAL_MAIN);
    if (!main || main->imported() || !originalMain ||
        originalMain->imported()) {
      return;
    }
    FindAllPointers<Call> calls(main->body);
    Expression** callSite = nullptr;
    for (auto* call : calls.list) {
      if ((*call)->cast<Call>()->target == ORIGINAL_MAIN) {
        if (callSite) {
          // More than one call site.
          return;
        }
        callSite = call;
      }
    }
    if (!callSite) {
      // No call at all.
      return;
    }
    doInlining(module,
               main,
               InliningAction(callSite, originalMain, true),
               getPassOptions());
  }
};

Pass* createInliningPass() { return new Inlining(); }

Pass* createInliningOptimizingPass() {
  auto* ret = new Inlining();
  ret->optimize = true;
  return ret;
}

Pass* createInlineMainPass() { return new InlineMainPass(); }

} // namespace wasm