summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/passes/Inlining.cpp68
-rw-r--r--test/lit/passes/inlining_optimize-level=3.wast204
2 files changed, 253 insertions, 19 deletions
diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp
index f4baf706d..7a66f32a4 100644
--- a/src/passes/Inlining.cpp
+++ b/src/passes/Inlining.cpp
@@ -127,6 +127,14 @@ struct FunctionInfoScanner
(*infos)[getFunction()->name].hasCalls = true;
}
+ // N.B.: CallIndirect and CallRef are intentionally omitted here, as we only
+ // note direct calls. Direct calls can lead to infinite recursion
+ // which we need to avoid, while indirect ones may in theory be
+ // optimized to direct calls later, but we take that risk - which is
+ // worthwhile as if we do manage to turn an indirect call into something
+ // else then it can be a big speedup, so we do want to inline code that
+ // has such indirect calls.
+
void visitTry(Try* curr) {
if (curr->isDelegate()) {
(*infos)[getFunction()->name].hasTryDelegate = true;
@@ -335,26 +343,51 @@ struct Inlining : public Pass {
// the information for each function. recomputed in each iteraction
NameInfoMap infos;
- Index iterationNumber;
-
void run(PassRunner* runner, Module* module) override {
- Index numFunctions = module->functions.size();
- // keep going while we inline, to handle nesting. TODO: optimize
- iterationNumber = 0;
- // no point to do more iterations than the number of functions, as
- // it means we infinitely recursing (which should
- // be very rare in practice, but it is possible that a recursive call
- // can look like it is worth inlining)
- while (iterationNumber <= numFunctions) {
+ // No point to do more iterations than the number of functions, as it means
+ // we are infinitely recursing (which should be very rare in practice, but
+ // it is possible that a recursive call can look like it is worth inlining).
+ Index iterationNumber = 0;
+
+ auto numOriginalFunctions = module->functions.size();
+
+ // Track in how many iterations a function was inlined into. We are willing
+ // to inline many times into a function within an iteration, as e.g. that
+ // helps the case of many calls of a small getter. However, if we only do
+ // more inlining in separate iterations then it is likely code that was the
+ // result of previous inlinings that is now being inlined into. That is, an
+ // old inlining added a call to somewhere, and now we are inlining into that
+ // call. This is typically recursion, which to some extent can help, but
+ // then like loop unrolling it loses its benefit quickly, so set a limit
+ // here.
+ std::unordered_map<Function*, Index> iterationsInlinedInto;
+
+ const size_t MaxIterationsForFunc = 5;
+
+ while (iterationNumber <= numOriginalFunctions) {
#ifdef INLINING_DEBUG
std::cout << "inlining loop iter " << iterationNumber
- << " (numFunctions: " << numFunctions << ")\n";
+ << " (numFunctions: " << module->functions.size() << ")\n";
#endif
+
calculateInfos(module);
- if (!iteration(runner, module)) {
+
+ iterationNumber++;
+ std::unordered_set<Function*> inlinedInto;
+ iteration(runner, module, inlinedInto);
+ if (inlinedInto.empty()) {
return;
}
- iterationNumber++;
+
+#ifdef INLINING_DEBUG
+ std::cout << " inlined into " << inlinedInto.size() << " funcs.\n";
+#endif
+
+ for (auto* func : inlinedInto) {
+ if (++iterationsInlinedInto[func] >= MaxIterationsForFunc) {
+ return;
+ }
+ }
}
}
@@ -380,7 +413,9 @@ struct Inlining : public Pass {
}
}
- bool iteration(PassRunner* runner, Module* module) {
+ void iteration(PassRunner* runner,
+ Module* module,
+ std::unordered_set<Function*>& inlinedInto) {
// decide which to inline
InliningState state;
ModuleUtils::iterDefinedFunctions(*module, [&](Function* func) {
@@ -389,7 +424,7 @@ struct Inlining : public Pass {
}
});
if (state.worthInlining.size() == 0) {
- return false;
+ return;
}
// fill in actionsForFunction, as we operate on it in parallel (each
// function to its own entry)
@@ -401,7 +436,6 @@ struct Inlining : public Pass {
// perform inlinings TODO: parallelize
std::unordered_map<Name, Index> inlinedUses; // how many uses we inlined
// which functions were inlined into
- std::unordered_set<Function*> inlinedInto;
for (auto& func : module->functions) {
// if we've inlined a function, don't inline into it in this iteration,
// avoid risk of races
@@ -446,8 +480,6 @@ struct Inlining : public Pass {
return inlinedUses.count(name) && inlinedUses[name] == info.refs &&
!info.usedGlobally;
});
- // return whether we did any work
- return inlinedUses.size() > 0;
}
// Checks if the combined size of the code after inlining is under the
diff --git a/test/lit/passes/inlining_optimize-level=3.wast b/test/lit/passes/inlining_optimize-level=3.wast
index ad5f2b44e..c9e60fc47 100644
--- a/test/lit/passes/inlining_optimize-level=3.wast
+++ b/test/lit/passes/inlining_optimize-level=3.wast
@@ -4,6 +4,8 @@
;; RUN: foreach %s %t wasm-opt --inlining --optimize-level=3 -S -o - | filecheck %s
(module
+ ;; CHECK: (type $i32_=>_i32 (func (param i32) (result i32)))
+
;; CHECK: (type $none_=>_i32 (func (result i32)))
;; CHECK: (type $none_=>_none (func))
@@ -19,6 +21,14 @@
(table 1 1 funcref)
(elem (i32.const 0) $no-loops-but-one-use-but-tabled)
+ ;; CHECK: (export "A" (func $recursive-inlining-1))
+
+ ;; CHECK: (export "B" (func $recursive-inlining-2))
+
+ ;; CHECK: (export "BA" (func $b-recursive-inlining-1))
+
+ ;; CHECK: (export "BB" (func $b-recursive-inlining-2))
+
;; CHECK: (func $yes (result i32)
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: )
@@ -220,5 +230,197 @@
(drop (call $no-loops-but-one-use-but-exported))
(drop (call $no-loops-but-one-use-but-tabled))
)
-)
+ ;; Two functions that call each other. (Exported, so that they are not removed
+ ;; after inlining.) We should only perform a limited amount of inlining here -
+ ;; a little might help, but like loop unrolling it loses its benefit quickly.
+ ;; Specifically here we will see the infinite recursion after one inlining,
+ ;; and stop (since we do not inline a method into itself).
+
+ ;; CHECK: (func $recursive-inlining-1 (param $x i32) (result i32)
+ ;; CHECK-NEXT: (local $1 i32)
+ ;; CHECK-NEXT: (block $__inlined_func$recursive-inlining-2 (result i32)
+ ;; CHECK-NEXT: (local.set $1
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (call $recursive-inlining-1
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $recursive-inlining-1 (export "A") (param $x i32) (result i32)
+ (call $recursive-inlining-2
+ (local.get $x)
+ )
+ )
+
+ ;; CHECK: (func $recursive-inlining-2 (param $x i32) (result i32)
+ ;; CHECK-NEXT: (call $recursive-inlining-1
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $recursive-inlining-2 (export "B") (param $x i32) (result i32)
+ (call $recursive-inlining-1
+ (local.get $x)
+ )
+ )
+
+ ;; As above, but both call the second. The first will inline the second
+ ;; several times before hitting the limit (of around 5).
+
+ ;; CHECK: (func $b-recursive-inlining-1 (param $x i32) (result i32)
+ ;; CHECK-NEXT: (local $1 i32)
+ ;; CHECK-NEXT: (local $2 i32)
+ ;; CHECK-NEXT: (local $3 i32)
+ ;; CHECK-NEXT: (local $4 i32)
+ ;; CHECK-NEXT: (local $5 i32)
+ ;; CHECK-NEXT: (block $__inlined_func$b-recursive-inlining-2 (result i32)
+ ;; CHECK-NEXT: (local.set $1
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (block $__inlined_func$b-recursive-inlining-20 (result i32)
+ ;; CHECK-NEXT: (local.set $2
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (block $__inlined_func$b-recursive-inlining-21 (result i32)
+ ;; CHECK-NEXT: (local.set $3
+ ;; CHECK-NEXT: (local.get $2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (block $__inlined_func$b-recursive-inlining-22 (result i32)
+ ;; CHECK-NEXT: (local.set $4
+ ;; CHECK-NEXT: (local.get $3)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (block $__inlined_func$b-recursive-inlining-23 (result i32)
+ ;; CHECK-NEXT: (local.set $5
+ ;; CHECK-NEXT: (local.get $4)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (call $b-recursive-inlining-2
+ ;; CHECK-NEXT: (local.get $5)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $b-recursive-inlining-1 (export "BA") (param $x i32) (result i32)
+ (call $b-recursive-inlining-2
+ (local.get $x)
+ )
+ )
+
+ ;; CHECK: (func $b-recursive-inlining-2 (param $x i32) (result i32)
+ ;; CHECK-NEXT: (call $b-recursive-inlining-2
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $b-recursive-inlining-2 (export "BB") (param $x i32) (result i32)
+ (call $b-recursive-inlining-2
+ (local.get $x)
+ )
+ )
+
+ ;; Verify that we can do a large number (larger than the limit of ~5 we just
+ ;; saw for recursion) of calls into a single function, if we can do it all in
+ ;; a single iteration (which is the usual case, and the case here).
+
+ ;; CHECK: (func $call-many-getters
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (block $__inlined_func$getter
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (block $__inlined_func$getter0
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (block $__inlined_func$getter1
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (block $__inlined_func$getter2
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (block $__inlined_func$getter3
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (block $__inlined_func$getter4
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (block $__inlined_func$getter5
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (block $__inlined_func$getter6
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (block $__inlined_func$getter7
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block
+ ;; CHECK-NEXT: (block $__inlined_func$getter8
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $call-many-getters
+ (call $getter)
+ (call $getter)
+ (call $getter)
+ (call $getter)
+ (call $getter)
+ (call $getter)
+ (call $getter)
+ (call $getter)
+ (call $getter)
+ (call $getter)
+ )
+
+ (func $getter
+ (drop (i32.const 1))
+ )
+)
+.