diff options
author | Alon Zakai <azakai@google.com> | 2021-08-09 18:06:36 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-10 01:06:36 +0000 |
commit | 1545bbdaec52c8d81e0e8f74afebe73caadc6828 (patch) | |
tree | d0a3f70d31d46324df4022c9b484dbd948aa4682 | |
parent | d1fb1e4742e259ebeec7119518169597e75b0b9d (diff) | |
download | binaryen-1545bbdaec52c8d81e0e8f74afebe73caadc6828.tar.gz binaryen-1545bbdaec52c8d81e0e8f74afebe73caadc6828.tar.bz2 binaryen-1545bbdaec52c8d81e0e8f74afebe73caadc6828.zip |
Improve optimization of call_ref into direct calls (#4068)
First, move the tiny pattern of call-ref-of-ref-func from Directize
into OptimizeInstructions. This is important because Directize is
a global optimization pass - it looks at the table to see if a
CallIndirect can be turned into a direct call. We only run global
passes at the end of the pipeline, but we don't need any global
data for call-ref of a ref-func, and OptimizeInstructions is the
place for such patterns.
Second, extend that to also handle fallthrough values. This is
less simple, but as call_ref is so inefficient, it's worth doing all
we can.
-rw-r--r-- | src/passes/Directize.cpp | 9 | ||||
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 83 | ||||
-rw-r--r-- | test/lit/passes/directize_all-features.wast | 25 | ||||
-rw-r--r-- | test/lit/passes/optimize-instructions-call_ref.wast | 203 | ||||
-rw-r--r-- | test/passes/Oz_fuzz-exec_all-features.txt | 4 |
5 files changed, 289 insertions, 35 deletions
diff --git a/src/passes/Directize.cpp b/src/passes/Directize.cpp index d8b7f82d1..0f04b5f4c 100644 --- a/src/passes/Directize.cpp +++ b/src/passes/Directize.cpp @@ -77,15 +77,6 @@ struct FunctionDirectizer : public WalkerPass<PostWalker<FunctionDirectizer>> { } } - void visitCallRef(CallRef* curr) { - if (auto* ref = curr->target->dynCast<RefFunc>()) { - // We know the target! - replaceCurrent( - Builder(*getModule()) - .makeCall(ref->func, curr->operands, curr->type, curr->isReturn)); - } - } - void doWalkFunction(Function* func) { WalkerPass<PostWalker<FunctionDirectizer>>::doWalkFunction(func); if (changedTypes) { diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 51a2095ea..e6854cf24 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -34,6 +34,7 @@ #include <ir/manipulation.h> #include <ir/match.h> #include <ir/properties.h> +#include <ir/type-updating.h> #include <ir/utils.h> #include <pass.h> #include <support/threads.h> @@ -1116,6 +1117,88 @@ struct OptimizeInstructions } } + void visitCallRef(CallRef* curr) { + if (curr->target->type == Type::unreachable) { + // The call_ref is not reached; leave this for DCE. + return; + } + + if (auto* ref = curr->target->dynCast<RefFunc>()) { + // We know the target! + replaceCurrent( + Builder(*getModule()) + .makeCall(ref->func, curr->operands, curr->type, curr->isReturn)); + return; + } + + auto features = getModule()->features; + + // It is possible the target is not a function reference, but we can infer + // the fallthrough value there. It takes more work to optimize this case, + // but it is pretty important to allow a call_ref to become a fast direct + // call, so make the effort. + if (auto* ref = + Properties::getFallthrough(curr->target, getPassOptions(), features) + ->dynCast<RefFunc>()) { + // Check if the fallthrough make sense. We may have cast it to a different + // type, which would be a problem - we'd be replacing a call_ref to one + // type with a direct call to a function of another type. That would trap + // at runtime; be careful not to emit invalid IR here. + if (curr->target->type.getHeapType() != ref->type.getHeapType()) { + return; + } + Builder builder(*getModule()); + if (curr->operands.empty()) { + // No operands, so this is simple and there is nothing to reorder: just + // emit: + // + // (block + // (drop curr->target) + // (call ref.func-from-curr->target) + // ) + replaceCurrent(builder.makeSequence( + builder.makeDrop(curr->target), + builder.makeCall(ref->func, {}, curr->type, curr->isReturn))); + return; + } + + // In the presence of operands, we must execute the code in curr->target + // after the last operand and before the call happens. Interpose at the + // last operand: + // + // (call ref.func-from-curr->target) + // (operand1) + // (..) + // (operandN-1) + // (block + // (local.set $temp (operandN)) + // (drop curr->target) + // (local.get $temp) + // ) + // ) + auto* lastOperand = curr->operands.back(); + auto lastOperandType = lastOperand->type; + if (lastOperandType == Type::unreachable) { + // The call_ref is not reached; leave this for DCE. + return; + } + if (!TypeUpdating::canHandleAsLocal(lastOperandType)) { + // We cannot create a local, so we must give up. + return; + } + Index tempLocal = builder.addVar( + getFunction(), + TypeUpdating::getValidLocalType(lastOperandType, features)); + auto* set = builder.makeLocalSet(tempLocal, lastOperand); + auto* drop = builder.makeDrop(curr->target); + auto* get = TypeUpdating::fixLocalGet( + builder.makeLocalGet(tempLocal, lastOperandType), *getModule()); + curr->operands.back() = builder.makeBlock({set, drop, get}); + replaceCurrent(builder.makeCall( + ref->func, curr->operands, curr->type, curr->isReturn)); + } + } + void visitRefEq(RefEq* curr) { // Identical references compare equal. if (areConsecutiveInputsEqual(curr->left, curr->right)) { diff --git a/test/lit/passes/directize_all-features.wast b/test/lit/passes/directize_all-features.wast index d9010a76d..54cf07556 100644 --- a/test/lit/passes/directize_all-features.wast +++ b/test/lit/passes/directize_all-features.wast @@ -494,28 +494,3 @@ ) ) ) -;; call_ref -(module - ;; CHECK: (type $i32_i32_=>_none (func (param i32 i32))) - - ;; CHECK: (func $foo (param $0 i32) (param $1 i32) - ;; CHECK-NEXT: (unreachable) - ;; CHECK-NEXT: ) - (func $foo (param i32) (param i32) - (unreachable) - ) - ;; CHECK: (func $bar (param $x i32) (param $y i32) - ;; CHECK-NEXT: (call $foo - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (local.get $y) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $bar (param $x i32) (param $y i32) - (call_ref - (local.get $x) - (local.get $y) - (ref.func $foo) - ) - ) -) - diff --git a/test/lit/passes/optimize-instructions-call_ref.wast b/test/lit/passes/optimize-instructions-call_ref.wast new file mode 100644 index 000000000..0a45b9e4d --- /dev/null +++ b/test/lit/passes/optimize-instructions-call_ref.wast @@ -0,0 +1,203 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. +;; NOTE: This test was ported using port_test.py and could be cleaned up. + +;; RUN: wasm-opt %s --remove-unused-names --optimize-instructions --all-features -S -o - | filecheck %s +;; remove-unused-names is to allow fallthrough computations to work on blocks + +(module + ;; CHECK: (type $i32_i32_=>_none (func (param i32 i32))) + (type $i32_i32_=>_none (func (param i32 i32))) + + ;; CHECK: (type $none_=>_i32 (func (result i32))) + (type $none_=>_i32 (func (result i32))) + + ;; CHECK: (type $none_=>_none (func)) + (type $none_=>_none (func)) + + ;; CHECK: (type $data_=>_none (func (param dataref))) + (type $data_=>_none (func (param (ref data)))) + + ;; CHECK: (type $i32_=>_none (func (param i32))) + + ;; CHECK: (elem declare func $fallthrough-no-params $fallthrough-non-nullable $foo $return-nothing) + + ;; CHECK: (func $foo (param $0 i32) (param $1 i32) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $foo (param i32) (param i32) + (unreachable) + ) + ;; CHECK: (func $call_ref-to-direct (param $x i32) (param $y i32) + ;; CHECK-NEXT: (call $foo + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $call_ref-to-direct (param $x i32) (param $y i32) + ;; This call_ref should become a direct call. + (call_ref + (local.get $x) + (local.get $y) + (ref.func $foo) + ) + ) + + ;; CHECK: (func $fallthrough (param $x i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (call $foo + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref $i32_i32_=>_none)) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.func $foo) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $fallthrough (param $x i32) + ;; This call_ref should become a direct call, even though it doesn't have a + ;; simple ref.func as the target - we need to look into the fallthrough, and + ;; handle things with locals. + (call_ref + ;; Write to $x before the block, and write to it in the block; we should not + ;; reorder these things as the side effects could alter what value appears + ;; in the get of $x. (There is a risk of reordering here if we naively moved + ;; the call target (the block) to before the first parameter (the tee). + ;; Instead, we append it after the second param (the get) which keeps the + ;; ordering as it was.) + (local.tee $x + (i32.const 1) + ) + (local.get $x) + (block (result (ref $i32_i32_=>_none)) + (local.set $x + (i32.const 2) + ) + (ref.func $foo) + ) + ) + ) + + ;; CHECK: (func $fallthrough-no-params (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref $none_=>_i32)) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (ref.func $fallthrough-no-params) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $fallthrough-no-params) + ;; CHECK-NEXT: ) + (func $fallthrough-no-params (result i32) + ;; A fallthrough appears here, but there are no operands so this is easier to + ;; optimize: we can just drop the call_ref's target before the call. + (call_ref + (block (result (ref $none_=>_i32)) + (nop) + (ref.func $fallthrough-no-params) + ) + ) + ) + + ;; CHECK: (func $fallthrough-non-nullable (param $x dataref) + ;; CHECK-NEXT: (local $1 (ref null data)) + ;; CHECK-NEXT: (call $fallthrough-non-nullable + ;; CHECK-NEXT: (block (result dataref) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref $data_=>_none)) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (ref.func $fallthrough-non-nullable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $fallthrough-non-nullable (param $x (ref data)) + ;; A fallthrough appears here, and in addition the last operand is non- + ;; nullable, which means we must be careful when we create a temp local for + ;; it: the local should be nullable, and gets of it should use a + ;; ref.as_non_null so that we validate. + (call_ref + (local.get $x) + (block (result (ref $data_=>_none)) + (nop) + (ref.func $fallthrough-non-nullable) + ) + ) + ) + + ;; CHECK: (func $fallthrough-bad-type (result i32) + ;; CHECK-NEXT: (call_ref + ;; CHECK-NEXT: (ref.cast + ;; CHECK-NEXT: (ref.func $return-nothing) + ;; CHECK-NEXT: (rtt.canon $none_=>_i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $fallthrough-bad-type (result i32) + ;; A fallthrough appears here, and we cast the function type to something else + ;; in a way that is bad: the actual target function has a different return + ;; type than the cast type. The cast will fail at runtime, and we should not + ;; emit non-validating code here, which would happen if we replace the + ;; call_ref that returns nothing with a call that returns an i32. + (call_ref + (ref.cast + (ref.func $return-nothing) + (rtt.canon $none_=>_i32) + ) + ) + ) + + ;; Helper function for the above test. + ;; CHECK: (func $return-nothing + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $return-nothing) + + ;; CHECK: (func $fallthrough-unreachable + ;; CHECK-NEXT: (call_ref + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: (block (result (ref $i32_i32_=>_none)) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (ref.func $foo) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $fallthrough-unreachable + ;; If the call is not reached, do not optimize it. + (call_ref + (unreachable) + (unreachable) + (block (result (ref $i32_i32_=>_none)) + (nop) + (ref.func $foo) + ) + ) + ) + + ;; CHECK: (func $ignore-unreachable + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $ignore-unreachable + ;; Ignore an unreachable call_ref target entirely. + (call_ref + (unreachable) + ) + ) +) diff --git a/test/passes/Oz_fuzz-exec_all-features.txt b/test/passes/Oz_fuzz-exec_all-features.txt index b0f4c6448..ddeacdc65 100644 --- a/test/passes/Oz_fuzz-exec_all-features.txt +++ b/test/passes/Oz_fuzz-exec_all-features.txt @@ -351,7 +351,9 @@ (call $log (i32.const 2) ) - (call $a-void-func) + (call $log + (i32.const 1337) + ) (call $log (i32.const 3) ) |