diff options
-rw-r--r-- | src/passes/Precompute.cpp | 330 | ||||
-rw-r--r-- | test/lit/passes/precompute-partial.wast | 788 |
2 files changed, 1106 insertions, 12 deletions
diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 4178342a2..5baf2331f 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -27,16 +27,19 @@ // looked at. // -#include <ir/literal-utils.h> -#include <ir/local-graph.h> -#include <ir/manipulation.h> -#include <ir/properties.h> -#include <ir/utils.h> -#include <pass.h> -#include <support/unique_deferring_queue.h> -#include <wasm-builder.h> -#include <wasm-interpreter.h> -#include <wasm.h> +#include "ir/effects.h" +#include "ir/iteration.h" +#include "ir/literal-utils.h" +#include "ir/local-graph.h" +#include "ir/manipulation.h" +#include "ir/properties.h" +#include "ir/utils.h" +#include "pass.h" +#include "support/insert_ordered.h" +#include "support/unique_deferring_queue.h" +#include "wasm-builder.h" +#include "wasm-interpreter.h" +#include "wasm.h" namespace wasm { @@ -210,9 +213,16 @@ struct Precompute GetValues getValues; HeapValues heapValues; + bool canPartiallyPrecompute; + void doWalkFunction(Function* func) { + // Perform partial precomputing only when the optimization level is non- + // trivial, as it is slower and less likely to help. + canPartiallyPrecompute = getPassOptions().optimizeLevel >= 2; + // Walk the function and precompute things. super::doWalkFunction(func); + partiallyPrecompute(func); if (!propagate) { return; } @@ -226,11 +236,13 @@ struct Precompute // another walk to apply them and perhaps other optimizations that are // unlocked. super::doWalkFunction(func); + // We could also try to partially precompute again, but that is a somewhat + // heavy operation, so we only do it the first time, and leave such things + // for later runs of this pass and for --converge. } // Note that in principle even more cycles could find further work here, in // very rare cases. To avoid constructing a LocalGraph again just for that - // unlikely chance, we leave such things for later runs of this pass and for - // --converge. + // unlikely chance, we leave such things for later. } template<typename T> void reuseConstantNode(T* curr, Flow flow) { @@ -281,6 +293,9 @@ struct Precompute } if (flow.breaking()) { if (flow.breakTo == NONCONSTANT_FLOW) { + // This cannot be turned into a constant, but perhaps we can partially + // precompute it. + considerPartiallyPrecomputing(curr); return; } if (flow.breakTo == RETURN_FLOW) { @@ -319,6 +334,273 @@ struct Precompute } } + // If we failed to precompute a constant, perhaps we can still precompute part + // of an expression. Specifically, consider this case: + // + // (A + // (select + // (B) + // (C) + // (condition) + // ) + // ) + // + // Perhaps we can compute A(B) and A(C). If so, we can emit a better select: + // + // (select + // (constant result of A(B)) + // (constant result of A(C)) + // (condition) + // ) + // + // Note that in general for code size we want to move operations *out* of + // selects and ifs (OptimizeInstructions does that), but here we are + // computing two constants which replace three expressions, so it is + // worthwhile. + // + // To do such partial precomputing, in the main pass we note selects that look + // promising. If we find any then we do a second pass later just for that (as + // doing so requires walking up the stack in a manner that we want to avoid in + // the main pass for overhead reasons; see below). + // + // Note that selects are all we really need here: Other passes would turn an + // if into a select if the arms are simple enough, and only in those cases + // (simple arms) do we have a chance at partially precomputing. For example, + // if an arm is a constant then we can, but if it is a call then we can't.) + // However, there are cases like an if with arms with side effects that end in + // precomputable things, that are missed atm TODO + std::unordered_set<Select*> partiallyPrecomputable; + + void considerPartiallyPrecomputing(Expression* curr) { + if (!canPartiallyPrecompute) { + return; + } + + if (auto* select = curr->dynCast<Select>()) { + // We only have a reasonable hope of success if the select arms are things + // like constants or global gets. At a first approximation, allow the set + // of things we allow in constant initializers (but we can probably allow + // more here TODO). + // + // We also ignore selects with no parent (that are the entire function + // body) as then there is nothing to optimize into their arms. + auto& wasm = *getModule(); + if (Properties::isValidConstantExpression(wasm, select->ifTrue) && + Properties::isValidConstantExpression(wasm, select->ifFalse) && + getFunction()->body != select) { + partiallyPrecomputable.insert(select); + } + } + } + + // To partially precompute selects we walk up the stack from them, like this: + // + // (A + // (B + // (select + // (C) + // (D) + // (condition) + // ) + // ) + // ) + // + // First we try to apply B to C and D. If that works, we arrive at this: + // + // (A + // (select + // (constant result of B(C)) + // (constant result of B(D)) + // (condition) + // ) + // ) + // + // We can then proceed to perhaps apply A. However, even if we failed to apply + // B then we can try to apply A and B together, because that combination may + // succeed where incremental work fails, for example: + // + // (global $C + // (struct.new ;; outer + // (struct.new ;; inner + // (i32.const 10) + // ) + // ) + // ) + // + // (struct.get ;; outer + // (struct.get ;; inner + // (select + // (global.get $C) + // (global.get $D) + // (condition) + // ) + // ) + // ) + // + // Applying the inner struct.get to $C leads us to the inner struct.new, but + // that is an interior pointer in the global - it is not something we can + // refer to using a global.get, so precomputing it fails. However, when we + // apply both struct.gets at once we arrive at the outer struct.new, which is + // in fact the global $C, and we succeed. + void partiallyPrecompute(Function* func) { + if (!canPartiallyPrecompute || partiallyPrecomputable.empty()) { + // Nothing to do. + return; + } + + // Walk the function to find the parent stacks of the promising selects. We + // copy the stacks and process them later. We do it like this because if we + // wanted to process stacks as we reached them then we'd trip over + // ourselves: when we optimize we replace a parent, but that parent is an + // expression we'll reach later in the walk, so modifying it is unsafe. + struct StackFinder : public ExpressionStackWalker<StackFinder> { + Precompute& parent; + + StackFinder(Precompute& parent) : parent(parent) {} + + // We will later iterate on this in the order of insertion, which keeps + // things deterministic, and also usually lets us do consecutive work + // like a select nested in another select's condition, simply because we + // will traverse the selects in postorder (however, because we cannot + // always succeed in an incremental manner - see the comment on this + // function - it is possible in theory that some work can happen only in a + // later execution of the pass). + InsertOrderedMap<Select*, ExpressionStack> stackMap; + + void visitSelect(Select* curr) { + if (parent.partiallyPrecomputable.count(curr)) { + stackMap[curr] = expressionStack; + } + } + } stackFinder(*this); + stackFinder.walkFunction(func); + + // Note which expressions we've modified as we go, as it is invalid to + // modify more than once. This could happen in theory in a situation like + // this: + // + // (ternary.f32.max ;; fictional instruction for explanatory purposes + // (select ..) + // (select ..) + // (f32.infinity) + // ) + // + // When we consider the first select we can see that the computation result + // is always infinity, so we can optimize here and replace the ternary. Then + // the same thing happens with the second select, causing the ternary to be + // replaced again, which is unsafe because it no longer exists after we + // precomputed it the first time. (Note that in this example the result is + // the same either way, but at least in theory an instruction could exist + // for whom there was a difference.) In practice it does not seem that wasm + // has instructions capable of this atm but this code is still useful to + // guard against future problems, and as a minor speedup (quickly skip code + // if it was already modified). + std::unordered_set<Expression*> modified; + + for (auto& [select, stack] : stackFinder.stackMap) { + // Each stack ends in the select itself, and contains more than the select + // itself (otherwise we'd have ignored the select), i.e., the select has a + // parent that we can try to optimize into the arms. + assert(stack.back() == select); + assert(stack.size() >= 2); + Index selectIndex = stack.size() - 1; + assert(selectIndex >= 1); + + if (modified.count(select)) { + // This select was modified; go to the next one. + continue; + } + + // Go up through the parents, until we can't do any more work. At each + // parent we'll try to execute it and all intermediate parents into the + // select arms. + for (Index parentIndex = selectIndex - 1; parentIndex != Index(-1); + parentIndex--) { + auto* parent = stack[parentIndex]; + if (modified.count(parent)) { + // This parent was modified; exit the loop on parents as no upper + // parent is valid to try either. + break; + } + + // If the parent lacks a concrete type then we can't move it into the + // select: the select needs a concrete (and non-tuple) type. For example + // if the parent is a drop or is unreachable, those are things we don't + // want to handle, and we stop here (once we see one such parent we + // can't expect to make any more progress). + if (!parent->type.isConcrete() || parent->type.isTuple()) { + break; + } + + // We are precomputing the select arms, but leaving the condition as-is. + // If the condition breaks to the parent, then we can't move the parent + // into the select arms: + // + // (block $name ;; this must stay outside of the select + // (select + // (B) + // (C) + // (block ;; condition + // (br_if $target + // + // Ignore all control flow for simplicity, as they aren't interesting + // for us, and other passes should have removed them anyhow. + if (Properties::isControlFlowStructure(parent)) { + break; + } + + // This looks promising, so try to precompute here. What we do is + // precompute twice, once with the select replaced with the left arm, + // and once with the right. If both succeed then we can create a new + // select (with the same condition as before) whose arms are the + // precomputed values. + auto isValidPrecomputation = [&](const Flow& flow) { + // For now we handle simple concrete values. We could also handle + // breaks in principle TODO + return canEmitConstantFor(flow.values) && !flow.breaking() && + flow.values.isConcrete(); + }; + + // Find the pointer to the select in its immediate parent so that we can + // replace it first with one arm and then the other. + auto** pointerToSelect = + getChildPointerInImmediateParent(stack, selectIndex, func); + *pointerToSelect = select->ifTrue; + auto ifTrue = precomputeExpression(parent); + if (isValidPrecomputation(ifTrue)) { + *pointerToSelect = select->ifFalse; + auto ifFalse = precomputeExpression(parent); + if (isValidPrecomputation(ifFalse)) { + // Wonderful, we can precompute here! The select can now contain the + // computed values in its arms. + select->ifTrue = ifTrue.getConstExpression(*getModule()); + select->ifFalse = ifFalse.getConstExpression(*getModule()); + select->finalize(); + + // The parent of the select is now replaced by the select. + auto** pointerToParent = + getChildPointerInImmediateParent(stack, parentIndex, func); + *pointerToParent = select; + + // Update state for further iterations: Mark everything modified and + // move the select to the parent's location. + for (Index i = parentIndex; i <= selectIndex; i++) { + modified.insert(stack[i]); + } + selectIndex = parentIndex; + stack[selectIndex] = select; + stack.resize(selectIndex + 1); + } + } + + // Whether we succeeded to precompute here or not, restore the parent's + // pointer to its original state (if we precomputed, the parent is no + // longer in use, but there is no harm in modifying it). + *pointerToSelect = select; + } + } + } + void visitFunction(Function* curr) { // removing breaks can alter types ReFinalize().walkFunctionInModule(curr, getModule()); @@ -531,6 +813,30 @@ private: return true; } + + // Helpers for partial precomputing. + + // Given a stack of expressions and the index of an expression in it, find + // the pointer to that expression in the parent. This gives us a pointer that + // allows us to replace the expression. + Expression** getChildPointerInImmediateParent(const ExpressionStack& stack, + Index index, + Function* func) { + if (index == 0) { + // There is nothing above this expression, so the pointer referring to it + // is the function's body. + return &func->body; + } + + auto* child = stack[index]; + for (auto** currChild : ChildIterator(stack[index - 1]).children) { + if (*currChild == child) { + return currChild; + } + } + + WASM_UNREACHABLE("child not found in parent"); + } }; Pass* createPrecomputePass() { return new Precompute(false); } diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast new file mode 100644 index 000000000..86a55f56a --- /dev/null +++ b/test/lit/passes/precompute-partial.wast @@ -0,0 +1,788 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. + +;; RUN: foreach %s %t wasm-opt --precompute --optimize-level=2 -all -S -o - | filecheck %s + +(module + ;; CHECK: (func $simple-1 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-1 (param $param i32) (result i32) + ;; The eqz can be applied to the select arms. + (i32.eqz + (select + (i32.const 42) + (i32.const 1337) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $simple-2 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-2 (param $param i32) (result i32) + (i32.eqz + (select + (i32.const 0) + (i32.const 10) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $simple-3 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-3 (param $param i32) (result i32) + (i32.eqz + (select + (i32.const 20) + (i32.const 0) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $simple-4 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-4 (param $param i32) (result i32) + (i32.eqz + (select + (i32.const 0) + (i32.const 0) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $not-left (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $not-left (param $param i32) (result i32) + (i32.eqz + (select + (local.get $param) ;; this cannot be precomputed, so we do nothing + (i32.const 0) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $not-right (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $not-right (param $param i32) (result i32) + (i32.eqz + (select + (i32.const 0) + (local.get $param) ;; this cannot be precomputed, so we do nothing + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $not-neither (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $not-neither (param $param i32) (result i32) + (i32.eqz + (select + (local.get $param) ;; these cannot be precomputed, + (local.get $param) ;; so we do nothing + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $binary (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: (i32.const 21) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $binary (param $param i32) (result i32) + (i32.add + (select + (i32.const 10) + (i32.const 20) + (local.get $param) + ) + (i32.const 1) + ) + ) + + ;; CHECK: (func $binary-2 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: (i32.const 21) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $binary-2 (param $param i32) (result i32) + ;; As above but with the select in the other arm. + (i32.add + (i32.const 1) + (select + (i32.const 10) + (i32.const 20) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $binary-3 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: (i32.const 40) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $binary-3 (param $param i32) (result i32) + ;; Two selects. We do not optimize here (but in theory could, as the + ;; condition is identical - other passes should merge that). + (i32.add + (select + (i32.const 10) + (i32.const 20) + (local.get $param) + ) + (select + (i32.const 30) + (i32.const 40) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $unreachable (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $unreachable (param $param i32) (result i32) + ;; We should ignore unreachable code like this. We would need to make sure + ;; to emit the proper type on the outside, and it's simpler to just defer + ;; this to DCE. + (i32.eqz + (select + (i32.const 0) + (i32.const 0) + (unreachable) + ) + ) + ) + + ;; CHECK: (func $tuple (type $1) (param $param i32) (result i32 i32) + ;; CHECK-NEXT: (tuple.make 2 + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tuple (param $param i32) (result i32 i32) + ;; We should ignore tuples, as select outputs cannot be tuples. + (tuple.make 2 + (select + (i32.const 0) + (i32.const 1) + (local.get $param) + ) + (i32.const 2) + ) + ) + + ;; CHECK: (func $control-flow (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (block $target (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (br_if $target + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $control-flow (param $param i32) (result i32) + ;; We ignore control flow structures to avoid issues with removing them (as + ;; the condition might refer to them, as in this testcase). + (block $target (result i32) + (select + (i32.const 0) + (i32.const 1) + ;; If we precomputed the block into the select arms, this br_if + ;; would have nowhere to go. + (br_if $target + (i32.const 1) + (local.get $param) + ) + ) + ) + ) + + ;; CHECK: (func $break (type $0) (param $x i32) (result i32) + ;; CHECK-NEXT: (block $label (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (br_if $label + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $break (param $x i32) (result i32) + ;; We should change nothing here: we do not partially precompute breaks yet + ;; TODO + (block $label (result i32) + (drop + (br_if $label + (select + (i32.const 0) + (i32.const 1) + (local.get $x) + ) + (i32.const 2) + ) + ) + (i32.const 3) + ) + ) + + ;; CHECK: (func $toplevel (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $toplevel (param $param i32) (result i32) + ;; There is nothing to do for a select with no parent, but do not error at + ;; least. + (select + (i32.const 0) + (i32.const 10) + (local.get $param) + ) + ) + + ;; CHECK: (func $toplevel-nested (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $toplevel-nested (param $param i32) (result i32) + ;; The inner select can be optimized: here we apply the outer select onto + ;; the inner one (the same as any other thing we apply into the select arms, + ;; but it happens to be a select itself). + (select + (i32.const 0) + (i32.const 10) + (select + (i32.const 0) + (i32.const 20) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $toplevel-nested-flip (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $toplevel-nested-flip (param $param i32) (result i32) + ;; As above but with inner select arms flipped. That flips the result. + (select + (i32.const 0) + (i32.const 10) + (select + (i32.const 20) + (i32.const 0) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $toplevel-nested-indirect (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $toplevel-nested-indirect (param $param i32) (result i32) + ;; As above, with an instruction in the middle. Again, we can optimize. + (select + (i32.const 0) + (i32.const 10) + (i32.eqz + (select + (i32.const 0) + (i32.const 20) + (local.get $param) + ) + ) + ) + ) + + ;; CHECK: (func $nested (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $nested (param $param i32) (result i32) + ;; As above, with an outer eqz as well. Now both the outer and inner selects + ;; can be optimized, and after the inner one is, it can be optimized with + ;; the outer one as well, leaving a single select and no eqz. + (i32.eqz + (select + (i32.const 0) + (i32.const 10) + (i32.eqz + (select + (i32.const 0) + (i32.const 20) + (local.get $param) + ) + ) + ) + ) + ) + + ;; CHECK: (func $nested-arms (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: (i32.const 40) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 50) + ;; CHECK-NEXT: (i32.const 60) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $nested-arms (param $param i32) (result i32) + ;; We do nothing for selects nested directly in select arms, but do not + ;; error at least. Note that we could apply the eqz two levels deep TODO + (i32.eqz + (select + (select + (i32.const 10) + (i32.const 20) + (local.get $param) + ) + (select + (i32.const 30) + (i32.const 40) + (local.get $param) + ) + (select + (i32.const 50) + (i32.const 60) + (local.get $param) + ) + ) + ) + ) + + ;; CHECK: (func $nested-indirect-arms (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $nested-indirect-arms (param $param i32) (result i32) + ;; As above, but now there is something in the middle, that can be optimized + ;; into those selects. + (i32.eqz + (select + (i32.eqz + (select + (i32.const 0) + (i32.const 10) + (local.get $param) + ) + ) + (i32.eqz + (select + (i32.const 20) + (i32.const 0) + (local.get $param) + ) + ) + (i32.eqz + (select + (i32.const 0) + (i32.const 30) + (local.get $param) + ) + ) + ) + ) + ) +) + +;; References. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $vtable (sub (struct (field funcref)))) + (type $vtable (sub (struct funcref))) + + ;; CHECK: (type $specific-func (sub (func (result i32)))) + (type $specific-func (sub (func (result i32)))) + + ;; CHECK: (type $specific-func.sub (sub $specific-func (func (result i32)))) + (type $specific-func.sub (sub $specific-func (func (result i32)))) + + ;; CHECK: (type $vtable.sub (sub $vtable (struct (field (ref $specific-func))))) + (type $vtable.sub (sub $vtable (struct (field (ref $specific-func))))) + ) + + ;; CHECK: (global $A$vtable (ref $vtable) (struct.new $vtable + ;; CHECK-NEXT: (ref.func $A$func) + ;; CHECK-NEXT: )) + (global $A$vtable (ref $vtable) (struct.new $vtable + (ref.func $A$func) + )) + + ;; CHECK: (global $B$vtable (ref $vtable) (struct.new $vtable + ;; CHECK-NEXT: (ref.func $B$func) + ;; CHECK-NEXT: )) + (global $B$vtable (ref $vtable) (struct.new $vtable + (ref.func $B$func) + )) + + ;; CHECK: (func $test-expanded (type $0) (param $x i32) (result funcref) + ;; CHECK-NEXT: (select (result (ref $specific-func)) + ;; CHECK-NEXT: (ref.func $A$func) + ;; CHECK-NEXT: (ref.func $B$func) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-expanded (param $x i32) (result funcref) + ;; We can apply the struct.get to the select arms: As the globals are all + ;; immutable, we can read the function references from them, and emit a + ;; select on those values, saving the struct.get operation entirely. + ;; + ;; Note that this test also checks updating the type of the select, which + ;; initially returned a struct, and afterwards returns a func. + (struct.get $vtable 0 + (select + (global.get $A$vtable) + (global.get $B$vtable) + (local.get $x) + ) + ) + ) + + ;; CHECK: (func $test-subtyping (type $0) (param $x i32) (result funcref) + ;; CHECK-NEXT: (select (result (ref $specific-func)) + ;; CHECK-NEXT: (ref.func $A$func) + ;; CHECK-NEXT: (ref.func $B$func) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-subtyping (param $x i32) (result funcref) + ;; As above, but now we have struct.news directly in the arms, and one is + ;; of a subtype of the final result (which should not prevent optimization). + (struct.get $vtable.sub 0 + (select + (struct.new $vtable.sub + (ref.func $A$func) + ) + (struct.new $vtable.sub + (ref.func $B$func) ;; this function is of a subtype of the field type + ) + (local.get $x) + ) + ) + ) + + ;; CHECK: (func $test-expanded-twice (type $5) (param $x i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-expanded-twice (param $x i32) (result i32) + ;; As $test-expanded, but we have two operations that can be applied. Both + ;; references are non-null, so the select arms will become 0. + (ref.is_null + (struct.get $vtable 0 + (select + (global.get $A$vtable) + (global.get $B$vtable) + (local.get $x) + ) + ) + ) + ) + + ;; CHECK: (func $test-expanded-twice-stop (type $6) (param $x i32) + ;; CHECK-NEXT: (call $send-i32 + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-expanded-twice-stop (param $x i32) + ;; As $test-expanded-twice, but we stop after two expansions when we fail on + ;; the call. + (call $send-i32 + (ref.is_null + (struct.get $vtable 0 + (select + (global.get $A$vtable) + (global.get $B$vtable) + (local.get $x) + ) + ) + ) + ) + ) + + ;; CHECK: (func $send-i32 (type $6) (param $x i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $send-i32 (param $x i32) + ;; Helper for above. + ) + + ;; CHECK: (func $binary (type $5) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $binary (param $param i32) (result i32) + ;; This is a common pattern in J2Wasm output. Note that this is optimized + ;; because immutable globals can be compared at compile time. + (ref.eq + (select + (global.get $A$vtable) + (global.get $B$vtable) + (local.get $param) + ) + (global.get $A$vtable) + ) + ) + + ;; CHECK: (func $test-trap (type $0) (param $x i32) (result funcref) + ;; CHECK-NEXT: (struct.get $vtable 0 + ;; CHECK-NEXT: (select (result (ref null $vtable)) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: (global.get $B$vtable) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-trap (param $x i32) (result funcref) + ;; One arm has a null, which makes the struct.get trap, so we ignore this + ;; for now. TODO: handle traps + (struct.get $vtable 0 + (select + (ref.null $vtable) + (global.get $B$vtable) + (local.get $x) + ) + ) + ) + + ;; CHECK: (func $A$func (type $specific-func) (result i32) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + (func $A$func (type $specific-func) (result i32) + ;; Helper for above. + (i32.const 1) + ) + + ;; CHECK: (func $B$func (type $specific-func.sub) (result i32) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + (func $B$func (type $specific-func.sub) (result i32) + ;; Helper for above. + (i32.const 2) + ) +) + +;; References with nested globals. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $vtable (sub (struct (field funcref)))) + (type $vtable (sub (struct funcref))) + + ;; CHECK: (type $itable (sub (struct (field (ref $vtable))))) + (type $itable (sub (struct (ref $vtable)))) + ) + + ;; CHECK: (global $A$itable (ref $itable) (struct.new $itable + ;; CHECK-NEXT: (struct.new $vtable + ;; CHECK-NEXT: (ref.func $A$func) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: )) + (global $A$itable (ref $itable) (struct.new $itable + (struct.new $vtable + (ref.func $A$func) + ) + )) + + ;; CHECK: (global $B$itable (ref $itable) (struct.new $itable + ;; CHECK-NEXT: (struct.new $vtable + ;; CHECK-NEXT: (ref.func $B$func) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: )) + (global $B$itable (ref $itable) (struct.new $itable + (struct.new $vtable + (ref.func $B$func) + ) + )) + + ;; CHECK: (func $test-expanded (type $3) (param $x i32) (result funcref) + ;; CHECK-NEXT: (select (result (ref $2)) + ;; CHECK-NEXT: (ref.func $A$func) + ;; CHECK-NEXT: (ref.func $B$func) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-expanded (param $x i32) (result funcref) + ;; Nesting in the global declarations means we cannot precompute the inner + ;; struct.get by itself (as that would return the inner struct.new), but + ;; when we do the outer struct.get, it all comes together. This is a common + ;; pattern in J2Wasm output. + (struct.get $vtable 0 + (struct.get $itable 0 + (select + (global.get $A$itable) + (global.get $B$itable) + (local.get $x) + ) + ) + ) + ) + + ;; CHECK: (func $test-expanded-almost (type $4) (param $x i32) (result anyref) + ;; CHECK-NEXT: (struct.get $itable 0 + ;; CHECK-NEXT: (select (result (ref $itable)) + ;; CHECK-NEXT: (global.get $A$itable) + ;; CHECK-NEXT: (global.get $B$itable) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-expanded-almost (param $x i32) (result anyref) + ;; As above, but without the outer struct.get. We get "stuck" with the + ;; inner part of the global, as explained there, and fail to optimize here. + (struct.get $itable 0 + (select + (global.get $A$itable) + (global.get $B$itable) + (local.get $x) + ) + ) + ) + + ;; CHECK: (func $A$func (type $2) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $A$func + ;; Helper for above. + ) + + ;; CHECK: (func $B$func (type $2) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $B$func + ;; Helper for above. + ) +) |