summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/passes/Precompute.cpp330
-rw-r--r--test/lit/passes/precompute-partial.wast788
2 files changed, 1106 insertions, 12 deletions
diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp
index 4178342a2..5baf2331f 100644
--- a/src/passes/Precompute.cpp
+++ b/src/passes/Precompute.cpp
@@ -27,16 +27,19 @@
// looked at.
//
-#include <ir/literal-utils.h>
-#include <ir/local-graph.h>
-#include <ir/manipulation.h>
-#include <ir/properties.h>
-#include <ir/utils.h>
-#include <pass.h>
-#include <support/unique_deferring_queue.h>
-#include <wasm-builder.h>
-#include <wasm-interpreter.h>
-#include <wasm.h>
+#include "ir/effects.h"
+#include "ir/iteration.h"
+#include "ir/literal-utils.h"
+#include "ir/local-graph.h"
+#include "ir/manipulation.h"
+#include "ir/properties.h"
+#include "ir/utils.h"
+#include "pass.h"
+#include "support/insert_ordered.h"
+#include "support/unique_deferring_queue.h"
+#include "wasm-builder.h"
+#include "wasm-interpreter.h"
+#include "wasm.h"
namespace wasm {
@@ -210,9 +213,16 @@ struct Precompute
GetValues getValues;
HeapValues heapValues;
+ bool canPartiallyPrecompute;
+
void doWalkFunction(Function* func) {
+ // Perform partial precomputing only when the optimization level is non-
+ // trivial, as it is slower and less likely to help.
+ canPartiallyPrecompute = getPassOptions().optimizeLevel >= 2;
+
// Walk the function and precompute things.
super::doWalkFunction(func);
+ partiallyPrecompute(func);
if (!propagate) {
return;
}
@@ -226,11 +236,13 @@ struct Precompute
// another walk to apply them and perhaps other optimizations that are
// unlocked.
super::doWalkFunction(func);
+ // We could also try to partially precompute again, but that is a somewhat
+ // heavy operation, so we only do it the first time, and leave such things
+ // for later runs of this pass and for --converge.
}
// Note that in principle even more cycles could find further work here, in
// very rare cases. To avoid constructing a LocalGraph again just for that
- // unlikely chance, we leave such things for later runs of this pass and for
- // --converge.
+ // unlikely chance, we leave such things for later.
}
template<typename T> void reuseConstantNode(T* curr, Flow flow) {
@@ -281,6 +293,9 @@ struct Precompute
}
if (flow.breaking()) {
if (flow.breakTo == NONCONSTANT_FLOW) {
+ // This cannot be turned into a constant, but perhaps we can partially
+ // precompute it.
+ considerPartiallyPrecomputing(curr);
return;
}
if (flow.breakTo == RETURN_FLOW) {
@@ -319,6 +334,273 @@ struct Precompute
}
}
+ // If we failed to precompute a constant, perhaps we can still precompute part
+ // of an expression. Specifically, consider this case:
+ //
+ // (A
+ // (select
+ // (B)
+ // (C)
+ // (condition)
+ // )
+ // )
+ //
+ // Perhaps we can compute A(B) and A(C). If so, we can emit a better select:
+ //
+ // (select
+ // (constant result of A(B))
+ // (constant result of A(C))
+ // (condition)
+ // )
+ //
+ // Note that in general for code size we want to move operations *out* of
+ // selects and ifs (OptimizeInstructions does that), but here we are
+ // computing two constants which replace three expressions, so it is
+ // worthwhile.
+ //
+ // To do such partial precomputing, in the main pass we note selects that look
+ // promising. If we find any then we do a second pass later just for that (as
+ // doing so requires walking up the stack in a manner that we want to avoid in
+ // the main pass for overhead reasons; see below).
+ //
+ // Note that selects are all we really need here: Other passes would turn an
+ // if into a select if the arms are simple enough, and only in those cases
+ // (simple arms) do we have a chance at partially precomputing. For example,
+ // if an arm is a constant then we can, but if it is a call then we can't.)
+ // However, there are cases like an if with arms with side effects that end in
+ // precomputable things, that are missed atm TODO
+ std::unordered_set<Select*> partiallyPrecomputable;
+
+ void considerPartiallyPrecomputing(Expression* curr) {
+ if (!canPartiallyPrecompute) {
+ return;
+ }
+
+ if (auto* select = curr->dynCast<Select>()) {
+ // We only have a reasonable hope of success if the select arms are things
+ // like constants or global gets. At a first approximation, allow the set
+ // of things we allow in constant initializers (but we can probably allow
+ // more here TODO).
+ //
+ // We also ignore selects with no parent (that are the entire function
+ // body) as then there is nothing to optimize into their arms.
+ auto& wasm = *getModule();
+ if (Properties::isValidConstantExpression(wasm, select->ifTrue) &&
+ Properties::isValidConstantExpression(wasm, select->ifFalse) &&
+ getFunction()->body != select) {
+ partiallyPrecomputable.insert(select);
+ }
+ }
+ }
+
+ // To partially precompute selects we walk up the stack from them, like this:
+ //
+ // (A
+ // (B
+ // (select
+ // (C)
+ // (D)
+ // (condition)
+ // )
+ // )
+ // )
+ //
+ // First we try to apply B to C and D. If that works, we arrive at this:
+ //
+ // (A
+ // (select
+ // (constant result of B(C))
+ // (constant result of B(D))
+ // (condition)
+ // )
+ // )
+ //
+ // We can then proceed to perhaps apply A. However, even if we failed to apply
+ // B then we can try to apply A and B together, because that combination may
+ // succeed where incremental work fails, for example:
+ //
+ // (global $C
+ // (struct.new ;; outer
+ // (struct.new ;; inner
+ // (i32.const 10)
+ // )
+ // )
+ // )
+ //
+ // (struct.get ;; outer
+ // (struct.get ;; inner
+ // (select
+ // (global.get $C)
+ // (global.get $D)
+ // (condition)
+ // )
+ // )
+ // )
+ //
+ // Applying the inner struct.get to $C leads us to the inner struct.new, but
+ // that is an interior pointer in the global - it is not something we can
+ // refer to using a global.get, so precomputing it fails. However, when we
+ // apply both struct.gets at once we arrive at the outer struct.new, which is
+ // in fact the global $C, and we succeed.
+ void partiallyPrecompute(Function* func) {
+ if (!canPartiallyPrecompute || partiallyPrecomputable.empty()) {
+ // Nothing to do.
+ return;
+ }
+
+ // Walk the function to find the parent stacks of the promising selects. We
+ // copy the stacks and process them later. We do it like this because if we
+ // wanted to process stacks as we reached them then we'd trip over
+ // ourselves: when we optimize we replace a parent, but that parent is an
+ // expression we'll reach later in the walk, so modifying it is unsafe.
+ struct StackFinder : public ExpressionStackWalker<StackFinder> {
+ Precompute& parent;
+
+ StackFinder(Precompute& parent) : parent(parent) {}
+
+ // We will later iterate on this in the order of insertion, which keeps
+ // things deterministic, and also usually lets us do consecutive work
+ // like a select nested in another select's condition, simply because we
+ // will traverse the selects in postorder (however, because we cannot
+ // always succeed in an incremental manner - see the comment on this
+ // function - it is possible in theory that some work can happen only in a
+ // later execution of the pass).
+ InsertOrderedMap<Select*, ExpressionStack> stackMap;
+
+ void visitSelect(Select* curr) {
+ if (parent.partiallyPrecomputable.count(curr)) {
+ stackMap[curr] = expressionStack;
+ }
+ }
+ } stackFinder(*this);
+ stackFinder.walkFunction(func);
+
+ // Note which expressions we've modified as we go, as it is invalid to
+ // modify more than once. This could happen in theory in a situation like
+ // this:
+ //
+ // (ternary.f32.max ;; fictional instruction for explanatory purposes
+ // (select ..)
+ // (select ..)
+ // (f32.infinity)
+ // )
+ //
+ // When we consider the first select we can see that the computation result
+ // is always infinity, so we can optimize here and replace the ternary. Then
+ // the same thing happens with the second select, causing the ternary to be
+ // replaced again, which is unsafe because it no longer exists after we
+ // precomputed it the first time. (Note that in this example the result is
+ // the same either way, but at least in theory an instruction could exist
+ // for whom there was a difference.) In practice it does not seem that wasm
+ // has instructions capable of this atm but this code is still useful to
+ // guard against future problems, and as a minor speedup (quickly skip code
+ // if it was already modified).
+ std::unordered_set<Expression*> modified;
+
+ for (auto& [select, stack] : stackFinder.stackMap) {
+ // Each stack ends in the select itself, and contains more than the select
+ // itself (otherwise we'd have ignored the select), i.e., the select has a
+ // parent that we can try to optimize into the arms.
+ assert(stack.back() == select);
+ assert(stack.size() >= 2);
+ Index selectIndex = stack.size() - 1;
+ assert(selectIndex >= 1);
+
+ if (modified.count(select)) {
+ // This select was modified; go to the next one.
+ continue;
+ }
+
+ // Go up through the parents, until we can't do any more work. At each
+ // parent we'll try to execute it and all intermediate parents into the
+ // select arms.
+ for (Index parentIndex = selectIndex - 1; parentIndex != Index(-1);
+ parentIndex--) {
+ auto* parent = stack[parentIndex];
+ if (modified.count(parent)) {
+ // This parent was modified; exit the loop on parents as no upper
+ // parent is valid to try either.
+ break;
+ }
+
+ // If the parent lacks a concrete type then we can't move it into the
+ // select: the select needs a concrete (and non-tuple) type. For example
+ // if the parent is a drop or is unreachable, those are things we don't
+ // want to handle, and we stop here (once we see one such parent we
+ // can't expect to make any more progress).
+ if (!parent->type.isConcrete() || parent->type.isTuple()) {
+ break;
+ }
+
+ // We are precomputing the select arms, but leaving the condition as-is.
+ // If the condition breaks to the parent, then we can't move the parent
+ // into the select arms:
+ //
+ // (block $name ;; this must stay outside of the select
+ // (select
+ // (B)
+ // (C)
+ // (block ;; condition
+ // (br_if $target
+ //
+ // Ignore all control flow for simplicity, as they aren't interesting
+ // for us, and other passes should have removed them anyhow.
+ if (Properties::isControlFlowStructure(parent)) {
+ break;
+ }
+
+ // This looks promising, so try to precompute here. What we do is
+ // precompute twice, once with the select replaced with the left arm,
+ // and once with the right. If both succeed then we can create a new
+ // select (with the same condition as before) whose arms are the
+ // precomputed values.
+ auto isValidPrecomputation = [&](const Flow& flow) {
+ // For now we handle simple concrete values. We could also handle
+ // breaks in principle TODO
+ return canEmitConstantFor(flow.values) && !flow.breaking() &&
+ flow.values.isConcrete();
+ };
+
+ // Find the pointer to the select in its immediate parent so that we can
+ // replace it first with one arm and then the other.
+ auto** pointerToSelect =
+ getChildPointerInImmediateParent(stack, selectIndex, func);
+ *pointerToSelect = select->ifTrue;
+ auto ifTrue = precomputeExpression(parent);
+ if (isValidPrecomputation(ifTrue)) {
+ *pointerToSelect = select->ifFalse;
+ auto ifFalse = precomputeExpression(parent);
+ if (isValidPrecomputation(ifFalse)) {
+ // Wonderful, we can precompute here! The select can now contain the
+ // computed values in its arms.
+ select->ifTrue = ifTrue.getConstExpression(*getModule());
+ select->ifFalse = ifFalse.getConstExpression(*getModule());
+ select->finalize();
+
+ // The parent of the select is now replaced by the select.
+ auto** pointerToParent =
+ getChildPointerInImmediateParent(stack, parentIndex, func);
+ *pointerToParent = select;
+
+ // Update state for further iterations: Mark everything modified and
+ // move the select to the parent's location.
+ for (Index i = parentIndex; i <= selectIndex; i++) {
+ modified.insert(stack[i]);
+ }
+ selectIndex = parentIndex;
+ stack[selectIndex] = select;
+ stack.resize(selectIndex + 1);
+ }
+ }
+
+ // Whether we succeeded to precompute here or not, restore the parent's
+ // pointer to its original state (if we precomputed, the parent is no
+ // longer in use, but there is no harm in modifying it).
+ *pointerToSelect = select;
+ }
+ }
+ }
+
void visitFunction(Function* curr) {
// removing breaks can alter types
ReFinalize().walkFunctionInModule(curr, getModule());
@@ -531,6 +813,30 @@ private:
return true;
}
+
+ // Helpers for partial precomputing.
+
+ // Given a stack of expressions and the index of an expression in it, find
+ // the pointer to that expression in the parent. This gives us a pointer that
+ // allows us to replace the expression.
+ Expression** getChildPointerInImmediateParent(const ExpressionStack& stack,
+ Index index,
+ Function* func) {
+ if (index == 0) {
+ // There is nothing above this expression, so the pointer referring to it
+ // is the function's body.
+ return &func->body;
+ }
+
+ auto* child = stack[index];
+ for (auto** currChild : ChildIterator(stack[index - 1]).children) {
+ if (*currChild == child) {
+ return currChild;
+ }
+ }
+
+ WASM_UNREACHABLE("child not found in parent");
+ }
};
Pass* createPrecomputePass() { return new Precompute(false); }
diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast
new file mode 100644
index 000000000..86a55f56a
--- /dev/null
+++ b/test/lit/passes/precompute-partial.wast
@@ -0,0 +1,788 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
+
+;; RUN: foreach %s %t wasm-opt --precompute --optimize-level=2 -all -S -o - | filecheck %s
+
+(module
+ ;; CHECK: (func $simple-1 (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $simple-1 (param $param i32) (result i32)
+ ;; The eqz can be applied to the select arms.
+ (i32.eqz
+ (select
+ (i32.const 42)
+ (i32.const 1337)
+ (local.get $param)
+ )
+ )
+ )
+
+ ;; CHECK: (func $simple-2 (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $simple-2 (param $param i32) (result i32)
+ (i32.eqz
+ (select
+ (i32.const 0)
+ (i32.const 10)
+ (local.get $param)
+ )
+ )
+ )
+
+ ;; CHECK: (func $simple-3 (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $simple-3 (param $param i32) (result i32)
+ (i32.eqz
+ (select
+ (i32.const 20)
+ (i32.const 0)
+ (local.get $param)
+ )
+ )
+ )
+
+ ;; CHECK: (func $simple-4 (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $simple-4 (param $param i32) (result i32)
+ (i32.eqz
+ (select
+ (i32.const 0)
+ (i32.const 0)
+ (local.get $param)
+ )
+ )
+ )
+
+ ;; CHECK: (func $not-left (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (i32.eqz
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $not-left (param $param i32) (result i32)
+ (i32.eqz
+ (select
+ (local.get $param) ;; this cannot be precomputed, so we do nothing
+ (i32.const 0)
+ (local.get $param)
+ )
+ )
+ )
+
+ ;; CHECK: (func $not-right (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (i32.eqz
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $not-right (param $param i32) (result i32)
+ (i32.eqz
+ (select
+ (i32.const 0)
+ (local.get $param) ;; this cannot be precomputed, so we do nothing
+ (local.get $param)
+ )
+ )
+ )
+
+ ;; CHECK: (func $not-neither (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (i32.eqz
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $not-neither (param $param i32) (result i32)
+ (i32.eqz
+ (select
+ (local.get $param) ;; these cannot be precomputed,
+ (local.get $param) ;; so we do nothing
+ (local.get $param)
+ )
+ )
+ )
+
+ ;; CHECK: (func $binary (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 11)
+ ;; CHECK-NEXT: (i32.const 21)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $binary (param $param i32) (result i32)
+ (i32.add
+ (select
+ (i32.const 10)
+ (i32.const 20)
+ (local.get $param)
+ )
+ (i32.const 1)
+ )
+ )
+
+ ;; CHECK: (func $binary-2 (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 11)
+ ;; CHECK-NEXT: (i32.const 21)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $binary-2 (param $param i32) (result i32)
+ ;; As above but with the select in the other arm.
+ (i32.add
+ (i32.const 1)
+ (select
+ (i32.const 10)
+ (i32.const 20)
+ (local.get $param)
+ )
+ )
+ )
+
+ ;; CHECK: (func $binary-3 (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: (i32.const 20)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 30)
+ ;; CHECK-NEXT: (i32.const 40)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $binary-3 (param $param i32) (result i32)
+ ;; Two selects. We do not optimize here (but in theory could, as the
+ ;; condition is identical - other passes should merge that).
+ (i32.add
+ (select
+ (i32.const 10)
+ (i32.const 20)
+ (local.get $param)
+ )
+ (select
+ (i32.const 30)
+ (i32.const 40)
+ (local.get $param)
+ )
+ )
+ )
+
+ ;; CHECK: (func $unreachable (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (i32.eqz
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (unreachable)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $unreachable (param $param i32) (result i32)
+ ;; We should ignore unreachable code like this. We would need to make sure
+ ;; to emit the proper type on the outside, and it's simpler to just defer
+ ;; this to DCE.
+ (i32.eqz
+ (select
+ (i32.const 0)
+ (i32.const 0)
+ (unreachable)
+ )
+ )
+ )
+
+ ;; CHECK: (func $tuple (type $1) (param $param i32) (result i32 i32)
+ ;; CHECK-NEXT: (tuple.make 2
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $tuple (param $param i32) (result i32 i32)
+ ;; We should ignore tuples, as select outputs cannot be tuples.
+ (tuple.make 2
+ (select
+ (i32.const 0)
+ (i32.const 1)
+ (local.get $param)
+ )
+ (i32.const 2)
+ )
+ )
+
+ ;; CHECK: (func $control-flow (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (block $target (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (br_if $target
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $control-flow (param $param i32) (result i32)
+ ;; We ignore control flow structures to avoid issues with removing them (as
+ ;; the condition might refer to them, as in this testcase).
+ (block $target (result i32)
+ (select
+ (i32.const 0)
+ (i32.const 1)
+ ;; If we precomputed the block into the select arms, this br_if
+ ;; would have nowhere to go.
+ (br_if $target
+ (i32.const 1)
+ (local.get $param)
+ )
+ )
+ )
+ )
+
+ ;; CHECK: (func $break (type $0) (param $x i32) (result i32)
+ ;; CHECK-NEXT: (block $label (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (br_if $label
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 3)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $break (param $x i32) (result i32)
+ ;; We should change nothing here: we do not partially precompute breaks yet
+ ;; TODO
+ (block $label (result i32)
+ (drop
+ (br_if $label
+ (select
+ (i32.const 0)
+ (i32.const 1)
+ (local.get $x)
+ )
+ (i32.const 2)
+ )
+ )
+ (i32.const 3)
+ )
+ )
+
+ ;; CHECK: (func $toplevel (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $toplevel (param $param i32) (result i32)
+ ;; There is nothing to do for a select with no parent, but do not error at
+ ;; least.
+ (select
+ (i32.const 0)
+ (i32.const 10)
+ (local.get $param)
+ )
+ )
+
+ ;; CHECK: (func $toplevel-nested (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $toplevel-nested (param $param i32) (result i32)
+ ;; The inner select can be optimized: here we apply the outer select onto
+ ;; the inner one (the same as any other thing we apply into the select arms,
+ ;; but it happens to be a select itself).
+ (select
+ (i32.const 0)
+ (i32.const 10)
+ (select
+ (i32.const 0)
+ (i32.const 20)
+ (local.get $param)
+ )
+ )
+ )
+
+ ;; CHECK: (func $toplevel-nested-flip (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $toplevel-nested-flip (param $param i32) (result i32)
+ ;; As above but with inner select arms flipped. That flips the result.
+ (select
+ (i32.const 0)
+ (i32.const 10)
+ (select
+ (i32.const 20)
+ (i32.const 0)
+ (local.get $param)
+ )
+ )
+ )
+
+ ;; CHECK: (func $toplevel-nested-indirect (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $toplevel-nested-indirect (param $param i32) (result i32)
+ ;; As above, with an instruction in the middle. Again, we can optimize.
+ (select
+ (i32.const 0)
+ (i32.const 10)
+ (i32.eqz
+ (select
+ (i32.const 0)
+ (i32.const 20)
+ (local.get $param)
+ )
+ )
+ )
+ )
+
+ ;; CHECK: (func $nested (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $nested (param $param i32) (result i32)
+ ;; As above, with an outer eqz as well. Now both the outer and inner selects
+ ;; can be optimized, and after the inner one is, it can be optimized with
+ ;; the outer one as well, leaving a single select and no eqz.
+ (i32.eqz
+ (select
+ (i32.const 0)
+ (i32.const 10)
+ (i32.eqz
+ (select
+ (i32.const 0)
+ (i32.const 20)
+ (local.get $param)
+ )
+ )
+ )
+ )
+ )
+
+ ;; CHECK: (func $nested-arms (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (i32.eqz
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: (i32.const 20)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 30)
+ ;; CHECK-NEXT: (i32.const 40)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 50)
+ ;; CHECK-NEXT: (i32.const 60)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $nested-arms (param $param i32) (result i32)
+ ;; We do nothing for selects nested directly in select arms, but do not
+ ;; error at least. Note that we could apply the eqz two levels deep TODO
+ (i32.eqz
+ (select
+ (select
+ (i32.const 10)
+ (i32.const 20)
+ (local.get $param)
+ )
+ (select
+ (i32.const 30)
+ (i32.const 40)
+ (local.get $param)
+ )
+ (select
+ (i32.const 50)
+ (i32.const 60)
+ (local.get $param)
+ )
+ )
+ )
+ )
+
+ ;; CHECK: (func $nested-indirect-arms (type $0) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (i32.eqz
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $nested-indirect-arms (param $param i32) (result i32)
+ ;; As above, but now there is something in the middle, that can be optimized
+ ;; into those selects.
+ (i32.eqz
+ (select
+ (i32.eqz
+ (select
+ (i32.const 0)
+ (i32.const 10)
+ (local.get $param)
+ )
+ )
+ (i32.eqz
+ (select
+ (i32.const 20)
+ (i32.const 0)
+ (local.get $param)
+ )
+ )
+ (i32.eqz
+ (select
+ (i32.const 0)
+ (i32.const 30)
+ (local.get $param)
+ )
+ )
+ )
+ )
+ )
+)
+
+;; References.
+(module
+ (rec
+ ;; CHECK: (rec
+ ;; CHECK-NEXT: (type $vtable (sub (struct (field funcref))))
+ (type $vtable (sub (struct funcref)))
+
+ ;; CHECK: (type $specific-func (sub (func (result i32))))
+ (type $specific-func (sub (func (result i32))))
+
+ ;; CHECK: (type $specific-func.sub (sub $specific-func (func (result i32))))
+ (type $specific-func.sub (sub $specific-func (func (result i32))))
+
+ ;; CHECK: (type $vtable.sub (sub $vtable (struct (field (ref $specific-func)))))
+ (type $vtable.sub (sub $vtable (struct (field (ref $specific-func)))))
+ )
+
+ ;; CHECK: (global $A$vtable (ref $vtable) (struct.new $vtable
+ ;; CHECK-NEXT: (ref.func $A$func)
+ ;; CHECK-NEXT: ))
+ (global $A$vtable (ref $vtable) (struct.new $vtable
+ (ref.func $A$func)
+ ))
+
+ ;; CHECK: (global $B$vtable (ref $vtable) (struct.new $vtable
+ ;; CHECK-NEXT: (ref.func $B$func)
+ ;; CHECK-NEXT: ))
+ (global $B$vtable (ref $vtable) (struct.new $vtable
+ (ref.func $B$func)
+ ))
+
+ ;; CHECK: (func $test-expanded (type $0) (param $x i32) (result funcref)
+ ;; CHECK-NEXT: (select (result (ref $specific-func))
+ ;; CHECK-NEXT: (ref.func $A$func)
+ ;; CHECK-NEXT: (ref.func $B$func)
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test-expanded (param $x i32) (result funcref)
+ ;; We can apply the struct.get to the select arms: As the globals are all
+ ;; immutable, we can read the function references from them, and emit a
+ ;; select on those values, saving the struct.get operation entirely.
+ ;;
+ ;; Note that this test also checks updating the type of the select, which
+ ;; initially returned a struct, and afterwards returns a func.
+ (struct.get $vtable 0
+ (select
+ (global.get $A$vtable)
+ (global.get $B$vtable)
+ (local.get $x)
+ )
+ )
+ )
+
+ ;; CHECK: (func $test-subtyping (type $0) (param $x i32) (result funcref)
+ ;; CHECK-NEXT: (select (result (ref $specific-func))
+ ;; CHECK-NEXT: (ref.func $A$func)
+ ;; CHECK-NEXT: (ref.func $B$func)
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test-subtyping (param $x i32) (result funcref)
+ ;; As above, but now we have struct.news directly in the arms, and one is
+ ;; of a subtype of the final result (which should not prevent optimization).
+ (struct.get $vtable.sub 0
+ (select
+ (struct.new $vtable.sub
+ (ref.func $A$func)
+ )
+ (struct.new $vtable.sub
+ (ref.func $B$func) ;; this function is of a subtype of the field type
+ )
+ (local.get $x)
+ )
+ )
+ )
+
+ ;; CHECK: (func $test-expanded-twice (type $5) (param $x i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test-expanded-twice (param $x i32) (result i32)
+ ;; As $test-expanded, but we have two operations that can be applied. Both
+ ;; references are non-null, so the select arms will become 0.
+ (ref.is_null
+ (struct.get $vtable 0
+ (select
+ (global.get $A$vtable)
+ (global.get $B$vtable)
+ (local.get $x)
+ )
+ )
+ )
+ )
+
+ ;; CHECK: (func $test-expanded-twice-stop (type $6) (param $x i32)
+ ;; CHECK-NEXT: (call $send-i32
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test-expanded-twice-stop (param $x i32)
+ ;; As $test-expanded-twice, but we stop after two expansions when we fail on
+ ;; the call.
+ (call $send-i32
+ (ref.is_null
+ (struct.get $vtable 0
+ (select
+ (global.get $A$vtable)
+ (global.get $B$vtable)
+ (local.get $x)
+ )
+ )
+ )
+ )
+ )
+
+ ;; CHECK: (func $send-i32 (type $6) (param $x i32)
+ ;; CHECK-NEXT: (nop)
+ ;; CHECK-NEXT: )
+ (func $send-i32 (param $x i32)
+ ;; Helper for above.
+ )
+
+ ;; CHECK: (func $binary (type $5) (param $param i32) (result i32)
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $binary (param $param i32) (result i32)
+ ;; This is a common pattern in J2Wasm output. Note that this is optimized
+ ;; because immutable globals can be compared at compile time.
+ (ref.eq
+ (select
+ (global.get $A$vtable)
+ (global.get $B$vtable)
+ (local.get $param)
+ )
+ (global.get $A$vtable)
+ )
+ )
+
+ ;; CHECK: (func $test-trap (type $0) (param $x i32) (result funcref)
+ ;; CHECK-NEXT: (struct.get $vtable 0
+ ;; CHECK-NEXT: (select (result (ref null $vtable))
+ ;; CHECK-NEXT: (ref.null none)
+ ;; CHECK-NEXT: (global.get $B$vtable)
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test-trap (param $x i32) (result funcref)
+ ;; One arm has a null, which makes the struct.get trap, so we ignore this
+ ;; for now. TODO: handle traps
+ (struct.get $vtable 0
+ (select
+ (ref.null $vtable)
+ (global.get $B$vtable)
+ (local.get $x)
+ )
+ )
+ )
+
+ ;; CHECK: (func $A$func (type $specific-func) (result i32)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ (func $A$func (type $specific-func) (result i32)
+ ;; Helper for above.
+ (i32.const 1)
+ )
+
+ ;; CHECK: (func $B$func (type $specific-func.sub) (result i32)
+ ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: )
+ (func $B$func (type $specific-func.sub) (result i32)
+ ;; Helper for above.
+ (i32.const 2)
+ )
+)
+
+;; References with nested globals.
+(module
+ (rec
+ ;; CHECK: (rec
+ ;; CHECK-NEXT: (type $vtable (sub (struct (field funcref))))
+ (type $vtable (sub (struct funcref)))
+
+ ;; CHECK: (type $itable (sub (struct (field (ref $vtable)))))
+ (type $itable (sub (struct (ref $vtable))))
+ )
+
+ ;; CHECK: (global $A$itable (ref $itable) (struct.new $itable
+ ;; CHECK-NEXT: (struct.new $vtable
+ ;; CHECK-NEXT: (ref.func $A$func)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: ))
+ (global $A$itable (ref $itable) (struct.new $itable
+ (struct.new $vtable
+ (ref.func $A$func)
+ )
+ ))
+
+ ;; CHECK: (global $B$itable (ref $itable) (struct.new $itable
+ ;; CHECK-NEXT: (struct.new $vtable
+ ;; CHECK-NEXT: (ref.func $B$func)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: ))
+ (global $B$itable (ref $itable) (struct.new $itable
+ (struct.new $vtable
+ (ref.func $B$func)
+ )
+ ))
+
+ ;; CHECK: (func $test-expanded (type $3) (param $x i32) (result funcref)
+ ;; CHECK-NEXT: (select (result (ref $2))
+ ;; CHECK-NEXT: (ref.func $A$func)
+ ;; CHECK-NEXT: (ref.func $B$func)
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test-expanded (param $x i32) (result funcref)
+ ;; Nesting in the global declarations means we cannot precompute the inner
+ ;; struct.get by itself (as that would return the inner struct.new), but
+ ;; when we do the outer struct.get, it all comes together. This is a common
+ ;; pattern in J2Wasm output.
+ (struct.get $vtable 0
+ (struct.get $itable 0
+ (select
+ (global.get $A$itable)
+ (global.get $B$itable)
+ (local.get $x)
+ )
+ )
+ )
+ )
+
+ ;; CHECK: (func $test-expanded-almost (type $4) (param $x i32) (result anyref)
+ ;; CHECK-NEXT: (struct.get $itable 0
+ ;; CHECK-NEXT: (select (result (ref $itable))
+ ;; CHECK-NEXT: (global.get $A$itable)
+ ;; CHECK-NEXT: (global.get $B$itable)
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test-expanded-almost (param $x i32) (result anyref)
+ ;; As above, but without the outer struct.get. We get "stuck" with the
+ ;; inner part of the global, as explained there, and fail to optimize here.
+ (struct.get $itable 0
+ (select
+ (global.get $A$itable)
+ (global.get $B$itable)
+ (local.get $x)
+ )
+ )
+ )
+
+ ;; CHECK: (func $A$func (type $2)
+ ;; CHECK-NEXT: (nop)
+ ;; CHECK-NEXT: )
+ (func $A$func
+ ;; Helper for above.
+ )
+
+ ;; CHECK: (func $B$func (type $2)
+ ;; CHECK-NEXT: (nop)
+ ;; CHECK-NEXT: )
+ (func $B$func
+ ;; Helper for above.
+ )
+)