diff options
author | Alon Zakai <azakai@google.com> | 2024-07-25 11:16:45 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-25 11:16:45 -0700 |
commit | 9cc1cb1ca66e89cbe7b7b5b52897f3bee3ee422c (patch) | |
tree | 632a9c57123fc25374d981acb2cb40600e3ea4f0 | |
parent | d903dd30f6426b8eb07605cae01baf4158364e2d (diff) | |
download | binaryen-9cc1cb1ca66e89cbe7b7b5b52897f3bee3ee422c.tar.gz binaryen-9cc1cb1ca66e89cbe7b7b5b52897f3bee3ee422c.tar.bz2 binaryen-9cc1cb1ca66e89cbe7b7b5b52897f3bee3ee422c.zip |
Cost analysis: Remove "Unacceptable" hack (#6782)
We marked various expressions as having cost "Unacceptable", fixed at 100, to
ensure we never moved them out from an If arm, etc. Giving them such a high
cost avoids that problem - the cost is higher than the limit we have for moving
code from conditional to unconditional execution - but it also means the total
cost is unrealistic. For example, a function with one such instruction + an add
(cost 1) would end up with cost 101, and removing the add would look
insignificant, which causes issues for things that want to compare costs
(like Monomorphization).
To fix this, adjust some costs. The main change here is to give casts a cost of 5.
I measured this in depth, see the attached benchmark scripts, and it looks
clear that in both V8 and SpiderMonkey the cost of a cast is high enough to
make it not worth turning an if with ref.test arm into a select (which would
always execute the test).
Other costs adjusted here matter a lot less, because they are on operations
that have side effects and so the optimizer will anyhow not move them from
conditional to unconditional execution, but I tried to make them a bit more
realistic while I was removing "Unacceptable":
* Give most atomic operations the 10 cost we've been using for atomic loads/
stores. Perhaps wait and notify should be slower, however, but it seems like
assuming fast switching might be more relevant.
* Give growth operations a cost of 20, and throw operations a cost of 10. These
numbers are entirely made up as I am not even sure how to measure them in
a useful way (but, again, this should not matter much as they have side
effects).
-rw-r--r-- | scripts/benchmarking/bench.js | 159 | ||||
-rw-r--r-- | scripts/benchmarking/bench.wat | 283 | ||||
-rw-r--r-- | src/ir/cost.h | 54 | ||||
-rw-r--r-- | src/passes/RemoveUnusedBrs.cpp | 10 | ||||
-rw-r--r-- | test/lit/passes/remove-unused-brs_levels.wast | 143 |
5 files changed, 615 insertions, 34 deletions
diff --git a/scripts/benchmarking/bench.js b/scripts/benchmarking/bench.js new file mode 100644 index 000000000..d1284a241 --- /dev/null +++ b/scripts/benchmarking/bench.js @@ -0,0 +1,159 @@ + +// Benchmarking script. This runs on compiled bench.wat and prints out timings. +// +// Usage: +// +// * wasm-opt scripts/benchmarking/bench.wat -all --inline-functions-with-loops --always-inline-max-function-size=1000 --inlining --precompute-propagate --optimize-instructions --inlining --simplify-locals --coalesce-locals --vacuum --remove-unused-module-elements -o bench.wasm -g +// * Inspect the optimized wasm to see that inlining etc. worked properly +// (we rely on inlining to let us write bench.wat in a short/simple form, and +// we use very specific optimizations in order to not optimize away the +// differences we care about). +// * d8 bench.js -- bench.wasm +// etc. +// + +// Shell integration. +if (typeof console === 'undefined') { + console = { log: print }; +} +var tempRet0; +var binary; +if (typeof process === 'object' && typeof require === 'function' /* node.js detection */) { + var args = process.argv.slice(2); + binary = require('fs').readFileSync(args[0]); + if (!binary.buffer) binary = new Uint8Array(binary); +} else { + var args; + if (typeof scriptArgs != 'undefined') { + args = scriptArgs; + } else if (typeof arguments != 'undefined') { + args = arguments; + } + if (typeof readbuffer === 'function') { + binary = new Uint8Array(readbuffer(args[0])); + } else { + binary = read(args[0], 'binary'); + } +} + +// Create the wasm. +const module = new WebAssembly.Module(binary); +const instance = new WebAssembly.Instance(module, {}); +const exports = instance.exports; + +// Create the benchmarkers. +function makeBenchmarker(name) { + return { + name: name, + func: exports[name], + time: 0, + sum: 0, + iters: 0, + }; +} + +const benchmarkers = [ + makeBenchmarker('len'), + makeBenchmarker('and'), + makeBenchmarker('iff-both'), + makeBenchmarker('or'), + makeBenchmarker('iff-either'), + makeBenchmarker('select'), + makeBenchmarker('iff-nextor'), + makeBenchmarker('select-three'), + makeBenchmarker('iff-three'), +]; + +// We'll call the benchmark functions in random orders. Random orders avoid any +// interaction between the benchmarks from causing bias in the results. +// +// An alternative to randomly ordering the benchmarks in each iteration would be +// to fully benchmark one, then do the next, so there are large amounts of time +// between them, but that also allows them to become more like microbenchmarks +// where the branch predictor etc. might display very favorable behavior. +// Interleaving them makes things slightly more realistic. +// +// If we have too many benchmarks then eventually computing all orders ahead of +// time will not work, but so long as we can, it is faster this way rather than +// to compute random orders on the fly as we go. +function makeOrders(prefix) { + // Given a prefix of an order, like [] or [0, 3], return all the possible + // orders beginning with that prefix. + + // We cannot repeat anything already seen. + const seen = new Set(); + for (var x of prefix) { + seen.add(x); + } + + // Starting from the prefix, extend it by one item in all valid ways. + const extensions = []; + for (var i = 0; i < benchmarkers.length; i++) { + if (!seen.has(i)) { + extensions.push(prefix.concat(i)); + } + } + + if (prefix.length == benchmarkers.length - 1) { + // The extensions are complete orders; stop the recursion. + return extensions; + } + + // Recursively generate the full orders. + const ret = []; + for (var extension of extensions) { + for (var order of makeOrders(extension)) { + ret.push(order); + } + } + return ret; +} + +const orders = makeOrders([]); + +// Params. +const M = 10000000; +const N = 100; + +console.log('iters :', M); +console.log('list len :', N); +console.log('benchmarkers:', benchmarkers.length); +console.log('orderings :', orders.length); + +// Create a long linked list of objects of both type $A and $B. +var list = null; +for (var i = 0; i < N; i++) { + list = Math.random() < 0.5 ? exports.makeA(list) : exports.makeB(list); +} + +console.log('benchmarking...'); + +// Call the benchmark functions. + +for (var i = 0; i < M; i++) { + const order = orders[Math.floor(Math.random() * orders.length)]; + for (var k = 0; k < benchmarkers.length; k++) { + const benchmarker = benchmarkers[order[k]]; + const start = performance.now(); + const result = benchmarker.func(list); + benchmarker.time += performance.now() - start; + benchmarker.sum += result; + benchmarker.iters++; + } +} + +for (var benchmarker of benchmarkers) { + if (benchmarker.iters != M) { + throw 'wat'; + } +} + +console.log(); +for (var benchmarker of benchmarkers) { + console.log(`${benchmarker.name} time: \t${benchmarker.time}`) +} +console.log(); +for (var benchmarker of benchmarkers) { + console.log(`${benchmarker.name} mean sum: \t${benchmarker.sum / M}`) +} + diff --git a/scripts/benchmarking/bench.wat b/scripts/benchmarking/bench.wat new file mode 100644 index 000000000..87d54b66b --- /dev/null +++ b/scripts/benchmarking/bench.wat @@ -0,0 +1,283 @@ +;; See bench.js. + +(module + ;; A chain of three types. Each type has a "next" field, so we can form linked + ;; lists. + (type $A (sub (struct (field $next (ref null $A))))) + (type $B (sub $A (struct (field $next (ref null $A))))) + (type $C (sub $B (struct (field $next (ref null $A))))) + + (type $func (func (param (ref $A)) (result i32))) + + ;; Internal helper to iterate over a linked list and call a function on each + ;; item, and return the sum of those calls' results. + (func $iter (param $list (ref null $A)) (param $func (ref $func)) (result i32) + (local $sum i32) + (loop $loop + (if + (ref.is_null + (local.get $list) + ) + (then + (return + (local.get $sum) + ) + ) + (else + (local.set $sum + (i32.add + (local.get $sum) + (call_ref $func + (ref.as_non_null + (local.get $list) + ) + (local.get $func) + ) + ) + ) + (local.set $list + (struct.get $A $next + (local.get $list) + ) + ) + (br $loop) + ) + ) + ) + ) + + ;; Using the helper, and depending on inlining to optimize this, lets us + ;; write the exports concisely. First, code to compute the length of the list + ;; (for comparison purposes). + (func $len (export "len") (param $list (ref $A)) (result i32) + (call $iter + (local.get $list) + ;; Add one each time this is called. + (ref.func $one) + ) + ) + (func $one (param $list (ref $A)) (result i32) + (i32.const 1) + ) + + ;; At each point in the linked list, check if both the current and next item + ;; are inputs are $B, using an if to short-circuit when possible. + (func $iff-both (export "iff-both") (param $list (ref $A)) (result i32) + (call $iter + (local.get $list) + (ref.func $do-iff-both) + ) + ) + (func $do-iff-both (param $list (ref $A)) (result i32) + (if (result i32) + (ref.test (ref $B) + (struct.get $A $next + (local.get $list) + ) + ) + (then + (ref.test (ref $B) + (local.get $list) + ) + ) + (else + (i32.const 0) + ) + ) + ) + + ;; The same computation, but using an and, so both tests always execute. + (func $and (export "and") (param $list (ref $A)) (result i32) + (call $iter + (local.get $list) + (ref.func $do-and) + ) + ) + (func $do-and (param $list (ref $A)) (result i32) + (i32.and + (ref.test (ref $B) + (struct.get $A $next + (local.get $list) + ) + ) + (ref.test (ref $B) + (local.get $list) + ) + ) + ) + + ;; Similar, but return 1 if either test succeeds (using an if). + (func $iff-either (export "iff-either") (param $list (ref $A)) (result i32) + (call $iter + (local.get $list) + (ref.func $do-iff-either) + ) + ) + (func $do-iff-either (param $list (ref $A)) (result i32) + (if (result i32) + (ref.test (ref $B) + (struct.get $A $next + (local.get $list) + ) + ) + (then + (i32.const 1) + ) + (else + (ref.test (ref $B) + (local.get $list) + ) + ) + ) + ) + + + ;; The same computation, but using an or, so both tests always execute. + (func $or (export "or") (param $list (ref $A)) (result i32) + (call $iter + (local.get $list) + (ref.func $do-or) + ) + ) + (func $do-or (param $list (ref $A)) (result i32) + (i32.or + (ref.test (ref $B) + (struct.get $A $next + (local.get $list) + ) + ) + (ref.test (ref $B) + (local.get $list) + ) + ) + ) + + ;; Use a select to do a test of "is next null ? 0 : test curr". + (func $select (export "select") (param $list (ref $A)) (result i32) + (call $iter + (local.get $list) + (ref.func $do-select) + ) + ) + (func $do-select (param $list (ref $A)) (result i32) + (select + (i32.const 0) + (ref.test (ref $B) + (local.get $list) + ) + (ref.is_null + (struct.get $A $next + (local.get $list) + ) + ) + ) + ) + + ;; Use an iff to do the same. + (func $iff-nextor (export "iff-nextor") (param $list (ref $A)) (result i32) + (call $iter + (local.get $list) + (ref.func $do-iff-nextor) + ) + ) + (func $do-iff-nextor (param $list (ref $A)) (result i32) + (if (result i32) + (ref.is_null + (struct.get $A $next + (local.get $list) + ) + ) + (then + (i32.const 0) + ) + (else + (ref.test (ref $B) + (local.get $list) + ) + ) + ) + ) + + ;; Use an if over three tests: "test if next is B or C depending on if curr is + ;; B." + (func $iff-three (export "iff-three") (param $list (ref $A)) (result i32) + (call $iter + (local.get $list) + (ref.func $do-iff-three) + ) + ) + (func $do-iff-three (param $list (ref $A)) (result i32) + (local $next (ref null $A)) + (local.set $next + (struct.get $A $next + (local.get $list) + ) + ) + (if (result i32) + (ref.test (ref $B) + (local.get $list) + ) + (then + (ref.test (ref $C) + (local.get $next) + ) + ) + (else + (ref.test (ref $B) + (local.get $next) + ) + ) + ) + ) + + ;; Use a select for the same. + (func $select-three (export "select-three") (param $list (ref $A)) (result i32) + (call $iter + (local.get $list) + (ref.func $do-select-three) + ) + ) + (func $do-select-three (param $list (ref $A)) (result i32) + (local $next (ref null $A)) + (local.set $next + (struct.get $A $next + (local.get $list) + ) + ) + (select + (ref.test (ref $C) + (local.get $next) + ) + (ref.test (ref $B) + (local.get $next) + ) + (ref.test (ref $B) + (local.get $list) + ) + ) + ) + + ;; Creation functions. + + (func $makeA (export "makeA") (param $next (ref null $A)) (result anyref) + (struct.new $A + (local.get $next) + ) + ) + + (func $makeB (export "makeB") (param $next (ref null $A)) (result anyref) + (struct.new $B + (local.get $next) + ) + ) + + (func $makeC (export "makeC") (param $next (ref null $A)) (result anyref) + ;; This function is not used in benchmarks yet, but it keeps the type $C + ;; alive, which prevents $B from looking like it could be final, which might + ;; allow the optimizer to simplify more than we want. + (struct.new $C + (local.get $next) + ) + ) +) + diff --git a/src/ir/cost.h b/src/ir/cost.h index 6622561b8..38c74a203 100644 --- a/src/ir/cost.h +++ b/src/ir/cost.h @@ -31,11 +31,17 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { CostType cost; - // A cost that is extremely high, something that is far, far more expensive - // than a fast operation like an add. This cost is so high it is unacceptable - // to add any more of it, say by an If=>Select operation (which would execute - // both arms; if either arm contains an unacceptable cost, we do not do it). - static const CostType Unacceptable = 100; + // The cost of a "slow" atomic operation like RMW. + static const CostType AtomicCost = 10; + + // The cost of throwing a wasm exception. This does not include the cost of + // catching it (which might be in another function than the one we are + // considering). + static const CostType ThrowCost = 10; + + // The cost of a cast. This can vary depending on the engine and on the type, + // but usually requires some loads and comparisons. + static const CostType CastCost = 5; CostType maybeVisit(Expression* curr) { return curr ? visit(curr) : 0; } @@ -85,26 +91,27 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { CostType visitGlobalGet(GlobalGet* curr) { return 1; } CostType visitGlobalSet(GlobalSet* curr) { return 2 + visit(curr->value); } CostType visitLoad(Load* curr) { - return 1 + visit(curr->ptr) + 10 * curr->isAtomic; + return 1 + visit(curr->ptr) + AtomicCost * curr->isAtomic; } CostType visitStore(Store* curr) { - return 2 + visit(curr->ptr) + visit(curr->value) + 10 * curr->isAtomic; + return 2 + visit(curr->ptr) + visit(curr->value) + + AtomicCost * curr->isAtomic; } CostType visitAtomicRMW(AtomicRMW* curr) { - return Unacceptable + visit(curr->ptr) + visit(curr->value); + return AtomicCost + visit(curr->ptr) + visit(curr->value); } CostType visitAtomicCmpxchg(AtomicCmpxchg* curr) { - return Unacceptable + visit(curr->ptr) + visit(curr->expected) + + return AtomicCost + visit(curr->ptr) + visit(curr->expected) + visit(curr->replacement); } CostType visitAtomicWait(AtomicWait* curr) { - return Unacceptable + visit(curr->ptr) + visit(curr->expected) + + return AtomicCost + visit(curr->ptr) + visit(curr->expected) + visit(curr->timeout); } CostType visitAtomicNotify(AtomicNotify* curr) { - return Unacceptable + visit(curr->ptr) + visit(curr->notifyCount); + return AtomicCost + visit(curr->ptr) + visit(curr->notifyCount); } - CostType visitAtomicFence(AtomicFence* curr) { return Unacceptable; } + CostType visitAtomicFence(AtomicFence* curr) { return AtomicCost; } CostType visitConst(Const* curr) { return 1; } CostType visitUnary(Unary* curr) { CostType ret = 0; @@ -516,7 +523,8 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { CostType visitReturn(Return* curr) { return maybeVisit(curr->value); } CostType visitMemorySize(MemorySize* curr) { return 1; } CostType visitMemoryGrow(MemoryGrow* curr) { - return Unacceptable + visit(curr->delta); + // TODO: This should perhaps be higher for shared memories. + return 20 + visit(curr->delta); } CostType visitMemoryInit(MemoryInit* curr) { return 6 + visit(curr->dest) + visit(curr->offset) + visit(curr->size); @@ -572,7 +580,7 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { } CostType visitTableSize(TableSize* curr) { return 1; } CostType visitTableGrow(TableGrow* curr) { - return Unacceptable + visit(curr->value) + visit(curr->delta); + return 20 + visit(curr->value) + visit(curr->delta); } CostType visitTableFill(TableFill* curr) { return 6 + visit(curr->dest) + visit(curr->value) + visit(curr->size); @@ -589,14 +597,14 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { return visit(curr->body); } CostType visitThrow(Throw* curr) { - CostType ret = Unacceptable; + CostType ret = ThrowCost; for (auto* child : curr->operands) { ret += visit(child); } return ret; } - CostType visitRethrow(Rethrow* curr) { return Unacceptable; } - CostType visitThrowRef(ThrowRef* curr) { return Unacceptable; } + CostType visitRethrow(Rethrow* curr) { return ThrowCost; } + CostType visitThrowRef(ThrowRef* curr) { return ThrowCost; } CostType visitTupleMake(TupleMake* curr) { CostType ret = 0; for (auto* child : curr->operands) { @@ -612,19 +620,15 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { CostType visitRefI31(RefI31* curr) { return 3 + visit(curr->value); } CostType visitI31Get(I31Get* curr) { return 2 + visit(curr->i31); } CostType visitRefTest(RefTest* curr) { - // Casts have a very high cost because in the VM they end up implemented as - // a combination of loads and branches. Given they contain branches, we do - // not want to add any more such work. - return Unacceptable + nullCheckCost(curr->ref) + visit(curr->ref); + return CastCost + nullCheckCost(curr->ref) + visit(curr->ref); } CostType visitRefCast(RefCast* curr) { - return Unacceptable + nullCheckCost(curr->ref) + visit(curr->ref); + return CastCost + nullCheckCost(curr->ref) + visit(curr->ref); } CostType visitBrOn(BrOn* curr) { - // BrOn of a null can be fairly fast, but anything else is a cast check - // basically, and an unacceptable cost. + // BrOn of a null can be fairly fast, but anything else is a cast check. CostType base = - curr->op == BrOnNull || curr->op == BrOnNonNull ? 2 : Unacceptable; + curr->op == BrOnNull || curr->op == BrOnNonNull ? 2 : CastCost; return base + nullCheckCost(curr->ref) + maybeVisit(curr->ref); } CostType visitStructNew(StructNew* curr) { diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 893cf9039..fcb4ea56a 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -88,8 +88,14 @@ static bool canTurnIfIntoBrIf(Expression* ifCondition, // * https://github.com/WebAssembly/binaryen/issues/5983 const Index TooCostlyToRunUnconditionally = 8; -static_assert(TooCostlyToRunUnconditionally < CostAnalyzer::Unacceptable, - "We never run code unconditionally if it has unacceptable cost"); +// Some costs are known to be too high to move from conditional to unconditional +// execution. +static_assert(CostAnalyzer::AtomicCost >= TooCostlyToRunUnconditionally, + "We never run atomics unconditionally"); +static_assert(CostAnalyzer::ThrowCost >= TooCostlyToRunUnconditionally, + "We never run throws unconditionally"); +static_assert(CostAnalyzer::CastCost > TooCostlyToRunUnconditionally / 2, + "We only run casts unconditionally when optimizing for size"); static bool tooCostlyToRunUnconditionally(const PassOptions& passOptions, Index cost) { diff --git a/test/lit/passes/remove-unused-brs_levels.wast b/test/lit/passes/remove-unused-brs_levels.wast index 927cce867..6e521aa16 100644 --- a/test/lit/passes/remove-unused-brs_levels.wast +++ b/test/lit/passes/remove-unused-brs_levels.wast @@ -3,9 +3,17 @@ ;; RUN: wasm-opt %s --remove-unused-brs -all -S --shrink-level=1 -o - | filecheck %s --check-prefix=SHRINK_1 ;; RUN: wasm-opt %s --remove-unused-brs -all -S --shrink-level=2 -o - | filecheck %s --check-prefix=SHRINK_2 - (module - ;; SHRINK_0: (func $selectify-division (type $0) (param $x i32) (result i32) + ;; SHRINK_0: (type $struct (sub (struct))) + ;; SHRINK_1: (type $struct (sub (struct))) + ;; SHRINK_2: (type $struct (sub (struct))) + (type $struct (sub (struct))) + ;; SHRINK_0: (type $substruct (sub $struct (struct))) + ;; SHRINK_1: (type $substruct (sub $struct (struct))) + ;; SHRINK_2: (type $substruct (sub $struct (struct))) + (type $substruct (sub $struct (struct))) + + ;; SHRINK_0: (func $selectify-division (type $2) (param $x i32) (result i32) ;; SHRINK_0-NEXT: (if (result i32) ;; SHRINK_0-NEXT: (i32.eq ;; SHRINK_0-NEXT: (local.get $x) @@ -22,7 +30,7 @@ ;; SHRINK_0-NEXT: ) ;; SHRINK_0-NEXT: ) ;; SHRINK_0-NEXT: ) - ;; SHRINK_1: (func $selectify-division (type $0) (param $x i32) (result i32) + ;; SHRINK_1: (func $selectify-division (type $2) (param $x i32) (result i32) ;; SHRINK_1-NEXT: (select ;; SHRINK_1-NEXT: (i32.div_s ;; SHRINK_1-NEXT: (local.get $x) @@ -35,7 +43,7 @@ ;; SHRINK_1-NEXT: ) ;; SHRINK_1-NEXT: ) ;; SHRINK_1-NEXT: ) - ;; SHRINK_2: (func $selectify-division (type $0) (param $x i32) (result i32) + ;; SHRINK_2: (func $selectify-division (type $2) (param $x i32) (result i32) ;; SHRINK_2-NEXT: (select ;; SHRINK_2-NEXT: (i32.div_s ;; SHRINK_2-NEXT: (local.get $x) @@ -68,7 +76,7 @@ ) ) - ;; SHRINK_0: (func $selectify-division2 (type $0) (param $x i32) (result i32) + ;; SHRINK_0: (func $selectify-division2 (type $2) (param $x i32) (result i32) ;; SHRINK_0-NEXT: (if (result i32) ;; SHRINK_0-NEXT: (i32.eq ;; SHRINK_0-NEXT: (local.get $x) @@ -88,7 +96,7 @@ ;; SHRINK_0-NEXT: ) ;; SHRINK_0-NEXT: ) ;; SHRINK_0-NEXT: ) - ;; SHRINK_1: (func $selectify-division2 (type $0) (param $x i32) (result i32) + ;; SHRINK_1: (func $selectify-division2 (type $2) (param $x i32) (result i32) ;; SHRINK_1-NEXT: (if (result i32) ;; SHRINK_1-NEXT: (i32.eq ;; SHRINK_1-NEXT: (local.get $x) @@ -108,7 +116,7 @@ ;; SHRINK_1-NEXT: ) ;; SHRINK_1-NEXT: ) ;; SHRINK_1-NEXT: ) - ;; SHRINK_2: (func $selectify-division2 (type $0) (param $x i32) (result i32) + ;; SHRINK_2: (func $selectify-division2 (type $2) (param $x i32) (result i32) ;; SHRINK_2-NEXT: (select ;; SHRINK_2-NEXT: (i32.div_s ;; SHRINK_2-NEXT: (i32.div_s @@ -146,4 +154,125 @@ ) ) ) + + ;; SHRINK_0: (func $if-tests (type $3) (param $x (ref $struct)) (param $y (ref $struct)) (result i32) + ;; SHRINK_0-NEXT: (if (result i32) + ;; SHRINK_0-NEXT: (ref.test (ref $substruct) + ;; SHRINK_0-NEXT: (local.get $x) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_0-NEXT: (then + ;; SHRINK_0-NEXT: (ref.test (ref $substruct) + ;; SHRINK_0-NEXT: (local.get $y) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_0-NEXT: (else + ;; SHRINK_0-NEXT: (i32.const 0) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_1: (func $if-tests (type $3) (param $x (ref $struct)) (param $y (ref $struct)) (result i32) + ;; SHRINK_1-NEXT: (select + ;; SHRINK_1-NEXT: (ref.test (ref $substruct) + ;; SHRINK_1-NEXT: (local.get $y) + ;; SHRINK_1-NEXT: ) + ;; SHRINK_1-NEXT: (i32.const 0) + ;; SHRINK_1-NEXT: (ref.test (ref $substruct) + ;; SHRINK_1-NEXT: (local.get $x) + ;; SHRINK_1-NEXT: ) + ;; SHRINK_1-NEXT: ) + ;; SHRINK_1-NEXT: ) + ;; SHRINK_2: (func $if-tests (type $3) (param $x (ref $struct)) (param $y (ref $struct)) (result i32) + ;; SHRINK_2-NEXT: (select + ;; SHRINK_2-NEXT: (ref.test (ref $substruct) + ;; SHRINK_2-NEXT: (local.get $y) + ;; SHRINK_2-NEXT: ) + ;; SHRINK_2-NEXT: (i32.const 0) + ;; SHRINK_2-NEXT: (ref.test (ref $substruct) + ;; SHRINK_2-NEXT: (local.get $x) + ;; SHRINK_2-NEXT: ) + ;; SHRINK_2-NEXT: ) + ;; SHRINK_2-NEXT: ) + (func $if-tests (param $x (ref $struct)) (param $y (ref $struct)) (result i32) + ;; An If with a test in one arm (and also in a condition). We do not + ;; normally turn this into a Select, as the test is more costly than the If. + ;; But we do so when optimizing for size. + ;; + ;; TODO: Consider optimizing this because the other arm is a 0, which means + ;; the Select can turn into an And later. + (if (result i32) + (ref.test (ref $substruct) + (local.get $x) + ) + (then + (ref.test (ref $substruct) + (local.get $y) + ) + ) + (else + (i32.const 0) + ) + ) + ) + + ;; SHRINK_0: (func $if-tests-arms (type $4) (param $x (ref $struct)) (param $y (ref $struct)) (param $z (ref $struct)) (result i32) + ;; SHRINK_0-NEXT: (if (result i32) + ;; SHRINK_0-NEXT: (ref.test (ref $substruct) + ;; SHRINK_0-NEXT: (local.get $x) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_0-NEXT: (then + ;; SHRINK_0-NEXT: (ref.test (ref $substruct) + ;; SHRINK_0-NEXT: (local.get $y) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_0-NEXT: (else + ;; SHRINK_0-NEXT: (ref.test (ref $substruct) + ;; SHRINK_0-NEXT: (local.get $z) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_0-NEXT: ) + ;; SHRINK_1: (func $if-tests-arms (type $4) (param $x (ref $struct)) (param $y (ref $struct)) (param $z (ref $struct)) (result i32) + ;; SHRINK_1-NEXT: (select + ;; SHRINK_1-NEXT: (ref.test (ref $substruct) + ;; SHRINK_1-NEXT: (local.get $y) + ;; SHRINK_1-NEXT: ) + ;; SHRINK_1-NEXT: (ref.test (ref $substruct) + ;; SHRINK_1-NEXT: (local.get $z) + ;; SHRINK_1-NEXT: ) + ;; SHRINK_1-NEXT: (ref.test (ref $substruct) + ;; SHRINK_1-NEXT: (local.get $x) + ;; SHRINK_1-NEXT: ) + ;; SHRINK_1-NEXT: ) + ;; SHRINK_1-NEXT: ) + ;; SHRINK_2: (func $if-tests-arms (type $4) (param $x (ref $struct)) (param $y (ref $struct)) (param $z (ref $struct)) (result i32) + ;; SHRINK_2-NEXT: (select + ;; SHRINK_2-NEXT: (ref.test (ref $substruct) + ;; SHRINK_2-NEXT: (local.get $y) + ;; SHRINK_2-NEXT: ) + ;; SHRINK_2-NEXT: (ref.test (ref $substruct) + ;; SHRINK_2-NEXT: (local.get $z) + ;; SHRINK_2-NEXT: ) + ;; SHRINK_2-NEXT: (ref.test (ref $substruct) + ;; SHRINK_2-NEXT: (local.get $x) + ;; SHRINK_2-NEXT: ) + ;; SHRINK_2-NEXT: ) + ;; SHRINK_2-NEXT: ) + (func $if-tests-arms (param $x (ref $struct)) (param $y (ref $struct)) (param $z (ref $struct)) (result i32) + ;; As above but now with a test in both arms. The outcomes are the same. + (if (result i32) + (ref.test (ref $substruct) + (local.get $x) + ) + (then + (ref.test (ref $substruct) + (local.get $y) + ) + ) + (else + (ref.test (ref $substruct) + (local.get $z) + ) + ) + ) + ) ) |