5 files changed, 615 insertions, 34 deletions
diff --git a/scripts/benchmarking/bench.js b/scripts/benchmarking/bench.js
new file mode 100644
index 000000000..d1284a241
--- /dev/null
+++ b/scripts/benchmarking/bench.js
@@ -0,0 +1,159 @@
+
+// Benchmarking script. This runs on compiled bench.wat and prints out timings.
+//
+// Usage:
+//
+//  * wasm-opt scripts/benchmarking/bench.wat -all --inline-functions-with-loops --always-inline-max-function-size=1000 --inlining --precompute-propagate --optimize-instructions --inlining --simplify-locals --coalesce-locals --vacuum --remove-unused-module-elements -o bench.wasm -g
+//  * Inspect the optimized wasm to see that inlining etc. worked properly
+//    (we rely on inlining to let us write bench.wat in a short/simple form, and
+//    we use very specific optimizations in order to not optimize away the
+//    differences we care about).
+//  * d8 bench.js -- bench.wasm
+//    etc.
+//
+
+// Shell integration.
+if (typeof console === 'undefined') {
+  console = { log: print };
+}
+var tempRet0;
+var binary;
+if (typeof process === 'object' && typeof require === 'function' /* node.js detection */) {
+  var args = process.argv.slice(2);
+  binary = require('fs').readFileSync(args[0]);
+  if (!binary.buffer) binary = new Uint8Array(binary);
+} else {
+  var args;
+  if (typeof scriptArgs != 'undefined') {
+    args = scriptArgs;
+  } else if (typeof arguments != 'undefined') {
+    args = arguments;
+  }
+  if (typeof readbuffer === 'function') {
+    binary = new Uint8Array(readbuffer(args[0]));
+  } else {
+    binary = read(args[0], 'binary');
+  }
+}
+
+// Create the wasm.
+const module = new WebAssembly.Module(binary);
+const instance = new WebAssembly.Instance(module, {});
+const exports = instance.exports;
+
+// Create the benchmarkers.
+function makeBenchmarker(name) {
+  return {
+    name: name,
+    func: exports[name],
+    time: 0,
+    sum: 0,
+    iters: 0,
+  };
+}
+
+const benchmarkers = [
+  makeBenchmarker('len'),
+  makeBenchmarker('and'),
+  makeBenchmarker('iff-both'),
+  makeBenchmarker('or'),
+  makeBenchmarker('iff-either'),
+  makeBenchmarker('select'),
+  makeBenchmarker('iff-nextor'),
+  makeBenchmarker('select-three'),
+  makeBenchmarker('iff-three'),
+];
+
+// We'll call the benchmark functions in random orders. Random orders avoid any
+// interaction between the benchmarks from causing bias in the results.
+//
+// An alternative to randomly ordering the benchmarks in each iteration would be
+// to fully benchmark one, then do the next, so there are large amounts of time
+// between them, but that also allows them to become more like microbenchmarks
+// where the branch predictor etc. might display very favorable behavior.
+// Interleaving them makes things slightly more realistic.
+//
+// If we have too many benchmarks then eventually computing all orders ahead of
+// time will not work, but so long as we can, it is faster this way rather than
+// to compute random orders on the fly as we go.
+function makeOrders(prefix) {
+  // Given a prefix of an order, like [] or [0, 3], return all the possible
+  // orders beginning with that prefix.
+
+  // We cannot repeat anything already seen.
+  const seen = new Set();
+  for (var x of prefix) {
+    seen.add(x);
+  }
+
+  // Starting from the prefix, extend it by one item in all valid ways.
+  const extensions = [];
+  for (var i = 0; i < benchmarkers.length; i++) {
+    if (!seen.has(i)) {
+      extensions.push(prefix.concat(i));
+    }
+  }
+
+  if (prefix.length == benchmarkers.length - 1) {
+    // The extensions are complete orders; stop the recursion.
+    return extensions;
+  }
+
+  // Recursively generate the full orders.
+  const ret = [];
+  for (var extension of extensions) {
+    for (var order of makeOrders(extension)) {
+      ret.push(order);
+    }
+  }
+  return ret;
+}
+
+const orders = makeOrders([]);
+
+// Params.
+const M = 10000000;
+const N = 100;
+
+console.log('iters       :', M);
+console.log('list len    :', N);
+console.log('benchmarkers:', benchmarkers.length);
+console.log('orderings   :', orders.length);
+
+// Create a long linked list of objects of both type $A and $B.
+var list = null;
+for (var i = 0; i < N; i++) {
+  list = Math.random() < 0.5 ? exports.makeA(list) : exports.makeB(list);
+}
+
+console.log('benchmarking...');
+
+// Call the benchmark functions.
+
+for (var i = 0; i < M; i++) {
+  const order = orders[Math.floor(Math.random() * orders.length)];
+  for (var k = 0; k < benchmarkers.length; k++) {
+    const benchmarker = benchmarkers[order[k]];
+    const start = performance.now();
+    const result = benchmarker.func(list);
+    benchmarker.time += performance.now() - start;
+    benchmarker.sum += result;
+    benchmarker.iters++;
+  }
+}
+
+for (var benchmarker of benchmarkers) {
+  if (benchmarker.iters != M) {
+    throw 'wat';
+  }
+}
+
+console.log();
+for (var benchmarker of benchmarkers) {
+  console.log(`${benchmarker.name} time: \t${benchmarker.time}`)
+}
+console.log();
+for (var benchmarker of benchmarkers) {
+  console.log(`${benchmarker.name} mean sum: \t${benchmarker.sum / M}`)
+}
+
diff --git a/scripts/benchmarking/bench.wat b/scripts/benchmarking/bench.wat
new file mode 100644
index 000000000..87d54b66b
--- /dev/null
+++ b/scripts/benchmarking/bench.wat
@@ -0,0 +1,283 @@
+;; See bench.js.
+
+(module
+  ;; A chain of three types. Each type has a "next" field, so we can form linked
+  ;; lists.
+  (type $A (sub (struct (field $next (ref null $A)))))
+  (type $B (sub $A (struct (field $next (ref null $A)))))
+  (type $C (sub $B (struct (field $next (ref null $A)))))
+
+  (type $func (func (param (ref $A)) (result i32)))
+
+  ;; Internal helper to iterate over a linked list and call a function on each
+  ;; item, and return the sum of those calls' results.
+  (func $iter (param $list (ref null $A)) (param $func (ref $func)) (result i32)
+    (local $sum i32)
+    (loop $loop
+      (if
+        (ref.is_null
+          (local.get $list)
+        )
+        (then
+          (return
+            (local.get $sum)
+          )
+        )
+        (else
+          (local.set $sum
+            (i32.add
+              (local.get $sum)
+              (call_ref $func
+                (ref.as_non_null
+                  (local.get $list)
+                )
+                (local.get $func)
+              )
+            )
+          )
+          (local.set $list
+            (struct.get $A $next
+              (local.get $list)
+            )
+          )
+          (br $loop)
+        )
+      )
+    )
+  )
+
+  ;; Using the helper, and depending on inlining to optimize this, lets us
+  ;; write the exports concisely. First, code to compute the length of the list
+  ;; (for comparison purposes).
+  (func $len (export "len") (param $list (ref $A)) (result i32)
+    (call $iter
+      (local.get $list)
+      ;; Add one each time this is called.
+      (ref.func $one)
+    )
+  )
+  (func $one (param $list (ref $A)) (result i32)
+    (i32.const 1)
+  )
+
+  ;; At each point in the linked list, check if both the current and next item
+  ;; are inputs are $B, using an if to short-circuit when possible.
+  (func $iff-both (export "iff-both") (param $list (ref $A)) (result i32)
+    (call $iter
+      (local.get $list)
+      (ref.func $do-iff-both)
+    )
+  )
+  (func $do-iff-both (param $list (ref $A)) (result i32)
+    (if (result i32)
+      (ref.test (ref $B)
+        (struct.get $A $next
+          (local.get $list)
+        )
+      )
+      (then
+        (ref.test (ref $B)
+          (local.get $list)
+        )
+      )
+      (else
+        (i32.const 0)
+      )
+    )
+  )
+
+  ;; The same computation, but using an and, so both tests always execute.
+  (func $and (export "and") (param $list (ref $A)) (result i32)
+    (call $iter
+      (local.get $list)
+      (ref.func $do-and)
+    )
+  )
+  (func $do-and (param $list (ref $A)) (result i32)
+    (i32.and
+      (ref.test (ref $B)
+        (struct.get $A $next
+          (local.get $list)
+        )
+      )
+      (ref.test (ref $B)
+        (local.get $list)
+      )
+    )
+  )
+
+  ;; Similar, but return 1 if either test succeeds (using an if).
+  (func $iff-either (export "iff-either") (param $list (ref $A)) (result i32)
+    (call $iter
+      (local.get $list)
+      (ref.func $do-iff-either)
+    )
+  )
+  (func $do-iff-either (param $list (ref $A)) (result i32)
+    (if (result i32)
+      (ref.test (ref $B)
+        (struct.get $A $next
+          (local.get $list)
+        )
+      )
+      (then
+        (i32.const 1)
+      )
+      (else
+        (ref.test (ref $B)
+          (local.get $list)
+        )
+      )
+    )
+  )
+
+
+  ;; The same computation, but using an or, so both tests always execute.
+  (func $or (export "or") (param $list (ref $A)) (result i32)
+    (call $iter
+      (local.get $list)
+      (ref.func $do-or)
+    )
+  )
+  (func $do-or (param $list (ref $A)) (result i32)
+    (i32.or
+      (ref.test (ref $B)
+        (struct.get $A $next
+          (local.get $list)
+        )
+      )
+      (ref.test (ref $B)
+        (local.get $list)
+      )
+    )
+  )
+
+  ;; Use a select to do a test of "is next null ? 0 : test curr".
+  (func $select (export "select") (param $list (ref $A)) (result i32)
+    (call $iter
+      (local.get $list)
+      (ref.func $do-select)
+    )
+  )
+  (func $do-select (param $list (ref $A)) (result i32)
+    (select
+      (i32.const 0)
+      (ref.test (ref $B)
+        (local.get $list)
+      )
+      (ref.is_null
+        (struct.get $A $next
+          (local.get $list)
+        )
+      )
+    )
+  )
+
+  ;; Use an iff to do the same.
+  (func $iff-nextor (export "iff-nextor") (param $list (ref $A)) (result i32)
+    (call $iter
+      (local.get $list)
+      (ref.func $do-iff-nextor)
+    )
+  )
+  (func $do-iff-nextor (param $list (ref $A)) (result i32)
+    (if (result i32)
+      (ref.is_null
+        (struct.get $A $next
+          (local.get $list)
+        )
+      )
+      (then
+        (i32.const 0)
+      )
+      (else
+        (ref.test (ref $B)
+          (local.get $list)
+        )
+      )
+    )
+  )
+
+  ;; Use an if over three tests: "test if next is B or C depending on if curr is
+  ;; B."
+  (func $iff-three (export "iff-three") (param $list (ref $A)) (result i32)
+    (call $iter
+      (local.get $list)
+      (ref.func $do-iff-three)
+    )
+  )
+  (func $do-iff-three (param $list (ref $A)) (result i32)
+    (local $next (ref null $A))
+    (local.set $next
+      (struct.get $A $next
+        (local.get $list)
+      )
+    )
+    (if (result i32)
+      (ref.test (ref $B)
+        (local.get $list)
+      )
+      (then
+        (ref.test (ref $C)
+          (local.get $next)
+        )
+      )
+      (else
+        (ref.test (ref $B)
+          (local.get $next)
+        )
+      )
+    )
+  )
+
+  ;; Use a select for the same.
+  (func $select-three (export "select-three") (param $list (ref $A)) (result i32)
+    (call $iter
+      (local.get $list)
+      (ref.func $do-select-three)
+    )
+  )
+  (func $do-select-three (param $list (ref $A)) (result i32)
+    (local $next (ref null $A))
+    (local.set $next
+      (struct.get $A $next
+        (local.get $list)
+      )
+    )
+    (select
+      (ref.test (ref $C)
+        (local.get $next)
+      )
+      (ref.test (ref $B)
+        (local.get $next)
+      )
+      (ref.test (ref $B)
+        (local.get $list)
+      )
+    )
+  )
+
+  ;; Creation functions.
+
+  (func $makeA (export "makeA") (param $next (ref null $A)) (result anyref)
+    (struct.new $A
+      (local.get $next)
+    )
+  )
+
+  (func $makeB (export "makeB") (param $next (ref null $A)) (result anyref)
+    (struct.new $B
+      (local.get $next)
+    )
+  )
+
+  (func $makeC (export "makeC") (param $next (ref null $A)) (result anyref)
+    ;; This function is not used in benchmarks yet, but it keeps the type $C
+    ;; alive, which prevents $B from looking like it could be final, which might
+    ;; allow the optimizer to simplify more than we want.
+    (struct.new $C
+      (local.get $next)
+    )
+  )
+)
+
diff --git a/src/ir/cost.h b/src/ir/cost.h
index 6622561b8..38c74a203 100644
--- a/src/ir/cost.h
+++ b/src/ir/cost.h
@@ -31,11 +31,17 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
 
   CostType cost;
 
-  // A cost that is extremely high, something that is far, far more expensive
-  // than a fast operation like an add. This cost is so high it is unacceptable
-  // to add any more of it, say by an If=>Select operation (which would execute
-  // both arms; if either arm contains an unacceptable cost, we do not do it).
-  static const CostType Unacceptable = 100;
+  // The cost of a "slow" atomic operation like RMW.
+  static const CostType AtomicCost = 10;
+
+  // The cost of throwing a wasm exception. This does not include the cost of
+  // catching it (which might be in another function than the one we are
+  // considering).
+  static const CostType ThrowCost = 10;
+
+  // The cost of a cast. This can vary depending on the engine and on the type,
+  // but usually requires some loads and comparisons.
+  static const CostType CastCost = 5;
 
   CostType maybeVisit(Expression* curr) { return curr ? visit(curr) : 0; }
 
@@ -85,26 +91,27 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
   CostType visitGlobalGet(GlobalGet* curr) { return 1; }
   CostType visitGlobalSet(GlobalSet* curr) { return 2 + visit(curr->value); }
   CostType visitLoad(Load* curr) {
-    return 1 + visit(curr->ptr) + 10 * curr->isAtomic;
+    return 1 + visit(curr->ptr) + AtomicCost * curr->isAtomic;
   }
   CostType visitStore(Store* curr) {
-    return 2 + visit(curr->ptr) + visit(curr->value) + 10 * curr->isAtomic;
+    return 2 + visit(curr->ptr) + visit(curr->value) +
+           AtomicCost * curr->isAtomic;
   }
   CostType visitAtomicRMW(AtomicRMW* curr) {
-    return Unacceptable + visit(curr->ptr) + visit(curr->value);
+    return AtomicCost + visit(curr->ptr) + visit(curr->value);
   }
   CostType visitAtomicCmpxchg(AtomicCmpxchg* curr) {
-    return Unacceptable + visit(curr->ptr) + visit(curr->expected) +
+    return AtomicCost + visit(curr->ptr) + visit(curr->expected) +
            visit(curr->replacement);
   }
   CostType visitAtomicWait(AtomicWait* curr) {
-    return Unacceptable + visit(curr->ptr) + visit(curr->expected) +
+    return AtomicCost + visit(curr->ptr) + visit(curr->expected) +
            visit(curr->timeout);
   }
   CostType visitAtomicNotify(AtomicNotify* curr) {
-    return Unacceptable + visit(curr->ptr) + visit(curr->notifyCount);
+    return AtomicCost + visit(curr->ptr) + visit(curr->notifyCount);
   }
-  CostType visitAtomicFence(AtomicFence* curr) { return Unacceptable; }
+  CostType visitAtomicFence(AtomicFence* curr) { return AtomicCost; }
   CostType visitConst(Const* curr) { return 1; }
   CostType visitUnary(Unary* curr) {
     CostType ret = 0;
@@ -516,7 +523,8 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
   CostType visitReturn(Return* curr) { return maybeVisit(curr->value); }
   CostType visitMemorySize(MemorySize* curr) { return 1; }
   CostType visitMemoryGrow(MemoryGrow* curr) {
-    return Unacceptable + visit(curr->delta);
+    // TODO: This should perhaps be higher for shared memories.
+    return 20 + visit(curr->delta);
   }
   CostType visitMemoryInit(MemoryInit* curr) {
     return 6 + visit(curr->dest) + visit(curr->offset) + visit(curr->size);
@@ -572,7 +580,7 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
   }
   CostType visitTableSize(TableSize* curr) { return 1; }
   CostType visitTableGrow(TableGrow* curr) {
-    return Unacceptable + visit(curr->value) + visit(curr->delta);
+    return 20 + visit(curr->value) + visit(curr->delta);
   }
   CostType visitTableFill(TableFill* curr) {
     return 6 + visit(curr->dest) + visit(curr->value) + visit(curr->size);
@@ -589,14 +597,14 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
     return visit(curr->body);
   }
   CostType visitThrow(Throw* curr) {
-    CostType ret = Unacceptable;
+    CostType ret = ThrowCost;
     for (auto* child : curr->operands) {
       ret += visit(child);
     }
     return ret;
   }
-  CostType visitRethrow(Rethrow* curr) { return Unacceptable; }
-  CostType visitThrowRef(ThrowRef* curr) { return Unacceptable; }
+  CostType visitRethrow(Rethrow* curr) { return ThrowCost; }
+  CostType visitThrowRef(ThrowRef* curr) { return ThrowCost; }
   CostType visitTupleMake(TupleMake* curr) {
     CostType ret = 0;
     for (auto* child : curr->operands) {
@@ -612,19 +620,15 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
   CostType visitRefI31(RefI31* curr) { return 3 + visit(curr->value); }
   CostType visitI31Get(I31Get* curr) { return 2 + visit(curr->i31); }
   CostType visitRefTest(RefTest* curr) {
-    // Casts have a very high cost because in the VM they end up implemented as
-    // a combination of loads and branches. Given they contain branches, we do
-    // not want to add any more such work.
-    return Unacceptable + nullCheckCost(curr->ref) + visit(curr->ref);
+    return CastCost + nullCheckCost(curr->ref) + visit(curr->ref);
   }
   CostType visitRefCast(RefCast* curr) {
-    return Unacceptable + nullCheckCost(curr->ref) + visit(curr->ref);
+    return CastCost + nullCheckCost(curr->ref) + visit(curr->ref);
   }
   CostType visitBrOn(BrOn* curr) {
-    // BrOn of a null can be fairly fast, but anything else is a cast check
-    // basically, and an unacceptable cost.
+    // BrOn of a null can be fairly fast, but anything else is a cast check.
     CostType base =
-      curr->op == BrOnNull || curr->op == BrOnNonNull ? 2 : Unacceptable;
+      curr->op == BrOnNull || curr->op == BrOnNonNull ? 2 : CastCost;
     return base + nullCheckCost(curr->ref) + maybeVisit(curr->ref);
   }
   CostType visitStructNew(StructNew* curr) {
diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp
index 893cf9039..fcb4ea56a 100644
--- a/src/passes/RemoveUnusedBrs.cpp
+++ b/src/passes/RemoveUnusedBrs.cpp
@@ -88,8 +88,14 @@ static bool canTurnIfIntoBrIf(Expression* ifCondition,
 //  * https://github.com/WebAssembly/binaryen/issues/5983
 const Index TooCostlyToRunUnconditionally = 8;
 
-static_assert(TooCostlyToRunUnconditionally < CostAnalyzer::Unacceptable,
-              "We never run code unconditionally if it has unacceptable cost");
+// Some costs are known to be too high to move from conditional to unconditional
+// execution.
+static_assert(CostAnalyzer::AtomicCost >= TooCostlyToRunUnconditionally,
+              "We never run atomics unconditionally");
+static_assert(CostAnalyzer::ThrowCost >= TooCostlyToRunUnconditionally,
+              "We never run throws unconditionally");
+static_assert(CostAnalyzer::CastCost > TooCostlyToRunUnconditionally / 2,
+              "We only run casts unconditionally when optimizing for size");
 
 static bool tooCostlyToRunUnconditionally(const PassOptions& passOptions,
                                           Index cost) {
diff --git a/test/lit/passes/remove-unused-brs_levels.wast b/test/lit/passes/remove-unused-brs_levels.wast
index 927cce867..6e521aa16 100644
--- a/test/lit/passes/remove-unused-brs_levels.wast
+++ b/test/lit/passes/remove-unused-brs_levels.wast
@@ -3,9 +3,17 @@
 ;; RUN: wasm-opt %s --remove-unused-brs -all -S --shrink-level=1 -o - | filecheck %s --check-prefix=SHRINK_1
 ;; RUN: wasm-opt %s --remove-unused-brs -all -S --shrink-level=2 -o - | filecheck %s --check-prefix=SHRINK_2
 
-
 (module
-  ;; SHRINK_0:      (func $selectify-division (type $0) (param $x i32) (result i32)
+  ;; SHRINK_0:      (type $struct (sub (struct)))
+  ;; SHRINK_1:      (type $struct (sub (struct)))
+  ;; SHRINK_2:      (type $struct (sub (struct)))
+  (type $struct (sub (struct)))
+  ;; SHRINK_0:      (type $substruct (sub $struct (struct)))
+  ;; SHRINK_1:      (type $substruct (sub $struct (struct)))
+  ;; SHRINK_2:      (type $substruct (sub $struct (struct)))
+  (type $substruct (sub $struct (struct)))
+
+  ;; SHRINK_0:      (func $selectify-division (type $2) (param $x i32) (result i32)
   ;; SHRINK_0-NEXT:  (if (result i32)
   ;; SHRINK_0-NEXT:   (i32.eq
   ;; SHRINK_0-NEXT:    (local.get $x)
@@ -22,7 +30,7 @@
   ;; SHRINK_0-NEXT:   )
   ;; SHRINK_0-NEXT:  )
   ;; SHRINK_0-NEXT: )
-  ;; SHRINK_1:      (func $selectify-division (type $0) (param $x i32) (result i32)
+  ;; SHRINK_1:      (func $selectify-division (type $2) (param $x i32) (result i32)
   ;; SHRINK_1-NEXT:  (select
   ;; SHRINK_1-NEXT:   (i32.div_s
   ;; SHRINK_1-NEXT:    (local.get $x)
@@ -35,7 +43,7 @@
   ;; SHRINK_1-NEXT:   )
   ;; SHRINK_1-NEXT:  )
   ;; SHRINK_1-NEXT: )
-  ;; SHRINK_2:      (func $selectify-division (type $0) (param $x i32) (result i32)
+  ;; SHRINK_2:      (func $selectify-division (type $2) (param $x i32) (result i32)
   ;; SHRINK_2-NEXT:  (select
   ;; SHRINK_2-NEXT:   (i32.div_s
   ;; SHRINK_2-NEXT:    (local.get $x)
@@ -68,7 +76,7 @@
     )
   )
 
-  ;; SHRINK_0:      (func $selectify-division2 (type $0) (param $x i32) (result i32)
+  ;; SHRINK_0:      (func $selectify-division2 (type $2) (param $x i32) (result i32)
   ;; SHRINK_0-NEXT:  (if (result i32)
   ;; SHRINK_0-NEXT:   (i32.eq
   ;; SHRINK_0-NEXT:    (local.get $x)
@@ -88,7 +96,7 @@
   ;; SHRINK_0-NEXT:   )
   ;; SHRINK_0-NEXT:  )
   ;; SHRINK_0-NEXT: )
-  ;; SHRINK_1:      (func $selectify-division2 (type $0) (param $x i32) (result i32)
+  ;; SHRINK_1:      (func $selectify-division2 (type $2) (param $x i32) (result i32)
   ;; SHRINK_1-NEXT:  (if (result i32)
   ;; SHRINK_1-NEXT:   (i32.eq
   ;; SHRINK_1-NEXT:    (local.get $x)
@@ -108,7 +116,7 @@
   ;; SHRINK_1-NEXT:   )
   ;; SHRINK_1-NEXT:  )
   ;; SHRINK_1-NEXT: )
-  ;; SHRINK_2:      (func $selectify-division2 (type $0) (param $x i32) (result i32)
+  ;; SHRINK_2:      (func $selectify-division2 (type $2) (param $x i32) (result i32)
   ;; SHRINK_2-NEXT:  (select
   ;; SHRINK_2-NEXT:   (i32.div_s
   ;; SHRINK_2-NEXT:    (i32.div_s
@@ -146,4 +154,125 @@
       )
     )
   )
+
+  ;; SHRINK_0:      (func $if-tests (type $3) (param $x (ref $struct)) (param $y (ref $struct)) (result i32)
+  ;; SHRINK_0-NEXT:  (if (result i32)
+  ;; SHRINK_0-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_0-NEXT:    (local.get $x)
+  ;; SHRINK_0-NEXT:   )
+  ;; SHRINK_0-NEXT:   (then
+  ;; SHRINK_0-NEXT:    (ref.test (ref $substruct)
+  ;; SHRINK_0-NEXT:     (local.get $y)
+  ;; SHRINK_0-NEXT:    )
+  ;; SHRINK_0-NEXT:   )
+  ;; SHRINK_0-NEXT:   (else
+  ;; SHRINK_0-NEXT:    (i32.const 0)
+  ;; SHRINK_0-NEXT:   )
+  ;; SHRINK_0-NEXT:  )
+  ;; SHRINK_0-NEXT: )
+  ;; SHRINK_1:      (func $if-tests (type $3) (param $x (ref $struct)) (param $y (ref $struct)) (result i32)
+  ;; SHRINK_1-NEXT:  (select
+  ;; SHRINK_1-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_1-NEXT:    (local.get $y)
+  ;; SHRINK_1-NEXT:   )
+  ;; SHRINK_1-NEXT:   (i32.const 0)
+  ;; SHRINK_1-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_1-NEXT:    (local.get $x)
+  ;; SHRINK_1-NEXT:   )
+  ;; SHRINK_1-NEXT:  )
+  ;; SHRINK_1-NEXT: )
+  ;; SHRINK_2:      (func $if-tests (type $3) (param $x (ref $struct)) (param $y (ref $struct)) (result i32)
+  ;; SHRINK_2-NEXT:  (select
+  ;; SHRINK_2-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_2-NEXT:    (local.get $y)
+  ;; SHRINK_2-NEXT:   )
+  ;; SHRINK_2-NEXT:   (i32.const 0)
+  ;; SHRINK_2-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_2-NEXT:    (local.get $x)
+  ;; SHRINK_2-NEXT:   )
+  ;; SHRINK_2-NEXT:  )
+  ;; SHRINK_2-NEXT: )
+  (func $if-tests (param $x (ref $struct)) (param $y (ref $struct)) (result i32)
+    ;; An If with a test in one arm (and also in a condition). We do not
+    ;; normally turn this into a Select, as the test is more costly than the If.
+    ;; But we do so when optimizing for size.
+    ;;
+    ;; TODO: Consider optimizing this because the other arm is a 0, which means
+    ;;       the Select can turn into an And later.
+    (if (result i32)
+      (ref.test (ref $substruct)
+        (local.get $x)
+      )
+      (then
+        (ref.test (ref $substruct)
+          (local.get $y)
+        )
+      )
+      (else
+        (i32.const 0)
+      )
+    )
+  )
+
+  ;; SHRINK_0:      (func $if-tests-arms (type $4) (param $x (ref $struct)) (param $y (ref $struct)) (param $z (ref $struct)) (result i32)
+  ;; SHRINK_0-NEXT:  (if (result i32)
+  ;; SHRINK_0-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_0-NEXT:    (local.get $x)
+  ;; SHRINK_0-NEXT:   )
+  ;; SHRINK_0-NEXT:   (then
+  ;; SHRINK_0-NEXT:    (ref.test (ref $substruct)
+  ;; SHRINK_0-NEXT:     (local.get $y)
+  ;; SHRINK_0-NEXT:    )
+  ;; SHRINK_0-NEXT:   )
+  ;; SHRINK_0-NEXT:   (else
+  ;; SHRINK_0-NEXT:    (ref.test (ref $substruct)
+  ;; SHRINK_0-NEXT:     (local.get $z)
+  ;; SHRINK_0-NEXT:    )
+  ;; SHRINK_0-NEXT:   )
+  ;; SHRINK_0-NEXT:  )
+  ;; SHRINK_0-NEXT: )
+  ;; SHRINK_1:      (func $if-tests-arms (type $4) (param $x (ref $struct)) (param $y (ref $struct)) (param $z (ref $struct)) (result i32)
+  ;; SHRINK_1-NEXT:  (select
+  ;; SHRINK_1-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_1-NEXT:    (local.get $y)
+  ;; SHRINK_1-NEXT:   )
+  ;; SHRINK_1-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_1-NEXT:    (local.get $z)
+  ;; SHRINK_1-NEXT:   )
+  ;; SHRINK_1-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_1-NEXT:    (local.get $x)
+  ;; SHRINK_1-NEXT:   )
+  ;; SHRINK_1-NEXT:  )
+  ;; SHRINK_1-NEXT: )
+  ;; SHRINK_2:      (func $if-tests-arms (type $4) (param $x (ref $struct)) (param $y (ref $struct)) (param $z (ref $struct)) (result i32)
+  ;; SHRINK_2-NEXT:  (select
+  ;; SHRINK_2-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_2-NEXT:    (local.get $y)
+  ;; SHRINK_2-NEXT:   )
+  ;; SHRINK_2-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_2-NEXT:    (local.get $z)
+  ;; SHRINK_2-NEXT:   )
+  ;; SHRINK_2-NEXT:   (ref.test (ref $substruct)
+  ;; SHRINK_2-NEXT:    (local.get $x)
+  ;; SHRINK_2-NEXT:   )
+  ;; SHRINK_2-NEXT:  )
+  ;; SHRINK_2-NEXT: )
+  (func $if-tests-arms (param $x (ref $struct)) (param $y (ref $struct)) (param $z (ref $struct)) (result i32)
+    ;; As above but now with a test in both arms. The outcomes are the same.
+    (if (result i32)
+      (ref.test (ref $substruct)
+        (local.get $x)
+      )
+      (then
+        (ref.test (ref $substruct)
+          (local.get $y)
+        )
+      )
+      (else
+        (ref.test (ref $substruct)
+          (local.get $z)
+        )
+      )
+    )
+  )
 )