summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ir/properties.cpp16
-rw-r--r--src/passes/OptimizeInstructions.cpp80
-rw-r--r--test/lit/passes/optimize-instructions-mvp.wast331
3 files changed, 392 insertions, 35 deletions
diff --git a/src/ir/properties.cpp b/src/ir/properties.cpp
index 63b7cedd1..05dae897e 100644
--- a/src/ir/properties.cpp
+++ b/src/ir/properties.cpp
@@ -20,14 +20,18 @@
namespace wasm::Properties {
bool isGenerative(Expression* curr, FeatureSet features) {
- // Practically no wasm instructions are generative. Exceptions occur only in
- // GC atm.
- if (!features.hasGC()) {
- return false;
- }
-
struct Scanner : public PostWalker<Scanner> {
bool generative = false;
+
+ void visitCall(Call* curr) {
+ // TODO: We could in principle look at the called function to see if it is
+ // generative. To do that we'd need to compute generativity like we
+ // compute global effects (we can't just peek from here, as the
+ // other function might be modified in parallel).
+ generative = true;
+ }
+ void visitCallIndirect(CallIndirect* curr) { generative = true; }
+ void visitCallRef(CallRef* curr) { generative = true; }
void visitStructNew(StructNew* curr) { generative = true; }
void visitArrayNew(ArrayNew* curr) { generative = true; }
void visitArrayNewFixed(ArrayNewFixed* curr) { generative = true; }
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 28eaf0fba..7c14c4172 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -2294,14 +2294,57 @@ private:
// Information about our locals
std::vector<LocalInfo> localInfo;
+ // Checks if the first is a local.tee and the second a local.get of that same
+ // index. This is useful in the methods right below us, as it is a common
+ // pattern where two consecutive inputs are equal despite being syntactically
+ // different.
+ bool areMatchingTeeAndGet(Expression* left, Expression* right) {
+ if (auto* set = left->dynCast<LocalSet>()) {
+ if (auto* get = right->dynCast<LocalGet>()) {
+ if (set->isTee() && get->index == set->index) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
// Check if two consecutive inputs to an instruction are equal. As they are
// consecutive, no code can execeute in between them, which simplies the
// problem here (and which is the case we care about in this pass, which does
// simple peephole optimizations - all we care about is a single instruction
// at a time, and its inputs).
- //
- // This also checks that the inputs are removable (but we do not assume the
- // caller will always remove them).
+ bool areConsecutiveInputsEqual(Expression* left, Expression* right) {
+ // When we look for a tee/get pair, we can consider the fallthrough values
+ // for the first, as the fallthrough happens last (however, we must use
+ // NoTeeBrIf as we do not want to look through the tee). We cannot do this
+ // on the second, however, as there could be effects in the middle.
+ // TODO: Use effects here perhaps.
+ auto& passOptions = getPassOptions();
+ left =
+ Properties::getFallthrough(left,
+ passOptions,
+ *getModule(),
+ Properties::FallthroughBehavior::NoTeeBrIf);
+ if (areMatchingTeeAndGet(left, right)) {
+ return true;
+ }
+
+ // Ignore extraneous things and compare them syntactically. We can also
+ // look at the full fallthrough for both sides now.
+ left = Properties::getFallthrough(left, passOptions, *getModule());
+ right = Properties::getFallthrough(right, passOptions, *getModule());
+ if (!ExpressionAnalyzer::equal(left, right)) {
+ return false;
+ }
+
+ // To be equal, they must also be known to return the same result
+ // deterministically.
+ return !Properties::isGenerative(left, getModule()->features);
+ }
+
+ // Similar to areConsecutiveInputsEqual() but also checks if we can remove
+ // them (but we do not assume the caller will always remove them).
bool areConsecutiveInputsEqualAndRemovable(Expression* left,
Expression* right) {
// First, check for side effects. If there are any, then we can't even
@@ -2316,18 +2359,7 @@ private:
return false;
}
- // Ignore extraneous things and compare them structurally.
- left = Properties::getFallthrough(left, passOptions, *getModule());
- right = Properties::getFallthrough(right, passOptions, *getModule());
- if (!ExpressionAnalyzer::equal(left, right)) {
- return false;
- }
- // To be equal, they must also be known to return the same result
- // deterministically.
- if (Properties::isGenerative(left, getModule()->features)) {
- return false;
- }
- return true;
+ return areConsecutiveInputsEqual(left, right);
}
// Check if two consecutive inputs to an instruction are equal and can also be
@@ -2341,25 +2373,17 @@ private:
// side effects at all in the middle. For example, a Const in between is ok.
bool areConsecutiveInputsEqualAndFoldable(Expression* left,
Expression* right) {
- if (auto* set = left->dynCast<LocalSet>()) {
- if (auto* get = right->dynCast<LocalGet>()) {
- if (set->isTee() && get->index == set->index) {
- return true;
- }
- }
+ // TODO: We could probably consider fallthrough values for left, at least
+ // (since we fold into it).
+ if (areMatchingTeeAndGet(left, right)) {
+ return true;
}
+
// stronger property than we need - we can not only fold
// them but remove them entirely.
return areConsecutiveInputsEqualAndRemovable(left, right);
}
- // Similar to areConsecutiveInputsEqualAndFoldable, but only checks that they
- // are equal (and not that they are foldable).
- bool areConsecutiveInputsEqual(Expression* left, Expression* right) {
- // TODO: optimize cases that must be equal but are *not* foldable.
- return areConsecutiveInputsEqualAndFoldable(left, right);
- }
-
// Canonicalizing the order of a symmetric binary helps us
// write more concise pattern matching code elsewhere.
void canonicalize(Binary* binary) {
diff --git a/test/lit/passes/optimize-instructions-mvp.wast b/test/lit/passes/optimize-instructions-mvp.wast
index 1575e8d6b..b618cf200 100644
--- a/test/lit/passes/optimize-instructions-mvp.wast
+++ b/test/lit/passes/optimize-instructions-mvp.wast
@@ -10,6 +10,9 @@
;; CHECK: (import "a" "b" (func $get-f64 (result f64)))
(import "a" "b" (func $get-f64 (result f64)))
+ ;; CHECK: (import "a" "c" (func $set-i32 (param i32)))
+ (import "a" "c" (func $set-i32 (param i32)))
+
(memory 0)
;; CHECK: (func $and-and (param $i1 i32) (result i32)
@@ -14711,7 +14714,9 @@
(local.get $y0)
)
))
- ;; this one cannot be optimized as the runtime values may differ
+ ;; This one cannot be optimized as the runtime values may differ: the calls
+ ;; are "generative" in that identical syntactic calls may emit different
+ ;; results.
(drop (f64.abs
(f64.mul
(call $get-f64)
@@ -14746,6 +14751,248 @@
(f64.abs (f64.add (local.get $x0) (local.get $x0)))
))
)
+
+ ;; CHECK: (func $optimize-float-points-fallthrough (param $x f64) (param $xb f64) (param $y f32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (f32.mul
+ ;; CHECK-NEXT: (block (result f32)
+ ;; CHECK-NEXT: (call $set-i32
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $y)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result f32)
+ ;; CHECK-NEXT: (call $set-i32
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $y)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $optimize-float-points-fallthrough (param $x f64) (param $xb f64) (param $y f32)
+ ;; abs(x * x) ==> x * x , as in the previous function.
+ ;;
+ ;; The fallthrough values here are identical, so we can optimize away the
+ ;; f32.abs despite the effects in both (and even different-looking effects).
+ (drop (f32.abs
+ (f32.mul
+ (block (result f32)
+ (call $set-i32
+ (i32.const 42)
+ )
+ (local.get $y)
+ )
+ (block (result f32)
+ (call $set-i32
+ (i32.const 1337)
+ )
+ (local.get $y)
+ )
+ )
+ ))
+ )
+ ;; CHECK: (func $optimize-float-points-fallthrough-b (param $x f64) (param $xb f64) (param $y f32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (f64.abs
+ ;; CHECK-NEXT: (f64.mul
+ ;; CHECK-NEXT: (block (result f64)
+ ;; CHECK-NEXT: (call $set-i32
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (call $get-f64)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result f64)
+ ;; CHECK-NEXT: (call $set-i32
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (call $get-f64)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $optimize-float-points-fallthrough-b (param $x f64) (param $xb f64) (param $y f32)
+ ;; But generative effects in the fallthrough values themselves block us.
+ (drop (f64.abs
+ (f64.mul
+ (block (result f64)
+ (call $set-i32
+ (i32.const 42)
+ )
+ (call $get-f64) ;; this changed
+ )
+ (block (result f64)
+ (call $set-i32
+ (i32.const 1337)
+ )
+ (call $get-f64) ;; this changed
+ )
+ )
+ ))
+ )
+ ;; CHECK: (func $optimize-float-points-fallthrough-c (param $x f64) (param $xb f64) (param $y f32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (f64.abs
+ ;; CHECK-NEXT: (f64.mul
+ ;; CHECK-NEXT: (block (result f64)
+ ;; CHECK-NEXT: (call $set-i32
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.tee $x
+ ;; CHECK-NEXT: (f64.const 12.34)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result f64)
+ ;; CHECK-NEXT: (call $set-i32
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $optimize-float-points-fallthrough-c (param $x f64) (param $xb f64) (param $y f32)
+ ;; local.tee/get pairs are ok, but atm we don't look at the fallthrough of
+ ;; the right side (we'd need to consider effects). TODO
+ (drop (f64.abs
+ (f64.mul
+ (block (result f64)
+ (call $set-i32
+ (i32.const 42)
+ )
+ (local.tee $x ;; this changed
+ (f64.const 12.34)
+ )
+ )
+ (block (result f64)
+ (call $set-i32
+ (i32.const 1337)
+ )
+ (local.get $x) ;; this changed
+ )
+ )
+ ))
+ )
+ ;; CHECK: (func $optimize-float-points-fallthrough-cb (param $x f64) (param $xb f64) (param $y f32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (f64.abs
+ ;; CHECK-NEXT: (f64.mul
+ ;; CHECK-NEXT: (block (result f64)
+ ;; CHECK-NEXT: (call $set-i32
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.tee $x
+ ;; CHECK-NEXT: (f64.const 12.34)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result f64)
+ ;; CHECK-NEXT: (local.set $x
+ ;; CHECK-NEXT: (f64.const 13.37)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $optimize-float-points-fallthrough-cb (param $x f64) (param $xb f64) (param $y f32)
+ ;; A conflicting set in the middle is a problem: here we cannot optimize.
+ (drop (f64.abs
+ (f64.mul
+ (block (result f64)
+ (call $set-i32
+ (i32.const 42)
+ )
+ (local.tee $x
+ (f64.const 12.34)
+ )
+ )
+ (block (result f64)
+ (local.set $x ;; this changed
+ (f64.const 13.37)
+ )
+ (local.get $x)
+ )
+ )
+ ))
+ )
+ ;; CHECK: (func $optimize-float-points-fallthrough-cc (param $x f64) (param $xb f64) (param $y f32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (f64.mul
+ ;; CHECK-NEXT: (block (result f64)
+ ;; CHECK-NEXT: (call $set-i32
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.tee $x
+ ;; CHECK-NEXT: (f64.const 12.34)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $optimize-float-points-fallthrough-cc (param $x f64) (param $xb f64) (param $y f32)
+ ;; Removing the local.set and the block on the right lets us optimize using
+ ;; the tee/get pair.
+ (drop (f64.abs
+ (f64.mul
+ (block (result f64)
+ (call $set-i32
+ (i32.const 42)
+ )
+ (local.tee $x
+ (f64.const 12.34)
+ )
+ )
+ (local.get $x) ;; this moved out
+ )
+ ))
+ )
+ ;; CHECK: (func $optimize-float-points-fallthrough-d (param $x f64) (param $xb f64) (param $y f32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (f64.abs
+ ;; CHECK-NEXT: (f64.mul
+ ;; CHECK-NEXT: (block (result f64)
+ ;; CHECK-NEXT: (call $set-i32
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.tee $x
+ ;; CHECK-NEXT: (f64.const 12.34)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (block (result f64)
+ ;; CHECK-NEXT: (call $set-i32
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $xb)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $optimize-float-points-fallthrough-d (param $x f64) (param $xb f64) (param $y f32)
+ ;; The wrong local index means we fail again.
+ (drop (f64.abs
+ (f64.mul
+ (block (result f64)
+ (call $set-i32
+ (i32.const 42)
+ )
+ (local.tee $x
+ (f64.const 12.34)
+ )
+ )
+ (block (result f64)
+ (call $set-i32
+ (i32.const 1337)
+ )
+ (local.get $xb) ;; this changed
+ )
+ )
+ ))
+ )
;; CHECK: (func $ternary (param $x i32) (param $y i32)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (i32.eqz
@@ -15113,6 +15360,88 @@
)
)
)
+ ;; CHECK: (func $ternary-identical-arms-tee (param $param i32)
+ ;; CHECK-NEXT: (local $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (local.tee $x
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (call $send-i32
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (block (result i32)
+ ;; CHECK-NEXT: (call $send-i32
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.tee $x
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.tee $x
+ ;; CHECK-NEXT: (local.get $param)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $ternary-identical-arms-tee (param $param i32)
+ (local $x i32)
+ ;; The select's ifTrue and condition are equal (as a tee/get pair with
+ ;; only a const in between), but there is a side effect too, that prevents
+ ;; optimization atm TODO
+ (drop
+ (select
+ (local.tee $x
+ (local.get $param)
+ )
+ (i32.const 0)
+ (block (result i32)
+ (call $send-i32
+ (i32.const 42)
+ )
+ (local.get $x)
+ )
+ )
+ )
+ ;; Side effect on the ifTrue - same outcome, we cannot optimize yet.
+ (drop
+ (select
+ (block (result i32)
+ (call $send-i32
+ (i32.const 1337)
+ )
+ (local.tee $x
+ (local.get $param)
+ )
+ )
+ (i32.const 0)
+ (local.get $x)
+ )
+ )
+ ;; When there are no blocks or things and just a local.tee/get, we can
+ ;; optimize.
+ (drop
+ (select
+ (local.tee $x
+ (local.get $param)
+ )
+ (i32.const 0)
+ (local.get $x)
+ )
+ )
+ )
;; CHECK: (func $ternary-identical-arms-and-type-is-none (param $x i32) (param $y i32) (param $z i32)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (i32.eqz