diff options
-rw-r--r-- | src/ir/properties.cpp | 16 | ||||
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 80 | ||||
-rw-r--r-- | test/lit/passes/optimize-instructions-mvp.wast | 331 |
3 files changed, 392 insertions, 35 deletions
diff --git a/src/ir/properties.cpp b/src/ir/properties.cpp index 63b7cedd1..05dae897e 100644 --- a/src/ir/properties.cpp +++ b/src/ir/properties.cpp @@ -20,14 +20,18 @@ namespace wasm::Properties { bool isGenerative(Expression* curr, FeatureSet features) { - // Practically no wasm instructions are generative. Exceptions occur only in - // GC atm. - if (!features.hasGC()) { - return false; - } - struct Scanner : public PostWalker<Scanner> { bool generative = false; + + void visitCall(Call* curr) { + // TODO: We could in principle look at the called function to see if it is + // generative. To do that we'd need to compute generativity like we + // compute global effects (we can't just peek from here, as the + // other function might be modified in parallel). + generative = true; + } + void visitCallIndirect(CallIndirect* curr) { generative = true; } + void visitCallRef(CallRef* curr) { generative = true; } void visitStructNew(StructNew* curr) { generative = true; } void visitArrayNew(ArrayNew* curr) { generative = true; } void visitArrayNewFixed(ArrayNewFixed* curr) { generative = true; } diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 28eaf0fba..7c14c4172 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -2294,14 +2294,57 @@ private: // Information about our locals std::vector<LocalInfo> localInfo; + // Checks if the first is a local.tee and the second a local.get of that same + // index. This is useful in the methods right below us, as it is a common + // pattern where two consecutive inputs are equal despite being syntactically + // different. + bool areMatchingTeeAndGet(Expression* left, Expression* right) { + if (auto* set = left->dynCast<LocalSet>()) { + if (auto* get = right->dynCast<LocalGet>()) { + if (set->isTee() && get->index == set->index) { + return true; + } + } + } + return false; + } + // Check if two consecutive inputs to an instruction are equal. As they are // consecutive, no code can execeute in between them, which simplies the // problem here (and which is the case we care about in this pass, which does // simple peephole optimizations - all we care about is a single instruction // at a time, and its inputs). - // - // This also checks that the inputs are removable (but we do not assume the - // caller will always remove them). + bool areConsecutiveInputsEqual(Expression* left, Expression* right) { + // When we look for a tee/get pair, we can consider the fallthrough values + // for the first, as the fallthrough happens last (however, we must use + // NoTeeBrIf as we do not want to look through the tee). We cannot do this + // on the second, however, as there could be effects in the middle. + // TODO: Use effects here perhaps. + auto& passOptions = getPassOptions(); + left = + Properties::getFallthrough(left, + passOptions, + *getModule(), + Properties::FallthroughBehavior::NoTeeBrIf); + if (areMatchingTeeAndGet(left, right)) { + return true; + } + + // Ignore extraneous things and compare them syntactically. We can also + // look at the full fallthrough for both sides now. + left = Properties::getFallthrough(left, passOptions, *getModule()); + right = Properties::getFallthrough(right, passOptions, *getModule()); + if (!ExpressionAnalyzer::equal(left, right)) { + return false; + } + + // To be equal, they must also be known to return the same result + // deterministically. + return !Properties::isGenerative(left, getModule()->features); + } + + // Similar to areConsecutiveInputsEqual() but also checks if we can remove + // them (but we do not assume the caller will always remove them). bool areConsecutiveInputsEqualAndRemovable(Expression* left, Expression* right) { // First, check for side effects. If there are any, then we can't even @@ -2316,18 +2359,7 @@ private: return false; } - // Ignore extraneous things and compare them structurally. - left = Properties::getFallthrough(left, passOptions, *getModule()); - right = Properties::getFallthrough(right, passOptions, *getModule()); - if (!ExpressionAnalyzer::equal(left, right)) { - return false; - } - // To be equal, they must also be known to return the same result - // deterministically. - if (Properties::isGenerative(left, getModule()->features)) { - return false; - } - return true; + return areConsecutiveInputsEqual(left, right); } // Check if two consecutive inputs to an instruction are equal and can also be @@ -2341,25 +2373,17 @@ private: // side effects at all in the middle. For example, a Const in between is ok. bool areConsecutiveInputsEqualAndFoldable(Expression* left, Expression* right) { - if (auto* set = left->dynCast<LocalSet>()) { - if (auto* get = right->dynCast<LocalGet>()) { - if (set->isTee() && get->index == set->index) { - return true; - } - } + // TODO: We could probably consider fallthrough values for left, at least + // (since we fold into it). + if (areMatchingTeeAndGet(left, right)) { + return true; } + // stronger property than we need - we can not only fold // them but remove them entirely. return areConsecutiveInputsEqualAndRemovable(left, right); } - // Similar to areConsecutiveInputsEqualAndFoldable, but only checks that they - // are equal (and not that they are foldable). - bool areConsecutiveInputsEqual(Expression* left, Expression* right) { - // TODO: optimize cases that must be equal but are *not* foldable. - return areConsecutiveInputsEqualAndFoldable(left, right); - } - // Canonicalizing the order of a symmetric binary helps us // write more concise pattern matching code elsewhere. void canonicalize(Binary* binary) { diff --git a/test/lit/passes/optimize-instructions-mvp.wast b/test/lit/passes/optimize-instructions-mvp.wast index 1575e8d6b..b618cf200 100644 --- a/test/lit/passes/optimize-instructions-mvp.wast +++ b/test/lit/passes/optimize-instructions-mvp.wast @@ -10,6 +10,9 @@ ;; CHECK: (import "a" "b" (func $get-f64 (result f64))) (import "a" "b" (func $get-f64 (result f64))) + ;; CHECK: (import "a" "c" (func $set-i32 (param i32))) + (import "a" "c" (func $set-i32 (param i32))) + (memory 0) ;; CHECK: (func $and-and (param $i1 i32) (result i32) @@ -14711,7 +14714,9 @@ (local.get $y0) ) )) - ;; this one cannot be optimized as the runtime values may differ + ;; This one cannot be optimized as the runtime values may differ: the calls + ;; are "generative" in that identical syntactic calls may emit different + ;; results. (drop (f64.abs (f64.mul (call $get-f64) @@ -14746,6 +14751,248 @@ (f64.abs (f64.add (local.get $x0) (local.get $x0))) )) ) + + ;; CHECK: (func $optimize-float-points-fallthrough (param $x f64) (param $xb f64) (param $y f32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.mul + ;; CHECK-NEXT: (block (result f32) + ;; CHECK-NEXT: (call $set-i32 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result f32) + ;; CHECK-NEXT: (call $set-i32 + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $optimize-float-points-fallthrough (param $x f64) (param $xb f64) (param $y f32) + ;; abs(x * x) ==> x * x , as in the previous function. + ;; + ;; The fallthrough values here are identical, so we can optimize away the + ;; f32.abs despite the effects in both (and even different-looking effects). + (drop (f32.abs + (f32.mul + (block (result f32) + (call $set-i32 + (i32.const 42) + ) + (local.get $y) + ) + (block (result f32) + (call $set-i32 + (i32.const 1337) + ) + (local.get $y) + ) + ) + )) + ) + ;; CHECK: (func $optimize-float-points-fallthrough-b (param $x f64) (param $xb f64) (param $y f32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.abs + ;; CHECK-NEXT: (f64.mul + ;; CHECK-NEXT: (block (result f64) + ;; CHECK-NEXT: (call $set-i32 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $get-f64) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result f64) + ;; CHECK-NEXT: (call $set-i32 + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $get-f64) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $optimize-float-points-fallthrough-b (param $x f64) (param $xb f64) (param $y f32) + ;; But generative effects in the fallthrough values themselves block us. + (drop (f64.abs + (f64.mul + (block (result f64) + (call $set-i32 + (i32.const 42) + ) + (call $get-f64) ;; this changed + ) + (block (result f64) + (call $set-i32 + (i32.const 1337) + ) + (call $get-f64) ;; this changed + ) + ) + )) + ) + ;; CHECK: (func $optimize-float-points-fallthrough-c (param $x f64) (param $xb f64) (param $y f32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.abs + ;; CHECK-NEXT: (f64.mul + ;; CHECK-NEXT: (block (result f64) + ;; CHECK-NEXT: (call $set-i32 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (f64.const 12.34) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result f64) + ;; CHECK-NEXT: (call $set-i32 + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $optimize-float-points-fallthrough-c (param $x f64) (param $xb f64) (param $y f32) + ;; local.tee/get pairs are ok, but atm we don't look at the fallthrough of + ;; the right side (we'd need to consider effects). TODO + (drop (f64.abs + (f64.mul + (block (result f64) + (call $set-i32 + (i32.const 42) + ) + (local.tee $x ;; this changed + (f64.const 12.34) + ) + ) + (block (result f64) + (call $set-i32 + (i32.const 1337) + ) + (local.get $x) ;; this changed + ) + ) + )) + ) + ;; CHECK: (func $optimize-float-points-fallthrough-cb (param $x f64) (param $xb f64) (param $y f32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.abs + ;; CHECK-NEXT: (f64.mul + ;; CHECK-NEXT: (block (result f64) + ;; CHECK-NEXT: (call $set-i32 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (f64.const 12.34) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result f64) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (f64.const 13.37) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $optimize-float-points-fallthrough-cb (param $x f64) (param $xb f64) (param $y f32) + ;; A conflicting set in the middle is a problem: here we cannot optimize. + (drop (f64.abs + (f64.mul + (block (result f64) + (call $set-i32 + (i32.const 42) + ) + (local.tee $x + (f64.const 12.34) + ) + ) + (block (result f64) + (local.set $x ;; this changed + (f64.const 13.37) + ) + (local.get $x) + ) + ) + )) + ) + ;; CHECK: (func $optimize-float-points-fallthrough-cc (param $x f64) (param $xb f64) (param $y f32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.mul + ;; CHECK-NEXT: (block (result f64) + ;; CHECK-NEXT: (call $set-i32 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (f64.const 12.34) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $optimize-float-points-fallthrough-cc (param $x f64) (param $xb f64) (param $y f32) + ;; Removing the local.set and the block on the right lets us optimize using + ;; the tee/get pair. + (drop (f64.abs + (f64.mul + (block (result f64) + (call $set-i32 + (i32.const 42) + ) + (local.tee $x + (f64.const 12.34) + ) + ) + (local.get $x) ;; this moved out + ) + )) + ) + ;; CHECK: (func $optimize-float-points-fallthrough-d (param $x f64) (param $xb f64) (param $y f32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.abs + ;; CHECK-NEXT: (f64.mul + ;; CHECK-NEXT: (block (result f64) + ;; CHECK-NEXT: (call $set-i32 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (f64.const 12.34) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result f64) + ;; CHECK-NEXT: (call $set-i32 + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $xb) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $optimize-float-points-fallthrough-d (param $x f64) (param $xb f64) (param $y f32) + ;; The wrong local index means we fail again. + (drop (f64.abs + (f64.mul + (block (result f64) + (call $set-i32 + (i32.const 42) + ) + (local.tee $x + (f64.const 12.34) + ) + ) + (block (result f64) + (call $set-i32 + (i32.const 1337) + ) + (local.get $xb) ;; this changed + ) + ) + )) + ) ;; CHECK: (func $ternary (param $x i32) (param $y i32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.eqz @@ -15113,6 +15360,88 @@ ) ) ) + ;; CHECK: (func $ternary-identical-arms-tee (param $param i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (call $send-i32 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (call $send-i32 + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $ternary-identical-arms-tee (param $param i32) + (local $x i32) + ;; The select's ifTrue and condition are equal (as a tee/get pair with + ;; only a const in between), but there is a side effect too, that prevents + ;; optimization atm TODO + (drop + (select + (local.tee $x + (local.get $param) + ) + (i32.const 0) + (block (result i32) + (call $send-i32 + (i32.const 42) + ) + (local.get $x) + ) + ) + ) + ;; Side effect on the ifTrue - same outcome, we cannot optimize yet. + (drop + (select + (block (result i32) + (call $send-i32 + (i32.const 1337) + ) + (local.tee $x + (local.get $param) + ) + ) + (i32.const 0) + (local.get $x) + ) + ) + ;; When there are no blocks or things and just a local.tee/get, we can + ;; optimize. + (drop + (select + (local.tee $x + (local.get $param) + ) + (i32.const 0) + (local.get $x) + ) + ) + ) ;; CHECK: (func $ternary-identical-arms-and-type-is-none (param $x i32) (param $y i32) (param $z i32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.eqz |