diff options
author | Alon Zakai <azakai@google.com> | 2022-11-01 16:20:06 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-01 16:20:06 -0700 |
commit | a030cc7f69a00dc0e699b8b580224126650c738b (patch) | |
tree | 6e3bee6e6cdf0aca66ecf26406ef66760ba908a6 | |
parent | 0db7b571b23a09f96810c9d65d5f88d8eae4ba52 (diff) | |
download | binaryen-a030cc7f69a00dc0e699b8b580224126650c738b.tar.gz binaryen-a030cc7f69a00dc0e699b8b580224126650c738b.tar.bz2 binaryen-a030cc7f69a00dc0e699b8b580224126650c738b.zip |
[Wasm GC] SimplifyLocals: Switch local.get to use a more refined type when possible (#5194)
(local.set $refined (cast (local.get $plain)))
..
.. (local.get $plain) .. ;; we can change this to read from $refined
By using the more refined type we may be able to eliminate casts later.
To do this, look at the fallthrough value (so we can look through a cast or a block
value - this is the reason for the small wasm2js improvements in tests), and also
extend the code that picks which local index to read to look at types (previously
we just ignored any pairs of locals with different types).
-rw-r--r-- | src/passes/SimplifyLocals.cpp | 60 | ||||
-rw-r--r-- | test/lit/passes/simplify-locals-gc.wast | 164 | ||||
-rw-r--r-- | test/wasm2js/conversions-modified.2asm.js.opt | 3 | ||||
-rw-r--r-- | test/wasm2js/float_literals-modified.2asm.js.opt | 54 | ||||
-rw-r--r-- | test/wasm2js/i64-ctz.2asm.js.opt | 5 | ||||
-rw-r--r-- | test/wasm2js/stack-modified.2asm.js.opt | 19 | ||||
-rw-r--r-- | test/wasm2js/unary-ops.2asm.js.opt | 2 |
7 files changed, 238 insertions, 69 deletions
diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp index dfbc7551a..c368740f1 100644 --- a/src/passes/SimplifyLocals.cpp +++ b/src/passes/SimplifyLocals.cpp @@ -1000,8 +1000,10 @@ struct SimplifyLocals std::vector<Index>* numLocalGets; bool removeEquivalentSets; Module* module; + PassOptions passOptions; bool anotherCycle = false; + bool refinalize = false; // We track locals containing the same value. EquivalentSets equivalences; @@ -1015,12 +1017,9 @@ struct SimplifyLocals void visitLocalSet(LocalSet* curr) { // Remove trivial copies, even through a tee - auto* value = curr->value; - Function* func = this->getFunction(); - while (auto* subSet = value->dynCast<LocalSet>()) { - value = subSet->value; - } - if (auto* get = value->dynCast<LocalGet>()) { + auto* value = + Properties::getFallthrough(curr->value, passOptions, *module); + if (auto* get = value->template dynCast<LocalGet>()) { if (equivalences.check(curr->index, get->index)) { // This is an unnecessary copy! if (removeEquivalentSets) { @@ -1033,13 +1032,9 @@ struct SimplifyLocals } // Nothing more to do, ignore the copy. return; - } else if (func->getLocalType(curr->index) == - func->getLocalType(get->index)) { + } else { // There is a new equivalence now. Remove all the old ones, and add // the new one. - // Note that we ignore the case of subtyping here, to keep this - // optimization simple by assuming all equivalent indexes also have - // the same type. TODO: consider optimizing this. equivalences.reset(curr->index); equivalences.add(curr->index, get->index); return; @@ -1054,8 +1049,6 @@ struct SimplifyLocals // Canonicalize gets: if some are equivalent, then we can pick more // then one, and other passes may benefit from having more uniformity. if (auto* set = equivalences.getEquivalents(curr->index)) { - // Pick the index with the most uses - maximizing the chance to - // lower one's uses to zero. // Helper method that returns the # of gets *ignoring the current // get*, as we want to see what is best overall, treating this one as // to be decided upon. @@ -1067,9 +1060,31 @@ struct SimplifyLocals } return ret; }; + + // Pick the index with the most uses - maximizing the chance to + // lower one's uses to zero. If types differ though then we prefer to + // switch to a more refined type even if there are fewer uses, as that + // may have significant benefits to later optimizations (we may be + // able to use it to remove casts, etc.). + auto* func = this->getFunction(); Index best = -1; for (auto index : *set) { - if (best == Index(-1) || + if (best == Index(-1)) { + // This is the first possible option we've seen. + best = index; + continue; + } + + auto bestType = func->getLocalType(best); + auto indexType = func->getLocalType(index); + if (!Type::isSubType(indexType, bestType)) { + // This is less refined than the current best; ignore. + continue; + } + + // This is better if it has a more refined type, or if it has more + // uses. + if (indexType != bestType || getNumGetsIgnoringCurr(index) > getNumGetsIgnoringCurr(best)) { best = index; } @@ -1077,8 +1092,11 @@ struct SimplifyLocals assert(best != Index(-1)); // Due to ordering, the best index may be different from us but have // the same # of locals - make sure we actually improve. - if (best != curr->index && getNumGetsIgnoringCurr(best) > - getNumGetsIgnoringCurr(curr->index)) { + auto bestType = func->getLocalType(best); + auto oldType = func->getLocalType(curr->index); + if (best != curr->index && (getNumGetsIgnoringCurr(best) > + getNumGetsIgnoringCurr(curr->index) || + bestType != oldType)) { // Update the get counts. (*numLocalGets)[best]++; assert((*numLocalGets)[curr->index] >= 1); @@ -1086,6 +1104,12 @@ struct SimplifyLocals // Make the change. curr->index = best; anotherCycle = true; + if (bestType != oldType) { + curr->type = func->getLocalType(best); + // We are switching to a more refined type, which might require + // changes in the user of the local.get. + refinalize = true; + } } } } @@ -1093,9 +1117,13 @@ struct SimplifyLocals EquivalentOptimizer eqOpter; eqOpter.module = this->getModule(); + eqOpter.passOptions = this->getPassOptions(); eqOpter.numLocalGets = &getCounter.num; eqOpter.removeEquivalentSets = allowStructure; eqOpter.walkFunction(func); + if (eqOpter.refinalize) { + ReFinalize().walkFunctionInModule(func, this->getModule()); + } // We may have already had a local with no uses, or we may have just // gotten there thanks to the EquivalentOptimizer. If there are such diff --git a/test/lit/passes/simplify-locals-gc.wast b/test/lit/passes/simplify-locals-gc.wast index 75be02cf2..951116a08 100644 --- a/test/lit/passes/simplify-locals-gc.wast +++ b/test/lit/passes/simplify-locals-gc.wast @@ -445,4 +445,168 @@ ;; Helper function for the above. (unreachable) ) + + ;; CHECK: (func $pick-refined (param $nn-any (ref any)) (result anyref) + ;; CHECK-NEXT: (local $any anyref) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $use-any + ;; CHECK-NEXT: (local.get $nn-any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $use-nn-any + ;; CHECK-NEXT: (local.get $nn-any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (local.get $nn-any) + ;; CHECK-NEXT: ) + ;; NOMNL: (func $pick-refined (type $ref|any|_=>_anyref) (param $nn-any (ref any)) (result anyref) + ;; NOMNL-NEXT: (local $any anyref) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: (call $use-any + ;; NOMNL-NEXT: (local.get $nn-any) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (call $use-nn-any + ;; NOMNL-NEXT: (local.get $nn-any) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: (local.get $nn-any) + ;; NOMNL-NEXT: ) + (func $pick-refined (param $nn-any (ref any)) (result anyref) + (local $any anyref) + (local.set $any + (local.get $nn-any) + ) + ;; Use the locals so neither is trivially removed. + (call $use-any + (local.get $any) + ) + (call $use-nn-any + (local.get $nn-any) + ) + ;; This copy is not needed, as they hold the same value. + (local.set $any + (local.get $nn-any) + ) + ;; This local.get might as well use the non-nullable local, which is more + ;; refined. In fact, all uses of locals can be switched to that one in the + ;; entire function (and the other local would be removed by other passes). + (local.get $any) + ) + + ;; CHECK: (func $pick-casted (param $any anyref) (result anyref) + ;; CHECK-NEXT: (local $nn-any (ref any)) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $use-any + ;; CHECK-NEXT: (local.tee $nn-any + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $use-nn-any + ;; CHECK-NEXT: (local.get $nn-any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (local.get $nn-any) + ;; CHECK-NEXT: ) + ;; NOMNL: (func $pick-casted (type $anyref_=>_anyref) (param $any anyref) (result anyref) + ;; NOMNL-NEXT: (local $nn-any (ref any)) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: (call $use-any + ;; NOMNL-NEXT: (local.tee $nn-any + ;; NOMNL-NEXT: (ref.as_non_null + ;; NOMNL-NEXT: (local.get $any) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (call $use-nn-any + ;; NOMNL-NEXT: (local.get $nn-any) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: (local.get $nn-any) + ;; NOMNL-NEXT: ) + (func $pick-casted (param $any anyref) (result anyref) + (local $nn-any (ref any)) + (local.set $nn-any + (ref.as_non_null + (local.get $any) + ) + ) + ;; Use the locals so neither is trivially removed. + (call $use-any + (local.get $any) + ) + (call $use-nn-any + (local.get $nn-any) + ) + ;; This copy is not needed, as they hold the same value. + (local.set $any + (local.get $nn-any) + ) + ;; This local.get might as well use the non-nullable local. + (local.get $any) + ) + + ;; CHECK: (func $pick-fallthrough (param $x i32) + ;; CHECK-NEXT: (local $t i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; NOMNL: (func $pick-fallthrough (type $i32_=>_none) (param $x i32) + ;; NOMNL-NEXT: (local $t i32) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: (drop + ;; NOMNL-NEXT: (local.get $x) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (drop + ;; NOMNL-NEXT: (block (result i32) + ;; NOMNL-NEXT: (local.get $x) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: ) + (func $pick-fallthrough (param $x i32) + (local $t i32) + ;; Similar to the above test wth looking through a cast, but using a non-gc + ;; type of fallthrough value. + (local.set $t + (block (result i32) + (local.get $x) + ) + ) + ;; The locals are identical, as we set $t = $x (we can look through to the + ;; block value). Both these gets can go to $x, and we do not need to set $t + ;; as it will have 0 uses. + (drop + (local.get $x) + ) + (drop + (local.get $t) + ) + ) + + ;; CHECK: (func $use-nn-any (param $nn-any (ref any)) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; NOMNL: (func $use-nn-any (type $ref|any|_=>_none) (param $nn-any (ref any)) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: ) + (func $use-nn-any (param $nn-any (ref any)) + ;; Helper function for the above. + ) + + ;; CHECK: (func $use-any (param $any anyref) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; NOMNL: (func $use-any (type $anyref_=>_none) (param $any anyref) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: ) + (func $use-any (param $any anyref) + ;; Helper function for the above. + ) ) diff --git a/test/wasm2js/conversions-modified.2asm.js.opt b/test/wasm2js/conversions-modified.2asm.js.opt index 36bfda8cb..4c2b6d1ae 100644 --- a/test/wasm2js/conversions-modified.2asm.js.opt +++ b/test/wasm2js/conversions-modified.2asm.js.opt @@ -162,9 +162,8 @@ function asmFunc(imports) { $1 = wasm2js_scratch_load_i32(1) | 0; $2 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $1; - $1 = $2; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $1; + return $2; } return { diff --git a/test/wasm2js/float_literals-modified.2asm.js.opt b/test/wasm2js/float_literals-modified.2asm.js.opt index 1e69b6304..613e52a40 100644 --- a/test/wasm2js/float_literals-modified.2asm.js.opt +++ b/test/wasm2js/float_literals-modified.2asm.js.opt @@ -108,9 +108,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$32() { @@ -119,9 +118,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$34() { @@ -130,9 +128,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$35() { @@ -141,9 +138,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$36() { @@ -152,9 +148,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$37() { @@ -163,9 +158,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$38() { @@ -174,9 +168,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$39() { @@ -185,9 +178,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$41() { @@ -196,9 +188,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$42() { @@ -207,9 +198,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$44() { @@ -218,9 +208,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$45() { @@ -229,9 +218,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$46() { @@ -240,9 +228,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$47() { @@ -251,9 +238,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$48() { @@ -262,9 +248,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$49() { @@ -273,9 +258,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$50() { @@ -284,9 +268,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$59() { @@ -295,9 +278,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } return { diff --git a/test/wasm2js/i64-ctz.2asm.js.opt b/test/wasm2js/i64-ctz.2asm.js.opt index c232bef7c..0d6ffd697 100644 --- a/test/wasm2js/i64-ctz.2asm.js.opt +++ b/test/wasm2js/i64-ctz.2asm.js.opt @@ -22,7 +22,7 @@ function asmFunc(imports) { while (1) { if ($0 | $2) { $1 = $0; - $0 = $0 - 1 & $0; + $0 = $1 & $1 - 1; $2 = $2 - !$1 & $2; $3 = $3 + 1 | 0; $4 = $3 ? $4 : $4 + 1 | 0; @@ -31,9 +31,8 @@ function asmFunc(imports) { break; }; i64toi32_i32$HIGH_BITS = $4; - $0 = $3; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0; + return $3; } function legalstub$ctz64($0, $1) { diff --git a/test/wasm2js/stack-modified.2asm.js.opt b/test/wasm2js/stack-modified.2asm.js.opt index c3d9d7d57..45203a50e 100644 --- a/test/wasm2js/stack-modified.2asm.js.opt +++ b/test/wasm2js/stack-modified.2asm.js.opt @@ -17,24 +17,21 @@ function asmFunc(imports) { var setTempRet0 = env.setTempRet0; var i64toi32_i32$HIGH_BITS = 0; function legalstub$0($0, $1) { - var $2 = 0, $3 = 0, $4 = 0; - $2 = $1; - $3 = 1; + var $2 = 0, $3 = 0; + $2 = 1; while (1) { - if ($0 | $2) { - $3 = __wasm_i64_mul($0, $2, $3, $4); - $4 = i64toi32_i32$HIGH_BITS; - $1 = $0; + if ($0 | $1) { + $2 = __wasm_i64_mul($0, $1, $2, $3); + $3 = i64toi32_i32$HIGH_BITS; + $1 = $1 - !$0 | 0; $0 = $0 - 1 | 0; - $2 = $2 - !$1 | 0; continue; } break; }; - i64toi32_i32$HIGH_BITS = $4; - $0 = $3; + i64toi32_i32$HIGH_BITS = $3; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0; + return $2; } function __wasm_i64_mul($0, $1, $2, $3) { diff --git a/test/wasm2js/unary-ops.2asm.js.opt b/test/wasm2js/unary-ops.2asm.js.opt index 4da12af66..bca65089b 100644 --- a/test/wasm2js/unary-ops.2asm.js.opt +++ b/test/wasm2js/unary-ops.2asm.js.opt @@ -48,7 +48,7 @@ function asmFunc(imports) { while (1) { if ($1_1 | $4) { $0 = $4; - $4 = $4 - 1 & $4; + $4 = $0 & $0 - 1; $1_1 = $1_1 - !$0 & $1_1; $5 = $5 + 1 | 0; $6_1 = $5 ? $6_1 : $6_1 + 1 | 0; |