diff options
-rw-r--r-- | src/passes/SimplifyLocals.cpp | 60 | ||||
-rw-r--r-- | test/lit/passes/simplify-locals-gc.wast | 164 | ||||
-rw-r--r-- | test/wasm2js/conversions-modified.2asm.js.opt | 3 | ||||
-rw-r--r-- | test/wasm2js/float_literals-modified.2asm.js.opt | 54 | ||||
-rw-r--r-- | test/wasm2js/i64-ctz.2asm.js.opt | 5 | ||||
-rw-r--r-- | test/wasm2js/stack-modified.2asm.js.opt | 19 | ||||
-rw-r--r-- | test/wasm2js/unary-ops.2asm.js.opt | 2 |
7 files changed, 238 insertions, 69 deletions
diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp index dfbc7551a..c368740f1 100644 --- a/src/passes/SimplifyLocals.cpp +++ b/src/passes/SimplifyLocals.cpp @@ -1000,8 +1000,10 @@ struct SimplifyLocals std::vector<Index>* numLocalGets; bool removeEquivalentSets; Module* module; + PassOptions passOptions; bool anotherCycle = false; + bool refinalize = false; // We track locals containing the same value. EquivalentSets equivalences; @@ -1015,12 +1017,9 @@ struct SimplifyLocals void visitLocalSet(LocalSet* curr) { // Remove trivial copies, even through a tee - auto* value = curr->value; - Function* func = this->getFunction(); - while (auto* subSet = value->dynCast<LocalSet>()) { - value = subSet->value; - } - if (auto* get = value->dynCast<LocalGet>()) { + auto* value = + Properties::getFallthrough(curr->value, passOptions, *module); + if (auto* get = value->template dynCast<LocalGet>()) { if (equivalences.check(curr->index, get->index)) { // This is an unnecessary copy! if (removeEquivalentSets) { @@ -1033,13 +1032,9 @@ struct SimplifyLocals } // Nothing more to do, ignore the copy. return; - } else if (func->getLocalType(curr->index) == - func->getLocalType(get->index)) { + } else { // There is a new equivalence now. Remove all the old ones, and add // the new one. - // Note that we ignore the case of subtyping here, to keep this - // optimization simple by assuming all equivalent indexes also have - // the same type. TODO: consider optimizing this. equivalences.reset(curr->index); equivalences.add(curr->index, get->index); return; @@ -1054,8 +1049,6 @@ struct SimplifyLocals // Canonicalize gets: if some are equivalent, then we can pick more // then one, and other passes may benefit from having more uniformity. if (auto* set = equivalences.getEquivalents(curr->index)) { - // Pick the index with the most uses - maximizing the chance to - // lower one's uses to zero. // Helper method that returns the # of gets *ignoring the current // get*, as we want to see what is best overall, treating this one as // to be decided upon. @@ -1067,9 +1060,31 @@ struct SimplifyLocals } return ret; }; + + // Pick the index with the most uses - maximizing the chance to + // lower one's uses to zero. If types differ though then we prefer to + // switch to a more refined type even if there are fewer uses, as that + // may have significant benefits to later optimizations (we may be + // able to use it to remove casts, etc.). + auto* func = this->getFunction(); Index best = -1; for (auto index : *set) { - if (best == Index(-1) || + if (best == Index(-1)) { + // This is the first possible option we've seen. + best = index; + continue; + } + + auto bestType = func->getLocalType(best); + auto indexType = func->getLocalType(index); + if (!Type::isSubType(indexType, bestType)) { + // This is less refined than the current best; ignore. + continue; + } + + // This is better if it has a more refined type, or if it has more + // uses. + if (indexType != bestType || getNumGetsIgnoringCurr(index) > getNumGetsIgnoringCurr(best)) { best = index; } @@ -1077,8 +1092,11 @@ struct SimplifyLocals assert(best != Index(-1)); // Due to ordering, the best index may be different from us but have // the same # of locals - make sure we actually improve. - if (best != curr->index && getNumGetsIgnoringCurr(best) > - getNumGetsIgnoringCurr(curr->index)) { + auto bestType = func->getLocalType(best); + auto oldType = func->getLocalType(curr->index); + if (best != curr->index && (getNumGetsIgnoringCurr(best) > + getNumGetsIgnoringCurr(curr->index) || + bestType != oldType)) { // Update the get counts. (*numLocalGets)[best]++; assert((*numLocalGets)[curr->index] >= 1); @@ -1086,6 +1104,12 @@ struct SimplifyLocals // Make the change. curr->index = best; anotherCycle = true; + if (bestType != oldType) { + curr->type = func->getLocalType(best); + // We are switching to a more refined type, which might require + // changes in the user of the local.get. + refinalize = true; + } } } } @@ -1093,9 +1117,13 @@ struct SimplifyLocals EquivalentOptimizer eqOpter; eqOpter.module = this->getModule(); + eqOpter.passOptions = this->getPassOptions(); eqOpter.numLocalGets = &getCounter.num; eqOpter.removeEquivalentSets = allowStructure; eqOpter.walkFunction(func); + if (eqOpter.refinalize) { + ReFinalize().walkFunctionInModule(func, this->getModule()); + } // We may have already had a local with no uses, or we may have just // gotten there thanks to the EquivalentOptimizer. If there are such diff --git a/test/lit/passes/simplify-locals-gc.wast b/test/lit/passes/simplify-locals-gc.wast index 75be02cf2..951116a08 100644 --- a/test/lit/passes/simplify-locals-gc.wast +++ b/test/lit/passes/simplify-locals-gc.wast @@ -445,4 +445,168 @@ ;; Helper function for the above. (unreachable) ) + + ;; CHECK: (func $pick-refined (param $nn-any (ref any)) (result anyref) + ;; CHECK-NEXT: (local $any anyref) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $use-any + ;; CHECK-NEXT: (local.get $nn-any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $use-nn-any + ;; CHECK-NEXT: (local.get $nn-any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (local.get $nn-any) + ;; CHECK-NEXT: ) + ;; NOMNL: (func $pick-refined (type $ref|any|_=>_anyref) (param $nn-any (ref any)) (result anyref) + ;; NOMNL-NEXT: (local $any anyref) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: (call $use-any + ;; NOMNL-NEXT: (local.get $nn-any) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (call $use-nn-any + ;; NOMNL-NEXT: (local.get $nn-any) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: (local.get $nn-any) + ;; NOMNL-NEXT: ) + (func $pick-refined (param $nn-any (ref any)) (result anyref) + (local $any anyref) + (local.set $any + (local.get $nn-any) + ) + ;; Use the locals so neither is trivially removed. + (call $use-any + (local.get $any) + ) + (call $use-nn-any + (local.get $nn-any) + ) + ;; This copy is not needed, as they hold the same value. + (local.set $any + (local.get $nn-any) + ) + ;; This local.get might as well use the non-nullable local, which is more + ;; refined. In fact, all uses of locals can be switched to that one in the + ;; entire function (and the other local would be removed by other passes). + (local.get $any) + ) + + ;; CHECK: (func $pick-casted (param $any anyref) (result anyref) + ;; CHECK-NEXT: (local $nn-any (ref any)) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $use-any + ;; CHECK-NEXT: (local.tee $nn-any + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $use-nn-any + ;; CHECK-NEXT: (local.get $nn-any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (local.get $nn-any) + ;; CHECK-NEXT: ) + ;; NOMNL: (func $pick-casted (type $anyref_=>_anyref) (param $any anyref) (result anyref) + ;; NOMNL-NEXT: (local $nn-any (ref any)) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: (call $use-any + ;; NOMNL-NEXT: (local.tee $nn-any + ;; NOMNL-NEXT: (ref.as_non_null + ;; NOMNL-NEXT: (local.get $any) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (call $use-nn-any + ;; NOMNL-NEXT: (local.get $nn-any) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: (local.get $nn-any) + ;; NOMNL-NEXT: ) + (func $pick-casted (param $any anyref) (result anyref) + (local $nn-any (ref any)) + (local.set $nn-any + (ref.as_non_null + (local.get $any) + ) + ) + ;; Use the locals so neither is trivially removed. + (call $use-any + (local.get $any) + ) + (call $use-nn-any + (local.get $nn-any) + ) + ;; This copy is not needed, as they hold the same value. + (local.set $any + (local.get $nn-any) + ) + ;; This local.get might as well use the non-nullable local. + (local.get $any) + ) + + ;; CHECK: (func $pick-fallthrough (param $x i32) + ;; CHECK-NEXT: (local $t i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; NOMNL: (func $pick-fallthrough (type $i32_=>_none) (param $x i32) + ;; NOMNL-NEXT: (local $t i32) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: (drop + ;; NOMNL-NEXT: (local.get $x) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: (drop + ;; NOMNL-NEXT: (block (result i32) + ;; NOMNL-NEXT: (local.get $x) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: ) + ;; NOMNL-NEXT: ) + (func $pick-fallthrough (param $x i32) + (local $t i32) + ;; Similar to the above test wth looking through a cast, but using a non-gc + ;; type of fallthrough value. + (local.set $t + (block (result i32) + (local.get $x) + ) + ) + ;; The locals are identical, as we set $t = $x (we can look through to the + ;; block value). Both these gets can go to $x, and we do not need to set $t + ;; as it will have 0 uses. + (drop + (local.get $x) + ) + (drop + (local.get $t) + ) + ) + + ;; CHECK: (func $use-nn-any (param $nn-any (ref any)) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; NOMNL: (func $use-nn-any (type $ref|any|_=>_none) (param $nn-any (ref any)) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: ) + (func $use-nn-any (param $nn-any (ref any)) + ;; Helper function for the above. + ) + + ;; CHECK: (func $use-any (param $any anyref) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; NOMNL: (func $use-any (type $anyref_=>_none) (param $any anyref) + ;; NOMNL-NEXT: (nop) + ;; NOMNL-NEXT: ) + (func $use-any (param $any anyref) + ;; Helper function for the above. + ) ) diff --git a/test/wasm2js/conversions-modified.2asm.js.opt b/test/wasm2js/conversions-modified.2asm.js.opt index 36bfda8cb..4c2b6d1ae 100644 --- a/test/wasm2js/conversions-modified.2asm.js.opt +++ b/test/wasm2js/conversions-modified.2asm.js.opt @@ -162,9 +162,8 @@ function asmFunc(imports) { $1 = wasm2js_scratch_load_i32(1) | 0; $2 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $1; - $1 = $2; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $1; + return $2; } return { diff --git a/test/wasm2js/float_literals-modified.2asm.js.opt b/test/wasm2js/float_literals-modified.2asm.js.opt index 1e69b6304..613e52a40 100644 --- a/test/wasm2js/float_literals-modified.2asm.js.opt +++ b/test/wasm2js/float_literals-modified.2asm.js.opt @@ -108,9 +108,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$32() { @@ -119,9 +118,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$34() { @@ -130,9 +128,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$35() { @@ -141,9 +138,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$36() { @@ -152,9 +148,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$37() { @@ -163,9 +158,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$38() { @@ -174,9 +168,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$39() { @@ -185,9 +178,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$41() { @@ -196,9 +188,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$42() { @@ -207,9 +198,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$44() { @@ -218,9 +208,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$45() { @@ -229,9 +218,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$46() { @@ -240,9 +228,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$47() { @@ -251,9 +238,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$48() { @@ -262,9 +248,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$49() { @@ -273,9 +258,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$50() { @@ -284,9 +268,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } function legalstub$59() { @@ -295,9 +278,8 @@ function asmFunc(imports) { $0_1 = wasm2js_scratch_load_i32(1) | 0; $1 = wasm2js_scratch_load_i32(0) | 0; i64toi32_i32$HIGH_BITS = $0_1; - $0_1 = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0_1; + return $1; } return { diff --git a/test/wasm2js/i64-ctz.2asm.js.opt b/test/wasm2js/i64-ctz.2asm.js.opt index c232bef7c..0d6ffd697 100644 --- a/test/wasm2js/i64-ctz.2asm.js.opt +++ b/test/wasm2js/i64-ctz.2asm.js.opt @@ -22,7 +22,7 @@ function asmFunc(imports) { while (1) { if ($0 | $2) { $1 = $0; - $0 = $0 - 1 & $0; + $0 = $1 & $1 - 1; $2 = $2 - !$1 & $2; $3 = $3 + 1 | 0; $4 = $3 ? $4 : $4 + 1 | 0; @@ -31,9 +31,8 @@ function asmFunc(imports) { break; }; i64toi32_i32$HIGH_BITS = $4; - $0 = $3; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0; + return $3; } function legalstub$ctz64($0, $1) { diff --git a/test/wasm2js/stack-modified.2asm.js.opt b/test/wasm2js/stack-modified.2asm.js.opt index c3d9d7d57..45203a50e 100644 --- a/test/wasm2js/stack-modified.2asm.js.opt +++ b/test/wasm2js/stack-modified.2asm.js.opt @@ -17,24 +17,21 @@ function asmFunc(imports) { var setTempRet0 = env.setTempRet0; var i64toi32_i32$HIGH_BITS = 0; function legalstub$0($0, $1) { - var $2 = 0, $3 = 0, $4 = 0; - $2 = $1; - $3 = 1; + var $2 = 0, $3 = 0; + $2 = 1; while (1) { - if ($0 | $2) { - $3 = __wasm_i64_mul($0, $2, $3, $4); - $4 = i64toi32_i32$HIGH_BITS; - $1 = $0; + if ($0 | $1) { + $2 = __wasm_i64_mul($0, $1, $2, $3); + $3 = i64toi32_i32$HIGH_BITS; + $1 = $1 - !$0 | 0; $0 = $0 - 1 | 0; - $2 = $2 - !$1 | 0; continue; } break; }; - i64toi32_i32$HIGH_BITS = $4; - $0 = $3; + i64toi32_i32$HIGH_BITS = $3; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return $0; + return $2; } function __wasm_i64_mul($0, $1, $2, $3) { diff --git a/test/wasm2js/unary-ops.2asm.js.opt b/test/wasm2js/unary-ops.2asm.js.opt index 4da12af66..bca65089b 100644 --- a/test/wasm2js/unary-ops.2asm.js.opt +++ b/test/wasm2js/unary-ops.2asm.js.opt @@ -48,7 +48,7 @@ function asmFunc(imports) { while (1) { if ($1_1 | $4) { $0 = $4; - $4 = $4 - 1 & $4; + $4 = $0 & $0 - 1; $1_1 = $1_1 - !$0 & $1_1; $5 = $5 + 1 | 0; $6_1 = $5 ? $6_1 : $6_1 + 1 | 0; |