From b2e096d79c36daa2cbfb7dc3db31af76e9f45cc8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 3 Oct 2023 16:39:12 -0700 Subject: RemoveUnusedBrs: Allow less unconditional work and in particular division (#5989) Fixes #5983: The testcase from there is used here in a new testcase remove-unused-brs_levels in which we check if we are willing to unconditionally do a division operation. Turning an if with an arm that does a division into a select, which always does the division, is almost 5x slower, so we should probably be extremely careful about doing that. I took some measurements and have some suggestions for changes in this PR: * Raise the cost of div/rem to what I measure on my machine, which is 5x slower than an add, or worse. * For some reason we added the if arms rather than take the max of them, so fix that. This does not help the issue, but was confusing. * Adjust TooCostlyToRunUnconditionally in the pass from 9 to 8 (this helps balance the last point). * Use half that value when not optimizing for size. That is, we allow only 4 extra unconditional work normally, and 8 in -Os, and when -Oz then we allow any extra amount. Aside from the new testcases, some existing ones changed. They all appear to change in a reasonable way, to me. We should perhaps go even further than this, and not even run a division unconditionally in -Os, but I wasn't sure it makes sense to go that far as other benchmarks may be affected. For now, this makes the benchmark in #5983 run at full speed in -O3 or -Os, and it remains slow in -Oz. The modified version of the benchmark that only divides in the if (no other operations) is still fast in -O3, but it become slow in -Os as we do turn that if into a select (but again, I didn't want to go that far as to overfit on that one benchmark). --- test/wasm2js/conversions-modified.2asm.js.opt | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'test/wasm2js/conversions-modified.2asm.js.opt') diff --git a/test/wasm2js/conversions-modified.2asm.js.opt b/test/wasm2js/conversions-modified.2asm.js.opt index 811845f9d..5eb7435ee 100644 --- a/test/wasm2js/conversions-modified.2asm.js.opt +++ b/test/wasm2js/conversions-modified.2asm.js.opt @@ -121,15 +121,29 @@ function asmFunc(imports) { } function legalstub$7($0) { - i64toi32_i32$HIGH_BITS = Math_fround(Math_abs($0)) >= Math_fround(1.0) ? ~~($0 > Math_fround(0.0) ? Math_fround(Math_min(Math_fround(Math_floor(Math_fround($0 * Math_fround(2.3283064365386963e-10)))), Math_fround(4294967296.0))) : Math_fround(Math_ceil(Math_fround(Math_fround($0 - Math_fround(~~$0 >>> 0 >>> 0)) * Math_fround(2.3283064365386963e-10))))) >>> 0 : 0; + var $1 = 0, $2 = 0; + $2 = ~~$0 >>> 0; + if (Math_fround(Math_abs($0)) >= Math_fround(1.0)) { + $1 = ~~($0 > Math_fround(0.0) ? Math_fround(Math_min(Math_fround(Math_floor(Math_fround($0 * Math_fround(2.3283064365386963e-10)))), Math_fround(4294967296.0))) : Math_fround(Math_ceil(Math_fround(Math_fround($0 - Math_fround(~~$0 >>> 0 >>> 0)) * Math_fround(2.3283064365386963e-10))))) >>> 0 + } else { + $1 = 0 + } + i64toi32_i32$HIGH_BITS = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return ~~$0 >>> 0; + return $2; } function legalstub$9($0) { - i64toi32_i32$HIGH_BITS = Math_abs($0) >= 1.0 ? ~~($0 > 0.0 ? Math_min(Math_floor($0 * 2.3283064365386963e-10), 4294967295.0) : Math_ceil(($0 - +(~~$0 >>> 0 >>> 0)) * 2.3283064365386963e-10)) >>> 0 : 0; + var $1 = 0, $2 = 0; + $2 = ~~$0 >>> 0; + if (Math_abs($0) >= 1.0) { + $1 = ~~($0 > 0.0 ? Math_min(Math_floor($0 * 2.3283064365386963e-10), 4294967295.0) : Math_ceil(($0 - +(~~$0 >>> 0 >>> 0)) * 2.3283064365386963e-10)) >>> 0 + } else { + $1 = 0 + } + i64toi32_i32$HIGH_BITS = $1; setTempRet0(i64toi32_i32$HIGH_BITS | 0); - return ~~$0 >>> 0; + return $2; } function legalstub$12($0, $1) { -- cgit v1.2.3