diff options
author | Max Graey <maxgraey@gmail.com> | 2020-10-14 03:49:46 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-13 17:49:46 -0700 |
commit | 1a1b547aee688a0d96251e8afd565999acfb1922 (patch) | |
tree | 7aa1794b4dfc8db996f41b3203848e20b245d768 | |
parent | 8943fcc31d12545043043c06c037cf6dfee14722 (diff) | |
download | binaryen-1a1b547aee688a0d96251e8afd565999acfb1922.tar.gz binaryen-1a1b547aee688a0d96251e8afd565999acfb1922.tar.bz2 binaryen-1a1b547aee688a0d96251e8afd565999acfb1922.zip |
Optimize power of two float divisions (#3018)
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 49 | ||||
-rw-r--r-- | src/support/bits.cpp | 24 | ||||
-rw-r--r-- | src/support/bits.h | 3 | ||||
-rw-r--r-- | test/passes/optimize-instructions_all-features.txt | 178 | ||||
-rw-r--r-- | test/passes/optimize-instructions_all-features.wast | 118 | ||||
-rw-r--r-- | test/wasm2js/conversions-modified.2asm.js.opt | 4 | ||||
-rw-r--r-- | test/wasm2js/float-ops.2asm.js.opt | 4 |
7 files changed, 368 insertions, 12 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 1e51adea9..85ab2a168 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -606,6 +606,18 @@ struct OptimizeInstructions } } } + if (binary->op == DivFloat32) { + float c = right->value.getf32(); + if (Bits::isPowerOf2Float(c)) { + return optimizePowerOf2FDiv(binary, c); + } + } + if (binary->op == DivFloat64) { + double c = right->value.getf64(); + if (Bits::isPowerOf2Float(c)) { + return optimizePowerOf2FDiv(binary, c); + } + } } // a bunch of operations on a constant left side can be simplified if (binary->left->is<Const>()) { @@ -1260,6 +1272,15 @@ private: // but it's still worth doing since // * Usually ands are more common than urems. // * The constant is slightly smaller. + template<typename T> Expression* optimizePowerOf2URem(Binary* binary, T c) { + static_assert(std::is_same<T, uint32_t>::value || + std::is_same<T, uint64_t>::value, + "type mismatch"); + binary->op = std::is_same<T, uint32_t>::value ? AndInt32 : AndInt64; + binary->right->cast<Const>()->value = Literal(c - 1); + return binary; + } + template<typename T> Expression* optimizePowerOf2UDiv(Binary* binary, T c) { static_assert(std::is_same<T, uint32_t>::value || std::is_same<T, uint64_t>::value, @@ -1270,12 +1291,30 @@ private: return binary; } - template<typename T> Expression* optimizePowerOf2URem(Binary* binary, T c) { - static_assert(std::is_same<T, uint32_t>::value || - std::is_same<T, uint64_t>::value, + template<typename T> Expression* optimizePowerOf2FDiv(Binary* binary, T c) { + // + // x / C_pot => x * (C_pot ^ -1) + // + // Explanation: + // Floating point numbers are represented as: + // ((-1) ^ sign) * (2 ^ (exp - bias)) * (1 + significand) + // + // If we have power of two numbers, then the mantissa (significand) + // is all zeros. Let's focus on the exponent, ignoring the sign part: + // (2 ^ (exp - bias)) + // + // and for inverted power of two floating point: + // 1.0 / (2 ^ (exp - bias)) -> 2 ^ -(exp - bias) + // + // So inversion of C_pot is valid because it changes only the sign + // of the exponent part and doesn't touch the significand part, + // which remains the same (zeros). + static_assert(std::is_same<T, float>::value || + std::is_same<T, double>::value, "type mismatch"); - binary->op = std::is_same<T, uint32_t>::value ? AndInt32 : AndInt64; - binary->right->cast<Const>()->value = Literal(c - 1); + double invDivisor = 1.0 / (double)c; + binary->op = std::is_same<T, float>::value ? MulFloat32 : MulFloat64; + binary->right->cast<Const>()->value = Literal(static_cast<T>(invDivisor)); return binary; } diff --git a/src/support/bits.cpp b/src/support/bits.cpp index 03fb66252..8a81a7e83 100644 --- a/src/support/bits.cpp +++ b/src/support/bits.cpp @@ -157,6 +157,30 @@ int ceilLog2(uint32_t v) { return 32 - countLeadingZeroes(v - 1); } int ceilLog2(uint64_t v) { return 64 - countLeadingZeroes(v - 1); } +bool isPowerOf2Float(float v) { + // Power of two floating points should have zero as their significands, + // so here we just mask the exponent range of "v" and compare it with the + // unmasked input value. If they are equal, our value is a power of + // two. Also, we reject all values which are less than the minimal possible + // power of two or greater than the maximum possible power of two. + const uint32_t MIN_POT = 0x01U << 23; // 0x1p-126 + const uint32_t MAX_POT = 0xFDU << 23; // 0x1p+126 + const uint32_t EXP_MASK = 0xFFU << 23; // mask only exponent + const uint32_t SIGN_MASK = ~0U >> 1; // mask everything except sign + auto u = bit_cast<uint32_t>(v) & SIGN_MASK; + return u >= MIN_POT && u <= MAX_POT && (u & EXP_MASK) == u; +} + +bool isPowerOf2Float(double v) { + // See isPowerOf2Float(float) + const uint64_t MIN_POT = 0x001ULL << 52; // 0x1p-1022 + const uint64_t MAX_POT = 0x7FDULL << 52; // 0x1p+1022 + const uint64_t EXP_MASK = 0x7FFULL << 52; // mask only exponent + const uint64_t SIGN_MASK = ~0ULL >> 1; // mask everything except sign + auto u = bit_cast<uint64_t>(v) & SIGN_MASK; + return u >= MIN_POT && u <= MAX_POT && (u & EXP_MASK) == u; +} + uint32_t log2(uint32_t v) { if (!isPowerOf2(v)) { WASM_UNREACHABLE("value should be a power of two"); diff --git a/src/support/bits.h b/src/support/bits.h index 3231e1c96..333eb5cc2 100644 --- a/src/support/bits.h +++ b/src/support/bits.h @@ -77,6 +77,9 @@ template<typename T> bool isPowerOf2(T v) { return v != 0 && (v & (v - 1)) == 0; } +bool isPowerOf2Float(float); +bool isPowerOf2Float(double); + template<typename T, typename U> inline static T rotateLeft(T val, U count) { auto value = typename std::make_unsigned<T>::type(val); U mask = sizeof(T) * CHAR_BIT - 1; diff --git a/test/passes/optimize-instructions_all-features.txt b/test/passes/optimize-instructions_all-features.txt index 64b2d787b..93f4c3087 100644 --- a/test/passes/optimize-instructions_all-features.txt +++ b/test/passes/optimize-instructions_all-features.txt @@ -16,7 +16,9 @@ (type $i32_i32_i32_f64_=>_none (func (param i32 i32 i32 f64))) (type $i32_i32_f64_f64_=>_none (func (param i32 i32 f64 f64))) (type $i32_i64_f64_i32_=>_none (func (param i32 i64 f64 i32))) + (type $f32_=>_none (func (param f32))) (type $f32_f64_=>_none (func (param f32 f64))) + (type $f64_=>_none (func (param f64))) (type $none_=>_f64 (func (result f64))) (memory $0 0) (export "load-off-2" (func $load-off-2)) @@ -2728,6 +2730,178 @@ ) (unreachable) ) + (func $fdiv-32-power-2 (param $x f32) + (drop + (f32.mul + (local.get $x) + (f32.const 0.5) + ) + ) + (drop + (f32.mul + (local.get $x) + (f32.const -0.5) + ) + ) + (drop + (f32.mul + (local.get $x) + (f32.const 2.3283064365386963e-10) + ) + ) + (drop + (f32.mul + (local.get $x) + (f32.const 5.421010862427522e-20) + ) + ) + (drop + (f32.mul + (local.get $x) + (f32.const 8507059173023461586584365e13) + ) + ) + (drop + (f32.mul + (local.get $x) + (f32.const 1.1754943508222875e-38) + ) + ) + (drop + (f32.mul + (local.get $x) + (f32.const -8507059173023461586584365e13) + ) + ) + (drop + (f32.mul + (local.get $x) + (f32.const -1.1754943508222875e-38) + ) + ) + (drop + (f32.div + (local.get $x) + (f32.const 5.877471754111438e-39) + ) + ) + (drop + (f32.div + (local.get $x) + (f32.const 5.877471754111438e-39) + ) + ) + (drop + (f32.div + (local.get $x) + (f32.const 0) + ) + ) + (drop + (f32.div + (local.get $x) + (f32.const nan:0x400000) + ) + ) + (drop + (f32.div + (local.get $x) + (f32.const inf) + ) + ) + (drop + (f32.div + (local.get $x) + (f32.const -inf) + ) + ) + ) + (func $fdiv-64-power-2 (param $x f64) + (drop + (f64.mul + (local.get $x) + (f64.const 0.5) + ) + ) + (drop + (f64.mul + (local.get $x) + (f64.const -0.5) + ) + ) + (drop + (f64.mul + (local.get $x) + (f64.const 2.3283064365386963e-10) + ) + ) + (drop + (f64.mul + (local.get $x) + (f64.const 5.421010862427522e-20) + ) + ) + (drop + (f64.mul + (local.get $x) + (f64.const 4494232837155789769323262e283) + ) + ) + (drop + (f64.mul + (local.get $x) + (f64.const 2.2250738585072014e-308) + ) + ) + (drop + (f64.mul + (local.get $x) + (f64.const -4494232837155789769323262e283) + ) + ) + (drop + (f64.mul + (local.get $x) + (f64.const -2.2250738585072014e-308) + ) + ) + (drop + (f64.div + (local.get $x) + (f64.const 1.1125369292536007e-308) + ) + ) + (drop + (f64.div + (local.get $x) + (f64.const 8988465674311579538646525e283) + ) + ) + (drop + (f64.div + (local.get $x) + (f64.const 0) + ) + ) + (drop + (f64.div + (local.get $x) + (f64.const nan:0x8000000000000) + ) + ) + (drop + (f64.div + (local.get $x) + (f64.const inf) + ) + ) + (drop + (f64.div + (local.get $x) + (f64.const -inf) + ) + ) + ) (func $srem-by-const (param $x i32) (param $y i64) (drop (i32.const 0) @@ -2995,13 +3169,13 @@ (local.get $x64) ) (drop - (f32.div + (f32.mul (local.get $y32) (f32.const 1) ) ) (drop - (f64.div + (f64.mul (local.get $y64) (f64.const 1) ) diff --git a/test/passes/optimize-instructions_all-features.wast b/test/passes/optimize-instructions_all-features.wast index e84778fde..80f440a59 100644 --- a/test/passes/optimize-instructions_all-features.wast +++ b/test/passes/optimize-instructions_all-features.wast @@ -3093,6 +3093,122 @@ ) (unreachable) ) + (func $fdiv-32-power-2 (param $x f32) + (drop (f32.div + (local.get $x) + (f32.const 2) + )) + (drop (f32.div + (local.get $x) + (f32.const -2) + )) + (drop (f32.div + (local.get $x) + (f32.const 4294967296) + )) + (drop (f32.div + (local.get $x) + (f32.const 18446744073709551616) + )) + (drop (f32.div + (local.get $x) + (f32.const 0x1p-126) + )) + (drop (f32.div + (local.get $x) + (f32.const 0x1p+126) + )) + (drop (f32.div + (local.get $x) + (f32.const -0x1p-126) + )) + (drop (f32.div + (local.get $x) + (f32.const -0x1p+126) + )) + (drop (f32.div + (local.get $x) + (f32.const 0x1p-127) ;; skip + )) + (drop (f32.div + (local.get $x) + (f32.const 0x1p-127) ;; skip + )) + (drop (f32.div + (local.get $x) + (f32.const 0) ;; skip + )) + (drop (f32.div + (local.get $x) + (f32.const nan) ;; skip + )) + (drop (f32.div + (local.get $x) + (f32.const inf) ;; skip + )) + (drop (f32.div + (local.get $x) + (f32.const -inf) ;; skip + )) + ) + (func $fdiv-64-power-2 (param $x f64) + (drop (f64.div + (local.get $x) + (f64.const 2) + )) + (drop (f64.div + (local.get $x) + (f64.const -2) + )) + (drop (f64.div + (local.get $x) + (f64.const 4294967296) + )) + (drop (f64.div + (local.get $x) + (f64.const 18446744073709551616) + )) + (drop (f64.div + (local.get $x) + (f64.const 0x1p-1022) + )) + (drop (f64.div + (local.get $x) + (f64.const 0x1p+1022) + )) + (drop (f64.div + (local.get $x) + (f64.const -0x1p-1022) + )) + (drop (f64.div + (local.get $x) + (f64.const -0x1p+1022) + )) + (drop (f64.div + (local.get $x) + (f64.const 0x1p-1023) ;; skip + )) + (drop (f64.div + (local.get $x) + (f64.const 0x1p+1023) ;; skip + )) + (drop (f64.div + (local.get $x) + (f64.const 0) ;; skip + )) + (drop (f64.div + (local.get $x) + (f64.const nan) ;; skip + )) + (drop (f64.div + (local.get $x) + (f64.const inf) ;; skip + )) + (drop (f64.div + (local.get $x) + (f64.const -inf) ;; skip + )) + ) (func $srem-by-const (param $x i32) (param $y i64) ;; (signed)x % 1 (drop (i32.rem_s @@ -4908,7 +5024,7 @@ (local.get $x) (local.get $y) ) - )) + )) ;; x | (y | x) where x and y cannot be reordered - skip (drop (i32.or diff --git a/test/wasm2js/conversions-modified.2asm.js.opt b/test/wasm2js/conversions-modified.2asm.js.opt index befce2489..072a3bbc6 100644 --- a/test/wasm2js/conversions-modified.2asm.js.opt +++ b/test/wasm2js/conversions-modified.2asm.js.opt @@ -67,13 +67,13 @@ function asmFunc(global, env) { function $7($0) { $0 = Math_fround($0); - i64toi32_i32$HIGH_BITS = Math_fround(Math_abs($0)) >= Math_fround(1.0) ? ($0 > Math_fround(0.0) ? ~~Math_fround(Math_min(Math_fround(Math_floor(Math_fround($0 / Math_fround(4294967296.0)))), Math_fround(4294967296.0))) >>> 0 : ~~Math_fround(Math_ceil(Math_fround(Math_fround($0 - Math_fround(~~$0 >>> 0 >>> 0)) / Math_fround(4294967296.0)))) >>> 0) : 0; + i64toi32_i32$HIGH_BITS = Math_fround(Math_abs($0)) >= Math_fround(1.0) ? ($0 > Math_fround(0.0) ? ~~Math_fround(Math_min(Math_fround(Math_floor(Math_fround($0 * Math_fround(2.3283064365386963e-10)))), Math_fround(4294967296.0))) >>> 0 : ~~Math_fround(Math_ceil(Math_fround(Math_fround($0 - Math_fround(~~$0 >>> 0 >>> 0)) * Math_fround(2.3283064365386963e-10)))) >>> 0) : 0; return ~~$0 >>> 0 | 0; } function $9($0) { $0 = +$0; - i64toi32_i32$HIGH_BITS = Math_abs($0) >= 1.0 ? ($0 > 0.0 ? ~~Math_min(Math_floor($0 / 4294967296.0), 4294967295.0) >>> 0 : ~~Math_ceil(($0 - +(~~$0 >>> 0 >>> 0)) / 4294967296.0) >>> 0) : 0; + i64toi32_i32$HIGH_BITS = Math_abs($0) >= 1.0 ? ($0 > 0.0 ? ~~Math_min(Math_floor($0 * 2.3283064365386963e-10), 4294967295.0) >>> 0 : ~~Math_ceil(($0 - +(~~$0 >>> 0 >>> 0)) * 2.3283064365386963e-10) >>> 0) : 0; return ~~$0 >>> 0 | 0; } diff --git a/test/wasm2js/float-ops.2asm.js.opt b/test/wasm2js/float-ops.2asm.js.opt index 1635dbc9a..fd13fe56d 100644 --- a/test/wasm2js/float-ops.2asm.js.opt +++ b/test/wasm2js/float-ops.2asm.js.opt @@ -238,12 +238,12 @@ function asmFunc(global, env) { function $47($0) { $0 = Math_fround($0); - return !(~~$0 >>> 0 | (Math_fround(Math_abs($0)) >= Math_fround(1.0) ? ($0 > Math_fround(0.0) ? ~~Math_fround(Math_min(Math_fround(Math_floor(Math_fround($0 / Math_fround(4294967296.0)))), Math_fround(4294967296.0))) >>> 0 : ~~Math_fround(Math_ceil(Math_fround(Math_fround($0 - Math_fround(~~$0 >>> 0 >>> 0)) / Math_fround(4294967296.0)))) >>> 0) : 0)) | 0; + return !(~~$0 >>> 0 | (Math_fround(Math_abs($0)) >= Math_fround(1.0) ? ($0 > Math_fround(0.0) ? ~~Math_fround(Math_min(Math_fround(Math_floor(Math_fround($0 * Math_fround(2.3283064365386963e-10)))), Math_fround(4294967296.0))) >>> 0 : ~~Math_fround(Math_ceil(Math_fround(Math_fround($0 - Math_fround(~~$0 >>> 0 >>> 0)) * Math_fround(2.3283064365386963e-10)))) >>> 0) : 0)) | 0; } function $48($0) { $0 = +$0; - return !(~~$0 >>> 0 | (Math_abs($0) >= 1.0 ? ($0 > 0.0 ? ~~Math_min(Math_floor($0 / 4294967296.0), 4294967295.0) >>> 0 : ~~Math_ceil(($0 - +(~~$0 >>> 0 >>> 0)) / 4294967296.0) >>> 0) : 0)) | 0; + return !(~~$0 >>> 0 | (Math_abs($0) >= 1.0 ? ($0 > 0.0 ? ~~Math_min(Math_floor($0 * 2.3283064365386963e-10), 4294967295.0) >>> 0 : ~~Math_ceil(($0 - +(~~$0 >>> 0 >>> 0)) * 2.3283064365386963e-10) >>> 0) : 0)) | 0; } function legalstub$43($0, $1_1) { |