Optimize power of two float divisions (#3018)

author: Max Graey <maxgraey@gmail.com> 2020-10-14 03:49:46 +0300
committer: GitHub <noreply@github.com> 2020-10-13 17:49:46 -0700
commit: 1a1b547aee688a0d96251e8afd565999acfb1922 (patch)
tree: 7aa1794b4dfc8db996f41b3203848e20b245d768
parent: 8943fcc31d12545043043c06c037cf6dfee14722 (diff)
download: binaryen-1a1b547aee688a0d96251e8afd565999acfb1922.tar.gz
binaryen-1a1b547aee688a0d96251e8afd565999acfb1922.tar.bz2
binaryen-1a1b547aee688a0d96251e8afd565999acfb1922.zip
7 files changed, 368 insertions, 12 deletions
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 1e51adea9..85ab2a168 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -606,6 +606,18 @@ struct OptimizeInstructions
             }
           }
         }
+        if (binary->op == DivFloat32) {
+          float c = right->value.getf32();
+          if (Bits::isPowerOf2Float(c)) {
+            return optimizePowerOf2FDiv(binary, c);
+          }
+        }
+        if (binary->op == DivFloat64) {
+          double c = right->value.getf64();
+          if (Bits::isPowerOf2Float(c)) {
+            return optimizePowerOf2FDiv(binary, c);
+          }
+        }
       }
       // a bunch of operations on a constant left side can be simplified
       if (binary->left->is<Const>()) {
@@ -1260,6 +1272,15 @@ private:
   // but it's still worth doing since
   //  * Usually ands are more common than urems.
   //  * The constant is slightly smaller.
+  template<typename T> Expression* optimizePowerOf2URem(Binary* binary, T c) {
+    static_assert(std::is_same<T, uint32_t>::value ||
+                    std::is_same<T, uint64_t>::value,
+                  "type mismatch");
+    binary->op = std::is_same<T, uint32_t>::value ? AndInt32 : AndInt64;
+    binary->right->cast<Const>()->value = Literal(c - 1);
+    return binary;
+  }
+
   template<typename T> Expression* optimizePowerOf2UDiv(Binary* binary, T c) {
     static_assert(std::is_same<T, uint32_t>::value ||
                     std::is_same<T, uint64_t>::value,
@@ -1270,12 +1291,30 @@ private:
     return binary;
   }
 
-  template<typename T> Expression* optimizePowerOf2URem(Binary* binary, T c) {
-    static_assert(std::is_same<T, uint32_t>::value ||
-                    std::is_same<T, uint64_t>::value,
+  template<typename T> Expression* optimizePowerOf2FDiv(Binary* binary, T c) {
+    //
+    // x / C_pot    =>   x * (C_pot ^ -1)
+    //
+    // Explanation:
+    // Floating point numbers are represented as:
+    //    ((-1) ^ sign) * (2 ^ (exp - bias)) * (1 + significand)
+    //
+    // If we have power of two numbers, then the mantissa (significand)
+    // is all zeros. Let's focus on the exponent, ignoring the sign part:
+    //    (2 ^ (exp - bias))
+    //
+    // and for inverted power of two floating point:
+    //     1.0 / (2 ^ (exp - bias))   ->   2 ^ -(exp - bias)
+    //
+    // So inversion of C_pot is valid because it changes only the sign
+    // of the exponent part and doesn't touch the significand part,
+    // which remains the same (zeros).
+    static_assert(std::is_same<T, float>::value ||
+                    std::is_same<T, double>::value,
                   "type mismatch");
-    binary->op = std::is_same<T, uint32_t>::value ? AndInt32 : AndInt64;
-    binary->right->cast<Const>()->value = Literal(c - 1);
+    double invDivisor = 1.0 / (double)c;
+    binary->op = std::is_same<T, float>::value ? MulFloat32 : MulFloat64;
+    binary->right->cast<Const>()->value = Literal(static_cast<T>(invDivisor));
     return binary;
   }
 
diff --git a/src/support/bits.cpp b/src/support/bits.cpp
index 03fb66252..8a81a7e83 100644
--- a/src/support/bits.cpp
+++ b/src/support/bits.cpp
@@ -157,6 +157,30 @@ int ceilLog2(uint32_t v) { return 32 - countLeadingZeroes(v - 1); }
 
 int ceilLog2(uint64_t v) { return 64 - countLeadingZeroes(v - 1); }
 
+bool isPowerOf2Float(float v) {
+  // Power of two floating points should have zero as their significands,
+  // so here we just mask the exponent range of "v" and compare it with the
+  // unmasked input value. If they are equal, our value is a power of
+  // two. Also, we reject all values which are less than the minimal possible
+  // power of two or greater than the maximum possible power of two.
+  const uint32_t MIN_POT = 0x01U << 23;  // 0x1p-126
+  const uint32_t MAX_POT = 0xFDU << 23;  // 0x1p+126
+  const uint32_t EXP_MASK = 0xFFU << 23; // mask only exponent
+  const uint32_t SIGN_MASK = ~0U >> 1;   // mask everything except sign
+  auto u = bit_cast<uint32_t>(v) & SIGN_MASK;
+  return u >= MIN_POT && u <= MAX_POT && (u & EXP_MASK) == u;
+}
+
+bool isPowerOf2Float(double v) {
+  // See isPowerOf2Float(float)
+  const uint64_t MIN_POT = 0x001ULL << 52;  // 0x1p-1022
+  const uint64_t MAX_POT = 0x7FDULL << 52;  // 0x1p+1022
+  const uint64_t EXP_MASK = 0x7FFULL << 52; // mask only exponent
+  const uint64_t SIGN_MASK = ~0ULL >> 1;    // mask everything except sign
+  auto u = bit_cast<uint64_t>(v) & SIGN_MASK;
+  return u >= MIN_POT && u <= MAX_POT && (u & EXP_MASK) == u;
+}
+
 uint32_t log2(uint32_t v) {
   if (!isPowerOf2(v)) {
     WASM_UNREACHABLE("value should be a power of two");
diff --git a/src/support/bits.h b/src/support/bits.h
index 3231e1c96..333eb5cc2 100644
--- a/src/support/bits.h
+++ b/src/support/bits.h
@@ -77,6 +77,9 @@ template<typename T> bool isPowerOf2(T v) {
   return v != 0 && (v & (v - 1)) == 0;
 }
 
+bool isPowerOf2Float(float);
+bool isPowerOf2Float(double);
+
 template<typename T, typename U> inline static T rotateLeft(T val, U count) {
   auto value = typename std::make_unsigned<T>::type(val);
   U mask = sizeof(T) * CHAR_BIT - 1;
diff --git a/test/passes/optimize-instructions_all-features.txt b/test/passes/optimize-instructions_all-features.txt
index 64b2d787b..93f4c3087 100644
--- a/test/passes/optimize-instructions_all-features.txt
+++ b/test/passes/optimize-instructions_all-features.txt
@@ -16,7 +16,9 @@
  (type $i32_i32_i32_f64_=>_none (func (param i32 i32 i32 f64)))
  (type $i32_i32_f64_f64_=>_none (func (param i32 i32 f64 f64)))
  (type $i32_i64_f64_i32_=>_none (func (param i32 i64 f64 i32)))
+ (type $f32_=>_none (func (param f32)))
  (type $f32_f64_=>_none (func (param f32 f64)))
+ (type $f64_=>_none (func (param f64)))
  (type $none_=>_f64 (func (result f64)))
  (memory $0 0)
  (export "load-off-2" (func $load-off-2))
@@ -2728,6 +2730,178 @@
   )
   (unreachable)
  )
+ (func $fdiv-32-power-2 (param $x f32)
+  (drop
+   (f32.mul
+    (local.get $x)
+    (f32.const 0.5)
+   )
+  )
+  (drop
+   (f32.mul
+    (local.get $x)
+    (f32.const -0.5)
+   )
+  )
+  (drop
+   (f32.mul
+    (local.get $x)
+    (f32.const 2.3283064365386963e-10)
+   )
+  )
+  (drop
+   (f32.mul
+    (local.get $x)
+    (f32.const 5.421010862427522e-20)
+   )
+  )
+  (drop
+   (f32.mul
+    (local.get $x)
+    (f32.const 8507059173023461586584365e13)
+   )
+  )
+  (drop
+   (f32.mul
+    (local.get $x)
+    (f32.const 1.1754943508222875e-38)
+   )
+  )
+  (drop
+   (f32.mul
+    (local.get $x)
+    (f32.const -8507059173023461586584365e13)
+   )
+  )
+  (drop
+   (f32.mul
+    (local.get $x)
+    (f32.const -1.1754943508222875e-38)
+   )
+  )
+  (drop
+   (f32.div
+    (local.get $x)
+    (f32.const 5.877471754111438e-39)
+   )
+  )
+  (drop
+   (f32.div
+    (local.get $x)
+    (f32.const 5.877471754111438e-39)
+   )
+  )
+  (drop
+   (f32.div
+    (local.get $x)
+    (f32.const 0)
+   )
+  )
+  (drop
+   (f32.div
+    (local.get $x)
+    (f32.const nan:0x400000)
+   )
+  )
+  (drop
+   (f32.div
+    (local.get $x)
+    (f32.const inf)
+   )
+  )
+  (drop
+   (f32.div
+    (local.get $x)
+    (f32.const -inf)
+   )
+  )
+ )
+ (func $fdiv-64-power-2 (param $x f64)
+  (drop
+   (f64.mul
+    (local.get $x)
+    (f64.const 0.5)
+   )
+  )
+  (drop
+   (f64.mul
+    (local.get $x)
+    (f64.const -0.5)
+   )
+  )
+  (drop
+   (f64.mul
+    (local.get $x)
+    (f64.const 2.3283064365386963e-10)
+   )
+  )
+  (drop
+   (f64.mul
+    (local.get $x)
+    (f64.const 5.421010862427522e-20)
+   )
+  )
+  (drop
+   (f64.mul
+    (local.get $x)
+    (f64.const 4494232837155789769323262e283)
+   )
+  )
+  (drop
+   (f64.mul
+    (local.get $x)
+    (f64.const 2.2250738585072014e-308)
+   )
+  )
+  (drop
+   (f64.mul
+    (local.get $x)
+    (f64.const -4494232837155789769323262e283)
+   )
+  )
+  (drop
+   (f64.mul
+    (local.get $x)
+    (f64.const -2.2250738585072014e-308)
+   )
+  )
+  (drop
+   (f64.div
+    (local.get $x)
+    (f64.const 1.1125369292536007e-308)
+   )
+  )
+  (drop
+   (f64.div
+    (local.get $x)
+    (f64.const 8988465674311579538646525e283)
+   )
+  )
+  (drop
+   (f64.div
+    (local.get $x)
+    (f64.const 0)
+   )
+  )
+  (drop
+   (f64.div
+    (local.get $x)
+    (f64.const nan:0x8000000000000)
+   )
+  )
+  (drop
+   (f64.div
+    (local.get $x)
+    (f64.const inf)
+   )
+  )
+  (drop
+   (f64.div
+    (local.get $x)
+    (f64.const -inf)
+   )
+  )
+ )
  (func $srem-by-const (param $x i32) (param $y i64)
   (drop
    (i32.const 0)
@@ -2995,13 +3169,13 @@
    (local.get $x64)
   )
   (drop
-   (f32.div
+   (f32.mul
     (local.get $y32)
     (f32.const 1)
    )
   )
   (drop
-   (f64.div
+   (f64.mul
     (local.get $y64)
     (f64.const 1)
    )
diff --git a/test/passes/optimize-instructions_all-features.wast b/test/passes/optimize-instructions_all-features.wast
index e84778fde..80f440a59 100644
--- a/test/passes/optimize-instructions_all-features.wast
+++ b/test/passes/optimize-instructions_all-features.wast
@@ -3093,6 +3093,122 @@
     )
     (unreachable)
   )
+  (func $fdiv-32-power-2 (param $x f32)
+    (drop (f32.div
+      (local.get $x)
+      (f32.const 2)
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const -2)
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const 4294967296)
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const 18446744073709551616)
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const 0x1p-126)
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const 0x1p+126)
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const -0x1p-126)
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const -0x1p+126)
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const 0x1p-127) ;; skip
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const 0x1p-127) ;; skip
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const 0) ;; skip
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const nan) ;; skip
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const inf) ;; skip
+    ))
+    (drop (f32.div
+      (local.get $x)
+      (f32.const -inf) ;; skip
+    ))
+  )
+  (func $fdiv-64-power-2 (param $x f64)
+    (drop (f64.div
+      (local.get $x)
+      (f64.const 2)
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const -2)
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const 4294967296)
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const 18446744073709551616)
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const 0x1p-1022)
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const 0x1p+1022)
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const -0x1p-1022)
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const -0x1p+1022)
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const 0x1p-1023) ;; skip
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const 0x1p+1023) ;; skip
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const 0) ;; skip
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const nan) ;; skip
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const inf) ;; skip
+    ))
+    (drop (f64.div
+      (local.get $x)
+      (f64.const -inf) ;; skip
+    ))
+  )
   (func $srem-by-const (param $x i32) (param $y i64)
     ;; (signed)x % 1
     (drop (i32.rem_s
@@ -4908,7 +5024,7 @@
         (local.get $x)
         (local.get $y)
       )
-     ))
+    ))
     ;; x | (y | x)   where x and y cannot be reordered  -  skip
     (drop
       (i32.or
diff --git a/test/wasm2js/conversions-modified.2asm.js.opt b/test/wasm2js/conversions-modified.2asm.js.opt
index befce2489..072a3bbc6 100644
--- a/test/wasm2js/conversions-modified.2asm.js.opt
+++ b/test/wasm2js/conversions-modified.2asm.js.opt
@@ -67,13 +67,13 @@ function asmFunc(global, env) {
  
  function $7($0) {
   $0 = Math_fround($0);
-  i64toi32_i32$HIGH_BITS = Math_fround(Math_abs($0)) >= Math_fround(1.0) ? ($0 > Math_fround(0.0) ? ~~Math_fround(Math_min(Math_fround(Math_floor(Math_fround($0 / Math_fround(4294967296.0)))), Math_fround(4294967296.0))) >>> 0 : ~~Math_fround(Math_ceil(Math_fround(Math_fround($0 - Math_fround(~~$0 >>> 0 >>> 0)) / Math_fround(4294967296.0)))) >>> 0) : 0;
+  i64toi32_i32$HIGH_BITS = Math_fround(Math_abs($0)) >= Math_fround(1.0) ? ($0 > Math_fround(0.0) ? ~~Math_fround(Math_min(Math_fround(Math_floor(Math_fround($0 * Math_fround(2.3283064365386963e-10)))), Math_fround(4294967296.0))) >>> 0 : ~~Math_fround(Math_ceil(Math_fround(Math_fround($0 - Math_fround(~~$0 >>> 0 >>> 0)) * Math_fround(2.3283064365386963e-10)))) >>> 0) : 0;
   return ~~$0 >>> 0 | 0;
  }
  
  function $9($0) {
   $0 = +$0;
-  i64toi32_i32$HIGH_BITS = Math_abs($0) >= 1.0 ? ($0 > 0.0 ? ~~Math_min(Math_floor($0 / 4294967296.0), 4294967295.0) >>> 0 : ~~Math_ceil(($0 - +(~~$0 >>> 0 >>> 0)) / 4294967296.0) >>> 0) : 0;
+  i64toi32_i32$HIGH_BITS = Math_abs($0) >= 1.0 ? ($0 > 0.0 ? ~~Math_min(Math_floor($0 * 2.3283064365386963e-10), 4294967295.0) >>> 0 : ~~Math_ceil(($0 - +(~~$0 >>> 0 >>> 0)) * 2.3283064365386963e-10) >>> 0) : 0;
   return ~~$0 >>> 0 | 0;
  }
  
diff --git a/test/wasm2js/float-ops.2asm.js.opt b/test/wasm2js/float-ops.2asm.js.opt
index 1635dbc9a..fd13fe56d 100644
--- a/test/wasm2js/float-ops.2asm.js.opt
+++ b/test/wasm2js/float-ops.2asm.js.opt
@@ -238,12 +238,12 @@ function asmFunc(global, env) {
  
  function $47($0) {
   $0 = Math_fround($0);
-  return !(~~$0 >>> 0 | (Math_fround(Math_abs($0)) >= Math_fround(1.0) ? ($0 > Math_fround(0.0) ? ~~Math_fround(Math_min(Math_fround(Math_floor(Math_fround($0 / Math_fround(4294967296.0)))), Math_fround(4294967296.0))) >>> 0 : ~~Math_fround(Math_ceil(Math_fround(Math_fround($0 - Math_fround(~~$0 >>> 0 >>> 0)) / Math_fround(4294967296.0)))) >>> 0) : 0)) | 0;
+  return !(~~$0 >>> 0 | (Math_fround(Math_abs($0)) >= Math_fround(1.0) ? ($0 > Math_fround(0.0) ? ~~Math_fround(Math_min(Math_fround(Math_floor(Math_fround($0 * Math_fround(2.3283064365386963e-10)))), Math_fround(4294967296.0))) >>> 0 : ~~Math_fround(Math_ceil(Math_fround(Math_fround($0 - Math_fround(~~$0 >>> 0 >>> 0)) * Math_fround(2.3283064365386963e-10)))) >>> 0) : 0)) | 0;
  }
  
  function $48($0) {
   $0 = +$0;
-  return !(~~$0 >>> 0 | (Math_abs($0) >= 1.0 ? ($0 > 0.0 ? ~~Math_min(Math_floor($0 / 4294967296.0), 4294967295.0) >>> 0 : ~~Math_ceil(($0 - +(~~$0 >>> 0 >>> 0)) / 4294967296.0) >>> 0) : 0)) | 0;
+  return !(~~$0 >>> 0 | (Math_abs($0) >= 1.0 ? ($0 > 0.0 ? ~~Math_min(Math_floor($0 * 2.3283064365386963e-10), 4294967295.0) >>> 0 : ~~Math_ceil(($0 - +(~~$0 >>> 0 >>> 0)) * 2.3283064365386963e-10) >>> 0) : 0)) | 0;
  }
  
  function legalstub$43($0, $1_1) {
author	Max Graey <maxgraey@gmail.com>	2020-10-14 03:49:46 +0300
committer	GitHub <noreply@github.com>	2020-10-13 17:49:46 -0700
commit	1a1b547aee688a0d96251e8afd565999acfb1922 (patch)
tree	7aa1794b4dfc8db996f41b3203848e20b245d768
parent	8943fcc31d12545043043c06c037cf6dfee14722 (diff)
download	binaryen-1a1b547aee688a0d96251e8afd565999acfb1922.tar.gz binaryen-1a1b547aee688a0d96251e8afd565999acfb1922.tar.bz2 binaryen-1a1b547aee688a0d96251e8afd565999acfb1922.zip