Add --fast-math mode (#3155)

Similar to clang and gcc, --fast-math makes us ignore corner cases of floating-point math like NaN changes and (not done yet) lack of associativity and so forth. In the future we may want to have separate fast math flags for each specific thing, like gcc and clang do. This undoes some changes (#2958 and #3096) where we assumed it was ok to not change NaN bits, but @binji corrected us. We can only do such things in fast math mode. This puts those optimizations behind that flag, adds tests for it, and restores the interpreter to the simpler code from before with no special cases.
author: Alon Zakai <azakai@google.com> 2020-09-30 12:39:05 -0700
committer: GitHub <noreply@github.com> 2020-09-30 12:39:05 -0700
commit: 07047103a26e1c17ee995ef3e1358ddb26d8e8c8 (patch)
tree: edaf22d19ab7c22c3cff58f9e02d8f41abcb5b3e /test
parent: 11de8894505d37b7b970a2103bc5b1cfd094b115 (diff)
download: binaryen-07047103a26e1c17ee995ef3e1358ddb26d8e8c8.tar.gz
binaryen-07047103a26e1c17ee995ef3e1358ddb26d8e8c8.tar.bz2
binaryen-07047103a26e1c17ee995ef3e1358ddb26d8e8c8.zip
7 files changed, 212 insertions, 33 deletions
diff --git a/test/passes/O_fast-math.txt b/test/passes/O_fast-math.txt
new file mode 100644
index 000000000..1b454c68e
--- /dev/null
+++ b/test/passes/O_fast-math.txt
@@ -0,0 +1,21 @@
+(module
+ (type $none_=>_f32 (func (result f32)))
+ (export "div" (func $0))
+ (export "mul1" (func $1))
+ (export "mul2" (func $2))
+ (export "add1" (func $1))
+ (export "add2" (func $2))
+ (export "add3" (func $2))
+ (export "add4" (func $2))
+ (export "sub1" (func $1))
+ (export "sub2" (func $2))
+ (func $0 (; has Stack IR ;) (result f32)
+  (f32.const -nan:0x23017a)
+ )
+ (func $1 (; has Stack IR ;) (result f32)
+  (f32.const -nan:0x34546d)
+ )
+ (func $2 (; has Stack IR ;) (result f32)
+  (f32.const -nan:0x74546d)
+ )
+)
diff --git a/test/passes/O_fast-math.wast b/test/passes/O_fast-math.wast
new file mode 100644
index 000000000..2317f782d
--- /dev/null
+++ b/test/passes/O_fast-math.wast
@@ -0,0 +1,57 @@
+;; with fast-math we can optimize some of these patterns
+(module
+ (func "div" (result f32)
+  (f32.div
+   (f32.const -nan:0x23017a)
+   (f32.const 1)
+  )
+ )
+ (func "mul1" (result f32)
+  (f32.mul
+   (f32.const -nan:0x34546d)
+   (f32.const 1)
+  )
+ )
+ (func "mul2" (result f32)
+  (f32.mul
+   (f32.const 1)
+   (f32.const -nan:0x34546d)
+  )
+ )
+ (func "add1" (result f32)
+  (f32.add
+   (f32.const -nan:0x34546d)
+   (f32.const -0)
+  )
+ )
+ (func "add2" (result f32)
+  (f32.add
+   (f32.const -0)
+   (f32.const -nan:0x34546d)
+  )
+ )
+ (func "add3" (result f32)
+  (f32.add
+   (f32.const -nan:0x34546d)
+   (f32.const 0)
+  )
+ )
+ (func "add4" (result f32)
+  (f32.add
+   (f32.const 0)
+   (f32.const -nan:0x34546d)
+  )
+ )
+ (func "sub1" (result f32)
+  (f32.sub
+   (f32.const -nan:0x34546d)
+   (f32.const 0)
+  )
+ )
+ (func "sub2" (result f32)
+  (f32.sub
+   (f32.const -nan:0x34546d)
+   (f32.const -0)
+  )
+ )
+)
diff --git a/test/passes/fuzz-exec_O.txt b/test/passes/fuzz-exec_O.txt
index ef8e165bb..f17b04650 100644
--- a/test/passes/fuzz-exec_O.txt
+++ b/test/passes/fuzz-exec_O.txt
@@ -31,29 +31,65 @@
 [fuzz-exec] comparing func_0
 [fuzz-exec] comparing func_1
 [fuzz-exec] calling div
-[fuzz-exec] note result: div => -nan:0x23017a
+[fuzz-exec] note result: div => -nan:0x63017a
 [fuzz-exec] calling mul1
-[fuzz-exec] note result: mul1 => -nan:0x34546d
+[fuzz-exec] note result: mul1 => -nan:0x74546d
 [fuzz-exec] calling mul2
-[fuzz-exec] note result: mul2 => -nan:0x34546d
+[fuzz-exec] note result: mul2 => -nan:0x74546d
+[fuzz-exec] calling add1
+[fuzz-exec] note result: add1 => -nan:0x74546d
+[fuzz-exec] calling add2
+[fuzz-exec] note result: add2 => -nan:0x74546d
+[fuzz-exec] calling add3
+[fuzz-exec] note result: add3 => -nan:0x74546d
+[fuzz-exec] calling add4
+[fuzz-exec] note result: add4 => -nan:0x74546d
+[fuzz-exec] calling sub1
+[fuzz-exec] note result: sub1 => -nan:0x74546d
+[fuzz-exec] calling sub2
+[fuzz-exec] note result: sub2 => -nan:0x74546d
 (module
  (type $none_=>_f32 (func (result f32)))
  (export "div" (func $0))
  (export "mul1" (func $1))
  (export "mul2" (func $1))
+ (export "add1" (func $1))
+ (export "add2" (func $1))
+ (export "add3" (func $1))
+ (export "add4" (func $1))
+ (export "sub1" (func $1))
+ (export "sub2" (func $1))
  (func $0 (; has Stack IR ;) (result f32)
-  (f32.const -nan:0x23017a)
+  (f32.const -nan:0x63017a)
  )
  (func $1 (; has Stack IR ;) (result f32)
-  (f32.const -nan:0x34546d)
+  (f32.const -nan:0x74546d)
  )
 )
 [fuzz-exec] calling div
-[fuzz-exec] note result: div => -nan:0x23017a
+[fuzz-exec] note result: div => -nan:0x63017a
 [fuzz-exec] calling mul1
-[fuzz-exec] note result: mul1 => -nan:0x34546d
+[fuzz-exec] note result: mul1 => -nan:0x74546d
 [fuzz-exec] calling mul2
-[fuzz-exec] note result: mul2 => -nan:0x34546d
+[fuzz-exec] note result: mul2 => -nan:0x74546d
+[fuzz-exec] calling add1
+[fuzz-exec] note result: add1 => -nan:0x74546d
+[fuzz-exec] calling add2
+[fuzz-exec] note result: add2 => -nan:0x74546d
+[fuzz-exec] calling add3
+[fuzz-exec] note result: add3 => -nan:0x74546d
+[fuzz-exec] calling add4
+[fuzz-exec] note result: add4 => -nan:0x74546d
+[fuzz-exec] calling sub1
+[fuzz-exec] note result: sub1 => -nan:0x74546d
+[fuzz-exec] calling sub2
+[fuzz-exec] note result: sub2 => -nan:0x74546d
+[fuzz-exec] comparing add1
+[fuzz-exec] comparing add2
+[fuzz-exec] comparing add3
+[fuzz-exec] comparing add4
 [fuzz-exec] comparing div
 [fuzz-exec] comparing mul1
 [fuzz-exec] comparing mul2
+[fuzz-exec] comparing sub1
+[fuzz-exec] comparing sub2
diff --git a/test/passes/fuzz-exec_O.wast b/test/passes/fuzz-exec_O.wast
index 5c739c548..b34dc2e8f 100644
--- a/test/passes/fuzz-exec_O.wast
+++ b/test/passes/fuzz-exec_O.wast
@@ -22,10 +22,10 @@
 )
 (module
  (func "div" (result f32)
-  (f32.div                   ;; div by 1 can be removed, leaving this nan
-   (f32.const -nan:0x23017a) ;; as it is. wasm semantics allow nan bits to
-   (f32.const 1)             ;; change, but the interpreter should not do so,
-  )                          ;; so that it does not fail on that opt.
+  (f32.div
+   (f32.const -nan:0x23017a)
+   (f32.const 1)
+  )
  )
  (func "mul1" (result f32)
   (f32.mul
@@ -39,5 +39,40 @@
    (f32.const -nan:0x34546d)
   )
  )
+ (func "add1" (result f32)
+  (f32.add
+   (f32.const -nan:0x34546d)
+   (f32.const -0)
+  )
+ )
+ (func "add2" (result f32)
+  (f32.add
+   (f32.const -0)
+   (f32.const -nan:0x34546d)
+  )
+ )
+ (func "add3" (result f32)
+  (f32.add
+   (f32.const -nan:0x34546d)
+   (f32.const 0)
+  )
+ )
+ (func "add4" (result f32)
+  (f32.add
+   (f32.const 0)
+   (f32.const -nan:0x34546d)
+  )
+ )
+ (func "sub1" (result f32)
+  (f32.sub
+   (f32.const -nan:0x34546d)
+   (f32.const 0)
+  )
+ )
+ (func "sub2" (result f32)
+  (f32.sub
+   (f32.const -nan:0x34546d)
+   (f32.const -0)
+  )
+ )
 )
-
diff --git a/test/passes/optimize-instructions_all-features.txt b/test/passes/optimize-instructions_all-features.txt
index 873d550d7..5babd75de 100644
--- a/test/passes/optimize-instructions_all-features.txt
+++ b/test/passes/optimize-instructions_all-features.txt
@@ -2886,10 +2886,16 @@
    (local.get $x64)
   )
   (drop
-   (local.get $y32)
+   (f32.mul
+    (local.get $y32)
+    (f32.const 1)
+   )
   )
   (drop
-   (local.get $y64)
+   (f64.mul
+    (local.get $y64)
+    (f64.const 1)
+   )
   )
   (drop
    (i32.const 0)
@@ -2922,10 +2928,16 @@
    (local.get $x64)
   )
   (drop
-   (local.get $y32)
+   (f32.div
+    (local.get $y32)
+    (f32.const 1)
+   )
   )
   (drop
-   (local.get $y64)
+   (f64.div
+    (local.get $y64)
+    (f64.const 1)
+   )
   )
   (drop
    (f32.div
@@ -3703,27 +3715,39 @@
  )
  (func $const-float-zero (param $fx f32) (param $fy f64)
   (drop
-   (local.get $fx)
+   (f32.sub
+    (local.get $fx)
+    (f32.const 0)
+   )
   )
   (drop
-   (local.get $fy)
+   (f64.sub
+    (local.get $fy)
+    (f64.const 0)
+   )
   )
   (drop
-   (local.get $fx)
+   (f32.add
+    (local.get $fx)
+    (f32.const -0)
+   )
   )
   (drop
-   (local.get $fy)
+   (f64.add
+    (local.get $fy)
+    (f64.const -0)
+   )
   )
   (drop
-   (f32.add
+   (f32.sub
     (local.get $fx)
-    (f32.const 0)
+    (f32.const -0)
    )
   )
   (drop
-   (f64.add
+   (f64.sub
     (local.get $fy)
-    (f64.const 0)
+    (f64.const -0)
    )
   )
   (drop
@@ -3750,6 +3774,12 @@
     (f64.const 0)
    )
   )
+  (drop
+   (f32.sub
+    (f32.const -nan:0x34546d)
+    (f32.const 0)
+   )
+  )
  )
  (func $rhs-is-neg-one (param $x i32) (param $y i64) (param $fx f32) (param $fy f64)
   (drop
diff --git a/test/passes/optimize-instructions_all-features.wast b/test/passes/optimize-instructions_all-features.wast
index 59e7e21d8..246fd41a2 100644
--- a/test/passes/optimize-instructions_all-features.wast
+++ b/test/passes/optimize-instructions_all-features.wast
@@ -4284,6 +4284,10 @@
       (local.get $fy) ;; skip
       (f64.const 0)
     ))
+    (drop (f32.sub
+      (f32.const -nan:0x34546d) ;; skip
+      (f32.const 0)
+    ))
   )
   (func $rhs-is-neg-one (param $x i32) (param $y i64) (param $fx f32) (param $fy f64)
     (drop (i32.sub
diff --git a/test/spec/old_float_exprs.wast b/test/spec/old_float_exprs.wast
index ca031114f..7900832b0 100644
--- a/test/spec/old_float_exprs.wast
+++ b/test/spec/old_float_exprs.wast
@@ -103,10 +103,8 @@
     (f64.mul (local.get $x) (f64.const 1.0)))
 )
 
-;; XXX BINARYEN: disable this test, as we have testing for the more strict property
-;;               of not changing the bits at all in our interpreter
-;; (assert_return (invoke "f32.no_fold_mul_one" (f32.const nan:0x200000)) (f32.const nan:0x600000))
-;; (assert_return (invoke "f64.no_fold_mul_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000))
+(assert_return (invoke "f32.no_fold_mul_one" (f32.const nan:0x200000)) (f32.const nan:0x600000))
+(assert_return (invoke "f64.no_fold_mul_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000))
 
 ;; Test that 0.0/x is not folded to 0.0.
 
@@ -135,10 +133,8 @@
     (f64.div (local.get $x) (f64.const 1.0)))
 )
 
-;; XXX BINARYEN: disable this test, as we have testing for the more strict property
-;;               of not changing the bits at all in our interpreter
-;; (assert_return (invoke "f32.no_fold_div_one" (f32.const nan:0x200000)) (f32.const nan:arithmetic))
-;; (assert_return (invoke "f64.no_fold_div_one" (f64.const nan:0x4000000000000)) (f64.const nan:arithmetic))
+(assert_return (invoke "f32.no_fold_div_one" (f32.const nan:0x200000)) (f32.const nan:0x600000))
+(assert_return (invoke "f64.no_fold_div_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000))
 
 ;; Test that x/-1.0 is not folded to -x.
author	Alon Zakai <azakai@google.com>	2020-09-30 12:39:05 -0700
committer	GitHub <noreply@github.com>	2020-09-30 12:39:05 -0700
commit	07047103a26e1c17ee995ef3e1358ddb26d8e8c8 (patch)
tree	edaf22d19ab7c22c3cff58f9e02d8f41abcb5b3e /test
parent	11de8894505d37b7b970a2103bc5b1cfd094b115 (diff)
download	binaryen-07047103a26e1c17ee995ef3e1358ddb26d8e8c8.tar.gz binaryen-07047103a26e1c17ee995ef3e1358ddb26d8e8c8.tar.bz2 binaryen-07047103a26e1c17ee995ef3e1358ddb26d8e8c8.zip