summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md2
-rw-r--r--src/pass.h5
-rw-r--r--src/passes/OptimizeInstructions.cpp19
-rw-r--r--src/tools/optimization-options.h8
-rw-r--r--src/wasm/literal.cpp41
-rw-r--r--test/passes/O_fast-math.txt21
-rw-r--r--test/passes/O_fast-math.wast57
-rw-r--r--test/passes/fuzz-exec_O.txt52
-rw-r--r--test/passes/fuzz-exec_O.wast45
-rw-r--r--test/passes/optimize-instructions_all-features.txt54
-rw-r--r--test/passes/optimize-instructions_all-features.wast4
-rw-r--r--test/spec/old_float_exprs.wast12
12 files changed, 241 insertions, 79 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 590d756fd..63d887b69 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,8 @@ full changeset diff at the end of each section.
Current Trunk
-------------
+- Add `--fast-math` mode. (#3155)
+
v97
---
diff --git a/src/pass.h b/src/pass.h
index a3ee41d61..27e7ee37f 100644
--- a/src/pass.h
+++ b/src/pass.h
@@ -102,6 +102,11 @@ struct PassOptions {
// many cases.
bool lowMemoryUnused = false;
enum { LowMemoryBound = 1024 };
+ // Whether to allow "loose" math semantics, ignoring corner cases with NaNs
+ // and assuming math follows the algebraic rules for associativity and so
+ // forth (which IEEE floats do not, strictly speaking). This is inspired by
+ // gcc/clang's -ffast-math flag.
+ bool fastMath = false;
// Whether to try to preserve debug info through, which are special calls.
bool debugInfo = false;
// Arbitrary string arguments from the commandline, which we forward to
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 36d92e81f..55af9a34d 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -161,7 +161,10 @@ struct OptimizeInstructions
#endif
}
+ bool fastMath;
+
void doWalkFunction(Function* func) {
+ fastMath = getPassOptions().fastMath;
// first, scan locals
{
LocalScanner scanner(localInfo, getPassOptions());
@@ -1414,14 +1417,15 @@ private:
}
{
double value;
- if (matches(curr, binary(Abstract::Sub, any(), fval(&value))) &&
+ if (fastMath &&
+ matches(curr, binary(Abstract::Sub, any(), fval(&value))) &&
value == 0.0) {
// x - (-0.0) ==> x + 0.0
if (std::signbit(value)) {
curr->op = Abstract::getBinary(type, Abstract::Add);
right->value = right->value.neg();
return curr;
- } else {
+ } else if (fastMath) {
// x - 0.0 ==> x
return curr->left;
}
@@ -1430,19 +1434,18 @@ private:
{
// x + (-0.0) ==> x
double value;
- if (matches(curr, binary(Abstract::Add, any(), fval(&value))) &&
+ if (fastMath &&
+ matches(curr, binary(Abstract::Add, any(), fval(&value))) &&
value == 0.0 && std::signbit(value)) {
return curr->left;
}
}
- // Note that this is correct even on floats with a NaN on the left,
- // as a NaN would skip the computation and just return the NaN,
- // and that is precisely what we do here. but, the same with -1
- // (change to a negation) would be incorrect for that reason.
if (matches(curr, binary(Abstract::Mul, any(&left), constant(1))) ||
matches(curr, binary(Abstract::DivS, any(&left), constant(1))) ||
matches(curr, binary(Abstract::DivU, any(&left), constant(1)))) {
- return left;
+ if (curr->type.isInteger() || fastMath) {
+ return left;
+ }
}
return nullptr;
}
diff --git a/src/tools/optimization-options.h b/src/tools/optimization-options.h
index 72f478329..5b6a643e6 100644
--- a/src/tools/optimization-options.h
+++ b/src/tools/optimization-options.h
@@ -187,7 +187,13 @@ struct OptimizationOptions : public ToolOptions {
Options::Arguments::Zero,
[this](Options*, const std::string&) {
passOptions.lowMemoryUnused = true;
- });
+ })
+ .add(
+ "--fast-math",
+ "-ffm",
+ "Optimize floats without handling corner cases of NaNs and rounding",
+ Options::Arguments::Zero,
+ [this](Options*, const std::string&) { passOptions.fastMath = true; });
// add passes in registry
for (const auto& p : PassRegistry::get()->getRegisteredNames()) {
(*this).add(
diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp
index d309be308..54453b356 100644
--- a/src/wasm/literal.cpp
+++ b/src/wasm/literal.cpp
@@ -934,35 +934,10 @@ Literal Literal::mul(const Literal& other) const {
return Literal(uint32_t(i32) * uint32_t(other.i32));
case Type::i64:
return Literal(uint64_t(i64) * uint64_t(other.i64));
- case Type::f32: {
- // Special-case multiplication by 1. nan * 1 can change nan bits per the
- // wasm spec, but it is ok to just return that original nan, and we
- // do that here so that we are consistent with the optimization of
- // removing the * 1 and leaving just the nan. That is, if we just
- // do a normal multiply and the CPU decides to change the bits, we'd
- // give a different result on optimized code, which would look like
- // it was a bad optimization. So out of all the valid results to
- // return here, return the simplest one that is consistent with
- // our optimization for the case of 1.
- float lhs = getf32(), rhs = other.getf32();
- if (rhs == 1) {
- return Literal(lhs);
- }
- if (lhs == 1) {
- return Literal(rhs);
- }
- return Literal(lhs * rhs);
- }
- case Type::f64: {
- double lhs = getf64(), rhs = other.getf64();
- if (rhs == 1) {
- return Literal(lhs);
- }
- if (lhs == 1) {
- return Literal(rhs);
- }
- return Literal(lhs * rhs);
- }
+ case Type::f32:
+ return Literal(getf32() * other.getf32());
+ case Type::f64:
+ return Literal(getf64() * other.getf64());
case Type::v128:
case Type::funcref:
case Type::externref:
@@ -1002,10 +977,6 @@ Literal Literal::div(const Literal& other) const {
case FP_INFINITE: // fallthrough
case FP_NORMAL: // fallthrough
case FP_SUBNORMAL:
- // Special-case division by 1, similar to multiply from earlier.
- if (rhs == 1) {
- return Literal(lhs);
- }
return Literal(lhs / rhs);
default:
WASM_UNREACHABLE("invalid fp classification");
@@ -1034,10 +1005,6 @@ Literal Literal::div(const Literal& other) const {
case FP_INFINITE: // fallthrough
case FP_NORMAL: // fallthrough
case FP_SUBNORMAL:
- // See above comment on f32.
- if (rhs == 1) {
- return Literal(lhs);
- }
return Literal(lhs / rhs);
default:
WASM_UNREACHABLE("invalid fp classification");
diff --git a/test/passes/O_fast-math.txt b/test/passes/O_fast-math.txt
new file mode 100644
index 000000000..1b454c68e
--- /dev/null
+++ b/test/passes/O_fast-math.txt
@@ -0,0 +1,21 @@
+(module
+ (type $none_=>_f32 (func (result f32)))
+ (export "div" (func $0))
+ (export "mul1" (func $1))
+ (export "mul2" (func $2))
+ (export "add1" (func $1))
+ (export "add2" (func $2))
+ (export "add3" (func $2))
+ (export "add4" (func $2))
+ (export "sub1" (func $1))
+ (export "sub2" (func $2))
+ (func $0 (; has Stack IR ;) (result f32)
+ (f32.const -nan:0x23017a)
+ )
+ (func $1 (; has Stack IR ;) (result f32)
+ (f32.const -nan:0x34546d)
+ )
+ (func $2 (; has Stack IR ;) (result f32)
+ (f32.const -nan:0x74546d)
+ )
+)
diff --git a/test/passes/O_fast-math.wast b/test/passes/O_fast-math.wast
new file mode 100644
index 000000000..2317f782d
--- /dev/null
+++ b/test/passes/O_fast-math.wast
@@ -0,0 +1,57 @@
+;; with fast-math we can optimize some of these patterns
+(module
+ (func "div" (result f32)
+ (f32.div
+ (f32.const -nan:0x23017a)
+ (f32.const 1)
+ )
+ )
+ (func "mul1" (result f32)
+ (f32.mul
+ (f32.const -nan:0x34546d)
+ (f32.const 1)
+ )
+ )
+ (func "mul2" (result f32)
+ (f32.mul
+ (f32.const 1)
+ (f32.const -nan:0x34546d)
+ )
+ )
+ (func "add1" (result f32)
+ (f32.add
+ (f32.const -nan:0x34546d)
+ (f32.const -0)
+ )
+ )
+ (func "add2" (result f32)
+ (f32.add
+ (f32.const -0)
+ (f32.const -nan:0x34546d)
+ )
+ )
+ (func "add3" (result f32)
+ (f32.add
+ (f32.const -nan:0x34546d)
+ (f32.const 0)
+ )
+ )
+ (func "add4" (result f32)
+ (f32.add
+ (f32.const 0)
+ (f32.const -nan:0x34546d)
+ )
+ )
+ (func "sub1" (result f32)
+ (f32.sub
+ (f32.const -nan:0x34546d)
+ (f32.const 0)
+ )
+ )
+ (func "sub2" (result f32)
+ (f32.sub
+ (f32.const -nan:0x34546d)
+ (f32.const -0)
+ )
+ )
+)
diff --git a/test/passes/fuzz-exec_O.txt b/test/passes/fuzz-exec_O.txt
index ef8e165bb..f17b04650 100644
--- a/test/passes/fuzz-exec_O.txt
+++ b/test/passes/fuzz-exec_O.txt
@@ -31,29 +31,65 @@
[fuzz-exec] comparing func_0
[fuzz-exec] comparing func_1
[fuzz-exec] calling div
-[fuzz-exec] note result: div => -nan:0x23017a
+[fuzz-exec] note result: div => -nan:0x63017a
[fuzz-exec] calling mul1
-[fuzz-exec] note result: mul1 => -nan:0x34546d
+[fuzz-exec] note result: mul1 => -nan:0x74546d
[fuzz-exec] calling mul2
-[fuzz-exec] note result: mul2 => -nan:0x34546d
+[fuzz-exec] note result: mul2 => -nan:0x74546d
+[fuzz-exec] calling add1
+[fuzz-exec] note result: add1 => -nan:0x74546d
+[fuzz-exec] calling add2
+[fuzz-exec] note result: add2 => -nan:0x74546d
+[fuzz-exec] calling add3
+[fuzz-exec] note result: add3 => -nan:0x74546d
+[fuzz-exec] calling add4
+[fuzz-exec] note result: add4 => -nan:0x74546d
+[fuzz-exec] calling sub1
+[fuzz-exec] note result: sub1 => -nan:0x74546d
+[fuzz-exec] calling sub2
+[fuzz-exec] note result: sub2 => -nan:0x74546d
(module
(type $none_=>_f32 (func (result f32)))
(export "div" (func $0))
(export "mul1" (func $1))
(export "mul2" (func $1))
+ (export "add1" (func $1))
+ (export "add2" (func $1))
+ (export "add3" (func $1))
+ (export "add4" (func $1))
+ (export "sub1" (func $1))
+ (export "sub2" (func $1))
(func $0 (; has Stack IR ;) (result f32)
- (f32.const -nan:0x23017a)
+ (f32.const -nan:0x63017a)
)
(func $1 (; has Stack IR ;) (result f32)
- (f32.const -nan:0x34546d)
+ (f32.const -nan:0x74546d)
)
)
[fuzz-exec] calling div
-[fuzz-exec] note result: div => -nan:0x23017a
+[fuzz-exec] note result: div => -nan:0x63017a
[fuzz-exec] calling mul1
-[fuzz-exec] note result: mul1 => -nan:0x34546d
+[fuzz-exec] note result: mul1 => -nan:0x74546d
[fuzz-exec] calling mul2
-[fuzz-exec] note result: mul2 => -nan:0x34546d
+[fuzz-exec] note result: mul2 => -nan:0x74546d
+[fuzz-exec] calling add1
+[fuzz-exec] note result: add1 => -nan:0x74546d
+[fuzz-exec] calling add2
+[fuzz-exec] note result: add2 => -nan:0x74546d
+[fuzz-exec] calling add3
+[fuzz-exec] note result: add3 => -nan:0x74546d
+[fuzz-exec] calling add4
+[fuzz-exec] note result: add4 => -nan:0x74546d
+[fuzz-exec] calling sub1
+[fuzz-exec] note result: sub1 => -nan:0x74546d
+[fuzz-exec] calling sub2
+[fuzz-exec] note result: sub2 => -nan:0x74546d
+[fuzz-exec] comparing add1
+[fuzz-exec] comparing add2
+[fuzz-exec] comparing add3
+[fuzz-exec] comparing add4
[fuzz-exec] comparing div
[fuzz-exec] comparing mul1
[fuzz-exec] comparing mul2
+[fuzz-exec] comparing sub1
+[fuzz-exec] comparing sub2
diff --git a/test/passes/fuzz-exec_O.wast b/test/passes/fuzz-exec_O.wast
index 5c739c548..b34dc2e8f 100644
--- a/test/passes/fuzz-exec_O.wast
+++ b/test/passes/fuzz-exec_O.wast
@@ -22,10 +22,10 @@
)
(module
(func "div" (result f32)
- (f32.div ;; div by 1 can be removed, leaving this nan
- (f32.const -nan:0x23017a) ;; as it is. wasm semantics allow nan bits to
- (f32.const 1) ;; change, but the interpreter should not do so,
- ) ;; so that it does not fail on that opt.
+ (f32.div
+ (f32.const -nan:0x23017a)
+ (f32.const 1)
+ )
)
(func "mul1" (result f32)
(f32.mul
@@ -39,5 +39,40 @@
(f32.const -nan:0x34546d)
)
)
+ (func "add1" (result f32)
+ (f32.add
+ (f32.const -nan:0x34546d)
+ (f32.const -0)
+ )
+ )
+ (func "add2" (result f32)
+ (f32.add
+ (f32.const -0)
+ (f32.const -nan:0x34546d)
+ )
+ )
+ (func "add3" (result f32)
+ (f32.add
+ (f32.const -nan:0x34546d)
+ (f32.const 0)
+ )
+ )
+ (func "add4" (result f32)
+ (f32.add
+ (f32.const 0)
+ (f32.const -nan:0x34546d)
+ )
+ )
+ (func "sub1" (result f32)
+ (f32.sub
+ (f32.const -nan:0x34546d)
+ (f32.const 0)
+ )
+ )
+ (func "sub2" (result f32)
+ (f32.sub
+ (f32.const -nan:0x34546d)
+ (f32.const -0)
+ )
+ )
)
-
diff --git a/test/passes/optimize-instructions_all-features.txt b/test/passes/optimize-instructions_all-features.txt
index 873d550d7..5babd75de 100644
--- a/test/passes/optimize-instructions_all-features.txt
+++ b/test/passes/optimize-instructions_all-features.txt
@@ -2886,10 +2886,16 @@
(local.get $x64)
)
(drop
- (local.get $y32)
+ (f32.mul
+ (local.get $y32)
+ (f32.const 1)
+ )
)
(drop
- (local.get $y64)
+ (f64.mul
+ (local.get $y64)
+ (f64.const 1)
+ )
)
(drop
(i32.const 0)
@@ -2922,10 +2928,16 @@
(local.get $x64)
)
(drop
- (local.get $y32)
+ (f32.div
+ (local.get $y32)
+ (f32.const 1)
+ )
)
(drop
- (local.get $y64)
+ (f64.div
+ (local.get $y64)
+ (f64.const 1)
+ )
)
(drop
(f32.div
@@ -3703,27 +3715,39 @@
)
(func $const-float-zero (param $fx f32) (param $fy f64)
(drop
- (local.get $fx)
+ (f32.sub
+ (local.get $fx)
+ (f32.const 0)
+ )
)
(drop
- (local.get $fy)
+ (f64.sub
+ (local.get $fy)
+ (f64.const 0)
+ )
)
(drop
- (local.get $fx)
+ (f32.add
+ (local.get $fx)
+ (f32.const -0)
+ )
)
(drop
- (local.get $fy)
+ (f64.add
+ (local.get $fy)
+ (f64.const -0)
+ )
)
(drop
- (f32.add
+ (f32.sub
(local.get $fx)
- (f32.const 0)
+ (f32.const -0)
)
)
(drop
- (f64.add
+ (f64.sub
(local.get $fy)
- (f64.const 0)
+ (f64.const -0)
)
)
(drop
@@ -3750,6 +3774,12 @@
(f64.const 0)
)
)
+ (drop
+ (f32.sub
+ (f32.const -nan:0x34546d)
+ (f32.const 0)
+ )
+ )
)
(func $rhs-is-neg-one (param $x i32) (param $y i64) (param $fx f32) (param $fy f64)
(drop
diff --git a/test/passes/optimize-instructions_all-features.wast b/test/passes/optimize-instructions_all-features.wast
index 59e7e21d8..246fd41a2 100644
--- a/test/passes/optimize-instructions_all-features.wast
+++ b/test/passes/optimize-instructions_all-features.wast
@@ -4284,6 +4284,10 @@
(local.get $fy) ;; skip
(f64.const 0)
))
+ (drop (f32.sub
+ (f32.const -nan:0x34546d) ;; skip
+ (f32.const 0)
+ ))
)
(func $rhs-is-neg-one (param $x i32) (param $y i64) (param $fx f32) (param $fy f64)
(drop (i32.sub
diff --git a/test/spec/old_float_exprs.wast b/test/spec/old_float_exprs.wast
index ca031114f..7900832b0 100644
--- a/test/spec/old_float_exprs.wast
+++ b/test/spec/old_float_exprs.wast
@@ -103,10 +103,8 @@
(f64.mul (local.get $x) (f64.const 1.0)))
)
-;; XXX BINARYEN: disable this test, as we have testing for the more strict property
-;; of not changing the bits at all in our interpreter
-;; (assert_return (invoke "f32.no_fold_mul_one" (f32.const nan:0x200000)) (f32.const nan:0x600000))
-;; (assert_return (invoke "f64.no_fold_mul_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000))
+(assert_return (invoke "f32.no_fold_mul_one" (f32.const nan:0x200000)) (f32.const nan:0x600000))
+(assert_return (invoke "f64.no_fold_mul_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000))
;; Test that 0.0/x is not folded to 0.0.
@@ -135,10 +133,8 @@
(f64.div (local.get $x) (f64.const 1.0)))
)
-;; XXX BINARYEN: disable this test, as we have testing for the more strict property
-;; of not changing the bits at all in our interpreter
-;; (assert_return (invoke "f32.no_fold_div_one" (f32.const nan:0x200000)) (f32.const nan:arithmetic))
-;; (assert_return (invoke "f64.no_fold_div_one" (f64.const nan:0x4000000000000)) (f64.const nan:arithmetic))
+(assert_return (invoke "f32.no_fold_div_one" (f32.const nan:0x200000)) (f32.const nan:0x600000))
+(assert_return (invoke "f64.no_fold_div_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000))
;; Test that x/-1.0 is not folded to -x.