diff options
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | src/pass.h | 5 | ||||
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 19 | ||||
-rw-r--r-- | src/tools/optimization-options.h | 8 | ||||
-rw-r--r-- | src/wasm/literal.cpp | 41 | ||||
-rw-r--r-- | test/passes/O_fast-math.txt | 21 | ||||
-rw-r--r-- | test/passes/O_fast-math.wast | 57 | ||||
-rw-r--r-- | test/passes/fuzz-exec_O.txt | 52 | ||||
-rw-r--r-- | test/passes/fuzz-exec_O.wast | 45 | ||||
-rw-r--r-- | test/passes/optimize-instructions_all-features.txt | 54 | ||||
-rw-r--r-- | test/passes/optimize-instructions_all-features.wast | 4 | ||||
-rw-r--r-- | test/spec/old_float_exprs.wast | 12 |
12 files changed, 241 insertions, 79 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 590d756fd..63d887b69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ full changeset diff at the end of each section. Current Trunk ------------- +- Add `--fast-math` mode. (#3155) + v97 --- diff --git a/src/pass.h b/src/pass.h index a3ee41d61..27e7ee37f 100644 --- a/src/pass.h +++ b/src/pass.h @@ -102,6 +102,11 @@ struct PassOptions { // many cases. bool lowMemoryUnused = false; enum { LowMemoryBound = 1024 }; + // Whether to allow "loose" math semantics, ignoring corner cases with NaNs + // and assuming math follows the algebraic rules for associativity and so + // forth (which IEEE floats do not, strictly speaking). This is inspired by + // gcc/clang's -ffast-math flag. + bool fastMath = false; // Whether to try to preserve debug info through, which are special calls. bool debugInfo = false; // Arbitrary string arguments from the commandline, which we forward to diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 36d92e81f..55af9a34d 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -161,7 +161,10 @@ struct OptimizeInstructions #endif } + bool fastMath; + void doWalkFunction(Function* func) { + fastMath = getPassOptions().fastMath; // first, scan locals { LocalScanner scanner(localInfo, getPassOptions()); @@ -1414,14 +1417,15 @@ private: } { double value; - if (matches(curr, binary(Abstract::Sub, any(), fval(&value))) && + if (fastMath && + matches(curr, binary(Abstract::Sub, any(), fval(&value))) && value == 0.0) { // x - (-0.0) ==> x + 0.0 if (std::signbit(value)) { curr->op = Abstract::getBinary(type, Abstract::Add); right->value = right->value.neg(); return curr; - } else { + } else if (fastMath) { // x - 0.0 ==> x return curr->left; } @@ -1430,19 +1434,18 @@ private: { // x + (-0.0) ==> x double value; - if (matches(curr, binary(Abstract::Add, any(), fval(&value))) && + if (fastMath && + matches(curr, binary(Abstract::Add, any(), fval(&value))) && value == 0.0 && std::signbit(value)) { return curr->left; } } - // Note that this is correct even on floats with a NaN on the left, - // as a NaN would skip the computation and just return the NaN, - // and that is precisely what we do here. but, the same with -1 - // (change to a negation) would be incorrect for that reason. if (matches(curr, binary(Abstract::Mul, any(&left), constant(1))) || matches(curr, binary(Abstract::DivS, any(&left), constant(1))) || matches(curr, binary(Abstract::DivU, any(&left), constant(1)))) { - return left; + if (curr->type.isInteger() || fastMath) { + return left; + } } return nullptr; } diff --git a/src/tools/optimization-options.h b/src/tools/optimization-options.h index 72f478329..5b6a643e6 100644 --- a/src/tools/optimization-options.h +++ b/src/tools/optimization-options.h @@ -187,7 +187,13 @@ struct OptimizationOptions : public ToolOptions { Options::Arguments::Zero, [this](Options*, const std::string&) { passOptions.lowMemoryUnused = true; - }); + }) + .add( + "--fast-math", + "-ffm", + "Optimize floats without handling corner cases of NaNs and rounding", + Options::Arguments::Zero, + [this](Options*, const std::string&) { passOptions.fastMath = true; }); // add passes in registry for (const auto& p : PassRegistry::get()->getRegisteredNames()) { (*this).add( diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index d309be308..54453b356 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -934,35 +934,10 @@ Literal Literal::mul(const Literal& other) const { return Literal(uint32_t(i32) * uint32_t(other.i32)); case Type::i64: return Literal(uint64_t(i64) * uint64_t(other.i64)); - case Type::f32: { - // Special-case multiplication by 1. nan * 1 can change nan bits per the - // wasm spec, but it is ok to just return that original nan, and we - // do that here so that we are consistent with the optimization of - // removing the * 1 and leaving just the nan. That is, if we just - // do a normal multiply and the CPU decides to change the bits, we'd - // give a different result on optimized code, which would look like - // it was a bad optimization. So out of all the valid results to - // return here, return the simplest one that is consistent with - // our optimization for the case of 1. - float lhs = getf32(), rhs = other.getf32(); - if (rhs == 1) { - return Literal(lhs); - } - if (lhs == 1) { - return Literal(rhs); - } - return Literal(lhs * rhs); - } - case Type::f64: { - double lhs = getf64(), rhs = other.getf64(); - if (rhs == 1) { - return Literal(lhs); - } - if (lhs == 1) { - return Literal(rhs); - } - return Literal(lhs * rhs); - } + case Type::f32: + return Literal(getf32() * other.getf32()); + case Type::f64: + return Literal(getf64() * other.getf64()); case Type::v128: case Type::funcref: case Type::externref: @@ -1002,10 +977,6 @@ Literal Literal::div(const Literal& other) const { case FP_INFINITE: // fallthrough case FP_NORMAL: // fallthrough case FP_SUBNORMAL: - // Special-case division by 1, similar to multiply from earlier. - if (rhs == 1) { - return Literal(lhs); - } return Literal(lhs / rhs); default: WASM_UNREACHABLE("invalid fp classification"); @@ -1034,10 +1005,6 @@ Literal Literal::div(const Literal& other) const { case FP_INFINITE: // fallthrough case FP_NORMAL: // fallthrough case FP_SUBNORMAL: - // See above comment on f32. - if (rhs == 1) { - return Literal(lhs); - } return Literal(lhs / rhs); default: WASM_UNREACHABLE("invalid fp classification"); diff --git a/test/passes/O_fast-math.txt b/test/passes/O_fast-math.txt new file mode 100644 index 000000000..1b454c68e --- /dev/null +++ b/test/passes/O_fast-math.txt @@ -0,0 +1,21 @@ +(module + (type $none_=>_f32 (func (result f32))) + (export "div" (func $0)) + (export "mul1" (func $1)) + (export "mul2" (func $2)) + (export "add1" (func $1)) + (export "add2" (func $2)) + (export "add3" (func $2)) + (export "add4" (func $2)) + (export "sub1" (func $1)) + (export "sub2" (func $2)) + (func $0 (; has Stack IR ;) (result f32) + (f32.const -nan:0x23017a) + ) + (func $1 (; has Stack IR ;) (result f32) + (f32.const -nan:0x34546d) + ) + (func $2 (; has Stack IR ;) (result f32) + (f32.const -nan:0x74546d) + ) +) diff --git a/test/passes/O_fast-math.wast b/test/passes/O_fast-math.wast new file mode 100644 index 000000000..2317f782d --- /dev/null +++ b/test/passes/O_fast-math.wast @@ -0,0 +1,57 @@ +;; with fast-math we can optimize some of these patterns +(module + (func "div" (result f32) + (f32.div + (f32.const -nan:0x23017a) + (f32.const 1) + ) + ) + (func "mul1" (result f32) + (f32.mul + (f32.const -nan:0x34546d) + (f32.const 1) + ) + ) + (func "mul2" (result f32) + (f32.mul + (f32.const 1) + (f32.const -nan:0x34546d) + ) + ) + (func "add1" (result f32) + (f32.add + (f32.const -nan:0x34546d) + (f32.const -0) + ) + ) + (func "add2" (result f32) + (f32.add + (f32.const -0) + (f32.const -nan:0x34546d) + ) + ) + (func "add3" (result f32) + (f32.add + (f32.const -nan:0x34546d) + (f32.const 0) + ) + ) + (func "add4" (result f32) + (f32.add + (f32.const 0) + (f32.const -nan:0x34546d) + ) + ) + (func "sub1" (result f32) + (f32.sub + (f32.const -nan:0x34546d) + (f32.const 0) + ) + ) + (func "sub2" (result f32) + (f32.sub + (f32.const -nan:0x34546d) + (f32.const -0) + ) + ) +) diff --git a/test/passes/fuzz-exec_O.txt b/test/passes/fuzz-exec_O.txt index ef8e165bb..f17b04650 100644 --- a/test/passes/fuzz-exec_O.txt +++ b/test/passes/fuzz-exec_O.txt @@ -31,29 +31,65 @@ [fuzz-exec] comparing func_0 [fuzz-exec] comparing func_1 [fuzz-exec] calling div -[fuzz-exec] note result: div => -nan:0x23017a +[fuzz-exec] note result: div => -nan:0x63017a [fuzz-exec] calling mul1 -[fuzz-exec] note result: mul1 => -nan:0x34546d +[fuzz-exec] note result: mul1 => -nan:0x74546d [fuzz-exec] calling mul2 -[fuzz-exec] note result: mul2 => -nan:0x34546d +[fuzz-exec] note result: mul2 => -nan:0x74546d +[fuzz-exec] calling add1 +[fuzz-exec] note result: add1 => -nan:0x74546d +[fuzz-exec] calling add2 +[fuzz-exec] note result: add2 => -nan:0x74546d +[fuzz-exec] calling add3 +[fuzz-exec] note result: add3 => -nan:0x74546d +[fuzz-exec] calling add4 +[fuzz-exec] note result: add4 => -nan:0x74546d +[fuzz-exec] calling sub1 +[fuzz-exec] note result: sub1 => -nan:0x74546d +[fuzz-exec] calling sub2 +[fuzz-exec] note result: sub2 => -nan:0x74546d (module (type $none_=>_f32 (func (result f32))) (export "div" (func $0)) (export "mul1" (func $1)) (export "mul2" (func $1)) + (export "add1" (func $1)) + (export "add2" (func $1)) + (export "add3" (func $1)) + (export "add4" (func $1)) + (export "sub1" (func $1)) + (export "sub2" (func $1)) (func $0 (; has Stack IR ;) (result f32) - (f32.const -nan:0x23017a) + (f32.const -nan:0x63017a) ) (func $1 (; has Stack IR ;) (result f32) - (f32.const -nan:0x34546d) + (f32.const -nan:0x74546d) ) ) [fuzz-exec] calling div -[fuzz-exec] note result: div => -nan:0x23017a +[fuzz-exec] note result: div => -nan:0x63017a [fuzz-exec] calling mul1 -[fuzz-exec] note result: mul1 => -nan:0x34546d +[fuzz-exec] note result: mul1 => -nan:0x74546d [fuzz-exec] calling mul2 -[fuzz-exec] note result: mul2 => -nan:0x34546d +[fuzz-exec] note result: mul2 => -nan:0x74546d +[fuzz-exec] calling add1 +[fuzz-exec] note result: add1 => -nan:0x74546d +[fuzz-exec] calling add2 +[fuzz-exec] note result: add2 => -nan:0x74546d +[fuzz-exec] calling add3 +[fuzz-exec] note result: add3 => -nan:0x74546d +[fuzz-exec] calling add4 +[fuzz-exec] note result: add4 => -nan:0x74546d +[fuzz-exec] calling sub1 +[fuzz-exec] note result: sub1 => -nan:0x74546d +[fuzz-exec] calling sub2 +[fuzz-exec] note result: sub2 => -nan:0x74546d +[fuzz-exec] comparing add1 +[fuzz-exec] comparing add2 +[fuzz-exec] comparing add3 +[fuzz-exec] comparing add4 [fuzz-exec] comparing div [fuzz-exec] comparing mul1 [fuzz-exec] comparing mul2 +[fuzz-exec] comparing sub1 +[fuzz-exec] comparing sub2 diff --git a/test/passes/fuzz-exec_O.wast b/test/passes/fuzz-exec_O.wast index 5c739c548..b34dc2e8f 100644 --- a/test/passes/fuzz-exec_O.wast +++ b/test/passes/fuzz-exec_O.wast @@ -22,10 +22,10 @@ ) (module (func "div" (result f32) - (f32.div ;; div by 1 can be removed, leaving this nan - (f32.const -nan:0x23017a) ;; as it is. wasm semantics allow nan bits to - (f32.const 1) ;; change, but the interpreter should not do so, - ) ;; so that it does not fail on that opt. + (f32.div + (f32.const -nan:0x23017a) + (f32.const 1) + ) ) (func "mul1" (result f32) (f32.mul @@ -39,5 +39,40 @@ (f32.const -nan:0x34546d) ) ) + (func "add1" (result f32) + (f32.add + (f32.const -nan:0x34546d) + (f32.const -0) + ) + ) + (func "add2" (result f32) + (f32.add + (f32.const -0) + (f32.const -nan:0x34546d) + ) + ) + (func "add3" (result f32) + (f32.add + (f32.const -nan:0x34546d) + (f32.const 0) + ) + ) + (func "add4" (result f32) + (f32.add + (f32.const 0) + (f32.const -nan:0x34546d) + ) + ) + (func "sub1" (result f32) + (f32.sub + (f32.const -nan:0x34546d) + (f32.const 0) + ) + ) + (func "sub2" (result f32) + (f32.sub + (f32.const -nan:0x34546d) + (f32.const -0) + ) + ) ) - diff --git a/test/passes/optimize-instructions_all-features.txt b/test/passes/optimize-instructions_all-features.txt index 873d550d7..5babd75de 100644 --- a/test/passes/optimize-instructions_all-features.txt +++ b/test/passes/optimize-instructions_all-features.txt @@ -2886,10 +2886,16 @@ (local.get $x64) ) (drop - (local.get $y32) + (f32.mul + (local.get $y32) + (f32.const 1) + ) ) (drop - (local.get $y64) + (f64.mul + (local.get $y64) + (f64.const 1) + ) ) (drop (i32.const 0) @@ -2922,10 +2928,16 @@ (local.get $x64) ) (drop - (local.get $y32) + (f32.div + (local.get $y32) + (f32.const 1) + ) ) (drop - (local.get $y64) + (f64.div + (local.get $y64) + (f64.const 1) + ) ) (drop (f32.div @@ -3703,27 +3715,39 @@ ) (func $const-float-zero (param $fx f32) (param $fy f64) (drop - (local.get $fx) + (f32.sub + (local.get $fx) + (f32.const 0) + ) ) (drop - (local.get $fy) + (f64.sub + (local.get $fy) + (f64.const 0) + ) ) (drop - (local.get $fx) + (f32.add + (local.get $fx) + (f32.const -0) + ) ) (drop - (local.get $fy) + (f64.add + (local.get $fy) + (f64.const -0) + ) ) (drop - (f32.add + (f32.sub (local.get $fx) - (f32.const 0) + (f32.const -0) ) ) (drop - (f64.add + (f64.sub (local.get $fy) - (f64.const 0) + (f64.const -0) ) ) (drop @@ -3750,6 +3774,12 @@ (f64.const 0) ) ) + (drop + (f32.sub + (f32.const -nan:0x34546d) + (f32.const 0) + ) + ) ) (func $rhs-is-neg-one (param $x i32) (param $y i64) (param $fx f32) (param $fy f64) (drop diff --git a/test/passes/optimize-instructions_all-features.wast b/test/passes/optimize-instructions_all-features.wast index 59e7e21d8..246fd41a2 100644 --- a/test/passes/optimize-instructions_all-features.wast +++ b/test/passes/optimize-instructions_all-features.wast @@ -4284,6 +4284,10 @@ (local.get $fy) ;; skip (f64.const 0) )) + (drop (f32.sub + (f32.const -nan:0x34546d) ;; skip + (f32.const 0) + )) ) (func $rhs-is-neg-one (param $x i32) (param $y i64) (param $fx f32) (param $fy f64) (drop (i32.sub diff --git a/test/spec/old_float_exprs.wast b/test/spec/old_float_exprs.wast index ca031114f..7900832b0 100644 --- a/test/spec/old_float_exprs.wast +++ b/test/spec/old_float_exprs.wast @@ -103,10 +103,8 @@ (f64.mul (local.get $x) (f64.const 1.0))) ) -;; XXX BINARYEN: disable this test, as we have testing for the more strict property -;; of not changing the bits at all in our interpreter -;; (assert_return (invoke "f32.no_fold_mul_one" (f32.const nan:0x200000)) (f32.const nan:0x600000)) -;; (assert_return (invoke "f64.no_fold_mul_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000)) +(assert_return (invoke "f32.no_fold_mul_one" (f32.const nan:0x200000)) (f32.const nan:0x600000)) +(assert_return (invoke "f64.no_fold_mul_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000)) ;; Test that 0.0/x is not folded to 0.0. @@ -135,10 +133,8 @@ (f64.div (local.get $x) (f64.const 1.0))) ) -;; XXX BINARYEN: disable this test, as we have testing for the more strict property -;; of not changing the bits at all in our interpreter -;; (assert_return (invoke "f32.no_fold_div_one" (f32.const nan:0x200000)) (f32.const nan:arithmetic)) -;; (assert_return (invoke "f64.no_fold_div_one" (f64.const nan:0x4000000000000)) (f64.const nan:arithmetic)) +(assert_return (invoke "f32.no_fold_div_one" (f32.const nan:0x200000)) (f32.const nan:0x600000)) +(assert_return (invoke "f64.no_fold_div_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000)) ;; Test that x/-1.0 is not folded to -x. |