diff options
-rw-r--r-- | src/ir/abstract.h | 6 | ||||
-rw-r--r-- | src/literal.h | 23 | ||||
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 34 | ||||
-rw-r--r-- | src/wasm/literal.cpp | 36 | ||||
-rw-r--r-- | test/lit/passes/optimize-instructions.wast | 214 | ||||
-rw-r--r-- | test/passes/optimize-instructions_fuzz-exec.txt | 10 |
6 files changed, 271 insertions, 52 deletions
diff --git a/src/ir/abstract.h b/src/ir/abstract.h index 0cfeeea77..04b04e342 100644 --- a/src/ir/abstract.h +++ b/src/ir/abstract.h @@ -34,7 +34,6 @@ enum Op { Mul, DivU, DivS, - Rem, RemU, RemS, Shl, @@ -45,6 +44,7 @@ enum Op { And, Or, Xor, + CopySign, // Relational EqZ, Eq, @@ -261,6 +261,8 @@ inline BinaryOp getBinary(Type type, Op op) { return DivFloat32; case DivS: return DivFloat32; + case CopySign: + return CopySignFloat32; case Eq: return EqFloat32; case Ne: @@ -282,6 +284,8 @@ inline BinaryOp getBinary(Type type, Op op) { return DivFloat64; case DivS: return DivFloat64; + case CopySign: + return CopySignFloat64; case Eq: return EqFloat64; case Ne: diff --git a/src/literal.h b/src/literal.h index ffd565c71..9d9630ec4 100644 --- a/src/literal.h +++ b/src/literal.h @@ -706,6 +706,29 @@ struct GCData { GCData(HeapType type, Literals values) : type(type), values(values) {} }; +// Wasm has nondeterministic rules for NaN propagation in some operations. For +// example. f32.neg is deterministic and just flips the sign, even of a NaN, but +// f32.add is nondeterministic, and if one or more of the inputs is a NaN, then +// +// * if all NaNs are canonical NaNs, the output is some arbitrary canonical NaN +// * otherwise the output is some arbitrary arithmetic NaN +// +// (canonical = NaN payload is 1000..000; arithmetic: 1???..???, that is, the +// high bit is 1 and all others can be 0 or 1) +// +// For many things we don't need to care, and can just do a normal C++ add for +// an f32.add, for example - the wasm rules are specified so that things like +// that just work (in order for such math to be fast). However, for our +// optimizer, it is useful to "standardize" NaNs when there is nondeterminism. +// That is, when there are multiple valid outputs, it's nice to emit the same +// one consistently, so that it doesn't look like the optimization changed +// something. In other words, if the valid output of an expression is a set of +// valid NaNs, and after optimization the output is still that same set, then +// the optimization is valid. And if the interpreter picks the same NaN in both +// cases from that identical set then nothing looks wrong to the fuzzer. +Literal standardizeNaN(float result); +Literal standardizeNaN(double result); + } // namespace wasm namespace std { diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 7f0b28574..9024d0cb6 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -3597,6 +3597,40 @@ private: } } } + { + // x != NaN ==> 1 + // x <=> NaN ==> 0 + // x op NaN' ==> NaN', iff `op` != `copysign` and `x` != C + Const* c; + Binary* bin; + Expression* x; + if (matches(curr, binary(&bin, pure(&x), fval(&c))) && + std::isnan(c->value.getFloat()) && + bin->op != getBinary(x->type, CopySign)) { + if (bin->isRelational()) { + // reuse "c" (nan) constant + c->type = Type::i32; + if (bin->op == getBinary(x->type, Ne)) { + // x != NaN ==> 1 + c->value = Literal::makeOne(Type::i32); + } else { + // x == NaN, + // x > NaN, + // x <= NaN + // x .. NaN ==> 0 + c->value = Literal::makeZero(Type::i32); + } + return c; + } + // propagate NaN of RHS but canonicalize it + if (c->type == Type::f32) { + c->value = standardizeNaN(c->value.getf32()); + } else { + c->value = standardizeNaN(c->value.getf64()); + } + return c; + } + } return nullptr; } diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index 684108581..843f4607f 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -859,38 +859,20 @@ Literal Literal::demote() const { return Literal(float(getf64())); } -// Wasm has nondeterministic rules for NaN propagation in some operations. For -// example. f32.neg is deterministic and just flips the sign, even of a NaN, but -// f32.add is nondeterministic, and if one or more of the inputs is a NaN, then -// -// * if all NaNs are canonical NaNs, the output is some arbitrary canonical NaN -// * otherwise the output is some arbitrary arithmetic NaN -// -// (canonical = NaN payload is 1000..000; arithmetic: 1???..???, that is, the -// high bit is 1 and all others can be 0 or 1) -// -// For many things we don't need to care, and can just do a normal C++ add for -// an f32.add, for example - the wasm rules are specified so that things like -// that just work (in order for such math to be fast). However, for our -// optimizer, it is useful to "standardize" NaNs when there is nondeterminism. -// That is, when there are multiple valid outputs, it's nice to emit the same -// one consistently, so that it doesn't look like the optimization changed -// something. In other words, if the valid output of an expression is a set of -// valid NaNs, and after optimization the output is still that same set, then -// the optimization is valid. And if the interpreter picks the same NaN in both -// cases from that identical set then nothing looks wrong to the fuzzer. -template<typename T> static Literal standardizeNaN(T result) { +Literal standardizeNaN(float result) { if (!std::isnan(result)) { return Literal(result); } // Pick a simple canonical payload, and positive. - if (sizeof(T) == 4) { - return Literal(Literal(uint32_t(0x7fc00000u)).reinterpretf32()); - } else if (sizeof(T) == 8) { - return Literal(Literal(uint64_t(0x7ff8000000000000ull)).reinterpretf64()); - } else { - WASM_UNREACHABLE("invalid float"); + return Literal(Literal(uint32_t(0x7fc00000u)).reinterpretf32()); +} + +Literal standardizeNaN(double result) { + if (!std::isnan(result)) { + return Literal(result); } + // Pick a simple canonical payload, and positive. + return Literal(Literal(uint64_t(0x7ff8000000000000ull)).reinterpretf64()); } Literal Literal::add(const Literal& other) const { diff --git a/test/lit/passes/optimize-instructions.wast b/test/lit/passes/optimize-instructions.wast index 728b195de..c75040a9f 100644 --- a/test/lit/passes/optimize-instructions.wast +++ b/test/lit/passes/optimize-instructions.wast @@ -1937,10 +1937,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (f64.ge - ;; CHECK-NEXT: (local.get $fx) - ;; CHECK-NEXT: (f64.const nan:0x8000000000000) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (f64.ge @@ -7692,10 +7689,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (f32.div - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (f32.const nan:0x400000) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f32.const nan:0x400000) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (f32.div @@ -7836,10 +7830,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (f64.div - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (f64.const nan:0x8000000000000) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f64.const nan:0x8000000000000) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (f64.div @@ -10853,10 +10844,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (f64.div - ;; CHECK-NEXT: (local.get $fy) - ;; CHECK-NEXT: (f64.const -nan:0x8000000000000) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f64.const nan:0x8000000000000) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (f64.div @@ -11093,6 +11081,200 @@ ) )) ) + ;; CHECK: (func $rhs-is-const-nan (param $x f32) (param $y f64) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.const nan:0x8000000000000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.const nan:0x8000000000000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.const nan:0x8000000000000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.const nan:0x8000000000000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.const nan:0x8000000000000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.const nan:0x8000000000000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.copysign + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (f32.const nan:0x400000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.copysign + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (f32.const nan:0x200000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f64.copysign + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (f64.const -nan:0x8000000000000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rhs-is-const-nan (param $x f32) (param $y f64) + + ;; arithmetic ops + + ;; x + nan' => nan' + (drop (f32.add (local.get $x) (f32.const nan))) + (drop (f32.add (local.get $x) (f32.const nan:0x200000))) + (drop (f64.add (local.get $y) (f64.const -nan))) + ;; x - nan' => nan' + (drop (f32.sub (local.get $x) (f32.const nan))) + (drop (f32.sub (local.get $x) (f32.const nan:0x200000))) + (drop (f64.sub (local.get $y) (f64.const -nan))) + ;; x * nan' => nan' + (drop (f32.mul (local.get $x) (f32.const nan))) + (drop (f32.mul (local.get $x) (f32.const nan:0x200000))) + (drop (f64.mul (local.get $y) (f64.const -nan))) + ;; x / nan' => nan' + (drop (f32.div (local.get $x) (f32.const nan))) + (drop (f32.div (local.get $x) (f32.const nan:0x200000))) + (drop (f64.div (local.get $y) (f64.const -nan))) + + ;; min / max ops + + ;; min(x, nan') => nan' + (drop (f32.min (local.get $x) (f32.const nan))) + (drop (f32.min (local.get $x) (f32.const nan:0x200000))) + (drop (f64.min (local.get $y) (f64.const -nan))) + ;; max(x, nan') => nan' + (drop (f32.max (local.get $x) (f32.const nan))) + (drop (f32.max (local.get $x) (f32.const nan:0x200000))) + (drop (f64.max (local.get $y) (f64.const -nan))) + + ;; copysign ops (should be skipped) + + ;; copysign(x, nan) => skip + (drop (f32.copysign (local.get $x) (f32.const nan))) + (drop (f32.copysign (local.get $x) (f32.const nan:0x200000))) + (drop (f64.copysign (local.get $y) (f64.const -nan))) + + ;; relational ops + + ;; x != nan => 1 + (drop (f32.ne (local.get $x) (f32.const nan))) + (drop (f32.ne (local.get $x) (f32.const nan:0x200000))) + (drop (f64.ne (local.get $y) (f64.const -nan))) + ;; x == nan => 0 + (drop (f32.eq (local.get $x) (f32.const nan))) + (drop (f32.eq (local.get $x) (f32.const nan:0x200000))) + (drop (f64.eq (local.get $y) (f64.const -nan))) + ;; x > nan => 0 + (drop (f32.gt (local.get $x) (f32.const nan))) + (drop (f32.gt (local.get $x) (f32.const nan:0x200000))) + (drop (f64.gt (local.get $y) (f64.const -nan))) + ;; x >= nan => 0 + (drop (f32.ge (local.get $x) (f32.const nan))) + (drop (f32.ge (local.get $x) (f32.const nan:0x200000))) + (drop (f64.ge (local.get $y) (f64.const -nan))) + ;; x < nan => 0 + (drop (f32.lt (local.get $x) (f32.const nan))) + (drop (f32.lt (local.get $x) (f32.const nan:0x200000))) + (drop (f64.lt (local.get $y) (f64.const -nan))) + ;; x <= nan => 0 + (drop (f32.le (local.get $x) (f32.const nan))) + (drop (f32.le (local.get $x) (f32.const nan:0x200000))) + (drop (f64.le (local.get $y) (f64.const -nan))) + ) ;; CHECK: (func $lhs-is-neg-one (param $x i32) (param $y i64) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.const -1) diff --git a/test/passes/optimize-instructions_fuzz-exec.txt b/test/passes/optimize-instructions_fuzz-exec.txt index ea32b8916..5a8ae7e5f 100644 --- a/test/passes/optimize-instructions_fuzz-exec.txt +++ b/test/passes/optimize-instructions_fuzz-exec.txt @@ -62,10 +62,7 @@ ) ) (call $logf32 - (f32.div - (f32.const nan:0x7fff82) - (f32.const -nan:0x7ff622) - ) + (f32.const nan:0x400000) ) (call $logf32 (f32.copysign @@ -118,10 +115,7 @@ ) ) (call $logf64 - (f64.div - (f64.const nan:0xfffffffffff82) - (f64.const -nan:0xfffffffffa622) - ) + (f64.const nan:0x8000000000000) ) (call $logf64 (f64.copysign |