diff options
author | Brendan Dahl <brendan.dahl@gmail.com> | 2024-09-03 12:08:50 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-03 12:08:50 -0700 |
commit | db9ee9434bd74ac8f1637ec109dc52e4b09794a7 (patch) | |
tree | 3a6067d3467a74730ee92914444169f6ae48e29c /test/spec | |
parent | b7cdb8c2110dff5a9b096d766dac04cd8ec04cc9 (diff) | |
download | binaryen-db9ee9434bd74ac8f1637ec109dc52e4b09794a7.tar.gz binaryen-db9ee9434bd74ac8f1637ec109dc52e4b09794a7.tar.bz2 binaryen-db9ee9434bd74ac8f1637ec109dc52e4b09794a7.zip |
[FP16] Implement madd and nmadd. (#6878)
Specified at
https://github.com/WebAssembly/half-precision/blob/main/proposals/half-precision/Overview.md
A few notes:
- The F32x4 and F64x2 versions of madd and nmadd are missing spect
tests.
- For madd, the implementation was incorrectly doing `(b*c)+a` where it
should be `(a*b)+c`.
- For nmadd, the implementation was incorrectly doing `(-b*c)+a` where
it should be `-(a*b)+c`.
- There doesn't appear to be a great way to actually implement a fused
nmadd, but the spec allows the double rounded version I added.
Diffstat (limited to 'test/spec')
-rw-r--r-- | test/spec/f16.wast | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/test/spec/f16.wast b/test/spec/f16.wast index 09ee9328b..d5de0c0e8 100644 --- a/test/spec/f16.wast +++ b/test/spec/f16.wast @@ -32,6 +32,8 @@ (func (export "f16x8.floor") (param $0 v128) (result v128) (f16x8.floor (local.get $0))) (func (export "f16x8.trunc") (param $0 v128) (result v128) (f16x8.trunc (local.get $0))) (func (export "f16x8.nearest") (param $0 v128) (result v128) (f16x8.nearest (local.get $0))) + (func (export "f16x8.relaxed_madd") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f16x8.relaxed_madd (local.get $0) (local.get $1) (local.get $2))) + (func (export "f16x8.relaxed_nmadd") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f16x8.relaxed_nmadd (local.get $0) (local.get $1) (local.get $2))) ) (assert_return (invoke "f32.load_f16") (f32.const 42.0)) @@ -187,3 +189,30 @@ (v128.const i16x8 0x7e00 0 0x7c00 0xfc00 0xbc00 0x3c00 0x3e00 0x3ccd)) ;; nan 0 inf -inf -1 1 2 1 (v128.const i16x8 0x7e00 0 0x7c00 0xfc00 0xbc00 0x3c00 0x4000 0x3c00)) +;; ternary operations +(assert_return (invoke "f16x8.relaxed_madd" + ;; Lane 0 illustrates the difference between fused/unfused. e.g. + ;; fused: (positive overflow) + -inf = -inf + ;; unfused: (inf) + -inf = NaN + ;; + ;; 1e4 inf -1 0 1 1.5 -2 1 + (v128.const i16x8 0x70e2 0x7c00 0xbc00 0 0x3c00 0x3e00 0xc000 0x3c00) + ;; 1e4 inf -1 0 1 1.5 4 1 + (v128.const i16x8 0x70e2 0x7c00 0xbc00 0 0x3c00 0x3e00 0x4400 0x3c00) + ;; -inf inf -1 0 1 2 1 -1 + (v128.const i16x8 0xfc00 0x7c00 0xbc00 0 0x3c00 0x4000 0x3c00 0xbc00)) + ;; -inf inf 0 0 2 4.25 -7 0 + (v128.const i16x8 0xfc00 0x7c00 0 0 0x4000 0x4440 0xc700 0)) +(assert_return (invoke "f16x8.relaxed_nmadd" + ;; Lane 0 illustrates the difference between fused/unfused. e.g. + ;; fused: -(positive overflow) + inf = inf + ;; unfused: (-inf) + -inf = NaN + ;; + ;; 1e4 -inf -1 0 1 1.5 -2 1 + (v128.const i16x8 0x70e2 0xfc00 0xbc00 0 0x3c00 0x3e00 0xc000 0x3c00) + ;; 1e4 inf -1 0 1 1.5 4 1 + (v128.const i16x8 0x70e2 0x7c00 0xbc00 0 0x3c00 0x3e00 0x4400 0x3c00) + ;; inf inf -1 0 1 2 1 -1 + (v128.const i16x8 0x7c00 0x7c00 0xbc00 0 0x3c00 0x4000 0x3c00 0xbc00)) + ;; inf inf -2 0 0 -0.25 9 -2 + (v128.const i16x8 0x7c00 0x7c00 0xc000 0 0 0xb400 0x4880 0xc000)) |