diff options
author | ivarflakstad <69173633+ivarflakstad@users.noreply.github.com> | 2024-01-12 11:18:11 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-12 11:18:11 +0100 |
commit | e90bcdcc7c51dd85037055b59f22568100d801f0 (patch) | |
tree | 2c7e89df98f44192d92b185682d40b71295fd704 /candle-metal-kernels | |
parent | 8e06bfb4fd33f1229a03abee20cc1c07198408b5 (diff) | |
download | candle-e90bcdcc7c51dd85037055b59f22568100d801f0.tar.gz candle-e90bcdcc7c51dd85037055b59f22568100d801f0.tar.bz2 candle-e90bcdcc7c51dd85037055b59f22568100d801f0.zip |
Metal: f16 and bf16 where_cond + benchmark (#1545)
* Use cfg to seperate benchmark results based on features
* Add metal where_cond for f16 and bf16. Add benchmark
* Remove allow pragma
* Avoid some unnecessary returns.
* Improve benchmarks layout
* Updated feature separated benchmarks
---------
Co-authored-by: Laurent <laurent.mazare@gmail.com>
Diffstat (limited to 'candle-metal-kernels')
-rw-r--r-- | candle-metal-kernels/src/ternary.metal | 66 |
1 files changed, 43 insertions, 23 deletions
diff --git a/candle-metal-kernels/src/ternary.metal b/candle-metal-kernels/src/ternary.metal index 40b4bcf4..7b3b8ca9 100644 --- a/candle-metal-kernels/src/ternary.metal +++ b/candle-metal-kernels/src/ternary.metal @@ -17,29 +17,45 @@ METAL_FUNC uint get_strided_index( return strided_i; } +template<typename T, typename ID> +METAL_FUNC void where_cond( + constant size_t &numel, + constant size_t &num_dims, + constant size_t *dims, + constant size_t *strides, + constant size_t *strides_t, + constant size_t *strides_f, + device const ID *ids, + device const T *t, + device const T *f, + device T *out, + uint i [[ thread_position_in_grid ]] +) { + if (i >= numel){ + return; + } + uint strided_i = get_strided_index(i, num_dims, dims, strides); + uint strided_i_t = get_strided_index(i, num_dims, dims, strides_t); + uint strided_i_f = get_strided_index(i, num_dims, dims, strides_f); + out[i] = ids[strided_i] ? t[strided_i_t] : f[strided_i_f]; +} -#define WHERE_OP(TYPENAME, ID_TYPENAME, FN_NAME) \ -kernel void FN_NAME( \ - constant size_t &numel, \ - constant size_t &num_dims, \ - constant size_t *dims, \ - constant size_t *strides, \ - constant size_t *strides_t, \ - constant size_t *strides_f, \ - device const ID_TYPENAME *ids, \ - device const TYPENAME *t, \ - device const TYPENAME *f, \ - device TYPENAME *out ,\ - uint i [[ thread_position_in_grid ]] \ -) { \ - if (i >= numel){ \ - return; \ - } \ - uint strided_i = get_strided_index(i, num_dims, dims, strides); \ - uint strided_i_t = get_strided_index(i, num_dims, dims, strides_t); \ - uint strided_i_f = get_strided_index(i, num_dims, dims, strides_f); \ - out[i] = ids[strided_i] ? t[strided_i_t] : f[strided_i_f]; \ -} \ +#define WHERE_OP(T, ID, FN_NAME) \ +kernel void FN_NAME( \ + constant size_t &numel, \ + constant size_t &num_dims, \ + constant size_t *dims, \ + constant size_t *strides, \ + constant size_t *strides_t, \ + constant size_t *strides_f, \ + device const ID *ids, \ + device const T *t, \ + device const T *f, \ + device T *out, \ + uint i [[ thread_position_in_grid ]] \ +) { \ + where_cond<T, ID>(numel, num_dims, dims, strides, strides_t, strides_f, ids, t, f, out, i); \ +} \ // WHERE_OP(float, int64_t, where_i64_f32) // WHERE_OP(double, int64_t, where_i64_f64) @@ -54,10 +70,14 @@ kernel void FN_NAME( \ // WHERE_OP(int64_t, uint32_t, where_u32_i64) WHERE_OP(float, uint8_t, where_u8_f32) -// WHERE_OP(double, uint8_t, where_u8_f64) +WHERE_OP(half, uint8_t, where_u8_f16) WHERE_OP(uint8_t, uint8_t, where_u8_u8) WHERE_OP(uint32_t, uint8_t, where_u8_u32) #if __METAL_VERSION__ >= 220 WHERE_OP(int64_t, uint8_t, where_u8_i64) #endif + +#if defined(__HAVE_BFLOAT__) +WHERE_OP(bfloat, uint8_t, where_u8_bf16) +#endif
\ No newline at end of file |