diff options
author | Laurent Mazare <laurent.mazare@gmail.com> | 2023-09-08 21:58:56 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-08 21:58:56 +0100 |
commit | 057f7909bc5544b0dcb0e6d5360288633dbf443f (patch) | |
tree | e6aaea33d24479fe819d3a13bf6a7a4747cd8025 /candle-core | |
parent | acf8f10ae17d7f472dc1a634fbd7358a79d7b4d4 (diff) | |
download | candle-057f7909bc5544b0dcb0e6d5360288633dbf443f.tar.gz candle-057f7909bc5544b0dcb0e6d5360288633dbf443f.tar.bz2 candle-057f7909bc5544b0dcb0e6d5360288633dbf443f.zip |
Accelerate support for gelu. (#782)
Diffstat (limited to 'candle-core')
-rw-r--r-- | candle-core/src/accelerate.rs | 32 | ||||
-rw-r--r-- | candle-core/src/op.rs | 18 |
2 files changed, 50 insertions, 0 deletions
diff --git a/candle-core/src/accelerate.rs b/candle-core/src/accelerate.rs index 87e0ee8d..1cb34e19 100644 --- a/candle-core/src/accelerate.rs +++ b/candle-core/src/accelerate.rs @@ -370,6 +370,38 @@ pub fn vd_sqr(a: &[f64], y: &mut [f64]) { y.iter_mut().zip(a.iter()).for_each(|(y, a)| *y = *a * *a) } +#[inline] +pub fn vs_tanh_inplace(y: &mut [f32]) { + unsafe { ffi::vvtanhf(y.as_mut_ptr(), y.as_ptr(), &(y.len() as i32)) } +} + +#[inline] +pub fn vd_tanh_inplace(y: &mut [f64]) { + unsafe { ffi::vvtanh(y.as_mut_ptr(), y.as_ptr(), &(y.len() as i32)) } +} + +#[inline] +pub fn vs_gelu(vs: &[f32], ys: &mut [f32]) { + for (&v, y) in vs.iter().zip(ys.iter_mut()) { + *y = (2.0f32 / std::f32::consts::PI).sqrt() * v * (1.0 + 0.044715 * v * v) + } + vs_tanh_inplace(ys); + for (&v, y) in vs.iter().zip(ys.iter_mut()) { + *y = 0.5 * v * (1.0 + *y) + } +} + +#[inline] +pub fn vd_gelu(vs: &[f64], ys: &mut [f64]) { + for (&v, y) in vs.iter().zip(ys.iter_mut()) { + *y = (2.0f64 / std::f64::consts::PI).sqrt() * v * (1.0 + 0.044715 * v * v) + } + vd_tanh_inplace(ys); + for (&v, y) in vs.iter().zip(ys.iter_mut()) { + *y = 0.5 * v * (1.0 + *y) + } +} + macro_rules! binary_op { ($fn_name:ident, $ty:ty, $accelerate_name:ident) => { #[inline] diff --git a/candle-core/src/op.rs b/candle-core/src/op.rs index fbfc9c1a..9382b217 100644 --- a/candle-core/src/op.rs +++ b/candle-core/src/op.rs @@ -600,6 +600,24 @@ impl UnaryOpT for Gelu { fn f64_vec(xs: &[f64], ys: &mut [f64]) { crate::mkl::vd_gelu(xs, ys) } + + #[cfg(feature = "accelerate")] + const F32_VEC: bool = true; + + #[cfg(feature = "accelerate")] + #[inline(always)] + fn f32_vec(xs: &[f32], ys: &mut [f32]) { + crate::accelerate::vs_gelu(xs, ys) + } + + #[cfg(feature = "accelerate")] + const F64_VEC: bool = true; + + #[cfg(feature = "accelerate")] + #[inline(always)] + fn f64_vec(xs: &[f64], ys: &mut [f64]) { + crate::accelerate::vd_gelu(xs, ys) + } } impl UnaryOpT for Relu { |