summaryrefslogtreecommitdiff
path: root/candle-core
diff options
context:
space:
mode:
authorLaurent Mazare <laurent.mazare@gmail.com>2023-09-08 21:58:56 +0100
committerGitHub <noreply@github.com>2023-09-08 21:58:56 +0100
commit057f7909bc5544b0dcb0e6d5360288633dbf443f (patch)
treee6aaea33d24479fe819d3a13bf6a7a4747cd8025 /candle-core
parentacf8f10ae17d7f472dc1a634fbd7358a79d7b4d4 (diff)
downloadcandle-057f7909bc5544b0dcb0e6d5360288633dbf443f.tar.gz
candle-057f7909bc5544b0dcb0e6d5360288633dbf443f.tar.bz2
candle-057f7909bc5544b0dcb0e6d5360288633dbf443f.zip
Accelerate support for gelu. (#782)
Diffstat (limited to 'candle-core')
-rw-r--r--candle-core/src/accelerate.rs32
-rw-r--r--candle-core/src/op.rs18
2 files changed, 50 insertions, 0 deletions
diff --git a/candle-core/src/accelerate.rs b/candle-core/src/accelerate.rs
index 87e0ee8d..1cb34e19 100644
--- a/candle-core/src/accelerate.rs
+++ b/candle-core/src/accelerate.rs
@@ -370,6 +370,38 @@ pub fn vd_sqr(a: &[f64], y: &mut [f64]) {
y.iter_mut().zip(a.iter()).for_each(|(y, a)| *y = *a * *a)
}
+#[inline]
+pub fn vs_tanh_inplace(y: &mut [f32]) {
+ unsafe { ffi::vvtanhf(y.as_mut_ptr(), y.as_ptr(), &(y.len() as i32)) }
+}
+
+#[inline]
+pub fn vd_tanh_inplace(y: &mut [f64]) {
+ unsafe { ffi::vvtanh(y.as_mut_ptr(), y.as_ptr(), &(y.len() as i32)) }
+}
+
+#[inline]
+pub fn vs_gelu(vs: &[f32], ys: &mut [f32]) {
+ for (&v, y) in vs.iter().zip(ys.iter_mut()) {
+ *y = (2.0f32 / std::f32::consts::PI).sqrt() * v * (1.0 + 0.044715 * v * v)
+ }
+ vs_tanh_inplace(ys);
+ for (&v, y) in vs.iter().zip(ys.iter_mut()) {
+ *y = 0.5 * v * (1.0 + *y)
+ }
+}
+
+#[inline]
+pub fn vd_gelu(vs: &[f64], ys: &mut [f64]) {
+ for (&v, y) in vs.iter().zip(ys.iter_mut()) {
+ *y = (2.0f64 / std::f64::consts::PI).sqrt() * v * (1.0 + 0.044715 * v * v)
+ }
+ vd_tanh_inplace(ys);
+ for (&v, y) in vs.iter().zip(ys.iter_mut()) {
+ *y = 0.5 * v * (1.0 + *y)
+ }
+}
+
macro_rules! binary_op {
($fn_name:ident, $ty:ty, $accelerate_name:ident) => {
#[inline]
diff --git a/candle-core/src/op.rs b/candle-core/src/op.rs
index fbfc9c1a..9382b217 100644
--- a/candle-core/src/op.rs
+++ b/candle-core/src/op.rs
@@ -600,6 +600,24 @@ impl UnaryOpT for Gelu {
fn f64_vec(xs: &[f64], ys: &mut [f64]) {
crate::mkl::vd_gelu(xs, ys)
}
+
+ #[cfg(feature = "accelerate")]
+ const F32_VEC: bool = true;
+
+ #[cfg(feature = "accelerate")]
+ #[inline(always)]
+ fn f32_vec(xs: &[f32], ys: &mut [f32]) {
+ crate::accelerate::vs_gelu(xs, ys)
+ }
+
+ #[cfg(feature = "accelerate")]
+ const F64_VEC: bool = true;
+
+ #[cfg(feature = "accelerate")]
+ #[inline(always)]
+ fn f64_vec(xs: &[f64], ys: &mut [f64]) {
+ crate::accelerate::vd_gelu(xs, ys)
+ }
}
impl UnaryOpT for Relu {