From 4fd00b890036ef67391a9cc03f896247d0a75711 Mon Sep 17 00:00:00 2001 From: Laurent Mazare Date: Wed, 28 Feb 2024 21:02:41 +0100 Subject: Add the StarCoder2 model. (#1779) * Add the StarCoder2 model. * Add the example code and get things to work. * And also tweak the readme. --- candle-nn/src/activation.rs | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'candle-nn') diff --git a/candle-nn/src/activation.rs b/candle-nn/src/activation.rs index 60a7a6d1..b9745375 100644 --- a/candle-nn/src/activation.rs +++ b/candle-nn/src/activation.rs @@ -5,6 +5,7 @@ use serde::Deserialize; #[serde(rename_all = "lowercase")] pub enum Activation { #[default] + #[serde(alias = "gelu")] Gelu, #[serde(alias = "gelu_new")] NewGelu, @@ -19,6 +20,8 @@ pub enum Activation { HardSwish, Elu(f64), LeakyRelu(f64), + #[serde(alias = "gelu_pytorch_tanh")] + GeluPytorchTanh, } impl super::Module for Activation { @@ -38,6 +41,7 @@ impl super::Module for Activation { Self::HardSwish => xs * crate::ops::hard_sigmoid(xs)?, &Self::Elu(alpha) => xs.elu(alpha), &Self::LeakyRelu(negative_slope) => crate::ops::leaky_relu(xs, negative_slope), + Self::GeluPytorchTanh => xs.gelu(), } } } -- cgit v1.2.3