diff options
Diffstat (limited to 'candle-transformers/src')
-rw-r--r-- | candle-transformers/src/models/bert.rs | 4 |
1 files changed, 1 insertions, 3 deletions
diff --git a/candle-transformers/src/models/bert.rs b/candle-transformers/src/models/bert.rs index 3f164a3a..8af34465 100644 --- a/candle-transformers/src/models/bert.rs +++ b/candle-transformers/src/models/bert.rs @@ -25,10 +25,8 @@ impl HiddenActLayer { fn forward(&self, xs: &Tensor) -> candle::Result<Tensor> { let _enter = self.span.enter(); match self.act { - // TODO: The all-MiniLM-L6-v2 model uses "gelu" whereas this is "gelu_new", this explains some - // small numerical difference. // https://github.com/huggingface/transformers/blob/cd4584e3c809bb9e1392ccd3fe38b40daba5519a/src/transformers/activations.py#L213 - HiddenAct::Gelu => xs.gelu(), + HiddenAct::Gelu => xs.gelu_erf(), HiddenAct::Relu => xs.relu(), } } |