summaryrefslogtreecommitdiff
path: root/candle-examples/examples/falcon
diff options
context:
space:
mode:
authorLaurent Mazare <laurent.mazare@gmail.com>2023-07-07 17:55:21 +0100
committerGitHub <noreply@github.com>2023-07-07 17:55:21 +0100
commit03dffe9ecc6a857d7ad449b1b0d69dc4f82c5b32 (patch)
tree1a17286ec0b1d167967ce208e40f466b6335e194 /candle-examples/examples/falcon
parente923b3adc2ff786614a1cba7e26a8bcab91dd0ed (diff)
downloadcandle-03dffe9ecc6a857d7ad449b1b0d69dc4f82c5b32.tar.gz
candle-03dffe9ecc6a857d7ad449b1b0d69dc4f82c5b32.tar.bz2
candle-03dffe9ecc6a857d7ad449b1b0d69dc4f82c5b32.zip
Use F32 for the reduce ops. (#105)
Diffstat (limited to 'candle-examples/examples/falcon')
-rw-r--r--candle-examples/examples/falcon/model.rs7
1 files changed, 6 insertions, 1 deletions
diff --git a/candle-examples/examples/falcon/model.rs b/candle-examples/examples/falcon/model.rs
index a877bd69..e7c53e50 100644
--- a/candle-examples/examples/falcon/model.rs
+++ b/candle-examples/examples/falcon/model.rs
@@ -122,12 +122,15 @@ impl LayerNorm {
}
fn forward(&self, x: &Tensor) -> Result<Tensor> {
+ let dtype = x.dtype();
let (_bsize, _seq_len, hidden_size) = x.shape().r3()?;
+ let x = x.to_dtype(DType::F32)?;
let mean_x = (x.sum(&[2])? / hidden_size as f64)?;
let x = x.broadcast_sub(&mean_x)?;
let norm_x = ((&x * &x)?.sum(&[2])? / hidden_size as f64)?;
let x_normed = x.broadcast_div(&(norm_x + self.eps)?.sqrt()?)?;
let x = x_normed
+ .to_dtype(dtype)?
.broadcast_mul(&self.weight)?
.broadcast_add(&self.bias)?;
Ok(x)
@@ -470,7 +473,9 @@ impl FalconAttention {
let attention_scores = (query.matmul(&key.t()?)? * self.inv_norm_factor)?;
let attention_scores = attention_scores
.broadcast_add(&mask.squeeze(1)?)?
- .softmax(D::Minus1)?;
+ .to_dtype(DType::F32)?
+ .softmax(D::Minus1)?
+ .to_dtype(x.dtype())?;
let attn_output = attention_scores
.matmul(&value)?
.reshape((b_sz, self.num_heads, seq_len, head_dim))?