summaryrefslogtreecommitdiff
path: root/candle-transformers/src
diff options
context:
space:
mode:
authorLaurent Mazare <laurent.mazare@gmail.com>2024-05-23 21:24:55 +0200
committerGitHub <noreply@github.com>2024-05-23 21:24:55 +0200
commitd54e02d73de3391b34d4511aa7add32f9cffd4f0 (patch)
tree7391d5b7693e2ee0ac8bea7c573f018d6647b203 /candle-transformers/src
parent45e235a7473d473df5c1e50f55504a97e28be822 (diff)
downloadcandle-d54e02d73de3391b34d4511aa7add32f9cffd4f0.tar.gz
candle-d54e02d73de3391b34d4511aa7add32f9cffd4f0.tar.bz2
candle-d54e02d73de3391b34d4511aa7add32f9cffd4f0.zip
Avoid a contiguous call in the quantized phi 3 model. (#2209)
* Simplify the KvCache api. * Avoid a contiguous call in the quantized phi3 model.
Diffstat (limited to 'candle-transformers/src')
-rw-r--r--candle-transformers/src/models/quantized_phi3.rs2
1 files changed, 1 insertions, 1 deletions
diff --git a/candle-transformers/src/models/quantized_phi3.rs b/candle-transformers/src/models/quantized_phi3.rs
index f9b55d9d..257ad983 100644
--- a/candle-transformers/src/models/quantized_phi3.rs
+++ b/candle-transformers/src/models/quantized_phi3.rs
@@ -146,7 +146,7 @@ impl LayerWeights {
};
let att = candle_nn::ops::softmax_last_dim(&att)?;
// Convert to contiguous as matmul doesn't support strided vs for now.
- att.matmul(&v.contiguous()?)?
+ att.matmul(&v)?
};
let y = y.transpose(1, 2)?.reshape(&[b_sz, seq_len, n_embd])?;
let y = self.attn_output.forward(&y)?;