summaryrefslogtreecommitdiff
path: root/candle-transformers
diff options
context:
space:
mode:
authorJani Monoses <jani.monoses@gmail.com>2025-01-13 15:35:33 +0200
committerGitHub <noreply@github.com>2025-01-13 14:35:33 +0100
commitab7ff7081eab36958b82b98b89cee3eacf877111 (patch)
treea6768826a260a190bfe774fbbf954d6e85b1c5ae /candle-transformers
parent461e8c1685e003bdddfd1e7d1aa5092786ca9df5 (diff)
downloadcandle-ab7ff7081eab36958b82b98b89cee3eacf877111.tar.gz
candle-ab7ff7081eab36958b82b98b89cee3eacf877111.tar.bz2
candle-ab7ff7081eab36958b82b98b89cee3eacf877111.zip
Fixes for running Phi-4 quantized. (#2714)
Diffstat (limited to 'candle-transformers')
-rw-r--r--candle-transformers/src/models/quantized_phi3.rs2
1 files changed, 1 insertions, 1 deletions
diff --git a/candle-transformers/src/models/quantized_phi3.rs b/candle-transformers/src/models/quantized_phi3.rs
index 51a75f38..1ceb48d1 100644
--- a/candle-transformers/src/models/quantized_phi3.rs
+++ b/candle-transformers/src/models/quantized_phi3.rs
@@ -127,7 +127,7 @@ impl LayerWeights {
.reshape((b_sz, seq_len, self.n_head, self.head_dim))?
.transpose(1, 2)?;
let k = k
- .reshape((b_sz, seq_len, self.n_head, self.head_dim))?
+ .reshape((b_sz, seq_len, self.n_kv_head, self.head_dim))?
.transpose(1, 2)?;
let v = v
.reshape((b_sz, seq_len, self.n_kv_head, self.head_dim))?