summaryrefslogtreecommitdiff
path: root/candle-core/src
diff options
context:
space:
mode:
authorLaurent Mazare <laurent.mazare@gmail.com>2023-08-25 09:04:05 +0100
committerGitHub <noreply@github.com>2023-08-25 09:04:05 +0100
commitc093b03d51ff9d080c29c44dbeaf9bfc7b755541 (patch)
treeed8f3299d6a85985a20f724e63c6358bc81cbe9a /candle-core/src
parentd8ba0452dcb6365a24b705952d86b23f72436c11 (diff)
downloadcandle-c093b03d51ff9d080c29c44dbeaf9bfc7b755541.tar.gz
candle-c093b03d51ff9d080c29c44dbeaf9bfc7b755541.tar.bz2
candle-c093b03d51ff9d080c29c44dbeaf9bfc7b755541.zip
Generic implementation of vecdot for q80. (#596)
* Generic implementation of vecdot for q80. * Add support for code-llama 7b. * Support more code-llama.
Diffstat (limited to 'candle-core/src')
-rw-r--r--candle-core/src/quantized/k_quants.rs20
1 files changed, 18 insertions, 2 deletions
diff --git a/candle-core/src/quantized/k_quants.rs b/candle-core/src/quantized/k_quants.rs
index e6db08d6..36efe2f2 100644
--- a/candle-core/src/quantized/k_quants.rs
+++ b/candle-core/src/quantized/k_quants.rs
@@ -421,8 +421,24 @@ impl GgmlType for BlockQ8_0 {
Ok(())
}
- fn vec_dot(_: usize, _: &[Self], _: &[Self::VecDotType]) -> Result<f32> {
- todo!()
+ fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
+ let qk = QK8_0;
+ if n % QK8_0 != 0 {
+ crate::bail!("vec_dot_q8_0_q8_0: {n} is not divisible by {qk}")
+ }
+
+ // Generic implementation.
+ let mut sumf = 0f32;
+ for (xs, ys) in xs.iter().zip(ys.iter()) {
+ let sum_i = xs
+ .qs
+ .iter()
+ .zip(ys.qs.iter())
+ .map(|(&x, &y)| x as i32 * y as i32)
+ .sum::<i32>();
+ sumf += sum_i as f32 * f16::to_f32(xs.d) * f16::to_f32(ys.d)
+ }
+ Ok(sumf)
}
}