diff options
Diffstat (limited to 'candle-core/src/quantized/k_quants.rs')
-rw-r--r-- | candle-core/src/quantized/k_quants.rs | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/candle-core/src/quantized/k_quants.rs b/candle-core/src/quantized/k_quants.rs index e6db08d6..36efe2f2 100644 --- a/candle-core/src/quantized/k_quants.rs +++ b/candle-core/src/quantized/k_quants.rs @@ -421,8 +421,24 @@ impl GgmlType for BlockQ8_0 { Ok(()) } - fn vec_dot(_: usize, _: &[Self], _: &[Self::VecDotType]) -> Result<f32> { - todo!() + fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> { + let qk = QK8_0; + if n % QK8_0 != 0 { + crate::bail!("vec_dot_q8_0_q8_0: {n} is not divisible by {qk}") + } + + // Generic implementation. + let mut sumf = 0f32; + for (xs, ys) in xs.iter().zip(ys.iter()) { + let sum_i = xs + .qs + .iter() + .zip(ys.qs.iter()) + .map(|(&x, &y)| x as i32 * y as i32) + .sum::<i32>(); + sumf += sum_i as f32 * f16::to_f32(xs.d) * f16::to_f32(ys.d) + } + Ok(sumf) } } |