diff options
author | Laurent Mazare <laurent.mazare@gmail.com> | 2023-08-16 12:41:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-16 12:41:07 +0100 |
commit | 3071134788334c972d9e356f53887d2b2ff026b7 (patch) | |
tree | adbb58e3babee5d62fa6150bde4f9bb03770607c /candle-core/src/quantized/k_quants.rs | |
parent | fec87e86f50da78656a0fb28fc254390435fb3fd (diff) | |
download | candle-3071134788334c972d9e356f53887d2b2ff026b7.tar.gz candle-3071134788334c972d9e356f53887d2b2ff026b7.tar.bz2 candle-3071134788334c972d9e356f53887d2b2ff026b7.zip |
Get the ggml based llama to generate some text. (#464)
* Add more stats to the ggml example.
* Build a quantized model from the file content.
* Move the tensor retrieval in the main crate.
* Start adding the forward pass.
* Add more to the forward pass of the quantized llama.
* Apply the attention layers.
* Add the sampling loop.
* Get the sampling loop to work.
* Minor tweak.
* Add a quantize/dequantize test.
* Bugfix.
* Add a comment + swap the order.
* Bugfixes.
Diffstat (limited to 'candle-core/src/quantized/k_quants.rs')
-rw-r--r-- | candle-core/src/quantized/k_quants.rs | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/candle-core/src/quantized/k_quants.rs b/candle-core/src/quantized/k_quants.rs index 53f3dc65..f7611897 100644 --- a/candle-core/src/quantized/k_quants.rs +++ b/candle-core/src/quantized/k_quants.rs @@ -531,20 +531,21 @@ impl GgmlType for BlockQ4_0 { // https://github.com/ggerganov/llama.cpp/blob/468ea24fb4633a0d681f7ac84089566c1c6190cb/ggml.c#L1525 fn to_float(xs: &[Self], ys: &mut [f32]) -> Result<()> { let k = ys.len(); - if k % QK4_0 != 0 { - crate::bail!("dequantize_row_q4_0: {k} is not divisible by {QK4_0}") + let qk = Self::BLCK_SIZE; + if k % qk != 0 { + crate::bail!("dequantize_row_q4_0: {k} is not divisible by {qk}") } - let nb = k / QK4_0; + let nb = k / qk; for i in 0..nb { let d = xs[i].d.to_f32(); - for j in 0..(QK4_0 / 2) { + for j in 0..(qk / 2) { let x0 = (xs[i].qs[j] & 0x0F) as i16 - 8; let x1 = (xs[i].qs[j] >> 4) as i16 - 8; - ys[i * QK4_0 + j] = (x0 as f32) * d; - ys[i * QK4_0 + j + QK4_0 / 2] = (x1 as f32) * d; + ys[i * qk + j] = (x0 as f32) * d; + ys[i * qk + j + qk / 2] = (x1 as f32) * d; } } Ok(()) |