summaryrefslogtreecommitdiff
path: root/candle-core/examples
diff options
context:
space:
mode:
authorLaurent Mazare <laurent.mazare@gmail.com>2023-09-22 21:36:56 +0100
committerGitHub <noreply@github.com>2023-09-22 21:36:56 +0100
commit912a3d63b049958d8b3bbaf0535a3179c17fd333 (patch)
tree95cdbf0dce98bb23a70b1af45932914e7af2a6db /candle-core/examples
parent3ef328c53db164af5e30248c95d37e95c397dc1a (diff)
downloadcandle-912a3d63b049958d8b3bbaf0535a3179c17fd333.tar.gz
candle-912a3d63b049958d8b3bbaf0535a3179c17fd333.tar.bz2
candle-912a3d63b049958d8b3bbaf0535a3179c17fd333.zip
Use the proper block size for quantizing models. (#933)
* Use the proper block size for quantizing models. * Use the proper dimension.
Diffstat (limited to 'candle-core/examples')
-rw-r--r--candle-core/examples/tensor-tools.rs19
1 files changed, 17 insertions, 2 deletions
diff --git a/candle-core/examples/tensor-tools.rs b/candle-core/examples/tensor-tools.rs
index c3459004..c0d5a334 100644
--- a/candle-core/examples/tensor-tools.rs
+++ b/candle-core/examples/tensor-tools.rs
@@ -243,12 +243,27 @@ fn run_quantize_safetensors(
Quantization::F16 => QTensor::quantize::<half::f16>,
Quantization::F32 => QTensor::quantize::<f32>,
};
+ let block_size = match q {
+ Quantization::Q4_0 => k_quants::QK4_0,
+ Quantization::Q4_1 => k_quants::QK4_1,
+ Quantization::Q5_0 => k_quants::QK5_0,
+ Quantization::Q5_1 => k_quants::QK5_1,
+ Quantization::Q8_0 => k_quants::QK8_0,
+ Quantization::Q8_1 => k_quants::QK8_1,
+ Quantization::Q2k
+ | Quantization::Q3k
+ | Quantization::Q4k
+ | Quantization::Q5k
+ | Quantization::Q6k
+ | Quantization::Q8k => k_quants::QK_K,
+ Quantization::F16 | Quantization::F32 => 1,
+ };
let qtensors = tensors
.into_par_iter()
.map(|(name, tensor)| {
- println!(" quantizing {name} {tensor:?}");
- let should_quantize = tensor.rank() == 2 && tensor.dim(0)? % 256 == 0;
+ let should_quantize = tensor.rank() == 2 && tensor.dim(1)? % block_size == 0;
+ println!(" quantizing {name} {tensor:?} {should_quantize}");
let tensor = if should_quantize {
quantize_fn(&tensor)?
} else {