diff options
author | Laurent Mazare <laurent.mazare@gmail.com> | 2023-09-22 21:36:56 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-22 21:36:56 +0100 |
commit | 912a3d63b049958d8b3bbaf0535a3179c17fd333 (patch) | |
tree | 95cdbf0dce98bb23a70b1af45932914e7af2a6db /candle-core/examples | |
parent | 3ef328c53db164af5e30248c95d37e95c397dc1a (diff) | |
download | candle-912a3d63b049958d8b3bbaf0535a3179c17fd333.tar.gz candle-912a3d63b049958d8b3bbaf0535a3179c17fd333.tar.bz2 candle-912a3d63b049958d8b3bbaf0535a3179c17fd333.zip |
Use the proper block size for quantizing models. (#933)
* Use the proper block size for quantizing models.
* Use the proper dimension.
Diffstat (limited to 'candle-core/examples')
-rw-r--r-- | candle-core/examples/tensor-tools.rs | 19 |
1 files changed, 17 insertions, 2 deletions
diff --git a/candle-core/examples/tensor-tools.rs b/candle-core/examples/tensor-tools.rs index c3459004..c0d5a334 100644 --- a/candle-core/examples/tensor-tools.rs +++ b/candle-core/examples/tensor-tools.rs @@ -243,12 +243,27 @@ fn run_quantize_safetensors( Quantization::F16 => QTensor::quantize::<half::f16>, Quantization::F32 => QTensor::quantize::<f32>, }; + let block_size = match q { + Quantization::Q4_0 => k_quants::QK4_0, + Quantization::Q4_1 => k_quants::QK4_1, + Quantization::Q5_0 => k_quants::QK5_0, + Quantization::Q5_1 => k_quants::QK5_1, + Quantization::Q8_0 => k_quants::QK8_0, + Quantization::Q8_1 => k_quants::QK8_1, + Quantization::Q2k + | Quantization::Q3k + | Quantization::Q4k + | Quantization::Q5k + | Quantization::Q6k + | Quantization::Q8k => k_quants::QK_K, + Quantization::F16 | Quantization::F32 => 1, + }; let qtensors = tensors .into_par_iter() .map(|(name, tensor)| { - println!(" quantizing {name} {tensor:?}"); - let should_quantize = tensor.rank() == 2 && tensor.dim(0)? % 256 == 0; + let should_quantize = tensor.rank() == 2 && tensor.dim(1)? % block_size == 0; + println!(" quantizing {name} {tensor:?} {should_quantize}"); let tensor = if should_quantize { quantize_fn(&tensor)? } else { |