diff options
author | Laurent Mazare <laurent.mazare@gmail.com> | 2023-08-17 07:03:32 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-17 07:03:32 +0100 |
commit | 306c8eee7ac96d23d1d6a7a13b4311edc6c4f98a (patch) | |
tree | 517f84b8b462d7703311edc59b127edeb87f6c56 /candle-core/examples | |
parent | 098909de40b1478dfd6fba92f9907b8cd88984a6 (diff) | |
download | candle-306c8eee7ac96d23d1d6a7a13b4311edc6c4f98a.tar.gz candle-306c8eee7ac96d23d1d6a7a13b4311edc6c4f98a.tar.bz2 candle-306c8eee7ac96d23d1d6a7a13b4311edc6c4f98a.zip |
AVX version of the vecdot for q4_0. (#474)
* AVX version of the vecdot for q4_0.
* Tweak the avx bits.
* Add a qmatmul benchmark.
* Fix the quantized test.
Diffstat (limited to 'candle-core/examples')
-rw-r--r-- | candle-core/examples/cpu_benchmarks.rs | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/candle-core/examples/cpu_benchmarks.rs b/candle-core/examples/cpu_benchmarks.rs index 6c40269f..ef27131e 100644 --- a/candle-core/examples/cpu_benchmarks.rs +++ b/candle-core/examples/cpu_benchmarks.rs @@ -5,6 +5,7 @@ extern crate intel_mkl_src; #[cfg(feature = "accelerate")] extern crate accelerate_src; +use candle_core::quantized::GgmlType; use candle_core::{Device, Result, Tensor, D}; use clap::{Parser, Subcommand}; @@ -81,6 +82,27 @@ impl Benchmark for Matmul { const ITERS: usize = 100; } +// This benchmark is similar to: +// https://github.com/ggerganov/llama.cpp/blob/master/examples/benchmark/benchmark-matmult.cpp +struct QMatMul; +impl Benchmark for QMatMul { + type PreProcessData = (candle_core::quantized::QMatMul, Tensor); + type RunResult = Tensor; + fn preprocess() -> Result<Self::PreProcessData> { + let zeros = vec![candle_core::quantized::k_quants::BlockQ4_0::zeros(); 4096 * 11008 / 32]; + let mm = candle_core::quantized::QTensor::new(zeros, (4096, 11008)); + let mm = candle_core::quantized::QMatMul::from_qtensor(mm); + let arg = Tensor::randn(0f32, 1., (128, 11008), &Device::Cpu)?; + Ok((mm, arg)) + } + + fn run_one(d: &Self::PreProcessData) -> Result<Self::RunResult> { + d.0.forward(&d.1) + } + + const ITERS: usize = 100; +} + struct Softmax; impl Benchmark for Softmax { type PreProcessData = Tensor; @@ -116,6 +138,7 @@ enum Task { Conv1d, Conv2d, Matmul, + Qmatmul, Softmax, } @@ -137,6 +160,7 @@ fn main() -> Result<()> { Task::Conv2d => run::<Conv2d>(args.iters)?, Task::Matmul => run::<Matmul>(args.iters)?, Task::Softmax => run::<Softmax>(args.iters)?, + Task::Qmatmul => run::<QMatMul>(args.iters)?, } Ok(()) } |