diff options
Diffstat (limited to 'candle-nn')
-rw-r--r-- | candle-nn/Cargo.toml | 5 | ||||
-rw-r--r-- | candle-nn/benches/bench_main.rs | 4 | ||||
-rw-r--r-- | candle-nn/benches/benchmarks/conv.rs | 54 | ||||
-rw-r--r-- | candle-nn/benches/benchmarks/layer_norm.rs | 48 | ||||
-rw-r--r-- | candle-nn/benches/benchmarks/mod.rs | 64 |
5 files changed, 175 insertions, 0 deletions
diff --git a/candle-nn/Cargo.toml b/candle-nn/Cargo.toml index 3408dae3..9f0d56bd 100644 --- a/candle-nn/Cargo.toml +++ b/candle-nn/Cargo.toml @@ -26,6 +26,7 @@ candle-metal-kernels = { workspace = true, optional = true } anyhow = { workspace = true } clap = { workspace = true } rand = { workspace = true } +criterion = { workspace = true } [features] default = [] @@ -33,3 +34,7 @@ accelerate = ["dep:accelerate-src", "candle/accelerate"] cuda = ["candle/cuda"] mkl = ["dep:intel-mkl-src", "candle/mkl"] metal = ["candle/metal", "dep:candle-metal-kernels", "dep:metal"] + +[[bench]] +name = "bench_main" +harness = false
\ No newline at end of file diff --git a/candle-nn/benches/bench_main.rs b/candle-nn/benches/bench_main.rs new file mode 100644 index 00000000..4db1d35c --- /dev/null +++ b/candle-nn/benches/bench_main.rs @@ -0,0 +1,4 @@ +mod benchmarks; + +use criterion::criterion_main; +criterion_main!(benchmarks::layer_norm::benches, benchmarks::conv::benches); diff --git a/candle-nn/benches/benchmarks/conv.rs b/candle-nn/benches/benchmarks/conv.rs new file mode 100644 index 00000000..eb80645b --- /dev/null +++ b/candle-nn/benches/benchmarks/conv.rs @@ -0,0 +1,54 @@ +use crate::benchmarks::{BenchDevice, BenchDeviceHandler}; +use candle::{DType, Device, Module, Tensor}; +use candle_nn::{Conv2d, Conv2dConfig}; +use criterion::{black_box, criterion_group, Criterion}; +use std::time::Instant; + +const B: usize = 1; +const C: usize = 1; +const M: usize = 128; +const K: usize = 128; +const K_SIZE: usize = 3; + +fn run(input: Tensor, weight: Tensor, bias: Tensor, config: Conv2dConfig) { + Conv2d::new(weight, Some(bias), config) + .forward(&input) + .unwrap(); +} + +fn run_conv2d_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &str) { + let weight = Tensor::ones((1, 1, K_SIZE, K_SIZE), dtype, device) + .unwrap() + .to_dtype(dtype) + .unwrap(); + let bias = Tensor::zeros(K, dtype, device).unwrap(); + let input = Tensor::ones((B, C, M, K), dtype, device).unwrap(); + + let mut group = c.benchmark_group(device.bench_name(name)); + group.bench_function("iter", move |b| { + b.iter_custom(|iters| { + let start = Instant::now(); + for _i in 0..iters { + run( + black_box(input.clone()), + black_box(weight.clone()), + black_box(bias.clone()), + Default::default(), + ); + } + device.sync().unwrap(); + start.elapsed() + }) + }); + group.finish(); +} + +fn criterion_benchmark(c: &mut Criterion) { + let device = BenchDeviceHandler::new().unwrap(); + for d in device.devices { + run_conv2d_benchmark(c, &d, DType::F32, "conv2d_f32"); + run_conv2d_benchmark(c, &d, DType::F16, "conv2d_f16"); + } +} + +criterion_group!(benches, criterion_benchmark); diff --git a/candle-nn/benches/benchmarks/layer_norm.rs b/candle-nn/benches/benchmarks/layer_norm.rs new file mode 100644 index 00000000..0be5c450 --- /dev/null +++ b/candle-nn/benches/benchmarks/layer_norm.rs @@ -0,0 +1,48 @@ +use crate::benchmarks::{BenchDevice, BenchDeviceHandler}; +use candle::{DType, Device, Module, Tensor}; +use candle_nn::LayerNorm; +use criterion::{black_box, criterion_group, Criterion}; +use std::time::Instant; + +fn run(input: &Tensor, weight: &Tensor, bias: &Tensor) { + let _ = LayerNorm::new(weight.clone(), bias.clone(), 1e-5).forward(&input); +} + +const B: usize = 1; +const M: usize = 1024; +const K: usize = 1024; + +fn run_layer_norm_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &str) { + let elements = B * M * K; + + let weight = Tensor::arange(0.0, elements as f32, device) + .unwrap() + .to_dtype(dtype) + .unwrap(); + let bias = weight.ones_like().unwrap(); + let input = weight.ones_like().unwrap(); + + let mut group = c.benchmark_group(device.bench_name(name)); + group.bench_function("iter", move |b| { + b.iter_custom(|iters| { + let start = Instant::now(); + for _i in 0..iters { + run(black_box(&input), black_box(&weight), black_box(&bias)); + } + device.sync().unwrap(); + start.elapsed() + }) + }); + group.finish(); +} + +fn criterion_benchmark(c: &mut Criterion) { + let device = BenchDeviceHandler::new().unwrap(); + for d in device.devices { + run_layer_norm_benchmark(c, &d, DType::F32, "layer_norm_f32"); + run_layer_norm_benchmark(c, &d, DType::BF16, "layer_norm_bf16"); + run_layer_norm_benchmark(c, &d, DType::F16, "layer_norm_f16"); + } +} + +criterion_group!(benches, criterion_benchmark); diff --git a/candle-nn/benches/benchmarks/mod.rs b/candle-nn/benches/benchmarks/mod.rs new file mode 100644 index 00000000..30a6ab6a --- /dev/null +++ b/candle-nn/benches/benchmarks/mod.rs @@ -0,0 +1,64 @@ +pub(crate) mod conv; +pub(crate) mod layer_norm; + +use candle::{Device, Result}; + +pub(crate) trait BenchDevice { + fn sync(&self) -> Result<()>; + + fn bench_name<S: Into<String>>(&self, name: S) -> String; +} + +impl BenchDevice for Device { + fn sync(&self) -> Result<()> { + match self { + Device::Cpu => Ok(()), + Device::Cuda(device) => { + #[cfg(feature = "cuda")] + return Ok(device.synchronize()?); + #[cfg(not(feature = "cuda"))] + panic!("Cuda device without cuda feature enabled: {:?}", device) + } + Device::Metal(device) => { + #[cfg(feature = "metal")] + return Ok(device.wait_until_completed()?); + #[cfg(not(feature = "metal"))] + panic!("Metal device without metal feature enabled: {:?}", device) + } + } + } + + fn bench_name<S: Into<String>>(&self, name: S) -> String { + match self { + Device::Cpu => { + let cpu_type = if cfg!(feature = "accelerate") { + "accelerate" + } else if cfg!(feature = "mkl") { + "mkl" + } else { + "cpu" + }; + format!("{}_{}", cpu_type, name.into()) + } + Device::Cuda(_) => format!("cuda_{}", name.into()), + Device::Metal(_) => format!("metal_{}", name.into()), + } + } +} + +struct BenchDeviceHandler { + devices: Vec<Device>, +} + +impl BenchDeviceHandler { + pub fn new() -> Result<Self> { + let mut devices = Vec::new(); + if cfg!(feature = "metal") { + devices.push(Device::new_metal(0)?); + } else if cfg!(feature = "cuda") { + devices.push(Device::new_cuda(0)?); + } + devices.push(Device::Cpu); + Ok(Self { devices }) + } +} |