summaryrefslogtreecommitdiff
path: root/candle-nn
diff options
context:
space:
mode:
Diffstat (limited to 'candle-nn')
-rw-r--r--candle-nn/Cargo.toml5
-rw-r--r--candle-nn/benches/bench_main.rs4
-rw-r--r--candle-nn/benches/benchmarks/conv.rs54
-rw-r--r--candle-nn/benches/benchmarks/layer_norm.rs48
-rw-r--r--candle-nn/benches/benchmarks/mod.rs64
5 files changed, 175 insertions, 0 deletions
diff --git a/candle-nn/Cargo.toml b/candle-nn/Cargo.toml
index 3408dae3..9f0d56bd 100644
--- a/candle-nn/Cargo.toml
+++ b/candle-nn/Cargo.toml
@@ -26,6 +26,7 @@ candle-metal-kernels = { workspace = true, optional = true }
anyhow = { workspace = true }
clap = { workspace = true }
rand = { workspace = true }
+criterion = { workspace = true }
[features]
default = []
@@ -33,3 +34,7 @@ accelerate = ["dep:accelerate-src", "candle/accelerate"]
cuda = ["candle/cuda"]
mkl = ["dep:intel-mkl-src", "candle/mkl"]
metal = ["candle/metal", "dep:candle-metal-kernels", "dep:metal"]
+
+[[bench]]
+name = "bench_main"
+harness = false \ No newline at end of file
diff --git a/candle-nn/benches/bench_main.rs b/candle-nn/benches/bench_main.rs
new file mode 100644
index 00000000..4db1d35c
--- /dev/null
+++ b/candle-nn/benches/bench_main.rs
@@ -0,0 +1,4 @@
+mod benchmarks;
+
+use criterion::criterion_main;
+criterion_main!(benchmarks::layer_norm::benches, benchmarks::conv::benches);
diff --git a/candle-nn/benches/benchmarks/conv.rs b/candle-nn/benches/benchmarks/conv.rs
new file mode 100644
index 00000000..eb80645b
--- /dev/null
+++ b/candle-nn/benches/benchmarks/conv.rs
@@ -0,0 +1,54 @@
+use crate::benchmarks::{BenchDevice, BenchDeviceHandler};
+use candle::{DType, Device, Module, Tensor};
+use candle_nn::{Conv2d, Conv2dConfig};
+use criterion::{black_box, criterion_group, Criterion};
+use std::time::Instant;
+
+const B: usize = 1;
+const C: usize = 1;
+const M: usize = 128;
+const K: usize = 128;
+const K_SIZE: usize = 3;
+
+fn run(input: Tensor, weight: Tensor, bias: Tensor, config: Conv2dConfig) {
+ Conv2d::new(weight, Some(bias), config)
+ .forward(&input)
+ .unwrap();
+}
+
+fn run_conv2d_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &str) {
+ let weight = Tensor::ones((1, 1, K_SIZE, K_SIZE), dtype, device)
+ .unwrap()
+ .to_dtype(dtype)
+ .unwrap();
+ let bias = Tensor::zeros(K, dtype, device).unwrap();
+ let input = Tensor::ones((B, C, M, K), dtype, device).unwrap();
+
+ let mut group = c.benchmark_group(device.bench_name(name));
+ group.bench_function("iter", move |b| {
+ b.iter_custom(|iters| {
+ let start = Instant::now();
+ for _i in 0..iters {
+ run(
+ black_box(input.clone()),
+ black_box(weight.clone()),
+ black_box(bias.clone()),
+ Default::default(),
+ );
+ }
+ device.sync().unwrap();
+ start.elapsed()
+ })
+ });
+ group.finish();
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+ let device = BenchDeviceHandler::new().unwrap();
+ for d in device.devices {
+ run_conv2d_benchmark(c, &d, DType::F32, "conv2d_f32");
+ run_conv2d_benchmark(c, &d, DType::F16, "conv2d_f16");
+ }
+}
+
+criterion_group!(benches, criterion_benchmark);
diff --git a/candle-nn/benches/benchmarks/layer_norm.rs b/candle-nn/benches/benchmarks/layer_norm.rs
new file mode 100644
index 00000000..0be5c450
--- /dev/null
+++ b/candle-nn/benches/benchmarks/layer_norm.rs
@@ -0,0 +1,48 @@
+use crate::benchmarks::{BenchDevice, BenchDeviceHandler};
+use candle::{DType, Device, Module, Tensor};
+use candle_nn::LayerNorm;
+use criterion::{black_box, criterion_group, Criterion};
+use std::time::Instant;
+
+fn run(input: &Tensor, weight: &Tensor, bias: &Tensor) {
+ let _ = LayerNorm::new(weight.clone(), bias.clone(), 1e-5).forward(&input);
+}
+
+const B: usize = 1;
+const M: usize = 1024;
+const K: usize = 1024;
+
+fn run_layer_norm_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &str) {
+ let elements = B * M * K;
+
+ let weight = Tensor::arange(0.0, elements as f32, device)
+ .unwrap()
+ .to_dtype(dtype)
+ .unwrap();
+ let bias = weight.ones_like().unwrap();
+ let input = weight.ones_like().unwrap();
+
+ let mut group = c.benchmark_group(device.bench_name(name));
+ group.bench_function("iter", move |b| {
+ b.iter_custom(|iters| {
+ let start = Instant::now();
+ for _i in 0..iters {
+ run(black_box(&input), black_box(&weight), black_box(&bias));
+ }
+ device.sync().unwrap();
+ start.elapsed()
+ })
+ });
+ group.finish();
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+ let device = BenchDeviceHandler::new().unwrap();
+ for d in device.devices {
+ run_layer_norm_benchmark(c, &d, DType::F32, "layer_norm_f32");
+ run_layer_norm_benchmark(c, &d, DType::BF16, "layer_norm_bf16");
+ run_layer_norm_benchmark(c, &d, DType::F16, "layer_norm_f16");
+ }
+}
+
+criterion_group!(benches, criterion_benchmark);
diff --git a/candle-nn/benches/benchmarks/mod.rs b/candle-nn/benches/benchmarks/mod.rs
new file mode 100644
index 00000000..30a6ab6a
--- /dev/null
+++ b/candle-nn/benches/benchmarks/mod.rs
@@ -0,0 +1,64 @@
+pub(crate) mod conv;
+pub(crate) mod layer_norm;
+
+use candle::{Device, Result};
+
+pub(crate) trait BenchDevice {
+ fn sync(&self) -> Result<()>;
+
+ fn bench_name<S: Into<String>>(&self, name: S) -> String;
+}
+
+impl BenchDevice for Device {
+ fn sync(&self) -> Result<()> {
+ match self {
+ Device::Cpu => Ok(()),
+ Device::Cuda(device) => {
+ #[cfg(feature = "cuda")]
+ return Ok(device.synchronize()?);
+ #[cfg(not(feature = "cuda"))]
+ panic!("Cuda device without cuda feature enabled: {:?}", device)
+ }
+ Device::Metal(device) => {
+ #[cfg(feature = "metal")]
+ return Ok(device.wait_until_completed()?);
+ #[cfg(not(feature = "metal"))]
+ panic!("Metal device without metal feature enabled: {:?}", device)
+ }
+ }
+ }
+
+ fn bench_name<S: Into<String>>(&self, name: S) -> String {
+ match self {
+ Device::Cpu => {
+ let cpu_type = if cfg!(feature = "accelerate") {
+ "accelerate"
+ } else if cfg!(feature = "mkl") {
+ "mkl"
+ } else {
+ "cpu"
+ };
+ format!("{}_{}", cpu_type, name.into())
+ }
+ Device::Cuda(_) => format!("cuda_{}", name.into()),
+ Device::Metal(_) => format!("metal_{}", name.into()),
+ }
+ }
+}
+
+struct BenchDeviceHandler {
+ devices: Vec<Device>,
+}
+
+impl BenchDeviceHandler {
+ pub fn new() -> Result<Self> {
+ let mut devices = Vec::new();
+ if cfg!(feature = "metal") {
+ devices.push(Device::new_metal(0)?);
+ } else if cfg!(feature = "cuda") {
+ devices.push(Device::new_cuda(0)?);
+ }
+ devices.push(Device::Cpu);
+ Ok(Self { devices })
+ }
+}