summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorivarflakstad <69173633+ivarflakstad@users.noreply.github.com>2024-01-11 15:35:38 +0100
committerGitHub <noreply@github.com>2024-01-11 15:35:38 +0100
commit9f0c99f0c1020678a682480e5936757510b10cee (patch)
tree57c83e6103a7360b4ed1abdfdb2fdc5e3518bc72
parent0fc95c9f0c426db0f32f7e853035fd3e8415c311 (diff)
downloadcandle-9f0c99f0c1020678a682480e5936757510b10cee.tar.gz
candle-9f0c99f0c1020678a682480e5936757510b10cee.tar.bz2
candle-9f0c99f0c1020678a682480e5936757510b10cee.zip
Seperate benchmarks by enabled features (#1538)
* Use cfg to seperate benchmark results based on features * Remove allow pragma * Avoid some unnecessary returns. * Improve benchmarks layout * Derive bench_name from actual device * Run CPU benchmarks even when GPU feature is enabled --------- Co-authored-by: Laurent <laurent.mazare@gmail.com>
-rw-r--r--candle-core/Cargo.toml2
-rw-r--r--candle-core/benches/bench_main.rs4
-rw-r--r--candle-core/benches/benchmarks/matmul.rs (renamed from candle-core/benches/matmul.rs)26
-rw-r--r--candle-core/benches/benchmarks/mod.rs63
4 files changed, 82 insertions, 13 deletions
diff --git a/candle-core/Cargo.toml b/candle-core/Cargo.toml
index 97857a6b..d9fc7526 100644
--- a/candle-core/Cargo.toml
+++ b/candle-core/Cargo.toml
@@ -46,6 +46,6 @@ accelerate = ["dep:libc", "dep:accelerate-src"]
metal = ["dep:metal", "dep:candle-metal-kernels"]
[[bench]]
-name = "matmul"
+name = "bench_main"
harness = false
diff --git a/candle-core/benches/bench_main.rs b/candle-core/benches/bench_main.rs
new file mode 100644
index 00000000..4425f2fb
--- /dev/null
+++ b/candle-core/benches/bench_main.rs
@@ -0,0 +1,4 @@
+mod benchmarks;
+
+use criterion::criterion_main;
+criterion_main!(benchmarks::matmul::benches);
diff --git a/candle-core/benches/matmul.rs b/candle-core/benches/benchmarks/matmul.rs
index 83679771..9d67e642 100644
--- a/candle-core/benches/matmul.rs
+++ b/candle-core/benches/benchmarks/matmul.rs
@@ -1,25 +1,25 @@
+use crate::benchmarks::{BenchDevice, BenchDeviceHandler};
use candle_core::{DType, Device, Tensor};
-use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
+use criterion::{black_box, criterion_group, Criterion, Throughput};
use std::time::Instant;
fn run(a: &Tensor, b: &Tensor) {
a.matmul(&b.t().unwrap()).unwrap();
}
-fn criterion_benchmark(c: &mut Criterion) {
+fn run_bench(c: &mut Criterion, device: &Device) {
let b = 1;
let m = 1;
let n = 2048;
let k = 2048;
- let device = Device::new_metal(0).unwrap();
let dtype = DType::F32;
- let lhs = Tensor::zeros((b, m, k), dtype, &device).unwrap();
- let rhs = Tensor::zeros((b, n, k), dtype, &device).unwrap();
+ let lhs = Tensor::zeros((b, m, k), dtype, device).unwrap();
+ let rhs = Tensor::zeros((b, n, k), dtype, device).unwrap();
let flops = b * m * n * k;
- let mut group = c.benchmark_group("matmul_metal");
+ let mut group = c.benchmark_group(device.bench_name("matmul"));
group.throughput(Throughput::Bytes(flops as u64));
group.bench_function("iter", move |b| {
b.iter_custom(|iters| {
@@ -27,16 +27,18 @@ fn criterion_benchmark(c: &mut Criterion) {
for _i in 0..iters {
run(black_box(&lhs), black_box(&rhs));
}
- if let Device::Metal(device) = &device {
- device.wait_until_completed().unwrap();
- } else {
- panic!("Expected metal device");
- }
+ device.sync().unwrap();
start.elapsed()
})
});
group.finish();
}
+fn criterion_benchmark(c: &mut Criterion) {
+ let handler = BenchDeviceHandler::new().unwrap();
+ for device in handler.devices {
+ run_bench(c, &device);
+ }
+}
+
criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/candle-core/benches/benchmarks/mod.rs b/candle-core/benches/benchmarks/mod.rs
new file mode 100644
index 00000000..295bbabd
--- /dev/null
+++ b/candle-core/benches/benchmarks/mod.rs
@@ -0,0 +1,63 @@
+pub(crate) mod matmul;
+
+use candle_core::{Device, Result};
+
+pub(crate) trait BenchDevice {
+ fn sync(&self) -> Result<()>;
+
+ fn bench_name<S: Into<String>>(&self, name: S) -> String;
+}
+
+impl BenchDevice for Device {
+ fn sync(&self) -> Result<()> {
+ match self {
+ Device::Cpu => Ok(()),
+ Device::Cuda(device) => {
+ #[cfg(feature = "cuda")]
+ return Ok(device.synchronize()?);
+ #[cfg(not(feature = "cuda"))]
+ panic!("Cuda device without cuda feature enabled: {:?}", device)
+ }
+ Device::Metal(device) => {
+ #[cfg(feature = "metal")]
+ return Ok(device.wait_until_completed()?);
+ #[cfg(not(feature = "metal"))]
+ panic!("Metal device without metal feature enabled: {:?}", device)
+ }
+ }
+ }
+
+ fn bench_name<S: Into<String>>(&self, name: S) -> String {
+ match self {
+ Device::Cpu => {
+ let cpu_type = if cfg!(feature = "accelerate") {
+ "accelerate"
+ } else if cfg!(feature = "mkl") {
+ "mkl"
+ } else {
+ "cpu"
+ };
+ format!("{}_{}", cpu_type, name.into())
+ }
+ Device::Cuda(_) => format!("cuda_{}", name.into()),
+ Device::Metal(_) => format!("metal_{}", name.into()),
+ }
+ }
+}
+
+struct BenchDeviceHandler {
+ devices: Vec<Device>,
+}
+
+impl BenchDeviceHandler {
+ pub fn new() -> Result<Self> {
+ let mut devices = Vec::new();
+ if cfg!(feature = "metal") {
+ devices.push(Device::new_metal(0)?);
+ } else if cfg!(feature = "cuda") {
+ devices.push(Device::new_cuda(0)?);
+ }
+ devices.push(Device::Cpu);
+ Ok(Self { devices })
+ }
+}