From 9b5e4843a63180a2803b1e836b4ca90f14281d03 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 20 Dec 2023 09:54:19 +0100 Subject: Optimizing decode matmul (Phi at 28tok/s on M3). Adding some benchmark in order to help checking out matmul performance. --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) (limited to 'Cargo.toml') diff --git a/Cargo.toml b/Cargo.toml index 7c2e3a7d..9fda5fba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ accelerate-src = { version = "0.3.2" } anyhow = { version = "1", features = ["backtrace"] } byteorder = "1.4.3" clap = { version = "4.2.4", features = ["derive"] } +criterion = { version = "0.5.1", default-features=false } cudarc = { version = "0.9.14", features = ["f16"] } gemm = { version = "0.16.6", features = ["wasm-simd128-enable"] } hf-hub = "0.3.0" -- cgit v1.2.3