12 files changed, 22 insertions, 80 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 3db95c86..7785a0b9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,7 +20,7 @@ exclude = [
 resolver = "2"
 
 [workspace.package]
-version = "0.5.0"
+version = "0.5.1"
 edition = "2021"
 description = "Minimalist ML framework."
 repository = "https://github.com/huggingface/candle"
@@ -33,14 +33,14 @@ ab_glyph = "0.2.23"
 accelerate-src = { version = "0.3.2" }
 anyhow = { version = "1", features = ["backtrace"] }
 byteorder = "1.4.3"
-candle = { path = "./candle-core", package = "candle-core", version = "0.5.0" }
-candle-datasets = { path = "./candle-datasets", version = "0.5.0" }
-candle-flash-attn = { path = "./candle-flash-attn", version = "0.5.0" }
-candle-kernels = { path = "./candle-kernels", version = "0.5.0" }
-candle-metal-kernels = { path = "./candle-metal-kernels", version = "0.5.0" }
-candle-nn = { path = "./candle-nn", version = "0.5.0" }
-candle-onnx = { path = "./candle-onnx", version = "0.5.0" }
-candle-transformers = { path = "./candle-transformers", version = "0.5.0" }
+candle = { path = "./candle-core", package = "candle-core", version = "0.5.1" }
+candle-datasets = { path = "./candle-datasets", version = "0.5.1" }
+candle-flash-attn = { path = "./candle-flash-attn", version = "0.5.1" }
+candle-kernels = { path = "./candle-kernels", version = "0.5.1" }
+candle-metal-kernels = { path = "./candle-metal-kernels", version = "0.5.1" }
+candle-nn = { path = "./candle-nn", version = "0.5.1" }
+candle-onnx = { path = "./candle-onnx", version = "0.5.1" }
+candle-transformers = { path = "./candle-transformers", version = "0.5.1" }
 clap = { version = "4.2.4", features = ["derive"] }
 criterion = { version = "0.5.1", default-features=false }
 cudarc = { version = "0.10.0", features = ["f16"] }
diff --git a/candle-core/src/cpu/mod.rs b/candle-core/src/cpu/mod.rs
index 50afb30f..e7d8b690 100644
--- a/candle-core/src/cpu/mod.rs
+++ b/candle-core/src/cpu/mod.rs
@@ -1,6 +1,7 @@
 pub mod erf;
 pub mod kernels;
 
+#[allow(unused)]
 trait Cpu<const ARR: usize> {
     type Unit;
     type Array;
@@ -18,6 +19,7 @@ trait Cpu<const ARR: usize> {
     unsafe fn vec_store(mem_addr: *mut f32, a: Self::Unit);
 }
 
+#[allow(unused)]
 trait CpuF16<const ARR: usize> {
     type Unit;
     type Array;
diff --git a/candle-core/src/cuda_backend/mod.rs b/candle-core/src/cuda_backend/mod.rs
index 39b41d2e..0b9c7a2c 100644
--- a/candle-core/src/cuda_backend/mod.rs
+++ b/candle-core/src/cuda_backend/mod.rs
@@ -250,44 +250,6 @@ impl Map1 for Powf {
     }
 }
 
-struct Sum<'a>(&'a [usize]);
-impl<'a> Map1 for Sum<'a> {
-    fn f<T: DeviceRepr + WithDType + ValidAsZeroBits>(
-        &self,
-        src: &CudaSlice<T>,
-        dev: &CudaDevice,
-        layout: &Layout,
-    ) -> Result<CudaSlice<T>> {
-        let shape = layout.shape();
-        let src_dims = shape.dims();
-        let el = shape.elem_count();
-        let mut dst_el = el;
-        for &sum_dim in self.0.iter() {
-            dst_el /= src_dims[sum_dim];
-        }
-        let mut sum_dims = self.0.to_vec();
-        // Sort the sum_dims as they have to be processed from left to right when converting the
-        // indexes.
-        sum_dims.sort();
-        let sum_dims_l: Vec<usize> = sum_dims.iter().map(|&d| src_dims[d]).collect();
-        let sum_dims_s: Vec<usize> = sum_dims
-            .iter()
-            .map(|&d| src_dims[d + 1..].iter().product::<usize>())
-            .collect();
-        let cfg = LaunchConfig::for_num_elems(el as u32);
-        let ds = dev
-            .htod_copy([src_dims, layout.stride(), &sum_dims_l, &sum_dims_s].concat())
-            .w()?;
-        let src = &src.slice(layout.start_offset()..);
-        let func = dev.get_or_load_func(&kernel_name::<T>("sum"), kernels::REDUCE)?;
-        let out = dev.alloc_zeros::<T>(dst_el).w()?;
-        let params = (el, src_dims.len(), sum_dims.len(), &ds, src, &out);
-        // SAFETY: ffi.
-        unsafe { func.launch(cfg, params) }.w()?;
-        Ok(out)
-    }
-}
-
 struct FastReduce<'a>(&'a [usize], ReduceOp);
 impl<'a> Map1Any for FastReduce<'a> {
     fn f<T: DeviceRepr + WithDType + ValidAsZeroBits, W: Fn(CudaSlice<T>) -> S>(
diff --git a/candle-core/src/quantized/gguf_file.rs b/candle-core/src/quantized/gguf_file.rs
index b729d4a0..1e9a6a9a 100644
--- a/candle-core/src/quantized/gguf_file.rs
+++ b/candle-core/src/quantized/gguf_file.rs
@@ -135,7 +135,6 @@ pub enum ValueType {
     // The value is a UTF-8 non-null-terminated string, with length prepended.
     String,
     // The value is an array of other values, with the length and type prepended.
-    ///
     // Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes.
     Array,
 }
diff --git a/candle-examples/examples/yolo-v3/darknet.rs b/candle-examples/examples/yolo-v3/darknet.rs
index 94d88d58..331e712c 100644
--- a/candle-examples/examples/yolo-v3/darknet.rs
+++ b/candle-examples/examples/yolo-v3/darknet.rs
@@ -13,7 +13,7 @@ struct Block {
 
 impl Block {
     fn get(&self, key: &str) -> Result<&str> {
-        match self.parameters.get(&key.to_string()) {
+        match self.parameters.get(key) {
             None => candle::bail!("cannot find {} in {}", key, self.block_type),
             Some(value) => Ok(value),
         }
@@ -28,7 +28,7 @@ pub struct Darknet {
 
 impl Darknet {
     fn get(&self, key: &str) -> Result<&str> {
-        match self.parameters.get(&key.to_string()) {
+        match self.parameters.get(key) {
             None => candle::bail!("cannot find {} in net parameters", key),
             Some(value) => Ok(value),
         }
diff --git a/candle-flash-attn/Cargo.toml b/candle-flash-attn/Cargo.toml
index 827cf970..7574b76b 100644
--- a/candle-flash-attn/Cargo.toml
+++ b/candle-flash-attn/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "candle-flash-attn"
-version = "0.5.0"
+version = "0.5.1"
 edition = "2021"
 
 description = "Flash attention layer for the candle ML framework."
@@ -11,7 +11,7 @@ license = "MIT OR Apache-2.0"
 readme = "README.md"
 
 [dependencies]
-candle = { path = "../candle-core", features = ["cuda"], package = "candle-core", version = "0.5.0" }
+candle = { path = "../candle-core", features = ["cuda"], package = "candle-core", version = "0.5.1" }
 half = { version = "2.3.1", features = ["num-traits"] }
 
 [build-dependencies]
diff --git a/candle-kernels/Cargo.toml b/candle-kernels/Cargo.toml
index 5cedb7d3..6b8cd586 100644
--- a/candle-kernels/Cargo.toml
+++ b/candle-kernels/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "candle-kernels"
-version = "0.5.0"
+version = "0.5.1"
 edition = "2021"
 
 description = "CUDA kernels for Candle"
diff --git a/candle-metal-kernels/Cargo.toml b/candle-metal-kernels/Cargo.toml
index 65e00bbc..fc5e214a 100644
--- a/candle-metal-kernels/Cargo.toml
+++ b/candle-metal-kernels/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "candle-metal-kernels"
-version = "0.5.0"
+version = "0.5.1"
 edition = "2021"
 
 description = "Metal kernels for Candle"
diff --git a/candle-nn/src/var_builder.rs b/candle-nn/src/var_builder.rs
index 5539370a..68bd6f05 100644
--- a/candle-nn/src/var_builder.rs
+++ b/candle-nn/src/var_builder.rs
@@ -264,7 +264,7 @@ impl SimpleBackend for VarMap {
     }
 }
 
-struct SafeTensorWithRouting<'a> {
+pub struct SafeTensorWithRouting<'a> {
     routing: HashMap<String, usize>,
     safetensors: Vec<SafeTensors<'a>>,
 }
diff --git a/candle-onnx/Cargo.toml b/candle-onnx/Cargo.toml
index 2f438cda..6354aacd 100644
--- a/candle-onnx/Cargo.toml
+++ b/candle-onnx/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "candle-onnx"
-version = "0.5.0"
+version = "0.5.1"
 edition = "2021"
 
 description = "ONNX support for Candle"
@@ -10,8 +10,8 @@ categories = ["science"]
 license = "MIT OR Apache-2.0"
 
 [dependencies]
-candle = { path = "../candle-core", package = "candle-core", version = "0.5.0" }
-candle-nn = { path = "../candle-nn", version = "0.5.0" }
+candle = { path = "../candle-core", package = "candle-core", version = "0.5.1" }
+candle-nn = { path = "../candle-nn", version = "0.5.1" }
 prost = "0.12.1"
 
 [build-dependencies]
diff --git a/candle-transformers/src/models/phi.rs b/candle-transformers/src/models/phi.rs
index 2c7fccef..3f8d92b9 100644
--- a/candle-transformers/src/models/phi.rs
+++ b/candle-transformers/src/models/phi.rs
@@ -72,7 +72,7 @@ impl RotaryEmbedding {
         let (xs1, xs2) = (&xs12[0], &xs12[1]);
         let c = self.cos.narrow(0, seqlen_offset, seq_len)?;
         let s = self.sin.narrow(0, seqlen_offset, seq_len)?;
-        let rotate_half = Tensor::cat(&[&xs2.neg()?, &xs1], D::Minus1)?;
+        let rotate_half = Tensor::cat(&[&xs2.neg()?, xs1], D::Minus1)?;
         let xs_rot = (xs_rot.broadcast_mul(&c)? + rotate_half.broadcast_mul(&s)?)?;
         Tensor::cat(&[&xs_rot, &xs_pass], D::Minus1)
     }
diff --git a/candle-wasm-examples/whisper/src/lib.rs b/candle-wasm-examples/whisper/src/lib.rs
index f1832012..cce92c85 100644
--- a/candle-wasm-examples/whisper/src/lib.rs
+++ b/candle-wasm-examples/whisper/src/lib.rs
@@ -1,26 +1,5 @@
 pub const WITH_TIMER: bool = true;
 
-struct Timer {
-    label: &'static str,
-}
-
-// impl Timer {
-//     fn new(label: &'static str) -> Self {
-//         if WITH_TIMER {
-//             web_sys::console::time_with_label(label);
-//         }
-//         Self { label }
-//     }
-// }
-
-impl Drop for Timer {
-    fn drop(&mut self) {
-        if WITH_TIMER {
-            web_sys::console::time_end_with_label(self.label)
-        }
-    }
-}
-
 mod app;
 mod audio;
 pub mod languages;