Add quantized tensors. (#458)

* Add quantized tensors. * Implement the debug trait for QTensor. * Add the QMatMul custom op.
author: Laurent Mazare <laurent.mazare@gmail.com> 2023-08-15 22:45:53 +0100
committer: GitHub <noreply@github.com> 2023-08-15 22:45:53 +0100
commit: ca449f9ee11b892e026972d114c77a0938e1dc0b (patch)
tree: fd179ab9ffd5d1a3da740506a091147df9ba39e5 /candle-examples/examples/ggml/main.rs
parent: b8263aa15cf2d8d0f425e25bae296ea4e96aeb88 (diff)
download: candle-ca449f9ee11b892e026972d114c77a0938e1dc0b.tar.gz
candle-ca449f9ee11b892e026972d114c77a0938e1dc0b.tar.bz2
candle-ca449f9ee11b892e026972d114c77a0938e1dc0b.zip
1 files changed, 1 insertions, 2 deletions
diff --git a/candle-examples/examples/ggml/main.rs b/candle-examples/examples/ggml/main.rs
index 78eb20dc..9e3e1ba6 100644
--- a/candle-examples/examples/ggml/main.rs
+++ b/candle-examples/examples/ggml/main.rs
@@ -3,7 +3,6 @@ use clap::Parser;
 use std::fs::File;
 
 use candle::quantized::ggml_file::Content;
-use candle::{DType, Device};
 
 #[derive(Parser, Debug)]
 #[command(author, version, about, long_about = None)]
@@ -18,7 +17,7 @@ fn main() -> Result<()> {
 
     let mut file = File::open(args.model)?;
     let start = std::time::Instant::now();
-    let model = Content::read(&mut file, DType::F16, &Device::Cpu)?;
+    let model = Content::read(&mut file)?;
 
     println!(
         "Loaded {:?} tensors in {:?}",
author	Laurent Mazare <laurent.mazare@gmail.com>	2023-08-15 22:45:53 +0100
committer	GitHub <noreply@github.com>	2023-08-15 22:45:53 +0100
commit	ca449f9ee11b892e026972d114c77a0938e1dc0b (patch)
tree	fd179ab9ffd5d1a3da740506a091147df9ba39e5 /candle-examples/examples/ggml/main.rs
parent	b8263aa15cf2d8d0f425e25bae296ea4e96aeb88 (diff)
download	candle-ca449f9ee11b892e026972d114c77a0938e1dc0b.tar.gz candle-ca449f9ee11b892e026972d114c77a0938e1dc0b.tar.bz2 candle-ca449f9ee11b892e026972d114c77a0938e1dc0b.zip