summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurent Mazare <laurent.mazare@gmail.com>2024-02-28 14:58:42 +0100
committerGitHub <noreply@github.com>2024-02-28 14:58:42 +0100
commit57267cd53612ede04090853680125b17956804f3 (patch)
treef38e058a3fd1486f3fb913b6155606f3ff60e649
parent60ee5cfd4dbe5893fc16c6addfeeca80f5e2a779 (diff)
downloadcandle-57267cd53612ede04090853680125b17956804f3.tar.gz
candle-57267cd53612ede04090853680125b17956804f3.tar.bz2
candle-57267cd53612ede04090853680125b17956804f3.zip
Add a flag to force running the quantized model on CPUs. (#1778)
* Add a flag to force running the quantized model on CPUs. * Add encodec to the readme.
-rw-r--r--README.md6
-rw-r--r--candle-examples/examples/quantized/main.rs6
2 files changed, 10 insertions, 2 deletions
diff --git a/README.md b/README.md
index 0119684e..aea30c5b 100644
--- a/README.md
+++ b/README.md
@@ -83,6 +83,8 @@ We also provide a some command line based examples using state of the art models
- [Replit-code-v1.5](./candle-examples/examples/replit-code/): a 3.3b LLM specialized for code completion.
- [Yi-6B / Yi-34B](./candle-examples/examples/yi/): two bilingual
(English/Chinese) general LLMs with 6b and 34b parameters.
+- [EnCodec](./candle-examples/examples/encodec/): high-quality audio compression
+ model using residual vector quantization.
- [Quantized LLaMA](./candle-examples/examples/quantized/): quantized version of
the LLaMA model using the same quantization techniques as
[llama.cpp](https://github.com/ggerganov/llama.cpp).
@@ -210,13 +212,15 @@ If you have an addition to this list, please submit a pull request.
- Text to text.
- T5 and its variants: FlanT5, UL2, MADLAD400 (translation), CoEdit (Grammar correction).
- Marian MT (Machine Translation).
- - Whisper (multi-lingual support).
- Text to image.
- Stable Diffusion v1.5, v2.1, XL v1.0.
- Wurstchen v2.
- Image to text.
- BLIP.
- TrOCR.
+ - Audio.
+ - Whisper, multi-lingual text-to-speech.
+ - EnCodec, audio compression model.
- Computer Vision Models.
- DINOv2, ConvMixer, EfficientNet, ResNet, ViT, VGG, RepVGG, ConvNeXT,
ConvNeXTv2.
diff --git a/candle-examples/examples/quantized/main.rs b/candle-examples/examples/quantized/main.rs
index a497e944..96344a49 100644
--- a/candle-examples/examples/quantized/main.rs
+++ b/candle-examples/examples/quantized/main.rs
@@ -216,6 +216,10 @@ struct Args {
#[arg(long)]
split_prompt: bool,
+ /// Run on CPU rather than GPU even if a GPU is available.
+ #[arg(long)]
+ cpu: bool,
+
/// Penalty to be applied for repeating tokens, 1. means no penalty.
#[arg(long, default_value_t = 1.1)]
repeat_penalty: f32,
@@ -365,7 +369,7 @@ fn main() -> anyhow::Result<()> {
let model_path = args.model()?;
let mut file = std::fs::File::open(&model_path)?;
let start = std::time::Instant::now();
- let device = candle_examples::device(false)?;
+ let device = candle_examples::device(args.cpu)?;
let mut model = match model_path.extension().and_then(|v| v.to_str()) {
Some("gguf") => {