diff options
author | Laurent Mazare <laurent.mazare@gmail.com> | 2024-02-28 14:58:42 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-28 14:58:42 +0100 |
commit | 57267cd53612ede04090853680125b17956804f3 (patch) | |
tree | f38e058a3fd1486f3fb913b6155606f3ff60e649 | |
parent | 60ee5cfd4dbe5893fc16c6addfeeca80f5e2a779 (diff) | |
download | candle-57267cd53612ede04090853680125b17956804f3.tar.gz candle-57267cd53612ede04090853680125b17956804f3.tar.bz2 candle-57267cd53612ede04090853680125b17956804f3.zip |
Add a flag to force running the quantized model on CPUs. (#1778)
* Add a flag to force running the quantized model on CPUs.
* Add encodec to the readme.
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | candle-examples/examples/quantized/main.rs | 6 |
2 files changed, 10 insertions, 2 deletions
@@ -83,6 +83,8 @@ We also provide a some command line based examples using state of the art models - [Replit-code-v1.5](./candle-examples/examples/replit-code/): a 3.3b LLM specialized for code completion. - [Yi-6B / Yi-34B](./candle-examples/examples/yi/): two bilingual (English/Chinese) general LLMs with 6b and 34b parameters. +- [EnCodec](./candle-examples/examples/encodec/): high-quality audio compression + model using residual vector quantization. - [Quantized LLaMA](./candle-examples/examples/quantized/): quantized version of the LLaMA model using the same quantization techniques as [llama.cpp](https://github.com/ggerganov/llama.cpp). @@ -210,13 +212,15 @@ If you have an addition to this list, please submit a pull request. - Text to text. - T5 and its variants: FlanT5, UL2, MADLAD400 (translation), CoEdit (Grammar correction). - Marian MT (Machine Translation). - - Whisper (multi-lingual support). - Text to image. - Stable Diffusion v1.5, v2.1, XL v1.0. - Wurstchen v2. - Image to text. - BLIP. - TrOCR. + - Audio. + - Whisper, multi-lingual text-to-speech. + - EnCodec, audio compression model. - Computer Vision Models. - DINOv2, ConvMixer, EfficientNet, ResNet, ViT, VGG, RepVGG, ConvNeXT, ConvNeXTv2. diff --git a/candle-examples/examples/quantized/main.rs b/candle-examples/examples/quantized/main.rs index a497e944..96344a49 100644 --- a/candle-examples/examples/quantized/main.rs +++ b/candle-examples/examples/quantized/main.rs @@ -216,6 +216,10 @@ struct Args { #[arg(long)] split_prompt: bool, + /// Run on CPU rather than GPU even if a GPU is available. + #[arg(long)] + cpu: bool, + /// Penalty to be applied for repeating tokens, 1. means no penalty. #[arg(long, default_value_t = 1.1)] repeat_penalty: f32, @@ -365,7 +369,7 @@ fn main() -> anyhow::Result<()> { let model_path = args.model()?; let mut file = std::fs::File::open(&model_path)?; let start = std::time::Instant::now(); - let device = candle_examples::device(false)?; + let device = candle_examples::device(args.cpu)?; let mut model = match model_path.extension().and_then(|v| v.to_str()) { Some("gguf") => { |