summaryrefslogtreecommitdiff
path: root/candle-examples/examples/phi/main.rs
diff options
context:
space:
mode:
authorLaurent Mazare <laurent.mazare@gmail.com>2023-12-13 21:16:34 -0600
committerGitHub <noreply@github.com>2023-12-13 21:16:34 -0600
commit5e33c85c8f7d2ae8c5fe8de557b69c036e4f080a (patch)
tree786b7d376b517fd5f353fbfdfcb1a1236178ae26 /candle-examples/examples/phi/main.rs
parent2b3a018be7596d6c72aaee8a469312ce865498d5 (diff)
downloadcandle-5e33c85c8f7d2ae8c5fe8de557b69c036e4f080a.tar.gz
candle-5e33c85c8f7d2ae8c5fe8de557b69c036e4f080a.tar.bz2
candle-5e33c85c8f7d2ae8c5fe8de557b69c036e4f080a.zip
Quantized version for phi-v2. (#1430)
* Quantized version for phi-v2. * More quantized support.
Diffstat (limited to 'candle-examples/examples/phi/main.rs')
-rw-r--r--candle-examples/examples/phi/main.rs7
1 files changed, 5 insertions, 2 deletions
diff --git a/candle-examples/examples/phi/main.rs b/candle-examples/examples/phi/main.rs
index 1dd507ff..321ea5de 100644
--- a/candle-examples/examples/phi/main.rs
+++ b/candle-examples/examples/phi/main.rs
@@ -268,7 +268,7 @@ fn main() -> Result<()> {
match args.model {
WhichModel::V1 => vec![repo.get("model-v1-q4k.gguf")?],
WhichModel::V1_5 => vec![repo.get("model-q4k.gguf")?],
- WhichModel::V2 => anyhow::bail!("phi-2 is not supported in quantized mode"),
+ WhichModel::V2 => vec![repo.get("model-v2-q4k.gguf")?],
WhichModel::PuffinPhiV2 => vec![repo.get("model-puffin-phi-v2-q4k.gguf")?],
WhichModel::PhiHermes => vec![repo.get("model-phi-hermes-1_3B-q4k.gguf")?],
}
@@ -298,7 +298,10 @@ fn main() -> Result<()> {
};
let (model, device) = if args.quantized {
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf(&filenames[0])?;
- let model = QMixFormer::new(&config, vb)?;
+ let model = match args.model {
+ WhichModel::V2 => QMixFormer::new_v2(&config, vb)?,
+ _ => QMixFormer::new(&config, vb)?,
+ };
(Model::Quantized(model), Device::Cpu)
} else {
let device = candle_examples::device(args.cpu)?;