summaryrefslogtreecommitdiff
path: root/candle-examples/examples/quantized/main.rs
diff options
context:
space:
mode:
authorLaurent Mazare <laurent.mazare@gmail.com>2023-12-23 16:18:49 +0100
committerGitHub <noreply@github.com>2023-12-23 16:18:49 +0100
commit88589d88153bef3316a13741bd12bf5e7963957a (patch)
tree9e924b07854b2006ce689fa41f07a57db4eb1076 /candle-examples/examples/quantized/main.rs
parent5b35fd0fcfc4f131c5be6358d733d555f644bc55 (diff)
downloadcandle-88589d88153bef3316a13741bd12bf5e7963957a.tar.gz
candle-88589d88153bef3316a13741bd12bf5e7963957a.tar.bz2
candle-88589d88153bef3316a13741bd12bf5e7963957a.zip
Support mistral instruct v0.2. (#1475)
* Support mistral instruct v0.2. * Use the safetensors model now that they are available.
Diffstat (limited to 'candle-examples/examples/quantized/main.rs')
-rw-r--r--candle-examples/examples/quantized/main.rs19
1 files changed, 15 insertions, 4 deletions
diff --git a/candle-examples/examples/quantized/main.rs b/candle-examples/examples/quantized/main.rs
index df758b4f..bfc6de53 100644
--- a/candle-examples/examples/quantized/main.rs
+++ b/candle-examples/examples/quantized/main.rs
@@ -53,6 +53,8 @@ enum Which {
Mistral7b,
#[value(name = "7b-mistral-instruct")]
Mistral7bInstruct,
+ #[value(name = "7b-mistral-instruct-v0.2")]
+ Mistral7bInstructV02,
#[value(name = "7b-zephyr-a")]
Zephyr7bAlpha,
#[value(name = "7b-zephyr-b")]
@@ -90,7 +92,8 @@ impl Which {
| Self::Mixtral
| Self::MixtralInstruct
| Self::Mistral7b
- | Self::Mistral7bInstruct => true,
+ | Self::Mistral7bInstruct
+ | Self::Mistral7bInstructV02 => true,
}
}
@@ -111,6 +114,7 @@ impl Which {
| Self::MixtralInstruct
| Self::Mistral7b
| Self::Mistral7bInstruct
+ | Self::Mistral7bInstructV02
| Self::OpenChat35
| Self::Starling7bAlpha => false,
Self::Zephyr7bAlpha | Self::Zephyr7bBeta => true,
@@ -134,6 +138,7 @@ impl Which {
| Self::MixtralInstruct
| Self::Mistral7b
| Self::Mistral7bInstruct
+ | Self::Mistral7bInstructV02
| Self::Zephyr7bAlpha
| Self::Zephyr7bBeta => false,
Self::OpenChat35 | Self::Starling7bAlpha => true,
@@ -157,6 +162,7 @@ impl Which {
Which::MixtralInstruct => "mistralai/Mixtral-8x7B-Instruct-v0.1",
Which::Mistral7b
| Which::Mistral7bInstruct
+ | Which::Mistral7bInstructV02
| Which::Zephyr7bAlpha
| Which::Zephyr7bBeta => "mistralai/Mistral-7B-v0.1",
Which::OpenChat35 => "openchat/openchat_3.5",
@@ -168,7 +174,7 @@ impl Which {
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
- /// GGML file to load, typically a .bin file generated by the quantize command from llama.cpp
+ /// GGML/GGUF file to load, typically a .bin/.gguf file generated by the quantize command from llama.cpp
#[arg(long)]
model: Option<String>,
@@ -284,6 +290,10 @@ impl Args {
"TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
"mistral-7b-instruct-v0.1.Q4_K_S.gguf",
),
+ Which::Mistral7bInstructV02 => (
+ "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
+ "mistral-7b-instruct-v0.2.Q4_K_S.gguf",
+ ),
Which::Zephyr7bAlpha => (
"TheBloke/zephyr-7B-alpha-GGUF",
"zephyr-7b-alpha.Q4_K_M.gguf",
@@ -354,7 +364,7 @@ fn main() -> anyhow::Result<()> {
let mut model = match model_path.extension().and_then(|v| v.to_str()) {
Some("gguf") => {
- let model = gguf_file::Content::read(&mut file)?;
+ let model = gguf_file::Content::read(&mut file).map_err(|e| e.with_path(model_path))?;
let mut total_size_in_bytes = 0;
for (_, tensor) in model.tensor_infos.iter() {
let elem_count = tensor.shape.elem_count();
@@ -370,7 +380,7 @@ fn main() -> anyhow::Result<()> {
ModelWeights::from_gguf(model, &mut file)?
}
Some("ggml" | "bin") | Some(_) | None => {
- let model = ggml_file::Content::read(&mut file)?;
+ let model = ggml_file::Content::read(&mut file).map_err(|e| e.with_path(model_path))?;
let mut total_size_in_bytes = 0;
for (_, tensor) in model.tensors.iter() {
let elem_count = tensor.shape().elem_count();
@@ -398,6 +408,7 @@ fn main() -> anyhow::Result<()> {
| Which::MixtralInstruct
| Which::Mistral7b
| Which::Mistral7bInstruct
+ | Which::Mistral7bInstructV02
| Which::Zephyr7bAlpha
| Which::Zephyr7bBeta
| Which::L70b