summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--candle-examples/examples/moondream/main.rs11
1 files changed, 10 insertions, 1 deletions
diff --git a/candle-examples/examples/moondream/main.rs b/candle-examples/examples/moondream/main.rs
index dfd83037..c7500ed9 100644
--- a/candle-examples/examples/moondream/main.rs
+++ b/candle-examples/examples/moondream/main.rs
@@ -194,6 +194,10 @@ struct Args {
#[arg(long)]
quantized: bool,
+ /// Use f16 precision for all the computations rather than f32.
+ #[arg(long)]
+ f16: bool,
+
#[arg(long)]
model_file: Option<String>,
@@ -283,7 +287,12 @@ async fn main() -> anyhow::Result<()> {
let start = std::time::Instant::now();
let device = candle_examples::device(args.cpu)?;
let config = moondream::Config::v2();
- let dtype = if device.is_cuda() && !args.quantized {
+ let dtype = if args.quantized {
+ if args.f16 {
+ anyhow::bail!("Quantized model does not support f16");
+ }
+ DType::F32
+ } else if device.is_cuda() || args.f16 {
DType::F16
} else {
DType::F32