diff options
Diffstat (limited to 'candle-examples/examples/quantized')
-rw-r--r-- | candle-examples/examples/quantized/main.rs | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/candle-examples/examples/quantized/main.rs b/candle-examples/examples/quantized/main.rs index 96344a49..3cabc3a4 100644 --- a/candle-examples/examples/quantized/main.rs +++ b/candle-examples/examples/quantized/main.rs @@ -235,6 +235,10 @@ struct Args { /// Group-Query Attention, use 8 for the 70B version of LLaMAv2. #[arg(long)] gqa: Option<usize>, + + /// Use the (experimental) fast cuda kernels. + #[arg(long)] + fast_cuda: bool, } impl Args { @@ -341,6 +345,10 @@ fn main() -> anyhow::Result<()> { use tracing_subscriber::prelude::*; let args = Args::parse(); + + #[cfg(feature = "cuda")] + candle::quantized::cuda::set_force_dmmv(!args.fast_cuda); + let temperature = if args.temperature == 0. { None } else { |