diff options
author | Laurent Mazare <laurent.mazare@gmail.com> | 2024-04-01 10:00:11 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-01 10:00:11 +0200 |
commit | c7557b65dcccbb45e53695db71e8d7c1bfd38dc2 (patch) | |
tree | 2aa4b7e6cba34d47ac275d269b0b9a6d50b2ff8b /candle-examples/examples/quantized | |
parent | cd29c7ccd420a840d883361c290ee92d06b9b96c (diff) | |
download | candle-c7557b65dcccbb45e53695db71e8d7c1bfd38dc2.tar.gz candle-c7557b65dcccbb45e53695db71e8d7c1bfd38dc2.tar.bz2 candle-c7557b65dcccbb45e53695db71e8d7c1bfd38dc2.zip |
Switch the default to using the faster kernels. (#1978)
* Switch the default to using the faster kernels.
* Add the force-dmmv flag.
Diffstat (limited to 'candle-examples/examples/quantized')
-rw-r--r-- | candle-examples/examples/quantized/main.rs | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/candle-examples/examples/quantized/main.rs b/candle-examples/examples/quantized/main.rs index 3cabc3a4..b03768ed 100644 --- a/candle-examples/examples/quantized/main.rs +++ b/candle-examples/examples/quantized/main.rs @@ -236,9 +236,9 @@ struct Args { #[arg(long)] gqa: Option<usize>, - /// Use the (experimental) fast cuda kernels. + /// Use the slower dmmv cuda kernel. #[arg(long)] - fast_cuda: bool, + force_dmmv: bool, } impl Args { @@ -347,7 +347,7 @@ fn main() -> anyhow::Result<()> { let args = Args::parse(); #[cfg(feature = "cuda")] - candle::quantized::cuda::set_force_dmmv(!args.fast_cuda); + candle::quantized::cuda::set_force_dmmv(args.force_dmmv); let temperature = if args.temperature == 0. { None |