summaryrefslogtreecommitdiff
path: root/candle-examples/examples/quantized
diff options
context:
space:
mode:
authorLaurent Mazare <laurent.mazare@gmail.com>2024-04-01 10:00:11 +0200
committerGitHub <noreply@github.com>2024-04-01 10:00:11 +0200
commitc7557b65dcccbb45e53695db71e8d7c1bfd38dc2 (patch)
tree2aa4b7e6cba34d47ac275d269b0b9a6d50b2ff8b /candle-examples/examples/quantized
parentcd29c7ccd420a840d883361c290ee92d06b9b96c (diff)
downloadcandle-c7557b65dcccbb45e53695db71e8d7c1bfd38dc2.tar.gz
candle-c7557b65dcccbb45e53695db71e8d7c1bfd38dc2.tar.bz2
candle-c7557b65dcccbb45e53695db71e8d7c1bfd38dc2.zip
Switch the default to using the faster kernels. (#1978)
* Switch the default to using the faster kernels. * Add the force-dmmv flag.
Diffstat (limited to 'candle-examples/examples/quantized')
-rw-r--r--candle-examples/examples/quantized/main.rs6
1 files changed, 3 insertions, 3 deletions
diff --git a/candle-examples/examples/quantized/main.rs b/candle-examples/examples/quantized/main.rs
index 3cabc3a4..b03768ed 100644
--- a/candle-examples/examples/quantized/main.rs
+++ b/candle-examples/examples/quantized/main.rs
@@ -236,9 +236,9 @@ struct Args {
#[arg(long)]
gqa: Option<usize>,
- /// Use the (experimental) fast cuda kernels.
+ /// Use the slower dmmv cuda kernel.
#[arg(long)]
- fast_cuda: bool,
+ force_dmmv: bool,
}
impl Args {
@@ -347,7 +347,7 @@ fn main() -> anyhow::Result<()> {
let args = Args::parse();
#[cfg(feature = "cuda")]
- candle::quantized::cuda::set_force_dmmv(!args.fast_cuda);
+ candle::quantized::cuda::set_force_dmmv(args.force_dmmv);
let temperature = if args.temperature == 0. {
None