From 122e334d0cf9c6b56adc2f6f287617141841f636 Mon Sep 17 00:00:00 2001
From: laurent <laurent.mazare@gmail.com>
Date: Thu, 29 Jun 2023 09:21:11 +0100
Subject: Simplify the pattern matching logic in the cuda backend.

---
 candle-core/examples/llama/main.rs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'candle-core/examples/llama/main.rs')

diff --git a/candle-core/examples/llama/main.rs b/candle-core/examples/llama/main.rs
index eb681f4b..3fc893e3 100644
--- a/candle-core/examples/llama/main.rs
+++ b/candle-core/examples/llama/main.rs
@@ -487,6 +487,7 @@ fn main() -> Result<()> {
     let mut rng = thread_rng();
     let start_gen = std::time::Instant::now();
     for index in 0..args.sample_len {
+        let start_gen = std::time::Instant::now();
         let ctxt = &tokens[tokens.len().saturating_sub(CONTEXT_SIZE)..];
         let input = Tensor::new(ctxt, &device)?;
         let logits = llama.forward(&input, &freqs_cis)?;
@@ -496,6 +497,7 @@ fn main() -> Result<()> {
         let next_token = distr.sample(&mut rng) as u32;
         tokens.push(next_token);
         new_tokens.push(next_token);
+        println!("> {:?}", start_gen.elapsed());
         println!(
             "{} token: {} '{}'",
             index + 1,
-- 
cgit v1.2.3