From 122e334d0cf9c6b56adc2f6f287617141841f636 Mon Sep 17 00:00:00 2001 From: laurent Date: Thu, 29 Jun 2023 09:21:11 +0100 Subject: Simplify the pattern matching logic in the cuda backend. --- candle-core/examples/llama/main.rs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'candle-core/examples/llama/main.rs') diff --git a/candle-core/examples/llama/main.rs b/candle-core/examples/llama/main.rs index eb681f4b..3fc893e3 100644 --- a/candle-core/examples/llama/main.rs +++ b/candle-core/examples/llama/main.rs @@ -487,6 +487,7 @@ fn main() -> Result<()> { let mut rng = thread_rng(); let start_gen = std::time::Instant::now(); for index in 0..args.sample_len { + let start_gen = std::time::Instant::now(); let ctxt = &tokens[tokens.len().saturating_sub(CONTEXT_SIZE)..]; let input = Tensor::new(ctxt, &device)?; let logits = llama.forward(&input, &freqs_cis)?; @@ -496,6 +497,7 @@ fn main() -> Result<()> { let next_token = distr.sample(&mut rng) as u32; tokens.push(next_token); new_tokens.push(next_token); + println!("> {:?}", start_gen.elapsed()); println!( "{} token: {} '{}'", index + 1, -- cgit v1.2.3