diff options
author | laurent <laurent.mazare@gmail.com> | 2023-06-29 19:07:52 +0100 |
---|---|---|
committer | laurent <laurent.mazare@gmail.com> | 2023-06-29 19:07:52 +0100 |
commit | b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db (patch) | |
tree | 3dc788d3a1f1c7dda9dbb3a931a54f5a48df4cf6 /candle-core/examples/llama/main.rs | |
parent | 3232df9458e41c7414d51459b23e493b75a3949c (diff) | |
download | candle-b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db.tar.gz candle-b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db.tar.bz2 candle-b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db.zip |
Only narrow when needed + deactivate the kv cache.
Diffstat (limited to 'candle-core/examples/llama/main.rs')
-rw-r--r-- | candle-core/examples/llama/main.rs | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/candle-core/examples/llama/main.rs b/candle-core/examples/llama/main.rs index 9d70921c..5a8a15d3 100644 --- a/candle-core/examples/llama/main.rs +++ b/candle-core/examples/llama/main.rs @@ -24,7 +24,7 @@ mod var_store; mod weights; const CONTEXT_SIZE: usize = 512; -const USE_KV_CACHE: bool = true; +const USE_KV_CACHE: bool = false; const START_PROMPT: &str = r" EDWARD: I wonder how our princely father 'scaped, @@ -268,7 +268,11 @@ impl CausalSelfAttention { fn apply_rotary_emb(&self, x: &Tensor, freqs_cis: &Tensor) -> Result<Tensor> { let mut dims = x.dims().to_vec(); - let freqs_cis = freqs_cis.narrow(1, freqs_cis.dims()[1] - dims[1], dims[1])?; + let freqs_cis = if dims[1] < CONTEXT_SIZE { + freqs_cis.narrow(1, CONTEXT_SIZE - dims[1], dims[1])? + } else { + freqs_cis.clone() + }; let v = dims.pop().unwrap(); dims.push(v / 2); dims.push(2); |