Only narrow when needed + deactivate the kv cache.

author: laurent <laurent.mazare@gmail.com> 2023-06-29 19:07:52 +0100
committer: laurent <laurent.mazare@gmail.com> 2023-06-29 19:07:52 +0100
commit: b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db (patch)
tree: 3dc788d3a1f1c7dda9dbb3a931a54f5a48df4cf6 /candle-core/examples/llama/main.rs
parent: 3232df9458e41c7414d51459b23e493b75a3949c (diff)
download: candle-b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db.tar.gz
candle-b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db.tar.bz2
candle-b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db.zip
1 files changed, 6 insertions, 2 deletions
diff --git a/candle-core/examples/llama/main.rs b/candle-core/examples/llama/main.rs
index 9d70921c..5a8a15d3 100644
--- a/candle-core/examples/llama/main.rs
+++ b/candle-core/examples/llama/main.rs
@@ -24,7 +24,7 @@ mod var_store;
 mod weights;
 
 const CONTEXT_SIZE: usize = 512;
-const USE_KV_CACHE: bool = true;
+const USE_KV_CACHE: bool = false;
 const START_PROMPT: &str = r"
 EDWARD:
 I wonder how our princely father 'scaped,
@@ -268,7 +268,11 @@ impl CausalSelfAttention {
 
     fn apply_rotary_emb(&self, x: &Tensor, freqs_cis: &Tensor) -> Result<Tensor> {
         let mut dims = x.dims().to_vec();
-        let freqs_cis = freqs_cis.narrow(1, freqs_cis.dims()[1] - dims[1], dims[1])?;
+        let freqs_cis = if dims[1] < CONTEXT_SIZE {
+            freqs_cis.narrow(1, CONTEXT_SIZE - dims[1], dims[1])?
+        } else {
+            freqs_cis.clone()
+        };
         let v = dims.pop().unwrap();
         dims.push(v / 2);
         dims.push(2);
author	laurent <laurent.mazare@gmail.com>	2023-06-29 19:07:52 +0100
committer	laurent <laurent.mazare@gmail.com>	2023-06-29 19:07:52 +0100
commit	b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db (patch)
tree	3dc788d3a1f1c7dda9dbb3a931a54f5a48df4cf6 /candle-core/examples/llama/main.rs
parent	3232df9458e41c7414d51459b23e493b75a3949c (diff)
download	candle-b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db.tar.gz candle-b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db.tar.bz2 candle-b50bd880ce472d7c20d09d6e5c7f49fcdf95f8db.zip