summaryrefslogtreecommitdiff
path: root/candle-examples/examples/llama2-c/main.rs
diff options
context:
space:
mode:
authorLaurent Mazare <laurent.mazare@gmail.com>2023-07-31 20:43:57 +0100
committerGitHub <noreply@github.com>2023-07-31 20:43:57 +0100
commit6b98b66eb36a484f1a65fbc1c528a8e0b90a1419 (patch)
tree3604078a30e0fa634bd3cf2d56096a847bb8b46b /candle-examples/examples/llama2-c/main.rs
parent9ae1f6afeecca7b424b0943d591809481dc88dbc (diff)
downloadcandle-6b98b66eb36a484f1a65fbc1c528a8e0b90a1419.tar.gz
candle-6b98b66eb36a484f1a65fbc1c528a8e0b90a1419.tar.bz2
candle-6b98b66eb36a484f1a65fbc1c528a8e0b90a1419.zip
Remove the end of text tokens. (#289)
Diffstat (limited to 'candle-examples/examples/llama2-c/main.rs')
-rw-r--r--candle-examples/examples/llama2-c/main.rs3
1 files changed, 2 insertions, 1 deletions
diff --git a/candle-examples/examples/llama2-c/main.rs b/candle-examples/examples/llama2-c/main.rs
index 65641b3c..d710652f 100644
--- a/candle-examples/examples/llama2-c/main.rs
+++ b/candle-examples/examples/llama2-c/main.rs
@@ -266,7 +266,8 @@ fn run_eval(tokenizer: Tokenizer, config_path: &std::path::PathBuf, args: Args)
let file = std::io::BufReader::new(file);
let mut tokens = vec![];
for line in file.lines() {
- let line = tokenizer.encode(line?, false).map_err(E::msg)?;
+ let line = line?.replace("<|endoftext|>", "");
+ let line = tokenizer.encode(line, false).map_err(E::msg)?;
tokens.push(line.get_ids().to_vec())
}
let tokens = tokens.concat();