summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJani Monoses <jani.monoses@gmail.com>2024-03-14 22:01:36 +0200
committerGitHub <noreply@github.com>2024-03-14 21:01:36 +0100
commite1f9c3776d4e3b77cc2958c835314e680ac6f54f (patch)
tree4b315c3da9834972156c001029fdfeb422bfba64
parent3318fe30fb3d8c3b92ba404a2a33de81c2731ad9 (diff)
downloadcandle-e1f9c3776d4e3b77cc2958c835314e680ac6f54f.tar.gz
candle-e1f9c3776d4e3b77cc2958c835314e680ac6f54f.tar.bz2
candle-e1f9c3776d4e3b77cc2958c835314e680ac6f54f.zip
StableLM-2 models were updated to use GPT-2 tokenization. (#1847)
-rw-r--r--candle-examples/examples/stable-lm/README.md5
-rw-r--r--candle-examples/examples/stable-lm/main.rs9
2 files changed, 1 insertions, 13 deletions
diff --git a/candle-examples/examples/stable-lm/README.md b/candle-examples/examples/stable-lm/README.md
index 546124a2..6f5e7597 100644
--- a/candle-examples/examples/stable-lm/README.md
+++ b/candle-examples/examples/stable-lm/README.md
@@ -10,11 +10,6 @@ order to be able to use it.
Other available models are Stable-Code-3B, StableLM-2 and Zephyr variants.
-StableLM-2 uses a Tiktoken based GPT-3.5/GPT-4 tokenizer not supported by
-Candle, so to run it you can download a somewhat compatible
-[tokenizer.json](https://huggingface.co/Xenova/gpt-4/resolve/main/tokenizer.json?download=true)
-and pass it via the --tokenizer-file argument.
-
## Running some example
```bash
diff --git a/candle-examples/examples/stable-lm/main.rs b/candle-examples/examples/stable-lm/main.rs
index abe7020c..f467903a 100644
--- a/candle-examples/examples/stable-lm/main.rs
+++ b/candle-examples/examples/stable-lm/main.rs
@@ -239,14 +239,7 @@ fn main() -> Result<()> {
));
let tokenizer_filename = match args.tokenizer_file {
Some(file) => std::path::PathBuf::from(file),
- None => match args.which {
- Which::V1Orig | Which::V1 | Which::V1Zephyr | Which::Code => {
- repo.get("tokenizer.json")?
- }
- Which::V2 | Which::V2Zephyr => api
- .model("lmz/candle-stablelm".to_string())
- .get("tokenizer-gpt4.json")?,
- },
+ None => repo.get("tokenizer.json")?,
};
let filenames = match args.weight_files {
Some(files) => files