diff options
author | Jani Monoses <jani.monoses@gmail.com> | 2024-03-14 22:01:36 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-14 21:01:36 +0100 |
commit | e1f9c3776d4e3b77cc2958c835314e680ac6f54f (patch) | |
tree | 4b315c3da9834972156c001029fdfeb422bfba64 | |
parent | 3318fe30fb3d8c3b92ba404a2a33de81c2731ad9 (diff) | |
download | candle-e1f9c3776d4e3b77cc2958c835314e680ac6f54f.tar.gz candle-e1f9c3776d4e3b77cc2958c835314e680ac6f54f.tar.bz2 candle-e1f9c3776d4e3b77cc2958c835314e680ac6f54f.zip |
StableLM-2 models were updated to use GPT-2 tokenization. (#1847)
-rw-r--r-- | candle-examples/examples/stable-lm/README.md | 5 | ||||
-rw-r--r-- | candle-examples/examples/stable-lm/main.rs | 9 |
2 files changed, 1 insertions, 13 deletions
diff --git a/candle-examples/examples/stable-lm/README.md b/candle-examples/examples/stable-lm/README.md index 546124a2..6f5e7597 100644 --- a/candle-examples/examples/stable-lm/README.md +++ b/candle-examples/examples/stable-lm/README.md @@ -10,11 +10,6 @@ order to be able to use it. Other available models are Stable-Code-3B, StableLM-2 and Zephyr variants. -StableLM-2 uses a Tiktoken based GPT-3.5/GPT-4 tokenizer not supported by -Candle, so to run it you can download a somewhat compatible -[tokenizer.json](https://huggingface.co/Xenova/gpt-4/resolve/main/tokenizer.json?download=true) -and pass it via the --tokenizer-file argument. - ## Running some example ```bash diff --git a/candle-examples/examples/stable-lm/main.rs b/candle-examples/examples/stable-lm/main.rs index abe7020c..f467903a 100644 --- a/candle-examples/examples/stable-lm/main.rs +++ b/candle-examples/examples/stable-lm/main.rs @@ -239,14 +239,7 @@ fn main() -> Result<()> { )); let tokenizer_filename = match args.tokenizer_file { Some(file) => std::path::PathBuf::from(file), - None => match args.which { - Which::V1Orig | Which::V1 | Which::V1Zephyr | Which::Code => { - repo.get("tokenizer.json")? - } - Which::V2 | Which::V2Zephyr => api - .model("lmz/candle-stablelm".to_string()) - .get("tokenizer-gpt4.json")?, - }, + None => repo.get("tokenizer.json")?, }; let filenames = match args.weight_files { Some(files) => files |