diff options
Diffstat (limited to 'candle-transformers/src/models/mixformer.rs')
-rw-r--r-- | candle-transformers/src/models/mixformer.rs | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/candle-transformers/src/models/mixformer.rs b/candle-transformers/src/models/mixformer.rs index 33aefbfe..e822ca14 100644 --- a/candle-transformers/src/models/mixformer.rs +++ b/candle-transformers/src/models/mixformer.rs @@ -73,6 +73,23 @@ impl Config { pad_vocab_size_multiple: 64, } } + + // https://huggingface.co/teknium/Phi-Hermes-1.3B/blob/main/config.json + pub fn phi_hermes_1_3b() -> Self { + Self { + vocab_size: 50304, + n_positions: 2048, + n_embd: 2048, + n_layer: 24, + n_inner: None, + n_head: 32, + rotary_dim: usize::min(32, 2048 / 32), + activation_function: Activation::NewGelu, + layer_norm_epsilon: 1e-5, + tie_word_embeddings: false, + pad_vocab_size_multiple: 64, + } + } } #[derive(Debug, Clone)] |