summaryrefslogtreecommitdiff
path: root/candle-transformers/src/models/mixformer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'candle-transformers/src/models/mixformer.rs')
-rw-r--r--candle-transformers/src/models/mixformer.rs17
1 files changed, 17 insertions, 0 deletions
diff --git a/candle-transformers/src/models/mixformer.rs b/candle-transformers/src/models/mixformer.rs
index 33aefbfe..e822ca14 100644
--- a/candle-transformers/src/models/mixformer.rs
+++ b/candle-transformers/src/models/mixformer.rs
@@ -73,6 +73,23 @@ impl Config {
pad_vocab_size_multiple: 64,
}
}
+
+ // https://huggingface.co/teknium/Phi-Hermes-1.3B/blob/main/config.json
+ pub fn phi_hermes_1_3b() -> Self {
+ Self {
+ vocab_size: 50304,
+ n_positions: 2048,
+ n_embd: 2048,
+ n_layer: 24,
+ n_inner: None,
+ n_head: 32,
+ rotary_dim: usize::min(32, 2048 / 32),
+ activation_function: Activation::NewGelu,
+ layer_norm_epsilon: 1e-5,
+ tie_word_embeddings: false,
+ pad_vocab_size_multiple: 64,
+ }
+ }
}
#[derive(Debug, Clone)]