diff options
Diffstat (limited to 'candle-transformers/src/models/quantized_phi.rs')
-rw-r--r-- | candle-transformers/src/models/quantized_phi.rs | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/candle-transformers/src/models/quantized_phi.rs b/candle-transformers/src/models/quantized_phi.rs index 0ebf7f4d..b874ad94 100644 --- a/candle-transformers/src/models/quantized_phi.rs +++ b/candle-transformers/src/models/quantized_phi.rs @@ -1,3 +1,20 @@ +//! Phi2 model implementation with quantization support. +//! +//! Phi2 is a 2.7B parameter language model using scaled-up Transformer decoder architecture. +//! This implementation provides quantization for reduced memory and compute usage. +//! +//! Key characteristics: +//! - Partial attention with learned mixing to reduce quadratic costs +//! - Layer reuse for improved inference efficiency +//! - Linear transformations with scalar mixing +//! - Rotary positional embeddings (RoPE) +//! - Support for 8-bit quantization +//! +//! References: +//! - [Phi2 Paper](https://arxiv.org/abs/2309.05463) +//! - [Model Card](https://huggingface.co/microsoft/phi-2) +//! + use std::collections::HashMap; use candle::quantized::gguf_file; |