1 files changed, 17 insertions, 0 deletions
diff --git a/candle-transformers/src/models/quantized_phi.rs b/candle-transformers/src/models/quantized_phi.rs
index 0ebf7f4d..b874ad94 100644
--- a/candle-transformers/src/models/quantized_phi.rs
+++ b/candle-transformers/src/models/quantized_phi.rs
@@ -1,3 +1,20 @@
+//! Phi2 model implementation with quantization support.
+//!
+//! Phi2 is a 2.7B parameter language model using scaled-up Transformer decoder architecture.
+//! This implementation provides quantization for reduced memory and compute usage.
+//!
+//! Key characteristics:
+//! - Partial attention with learned mixing to reduce quadratic costs
+//! - Layer reuse for improved inference efficiency
+//! - Linear transformations with scalar mixing
+//! - Rotary positional embeddings (RoPE)
+//! - Support for 8-bit quantization
+//!
+//! References:
+//! - [Phi2 Paper](https://arxiv.org/abs/2309.05463)
+//! - [Model Card](https://huggingface.co/microsoft/phi-2)
+//!
+
 use std::collections::HashMap;
 
 use candle::quantized::gguf_file;