diff options
Diffstat (limited to 'candle-transformers/src/models/quantized_rwkv_v5.rs')
-rw-r--r-- | candle-transformers/src/models/quantized_rwkv_v5.rs | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/candle-transformers/src/models/quantized_rwkv_v5.rs b/candle-transformers/src/models/quantized_rwkv_v5.rs index c41d7b4e..cc5204bf 100644 --- a/candle-transformers/src/models/quantized_rwkv_v5.rs +++ b/candle-transformers/src/models/quantized_rwkv_v5.rs @@ -1,3 +1,20 @@ +//! RWKV v5 model implementation with quantization support. +//! +//! RWKV v5 is an attention-free language model optimized for efficiency. +//! This implementation provides quantization for reduced memory and compute. +//! +//! Key characteristics: +//! - Linear attention mechanism +//! - GroupNorm layer normalization +//! - Time-mixing layers +//! - State-based sequential processing +//! - Support for 8-bit quantization +//! +//! References: +//! - [RWKV Model](https://github.com/BlinkDL/RWKV-LM) +//! - [RWKV v5 Architecture](https://www.rwkv.com/v5) +//! + use crate::{ quantized_nn::{layer_norm, linear_no_bias as linear, Embedding, Linear}, quantized_var_builder::VarBuilder, |