From f689ce5d39c6f1475dfc71503288ea2905c8f685 Mon Sep 17 00:00:00 2001 From: zachcp Date: Fri, 15 Nov 2024 02:30:15 -0500 Subject: Documentation Pass for Models (#2617) * links in chinese_clip * links for clip model * add mod docs for flux and llava * module doc for MMDIT and MIMI * add docs for a few more modesl * mod docs for bert naser and beit * add module docs for convmixer colpali codegeex and chatglm * add another series of moddocs * add fastvit-llama2_c * module docs mamba -> mobileone * module docs from moondream-phi3 * mod docs for quantized and qwen * update to yi * fix long names * Update llama2_c.rs * Update llama2_c_weights.rs * Fix the link for mimi + tweaks --------- Co-authored-by: Laurent Mazare --- candle-transformers/src/models/persimmon.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'candle-transformers/src/models/persimmon.rs') diff --git a/candle-transformers/src/models/persimmon.rs b/candle-transformers/src/models/persimmon.rs index afee7c83..0996decf 100644 --- a/candle-transformers/src/models/persimmon.rs +++ b/candle-transformers/src/models/persimmon.rs @@ -1,3 +1,19 @@ +//! Persimmon Model +//! +//! A transformer language model for efficient inference and general-purpose tasks. See Persimmon model details at: +//! - [Hugging Face](https://huggingface.co/adept/persimmon-8b-base) +//! +//! The model uses a standard transformer architecture with: +//! - Layer normalization for Q/K attention +//! - RoPE embeddings with partial rotary factor +//! - ReLU activation +//! - Separate number of attention heads and KV heads +//! +//! References: +//! - [Hugging Face Implementation](https://github.com/huggingface/transformers/blob/main/src/transformers/models/persimmon/modeling_persimmon.py) +//! - [Persimmon Config](https://github.com/huggingface/transformers/blob/main/src/transformers/models/persimmon/configuration_persimmon.py) +//! + use candle::DType; use serde::Deserialize; -- cgit v1.2.3