diff options
Diffstat (limited to 'candle-transformers/src/models/mobileclip.rs')
-rw-r--r-- | candle-transformers/src/models/mobileclip.rs | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/candle-transformers/src/models/mobileclip.rs b/candle-transformers/src/models/mobileclip.rs index 45a5dbad..f0baf9e1 100644 --- a/candle-transformers/src/models/mobileclip.rs +++ b/candle-transformers/src/models/mobileclip.rs @@ -1,3 +1,19 @@ +//! Mobile CLIP model, combining a lightweight vision encoder with a text encoder +//! +//! A mobile-optimized CLIP implementation that uses: +//! - FastViT as the vision encoder +//! - OpenCLIP text encoder +//! - Projection layers to align the feature spaces +//! +//! See model details at: +//! - [FastViT](https://arxiv.org/abs/2303.14189) +//! - [OpenCLIP](https://github.com/mlfoundations/open_clip) +//! +//! References: +//! - [MobileVLM](https://huggingface.co/mobileVLM) +//! - [MetaCLIP](https://arxiv.org/abs/2309.16671) +//! + use super::fastvit; use super::openclip::text_model; use candle::{Result, Tensor, D}; |