From 0ebb38813b152432249dde6f64004f682b50975b Mon Sep 17 00:00:00 2001 From: Laurent Mazare Date: Sun, 29 Sep 2024 17:53:52 +0200 Subject: Paligemma siglip vision config (#2518) * Add the paligemma siglip vision config. * More paligemma configs. --- candle-transformers/src/models/siglip.rs | 54 ++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'candle-transformers') diff --git a/candle-transformers/src/models/siglip.rs b/candle-transformers/src/models/siglip.rs index a3280a86..63b6635d 100644 --- a/candle-transformers/src/models/siglip.rs +++ b/candle-transformers/src/models/siglip.rs @@ -83,6 +83,60 @@ impl TransformerConfig for VisionConfig { } } +impl VisionConfig { + pub fn paligemma_3b_224() -> Self { + Self { + // https://huggingface.co/google/paligemma-3b-pt-224/blob/main/config.json + patch_size: 14, + num_attention_heads: 16, + num_hidden_layers: 27, + hidden_size: 1152, + intermediate_size: 4304, + image_size: 224, // num_image_tokens: (224 / 14)^2 = 256 + // Default values. + num_channels: 3, + hidden_act: candle_nn::Activation::GeluPytorchTanh, + layer_norm_eps: 1e-6, + } + } + + pub fn paligemma_3b_448() -> Self { + Self { + // https://huggingface.co/google/paligemma-3b-pt-448/blob/main/config.json + patch_size: 14, + num_attention_heads: 16, + num_hidden_layers: 27, + hidden_size: 1152, + intermediate_size: 4304, + image_size: 448, // num_image_tokens: (448 / 14)^2 = 1024 + // Default values. + num_channels: 3, + hidden_act: candle_nn::Activation::GeluPytorchTanh, + layer_norm_eps: 1e-6, + } + } + + pub fn paligemma_3b_896() -> Self { + Self { + // https://huggingface.co/google/paligemma-3b-pt-448/blob/main/config.json + patch_size: 14, + num_attention_heads: 16, + num_hidden_layers: 27, + hidden_size: 1152, + intermediate_size: 4304, + image_size: 896, // num_image_tokens: (896 / 14)^2 = 4096 + // Default values. + num_channels: 3, + hidden_act: candle_nn::Activation::GeluPytorchTanh, + layer_norm_eps: 1e-6, + } + } + + pub fn num_patches(&self) -> usize { + (self.image_size / self.patch_size).pow(2) + } +} + // https://github.com/huggingface/transformers/blob/2e24ee4dfa39cc0bc264b89edbccc373c8337086/src/transformers/models/siglip/configuration_siglip.py#L228 #[derive(serde::Deserialize, Clone, Debug)] pub struct Config { -- cgit v1.2.3