summaryrefslogtreecommitdiff
path: root/candle-examples/examples/efficientvit
diff options
context:
space:
mode:
authorJani Monoses <jani.monoses@gmail.com>2024-03-01 09:53:52 +0200
committerGitHub <noreply@github.com>2024-03-01 08:53:52 +0100
commit979deaca07708394349b72707c7db446cbd42e23 (patch)
tree4b88b9465359aa107372463653c413036dae3945 /candle-examples/examples/efficientvit
parentb485e4b6ee3362eeab0f48ddf05d257cc0e84c63 (diff)
downloadcandle-979deaca07708394349b72707c7db446cbd42e23.tar.gz
candle-979deaca07708394349b72707c7db446cbd42e23.tar.bz2
candle-979deaca07708394349b72707c7db446cbd42e23.zip
EfficientVit (MSRA) model (#1783)
* Add EfficientVit (Microsoft Research Asia) model. * Mention models in README
Diffstat (limited to 'candle-examples/examples/efficientvit')
-rw-r--r--candle-examples/examples/efficientvit/README.md20
-rw-r--r--candle-examples/examples/efficientvit/main.rs99
2 files changed, 119 insertions, 0 deletions
diff --git a/candle-examples/examples/efficientvit/README.md b/candle-examples/examples/efficientvit/README.md
new file mode 100644
index 00000000..7a989a25
--- /dev/null
+++ b/candle-examples/examples/efficientvit/README.md
@@ -0,0 +1,20 @@
+# candle-efficientvit
+
+[EfficientViT: Memory Efficient Vision Transformer with Cascaded Group Attention](https://arxiv.org/abs/2305.07027).
+
+This candle implementation uses a pre-trained EfficientViT (from Microsoft Research Asia) network for inference.
+The classification head has been trained on the ImageNet dataset and returns the probabilities for the top-5 classes.
+
+## Running an example
+
+```
+$ cargo run --example efficientvit --release -- --image candle-examples/examples/yolo-v8/assets/bike.jpg --which m1
+
+loaded image Tensor[dims 3, 224, 224; f32]
+model built
+mountain bike, all-terrain bike, off-roader: 69.80%
+unicycle, monocycle : 13.03%
+bicycle-built-for-two, tandem bicycle, tandem: 9.28%
+crash helmet : 2.25%
+alp : 0.46%
+```
diff --git a/candle-examples/examples/efficientvit/main.rs b/candle-examples/examples/efficientvit/main.rs
new file mode 100644
index 00000000..1eb80a2d
--- /dev/null
+++ b/candle-examples/examples/efficientvit/main.rs
@@ -0,0 +1,99 @@
+#[cfg(feature = "mkl")]
+extern crate intel_mkl_src;
+
+#[cfg(feature = "accelerate")]
+extern crate accelerate_src;
+
+use clap::{Parser, ValueEnum};
+
+use candle::{DType, IndexOp, D};
+use candle_nn::{Module, VarBuilder};
+use candle_transformers::models::efficientvit;
+
+#[derive(Clone, Copy, Debug, ValueEnum)]
+enum Which {
+ M0,
+ M1,
+ M2,
+ M3,
+ M4,
+ M5,
+}
+
+impl Which {
+ fn model_filename(&self) -> String {
+ let name = match self {
+ Self::M0 => "m0",
+ Self::M1 => "m1",
+ Self::M2 => "m2",
+ Self::M3 => "m3",
+ Self::M4 => "m4",
+ Self::M5 => "m5",
+ };
+ format!("timm/efficientvit_{}.r224_in1k", name)
+ }
+
+ fn config(&self) -> efficientvit::Config {
+ match self {
+ Self::M0 => efficientvit::Config::m0(),
+ Self::M1 => efficientvit::Config::m1(),
+ Self::M2 => efficientvit::Config::m2(),
+ Self::M3 => efficientvit::Config::m3(),
+ Self::M4 => efficientvit::Config::m4(),
+ Self::M5 => efficientvit::Config::m5(),
+ }
+ }
+}
+
+#[derive(Parser)]
+struct Args {
+ #[arg(long)]
+ model: Option<String>,
+
+ #[arg(long)]
+ image: String,
+
+ /// Run on CPU rather than on GPU.
+ #[arg(long)]
+ cpu: bool,
+
+ #[arg(value_enum, long, default_value_t=Which::M0)]
+ which: Which,
+}
+
+pub fn main() -> anyhow::Result<()> {
+ let args = Args::parse();
+
+ let device = candle_examples::device(args.cpu)?;
+
+ let image = candle_examples::imagenet::load_image224(args.image)?;
+ println!("loaded image {image:?}");
+
+ let model_file = match args.model {
+ None => {
+ let model_name = args.which.model_filename();
+ let api = hf_hub::api::sync::Api::new()?;
+ let api = api.model(model_name);
+ api.get("model.safetensors")?
+ }
+ Some(model) => model.into(),
+ };
+
+ let vb = unsafe { VarBuilder::from_mmaped_safetensors(&[model_file], DType::F32, &device)? };
+ let model = efficientvit::efficientvit(&args.which.config(), 1000, vb)?;
+ println!("model built");
+ let logits = model.forward(&image.unsqueeze(0)?)?;
+ let prs = candle_nn::ops::softmax(&logits, D::Minus1)?
+ .i(0)?
+ .to_vec1::<f32>()?;
+ let mut prs = prs.iter().enumerate().collect::<Vec<_>>();
+ prs.sort_by(|(_, p1), (_, p2)| p2.total_cmp(p1));
+ for &(category_idx, pr) in prs.iter().take(5) {
+ println!(
+ "{:24}: {:.2}%",
+ candle_examples::imagenet::CLASSES[category_idx],
+ 100. * pr
+ );
+ }
+ Ok(())
+}