summaryrefslogtreecommitdiff
path: root/candle-nn/src
diff options
context:
space:
mode:
authorLaurent Mazare <laurent.mazare@gmail.com>2023-08-30 16:19:28 +0100
committerGitHub <noreply@github.com>2023-08-30 16:19:28 +0100
commit3159982a899e23b57236ba3f4b0b954769e7f7f7 (patch)
treefe716b50f3510d26514ba272cf2948ed6629a68c /candle-nn/src
parentad8a62dbf50b0771e392662cf444917a72aed844 (diff)
downloadcandle-3159982a899e23b57236ba3f4b0b954769e7f7f7.tar.gz
candle-3159982a899e23b57236ba3f4b0b954769e7f7f7.tar.bz2
candle-3159982a899e23b57236ba3f4b0b954769e7f7f7.zip
Add a Dropout layer (#676)
* Add a dropout layer. * Add an actual layer.
Diffstat (limited to 'candle-nn/src')
-rw-r--r--candle-nn/src/lib.rs1
-rw-r--r--candle-nn/src/ops.rs35
2 files changed, 36 insertions, 0 deletions
diff --git a/candle-nn/src/lib.rs b/candle-nn/src/lib.rs
index 8ab51070..e9552e83 100644
--- a/candle-nn/src/lib.rs
+++ b/candle-nn/src/lib.rs
@@ -23,6 +23,7 @@ pub use group_norm::{group_norm, GroupNorm};
pub use init::Init;
pub use layer_norm::{layer_norm, rms_norm, LayerNorm, LayerNormConfig, RmsNorm};
pub use linear::{linear, linear_no_bias, Linear};
+pub use ops::Dropout;
pub use optim::{AdamW, ParamsAdamW, SGD};
pub use rnn::{lstm, LSTM, RNN};
pub use var_builder::VarBuilder;
diff --git a/candle-nn/src/ops.rs b/candle-nn/src/ops.rs
index 397674f3..63f73dfe 100644
--- a/candle-nn/src/ops.rs
+++ b/candle-nn/src/ops.rs
@@ -42,3 +42,38 @@ pub fn sigmoid(xs: &Tensor) -> Result<Tensor> {
// TODO: Should we have a specialized op for this?
(xs.neg()?.exp()? + 1.0)?.recip()
}
+
+pub fn dropout(xs: &Tensor, drop_p: f32) -> Result<Tensor> {
+ // This implementation is inefficient as it stores the full mask for the backward pass.
+ // Instead we could just store the seed and have a specialized kernel that would both
+ // generate the random mask and apply it.
+ // Another easier optimization would be to be able to generate boolean mask using just a bit of
+ // entropy per element rather than generating a full float per element.
+ if !(0. ..1.).contains(&drop_p) {
+ candle::bail!("dropout probability has to be in [0, 1), got {drop_p}")
+ }
+ let rand = Tensor::rand(0f32, 1f32, xs.shape(), xs.device())?;
+ let scale = 1.0 / (1.0 - drop_p as f64);
+ let drop_p = Tensor::new(drop_p, xs.device())?.broadcast_as(xs.shape())?;
+ let mask = (rand.ge(&drop_p)? * scale)?.to_dtype(xs.dtype())?;
+ xs * mask
+}
+
+#[derive(Debug)]
+pub struct Dropout {
+ drop_p: f32,
+}
+
+impl Dropout {
+ pub fn new(drop_p: f32) -> Dropout {
+ Self { drop_p }
+ }
+
+ pub fn forward(&self, xs: &Tensor, train: bool) -> Result<Tensor> {
+ if train {
+ dropout(xs, self.drop_p)
+ } else {
+ Ok(xs.clone())
+ }
+ }
+}