diff options
Diffstat (limited to 'candle-transformers/src/models/whisper/mod.rs')
-rw-r--r-- | candle-transformers/src/models/whisper/mod.rs | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/candle-transformers/src/models/whisper/mod.rs b/candle-transformers/src/models/whisper/mod.rs new file mode 100644 index 00000000..7dc8107b --- /dev/null +++ b/candle-transformers/src/models/whisper/mod.rs @@ -0,0 +1,26 @@ +pub mod audio; +pub mod model; + +pub const DTYPE: candle::DType = candle::DType::F32; + +// Audio parameters. +pub const SAMPLE_RATE: usize = 16000; +pub const N_FFT: usize = 400; +pub const N_MELS: usize = 80; +pub const HOP_LENGTH: usize = 160; +pub const CHUNK_LENGTH: usize = 30; +pub const N_SAMPLES: usize = CHUNK_LENGTH * SAMPLE_RATE; // 480000 samples in a 30-second chunk +pub const N_FRAMES: usize = N_SAMPLES / HOP_LENGTH; // 3000 frames in a mel spectrogram input + +pub const NO_SPEECH_THRESHOLD: f64 = 0.6; +pub const LOGPROB_THRESHOLD: f64 = -1.0; +pub const TEMPERATURES: [f64; 6] = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]; +pub const COMPRESSION_RATIO_THRESHOLD: f64 = 2.4; + +// Tokenizer dependent bits. +pub const SOT_TOKEN: &str = "<|startoftranscript|>"; +pub const TRANSCRIBE_TOKEN: &str = "<|transcribe|>"; +pub const TRANSLATE_TOKEN: &str = "<|translate|>"; +pub const NO_TIMESTAMPS_TOKEN: &str = "<|notimestamps|>"; +pub const EOT_TOKEN: &str = "<|endoftext|>"; +pub const NO_SPEECH_TOKEN: &str = "<|nocaptions|>"; |