summaryrefslogtreecommitdiff
path: root/candle-transformers/src/models/based.rs
diff options
context:
space:
mode:
Diffstat (limited to 'candle-transformers/src/models/based.rs')
-rw-r--r--candle-transformers/src/models/based.rs7
1 files changed, 3 insertions, 4 deletions
diff --git a/candle-transformers/src/models/based.rs b/candle-transformers/src/models/based.rs
index aa28f523..c54ff966 100644
--- a/candle-transformers/src/models/based.rs
+++ b/candle-transformers/src/models/based.rs
@@ -1,10 +1,9 @@
//! Based from the Stanford Hazy Research group.
//!
//! See "Simple linear attention language models balance the recall-throughput tradeoff", Arora et al. 2024
-//! <https://arxiv.org/abs/2402.18668>
-
-//! Original code:
-//! https://github.com/HazyResearch/based
+//! - [Arxiv](https://arxiv.org/abs/2402.18668)
+//! - [Github](https://github.com/HazyResearch/based)
+//!
use candle::{DType, Device, IndexOp, Module, Result, Tensor, D};
use candle_nn::{