summaryrefslogtreecommitdiff
path: root/candle-nn/src/kv_cache.rs
blob: 684053dc736ebcf8d454d62485ad8fd29b8c6df0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
use candle::{DType, Device, Result, Shape, Tensor};

#[derive(Debug, Clone)]
pub struct Cache {
    all_data: Tensor,
    dim: usize,
    current_seq_len: usize,
    max_seq_len: usize,
}

impl Cache {
    pub fn new<S: Into<Shape>, D: candle::shape::Dim>(
        dim: D,
        shape: S,
        dtype: DType,
        dev: &Device,
    ) -> Result<Self> {
        let shape = shape.into();
        let dim = dim.to_index(&shape, "kv-cache")?;
        let max_seq_len = shape.dims()[dim];
        let all_data = Tensor::zeros(shape, dtype, dev)?;
        Ok(Self {
            all_data,
            dim,
            current_seq_len: 0,
            max_seq_len,
        })
    }

    pub fn dim(&self) -> usize {
        self.dim
    }

    pub fn current_seq_len(&self) -> usize {
        self.current_seq_len
    }

    pub fn max_seq_len(&self) -> usize {
        self.max_seq_len
    }

    pub fn all_data(&self) -> &Tensor {
        &self.all_data
    }

    pub fn current_data(&self) -> Result<Tensor> {
        self.all_data.narrow(self.dim, 0, self.current_seq_len)
    }

    pub fn append(&mut self, src: &Tensor) -> Result<()> {
        let seq_len = src.dim(self.dim)?;
        if self.current_seq_len + seq_len > self.max_seq_len {
            candle::bail!(
                "kv-cache: above max-seq-len {}+{seq_len}>{}",
                self.current_seq_len,
                self.max_seq_len
            )
        }
        self.all_data
            .slice_set(src, self.dim, self.current_seq_len)?;
        self.current_seq_len += seq_len;
        Ok(())
    }
}

#[derive(Debug, Clone)]
pub struct KvCache {
    k: Cache,
    v: Cache,
}

impl KvCache {
    pub fn new<S: Into<Shape>, D: candle::shape::Dim>(
        dim: D,
        shape: S,
        dtype: DType,
        dev: &Device,
    ) -> Result<Self> {
        let shape = shape.into();
        let dim = dim.to_index(&shape, "kv-cache")?;
        let k = Cache::new(dim, &shape, dtype, dev)?;
        let v = Cache::new(dim, &shape, dtype, dev)?;
        Ok(Self { k, v })
    }

    pub fn k(&self) -> Result<Tensor> {
        self.k.current_data()
    }

    pub fn v(&self) -> Result<Tensor> {
        self.v.current_data()
    }

    pub fn append(&mut self, k: &Tensor, v: &Tensor) -> Result<(Tensor, Tensor)> {
        self.k.append(k)?;
        self.v.append(v)?;
        let k = self.k.current_data()?;
        let v = self.v.current_data()?;
        Ok((k, v))
    }
}