From f9ecc8447753d759e776e762ba9309bb90b76bb3 Mon Sep 17 00:00:00 2001 From: Laurent Mazare Date: Tue, 22 Aug 2023 19:41:10 +0100 Subject: GQA support in the quantized model. (#555) * GQA support in the quantized model. * Fix the reshaping. * Fix the main llama model. * Infer the proper gqa from the model kind. --- candle-examples/examples/llama/model.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'candle-examples/examples/llama') diff --git a/candle-examples/examples/llama/model.rs b/candle-examples/examples/llama/model.rs index 86d13bdb..561c2939 100644 --- a/candle-examples/examples/llama/model.rs +++ b/candle-examples/examples/llama/model.rs @@ -291,7 +291,7 @@ impl CausalSelfAttention { let x = x .unsqueeze(2)? .expand((b_sz, n_kv_head, n_rep, seq_len, head_dim))? - .reshape((b_sz, n_kv_head, n_rep, seq_len, head_dim))?; + .reshape((b_sz, n_kv_head * n_rep, seq_len, head_dim))?; Ok(x) } } -- cgit v1.2.3