From 0f5cbb08b36a2d962470ec590a2d2bd9770bd12d Mon Sep 17 00:00:00 2001 From: Eric Buehler <65165915+EricLBuehler@users.noreply.github.com> Date: Fri, 26 Jul 2024 15:32:26 -0400 Subject: Add support for Llama 3.1 (#2359) * Add Llama 3.1 rope * Clippy * Format * Clippy * Add support for multiple eos tokens: * Untagged either * Remove either dep and fix settings.json * Make the max positional embeddings configurable --- candle-core/benches/benchmarks/affine.rs | 2 +- candle-core/benches/benchmarks/qmatmul.rs | 4 ++-- candle-core/benches/benchmarks/unary.rs | 2 +- candle-core/benches/benchmarks/where_cond.rs | 6 +++--- candle-core/src/tensor.rs | 6 +++--- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'candle-core') diff --git a/candle-core/benches/benchmarks/affine.rs b/candle-core/benches/benchmarks/affine.rs index eded9f57..c1004c6c 100644 --- a/candle-core/benches/benchmarks/affine.rs +++ b/candle-core/benches/benchmarks/affine.rs @@ -12,7 +12,7 @@ fn run_affine_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: let m = 1024; let k = 1024; - let tensor = Tensor::zeros((b, m, k), dtype, &device).unwrap(); + let tensor = Tensor::zeros((b, m, k), dtype, device).unwrap(); let flops = b * m * k * dtype.size_in_bytes(); diff --git a/candle-core/benches/benchmarks/qmatmul.rs b/candle-core/benches/benchmarks/qmatmul.rs index ccb136ac..4d34588b 100644 --- a/candle-core/benches/benchmarks/qmatmul.rs +++ b/candle-core/benches/benchmarks/qmatmul.rs @@ -7,7 +7,7 @@ use criterion::{black_box, criterion_group, Criterion, Throughput}; use std::time::Instant; fn run(matmul: &QMatMul, x: &Tensor) { - matmul.forward(&x).unwrap(); + matmul.forward(x).unwrap(); } fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) { @@ -50,7 +50,7 @@ fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) { fn criterion_benchmark(c: &mut Criterion) { let handler = BenchDeviceHandler::new().unwrap(); for device in handler.devices { - for dtype in vec![ + for dtype in [ GgmlDType::F32, GgmlDType::F16, GgmlDType::Q4_0, diff --git a/candle-core/benches/benchmarks/unary.rs b/candle-core/benches/benchmarks/unary.rs index a8e0d025..9efd7509 100644 --- a/candle-core/benches/benchmarks/unary.rs +++ b/candle-core/benches/benchmarks/unary.rs @@ -12,7 +12,7 @@ fn run_unary_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: & let m = 1024; let k = 1024; - let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, &device) + let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, device) .unwrap() .to_dtype(dtype) .unwrap() diff --git a/candle-core/benches/benchmarks/where_cond.rs b/candle-core/benches/benchmarks/where_cond.rs index c517dcf5..0e91f656 100644 --- a/candle-core/benches/benchmarks/where_cond.rs +++ b/candle-core/benches/benchmarks/where_cond.rs @@ -25,9 +25,9 @@ const SIZE: usize = B * M * K; const DATA: [u8; SIZE] = create_cond_arr::(); fn run_where_cond_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &str) { - let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), &device).unwrap(); - let on_true = Tensor::ones((B, M, K), dtype, &device).unwrap(); - let on_false = Tensor::zeros((B, M, K), dtype, &device).unwrap(); + let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), device).unwrap(); + let on_true = Tensor::ones((B, M, K), dtype, device).unwrap(); + let on_false = Tensor::zeros((B, M, K), dtype, device).unwrap(); let elements = B * M * K; // E.g. 2 f32 tensors + 1 u8 tensor diff --git a/candle-core/src/tensor.rs b/candle-core/src/tensor.rs index dd1b44b0..82532f20 100644 --- a/candle-core/src/tensor.rs +++ b/candle-core/src/tensor.rs @@ -590,9 +590,9 @@ impl Tensor { /// /// * `args` - A slice of 1D tensors. /// * `xy_indexing` - Whether to use xy indexing or ij indexing. If xy is selected, the - /// first dimension corresponds to the cardinality of the second input and the second - /// dimension corresponds to the cardinality of the first input. If ij is selected, the - /// dimensions are in the same order as the cardinality of the inputs. + /// first dimension corresponds to the cardinality of the second input and the second + /// dimension corresponds to the cardinality of the first input. If ij is selected, the + /// dimensions are in the same order as the cardinality of the inputs. /// /// # Examples /// -- cgit v1.2.3