From b1879f17f6b9d13e101a4d3ff5b6b4ff2e1a7a24 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Fri, 19 Jan 2024 08:57:49 +0000 Subject: chore: switch to buffer --- candle-metal-kernels/src/lib.rs | 24 ++++++++++++--------- .../src/libMetalFlashAttention.metallib | Bin 116216 -> 102760 bytes 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'candle-metal-kernels') diff --git a/candle-metal-kernels/src/lib.rs b/candle-metal-kernels/src/lib.rs index 2773ca6a..8cb3c16a 100644 --- a/candle-metal-kernels/src/lib.rs +++ b/candle-metal-kernels/src/lib.rs @@ -1,6 +1,7 @@ use metal::{ Buffer, CommandBufferRef, CompileOptions, ComputeCommandEncoderRef, ComputePipelineState, - Device, Function, FunctionConstantValues, Library, MTLDataType, MTLSize, NSUInteger, + Device, Function, FunctionConstantValues, Library, MTLDataType, MTLResourceOptions, MTLSize, + NSUInteger, }; use std::collections::HashMap; use std::ffi::c_void; @@ -1359,17 +1360,20 @@ pub fn call_gemm( // TODO byte_stride_d let byte_stride_d = 0; - let buffer: Vec = vec![ - byte_stride_a as _, - byte_stride_b as _, - byte_stride_c as _, - byte_stride_d as _, - ]; - encoder.set_bytes( - 10, - (buffer.len() * core::mem::size_of::()) as NSUInteger, + let mut buffer: Vec = Vec::with_capacity(b * 4); + for i in 0..b { + buffer.push((i * byte_stride_a) as u64); + buffer.push((i * byte_stride_b) as u64); + buffer.push((i * byte_stride_c) as u64); + buffer.push((i * byte_stride_d) as u64); + } + + let matrix_offsets = device.new_buffer_with_data( buffer.as_ptr() as *const NSUInteger as *const c_void, + (buffer.len() * core::mem::size_of::()) as NSUInteger, + MTLResourceOptions::StorageModePrivate, ); + encoder.set_buffer(10, Some(&matrix_offsets), 0); } let grid_size = MTLSize { diff --git a/candle-metal-kernels/src/libMetalFlashAttention.metallib b/candle-metal-kernels/src/libMetalFlashAttention.metallib index c28d2b03..f5116ca6 100644 Binary files a/candle-metal-kernels/src/libMetalFlashAttention.metallib and b/candle-metal-kernels/src/libMetalFlashAttention.metallib differ -- cgit v1.2.3