diff options
author | Laurent Mazare <laurent.mazare@gmail.com> | 2023-09-28 09:05:29 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-28 09:05:29 +0100 |
commit | 5e1c595e00721a11bb46e9187ea7d86ea4ace0e3 (patch) | |
tree | 53613fca3defe5f9f1905b57107ec63ad7f309ab /candle-core | |
parent | 8a49e01b9d9e13732d83f79afa2f850e2ba7fdae (diff) | |
download | candle-5e1c595e00721a11bb46e9187ea7d86ea4ace0e3.tar.gz candle-5e1c595e00721a11bb46e9187ea7d86ea4ace0e3.tar.bz2 candle-5e1c595e00721a11bb46e9187ea7d86ea4ace0e3.zip |
Optimize the index-select cuda kernel. (#976)
Diffstat (limited to 'candle-core')
-rw-r--r-- | candle-core/src/cuda_backend.rs | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/candle-core/src/cuda_backend.rs b/candle-core/src/cuda_backend.rs index 00fd1d04..d1187b1c 100644 --- a/candle-core/src/cuda_backend.rs +++ b/candle-core/src/cuda_backend.rs @@ -884,8 +884,6 @@ impl<'a> Map1 for IndexSelect<'a> { }; let ids_shape = ids_l.shape(); let ids_dims = ids_shape.dims(); - let ids_el = ids_shape.elem_count(); - let cfg = LaunchConfig::for_num_elems(ids_el as u32); let ds = dev.htod_copy([ids_dims, ids_l.stride()].concat()).w()?; let src = match src_l.contiguous_offsets() { Some((o1, o2)) => src.slice(o1..o2), @@ -894,11 +892,13 @@ impl<'a> Map1 for IndexSelect<'a> { let left_size: usize = src_l.dims()[..self.2].iter().product(); let right_size: usize = src_l.dims()[self.2 + 1..].iter().product(); let dim_size = src_l.dims()[self.2]; + let dst_el = ids_shape.elem_count() * left_size * right_size; + let cfg = LaunchConfig::for_num_elems(dst_el as u32); let func = dev.get_or_load_func(&kernel_name::<T>(name), kernels::INDEXING)?; // SAFETY: Set later by running the kernel. - let out = unsafe { dev.alloc::<T>(ids_el * left_size * right_size) }.w()?; + let out = unsafe { dev.alloc::<T>(dst_el) }.w()?; let params = ( - ids_el, + dst_el, ids_dims.len(), &ds, ids, |