diff options
author | Nicolas Patry <patry.nicolas@protonmail.com> | 2023-08-25 11:54:30 +0000 |
---|---|---|
committer | Nicolas Patry <patry.nicolas@protonmail.com> | 2023-08-25 11:54:30 +0000 |
commit | be371e827c141e9452b0dd8790209e0b3642648c (patch) | |
tree | 25b32a7051fee16311f6fa148af05cf0965c4923 /candle-kernels/src | |
parent | 1c1e34735e2d20f20ad17c03f01697ecaae5a8d1 (diff) | |
download | candle-be371e827c141e9452b0dd8790209e0b3642648c.tar.gz candle-be371e827c141e9452b0dd8790209e0b3642648c.tar.bz2 candle-be371e827c141e9452b0dd8790209e0b3642648c.zip |
Intermediary float cast is necessary for cuda 11.8
Diffstat (limited to 'candle-kernels/src')
-rw-r--r-- | candle-kernels/src/cast.cu | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/candle-kernels/src/cast.cu b/candle-kernels/src/cast.cu index 0a2282fc..03ca1ec7 100644 --- a/candle-kernels/src/cast.cu +++ b/candle-kernels/src/cast.cu @@ -13,13 +13,13 @@ extern "C" __global__ void FN_NAME( \ const size_t *strides = info + num_dims; \ if (is_contiguous(num_dims, dims, strides)) { \ for (unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; i < numel; i += blockDim.x * gridDim.x) { \ - out[i] = static_cast<DST_TYPENAME>(inp[i]); \ + out[i] = (DST_TYPENAME) (float) inp[i]; \ } \ } \ else { \ for (unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; i < numel; i += blockDim.x * gridDim.x) { \ unsigned strided_i = get_strided_index(i, num_dims, dims, strides); \ - out[i] = static_cast<DST_TYPENAME>(inp[strided_i]); \ + out[i] = (DST_TYPENAME) (float) inp[strided_i]; \ } \ } \ } \ |