diff options
-rw-r--r-- | Makefile (renamed from candle-core/Makefile) | 4 | ||||
-rw-r--r-- | candle-core/src/cuda_backend.rs | 4 | ||||
-rw-r--r-- | candle-kernels/src/ternary.cu | 2 |
3 files changed, 5 insertions, 5 deletions
diff --git a/candle-core/Makefile b/Makefile index 97923e96..cb472d80 100644 --- a/candle-core/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ clean-ptx: find target -name "*.ptx" -type f -delete - echo "" > kernels/src/lib.rs - touch kernels/build.rs + echo "" > candle-kernels/src/lib.rs + touch candle-kernels/build.rs clean: cargo clean diff --git a/candle-core/src/cuda_backend.rs b/candle-core/src/cuda_backend.rs index caaa64b8..57ea9b3e 100644 --- a/candle-core/src/cuda_backend.rs +++ b/candle-core/src/cuda_backend.rs @@ -301,8 +301,8 @@ fn gemm_config<T>( Ok(StridedBatchedConfig { batch_size: b as i32, gemm, - stride_a: (m * k) as i64, - stride_b: (n * k) as i64, + stride_a: (n * k) as i64, + stride_b: (m * k) as i64, stride_c: (m * n) as i64, }) } diff --git a/candle-kernels/src/ternary.cu b/candle-kernels/src/ternary.cu index 8f51526b..2a20fbec 100644 --- a/candle-kernels/src/ternary.cu +++ b/candle-kernels/src/ternary.cu @@ -14,7 +14,7 @@ extern "C" __global__ void FN_NAME( \ const size_t *dims = info; \ const size_t *strides = info + num_dims; \ const size_t *strides_t = info + 2*num_dims; \ - const size_t *strides_f = info + 2*num_dims; \ + const size_t *strides_f = info + 3*num_dims; \ if (is_contiguous(num_dims, dims, strides) \ && is_contiguous(num_dims, dims, strides_f) \ && is_contiguous(num_dims, dims, strides_t)) { \ |