We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9574264 commit efd619aCopy full SHA for efd619a
ggml/src/ggml-cuda/cumsum.cu
@@ -81,7 +81,7 @@ static __global__ void cumsum_kernel(
81
82
const int tid = threadIdx.x;
83
constexpr int warp_size = ggml_cuda_get_physical_warp_size();
84
- const int lane = tid & (warp_size - 1);
+ const int lane = tid % warp_size;
85
const int warp = tid / warp_size;
86
const int warps_per_block = blockDim.x / warp_size;
87
0 commit comments