Skip to content

Commit ba255da

Browse files
committed
Fix bug in accumulate
1 parent 6e38e60 commit ba255da

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/accumulate/accumulate_nd.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ end
227227
# We have a block of threads to accumulate along the dims axis; do it in chunks of
228228
# block_size and keep track of previous chunks' running prefix
229229
ichunk = typeof(iblock)(0)
230-
num_chunks = (length_dims + block_size - 0x1) ÷ block_size
230+
num_chunks = (length_dims + (0x2 * block_size) - 0x1) ÷ (0x2 * block_size)
231231
total = neutral
232232

233233
if ithread == 0x0
@@ -326,7 +326,7 @@ end
326326

327327
# ...and accumulate the last value too
328328
if bi == 0x2 * block_size - 0x1
329-
if iblock < num_chunks - 0x1
329+
if ichunk < num_chunks - 0x1
330330
temp[bi + bank_offset_b + 0x1] = op(t2, v[
331331
input_base_idx +
332332
((ichunk + 0x1) * block_size * 0x2 - 0x1) * vstrides[dims] +

0 commit comments

Comments
 (0)