@@ -215,16 +215,17 @@ inclusive_scan_base_step(sycl::queue &exec_q,
215215 const size_t gid = it.get_global_id (0 );
216216 const size_t lid = it.get_local_id (0 );
217217
218- const size_t iter_gid = gid / (acc_groups * wg_size);
219- const size_t chunk_gid = gid - (iter_gid * acc_groups * wg_size);
218+ const size_t reduce_chunks = acc_groups * wg_size;
219+ const size_t iter_gid = gid / reduce_chunks;
220+ const size_t chunk_gid = gid - (iter_gid * reduce_chunks);
220221
221- std::array<outputT, n_wi> local_iscan;
222-
223- size_t i = chunk_gid * n_wi;
222+ const size_t i = chunk_gid * n_wi;
224223 const auto &iter_offsets = iter_indexer (iter_gid);
225224 const auto &inp_iter_offset = iter_offsets.get_first_offset ();
226225 const auto &out_iter_offset = iter_offsets.get_second_offset ();
227226
227+ std::array<outputT, n_wi> local_iscan;
228+
228229#pragma unroll
229230 for (nwiT m_wi = 0 ; m_wi < n_wi; ++m_wi) {
230231 const size_t i_m_wi = i + m_wi;
@@ -324,7 +325,7 @@ sycl::event inclusive_scan_iter_1d(sycl::queue &exec_q,
324325 std::vector<sycl::event> &host_tasks,
325326 const std::vector<sycl::event> &depends = {})
326327{
327- ScanOpT scan_op = ScanOpT () ;
328+ ScanOpT scan_op{} ;
328329 constexpr outputT identity = su_ns::Identity<ScanOpT, outputT>::value;
329330
330331 constexpr size_t _iter_nelems = 1 ;
@@ -352,9 +353,9 @@ sycl::event inclusive_scan_iter_1d(sycl::queue &exec_q,
352353 size_t n_groups_ = n_groups;
353354 size_t temp_size = 0 ;
354355 while (n_groups_ > 1 ) {
355- const auto this_size = (n_groups_ - 1 );
356+ const size_t this_size = (n_groups_ - 1 );
356357 temp_size += this_size;
357- n_groups_ = ceiling_quotient< size_t > (this_size, chunk_size);
358+ n_groups_ = ceiling_quotient (this_size, chunk_size);
358359 }
359360
360361 // allocate
0 commit comments