@@ -2163,7 +2163,7 @@ namespace detail {
21632163 TEST_CASE (" handler::parallel_for accepts nd_range" , " [handler]" ) {
21642164 distr_queue q;
21652165
2166- // Note: be careful about local range sizes here, not all devices support work groups with > 256 elements .
2166+ // Note: We assume a local range size of 64 here, this should be supported by most devices .
21672167
21682168 CHECK_NOTHROW (q.submit ([&](handler& cgh) {
21692169 cgh.parallel_for <class UKN (nd_range_1)>(celerity::nd_range<1 >{{256 }, {64 }}, [](nd_item<1 > item) {
@@ -2175,10 +2175,10 @@ namespace detail {
21752175 }));
21762176
21772177 CHECK_NOTHROW (q.submit ([&](handler& cgh) {
2178- cgh.parallel_for <class UKN (nd_range_2)>(celerity::nd_range<2 >{{64 , 64 }, {16 , 16 }}, [](nd_item<2 > item) {
2178+ cgh.parallel_for <class UKN (nd_range_2)>(celerity::nd_range<2 >{{64 , 64 }, {8 , 8 }}, [](nd_item<2 > item) {
21792179 group_barrier (item.get_group ());
21802180#if !WORKAROUND_COMPUTECPP // no group primitives
2181- group_broadcast (item.get_group (), 42 , 99 );
2181+ group_broadcast (item.get_group (), 42 , 25 );
21822182#endif
21832183 });
21842184 }));
@@ -2206,6 +2206,8 @@ namespace detail {
22062206 distr_queue q;
22072207 buffer<int , 1 > out{64 };
22082208
2209+ // Note: We assume a local range size of 32 here, this should be supported by most devices.
2210+
22092211 q.submit ([=](handler& cgh) {
22102212 local_accessor<int > la{32 , cgh};
22112213 accessor ga{out, cgh, celerity::access::one_to_one{}, write_only};
@@ -2229,6 +2231,8 @@ namespace detail {
22292231#if CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS
22302232
22312233 TEST_CASE (" reductions can be passed into nd_range kernels" , " [handler]" ) {
2234+ // Note: We assume a local range size of 16 here, this should be supported by most devices.
2235+
22322236 buffer<int , 1 > b{cl::sycl::range<1 >{1 }};
22332237 distr_queue{}.submit ([=](handler& cgh) {
22342238 cgh.parallel_for <class UKN (kernel)>(celerity::nd_range{cl::sycl::range<2 >{8 , 8 }, cl::sycl::range<2 >{4 , 4 }}, reduction (b, cgh, cl::sycl::plus<>{}),
@@ -2243,6 +2247,8 @@ namespace detail {
22432247 TEST_CASE (" handler::parallel_for kernel names are optional" , " [handler]" ) {
22442248 distr_queue q;
22452249
2250+ // Note: We assume a local range size of 32 here, this should be supported by most devices.
2251+
22462252 // without name
22472253 q.submit ([](handler& cgh) { cgh.parallel_for (cl::sycl::range<1 >{64 }, [](item<1 > item) {}); });
22482254 q.submit ([=](handler& cgh) { cgh.parallel_for (celerity::nd_range<1 >{64 , 32 }, [](nd_item<1 > item) {}); });
0 commit comments