Skip to content

Commit f0cf3f4

Browse files
Markus Wipplerfknorr
authored andcommitted
Reduce local range size in tests to improve device compatibility
1 parent 39dacdf commit f0cf3f4

File tree

3 files changed

+12
-3
lines changed

3 files changed

+12
-3
lines changed

examples/matmul/matmul.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ void multiply(celerity::distr_queue queue, celerity::buffer<T, 2> mat_a, celerit
2020
celerity::accessor c{mat_c, cgh, celerity::access::one_to_one{}, celerity::write_only, celerity::no_init};
2121

2222
// Use local-memory tiling to avoid waiting on global memory too often
23+
// Note: We assume a local range size of 64 here, this should be supported by most devices.
2324
const size_t GROUP_SIZE = 8;
2425
celerity::local_accessor<T, 2> scratch_a{{GROUP_SIZE, GROUP_SIZE}, cgh};
2526
celerity::local_accessor<T, 2> scratch_b{{GROUP_SIZE, GROUP_SIZE}, cgh};

test/runtime_tests.cc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2163,7 +2163,7 @@ namespace detail {
21632163
TEST_CASE("handler::parallel_for accepts nd_range", "[handler]") {
21642164
distr_queue q;
21652165

2166-
// Note: be careful about local range sizes here, not all devices support work groups with > 256 elements.
2166+
// Note: We assume a local range size of 64 here, this should be supported by most devices.
21672167

21682168
CHECK_NOTHROW(q.submit([&](handler& cgh) {
21692169
cgh.parallel_for<class UKN(nd_range_1)>(celerity::nd_range<1>{{256}, {64}}, [](nd_item<1> item) {
@@ -2175,10 +2175,10 @@ namespace detail {
21752175
}));
21762176

21772177
CHECK_NOTHROW(q.submit([&](handler& cgh) {
2178-
cgh.parallel_for<class UKN(nd_range_2)>(celerity::nd_range<2>{{64, 64}, {16, 16}}, [](nd_item<2> item) {
2178+
cgh.parallel_for<class UKN(nd_range_2)>(celerity::nd_range<2>{{64, 64}, {8, 8}}, [](nd_item<2> item) {
21792179
group_barrier(item.get_group());
21802180
#if !WORKAROUND_COMPUTECPP // no group primitives
2181-
group_broadcast(item.get_group(), 42, 99);
2181+
group_broadcast(item.get_group(), 42, 25);
21822182
#endif
21832183
});
21842184
}));
@@ -2206,6 +2206,8 @@ namespace detail {
22062206
distr_queue q;
22072207
buffer<int, 1> out{64};
22082208

2209+
// Note: We assume a local range size of 32 here, this should be supported by most devices.
2210+
22092211
q.submit([=](handler& cgh) {
22102212
local_accessor<int> la{32, cgh};
22112213
accessor ga{out, cgh, celerity::access::one_to_one{}, write_only};
@@ -2229,6 +2231,8 @@ namespace detail {
22292231
#if CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS
22302232

22312233
TEST_CASE("reductions can be passed into nd_range kernels", "[handler]") {
2234+
// Note: We assume a local range size of 16 here, this should be supported by most devices.
2235+
22322236
buffer<int, 1> b{cl::sycl::range<1>{1}};
22332237
distr_queue{}.submit([=](handler& cgh) {
22342238
cgh.parallel_for<class UKN(kernel)>(celerity::nd_range{cl::sycl::range<2>{8, 8}, cl::sycl::range<2>{4, 4}}, reduction(b, cgh, cl::sycl::plus<>{}),
@@ -2243,6 +2247,8 @@ namespace detail {
22432247
TEST_CASE("handler::parallel_for kernel names are optional", "[handler]") {
22442248
distr_queue q;
22452249

2250+
// Note: We assume a local range size of 32 here, this should be supported by most devices.
2251+
22462252
// without name
22472253
q.submit([](handler& cgh) { cgh.parallel_for(cl::sycl::range<1>{64}, [](item<1> item) {}); });
22482254
q.submit([=](handler& cgh) { cgh.parallel_for(celerity::nd_range<1>{64, 32}, [](nd_item<1> item) {}); });

test/system/distr_tests.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ namespace detail {
163163
distr_queue q;
164164
auto n = runtime::get_instance().get_num_nodes();
165165

166+
// Note: We assume a local range size of 165 here, this may not be supported by all devices.
167+
166168
auto global_range = range_cast<Dims>(cl::sycl::range<3>{n * 4 * 3, 3 * 5, 2 * 11});
167169
auto local_range = range_cast<Dims>(cl::sycl::range<3>{3, 5, 11});
168170
auto group_range = global_range / local_range;

0 commit comments

Comments
 (0)