Skip to content

Commit 0d87461

Browse files
committed
Prefer selecting any available device over aborting
...in case not enough/no GPU device(s) can be found.
1 parent 53497e4 commit 0d87461

File tree

1 file changed

+34
-18
lines changed

1 file changed

+34
-18
lines changed

src/device_queue.cc

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,26 +43,42 @@ namespace detail {
4343
} else {
4444
const auto host_cfg = cfg.get_host_config();
4545

46-
// Try to find a platform that can provide a unique device for each node.
47-
bool found = false;
48-
const auto platforms = cl::sycl::platform::get_platforms();
49-
for(size_t i = 0; i < platforms.size(); ++i) {
50-
auto&& platform = platforms[i];
51-
const auto devices = platform.get_devices(cl::sycl::info::device_type::gpu);
52-
if(devices.size() >= host_cfg.node_count) {
53-
how_selected = fmt::format("automatically selected platform {}, device {}", i, host_cfg.local_rank);
54-
device = devices[host_cfg.local_rank];
55-
found = true;
56-
break;
46+
const auto try_find_device_per_node = [&host_cfg, &device, &how_selected](cl::sycl::info::device_type type) {
47+
// Try to find a platform that can provide a unique device for each node.
48+
const auto platforms = cl::sycl::platform::get_platforms();
49+
for(size_t i = 0; i < platforms.size(); ++i) {
50+
auto&& platform = platforms[i];
51+
const auto devices = platform.get_devices(type);
52+
if(devices.size() >= host_cfg.node_count) {
53+
how_selected = fmt::format("automatically selected platform {}, device {}", i, host_cfg.local_rank);
54+
device = devices[host_cfg.local_rank];
55+
return true;
56+
}
5757
}
58-
}
58+
return false;
59+
};
5960

60-
if(!found) {
61-
queue_logger.warn("No suitable platform found that can provide {} devices, and CELERITY_DEVICES not set", host_cfg.node_count);
62-
// Just use the first device available
63-
const auto devices = cl::sycl::device::get_devices(cl::sycl::info::device_type::gpu);
64-
if(devices.empty()) { throw std::runtime_error("Automatic device selection failed: No GPU device available"); }
65-
device = devices[0];
61+
const auto try_find_one_device = [&device](cl::sycl::info::device_type type) {
62+
const auto devices = cl::sycl::device::get_devices(type);
63+
if(!devices.empty()) {
64+
device = devices[0];
65+
return true;
66+
}
67+
return false;
68+
};
69+
70+
// Try to find a unique GPU per node.
71+
if(!try_find_device_per_node(cl::sycl::info::device_type::gpu)) {
72+
// Try to find a unique device (of any type) per node.
73+
if(try_find_device_per_node(cl::sycl::info::device_type::all)) {
74+
queue_logger.warn("No suitable platform found that can provide {} GPU devices, and CELERITY_DEVICES not set", host_cfg.node_count);
75+
} else {
76+
queue_logger.warn("No suitable platform found that can provide {} devices, and CELERITY_DEVICES not set", host_cfg.node_count);
77+
// Just use the first available device. Prefer GPUs, but settle for anything.
78+
if(!try_find_one_device(cl::sycl::info::device_type::gpu) && !try_find_one_device(cl::sycl::info::device_type::all)) {
79+
throw std::runtime_error("Automatic device selection failed: No device available");
80+
}
81+
}
6682
}
6783
}
6884
}

0 commit comments

Comments
 (0)