Skip to content

Commit 7e99eed

Browse files
committed
Introduce new CELERITY_PRINT_GRAPHS env var
...to control whether task and command graphs are printed to stdout at the end of execution.
1 parent e25f904 commit 7e99eed

File tree

9 files changed

+26
-21
lines changed

9 files changed

+26
-21
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Versioning](http://semver.org/spec/v2.0.0.html).
1010

1111
### Added
1212

13+
- Add new environment variable `CELERITY_PRINT_GRAPHS` to control whether task and command graphs are logged (#197, #236)
1314
- Introduce new experimental `for_each_item` utility to iterate over a celerity range (#199)
1415
- Add new environment variables `CELERITY_HORIZON_STEP` and `CELERITY_HORIZON_MAX_PARALLELISM` to control Horizon generation (#199)
1516
- Add new `experimental::constrain_split` API to limit how a kernel can be split (#?)

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ Celerity's runtime behavior:
122122
automatically assign a unique device to each worker on a host.
123123
- `CELERITY_PROFILE_KERNEL` controls whether SYCL queue profiling information
124124
should be queried (currently not supported when using hipSYCL).
125-
- `CELERITY_RECORDING` enables recording of the generated tasks and commands,
126-
which allows printing dot graphs for debugging and analysis.
125+
- `CELERITY_PRINT_GRAPHS` controls whether task and command graphs are logged
126+
at the end of execution (requires log level `info` or higher).
127127
- `CELERITY_DRY_RUN_NODES` takes a number and simulates a run with that many nodes
128128
without actually executing the commands.

include/config.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@ namespace detail {
4040
const std::optional<device_config>& get_device_config() const { return m_device_cfg; }
4141
std::optional<bool> get_enable_device_profiling() const { return m_enable_device_profiling; }
4242
bool is_dry_run() const { return m_dry_run_nodes > 0; }
43-
bool is_recording() const { return m_recording; }
43+
bool should_print_graphs() const { return m_should_print_graphs; }
44+
bool should_record() const {
45+
// Currently only graph printing requires recording, but this might change in the future.
46+
return m_should_print_graphs;
47+
}
4448
int get_dry_run_nodes() const { return m_dry_run_nodes; }
4549
std::optional<int> get_horizon_step() const { return m_horizon_step; }
4650
std::optional<int> get_horizon_max_parallelism() const { return m_horizon_max_parallelism; }
@@ -50,7 +54,7 @@ namespace detail {
5054
std::optional<device_config> m_device_cfg;
5155
std::optional<bool> m_enable_device_profiling;
5256
size_t m_dry_run_nodes = 0;
53-
bool m_recording = false;
57+
bool m_should_print_graphs = false;
5458
std::optional<int> m_horizon_step;
5559
std::optional<int> m_horizon_max_parallelism;
5660
};

src/config.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ namespace {
6262

6363
size_t parse_validate_graph_print_max_verts(const std::string_view str) {
6464
throw env::validation_error{"Support for CELERITY_GRAPH_PRINT_MAX_VERTS has been removed with Celerity 0.5.0.\n"
65-
"Opt into graph recording by setting CELERITY_RECORDING."};
65+
"Opt into graph printing by setting CELERITY_PRINT_GRAPHS=1."};
6666
return 0;
6767
}
6868

@@ -155,7 +155,7 @@ namespace detail {
155155
pref.register_variable<std::vector<size_t>>("DEVICES", [this](const std::string_view str) { return parse_validate_devices(str, m_host_cfg); });
156156
const auto env_profile_kernel = pref.register_variable<bool>("PROFILE_KERNEL", parse_validate_profile_kernel);
157157
const auto env_dry_run_nodes = pref.register_variable<size_t>("DRY_RUN_NODES", parse_validate_dry_run_nodes);
158-
const auto env_recording = pref.register_variable<bool>("RECORDING");
158+
const auto env_print_graphs = pref.register_variable<bool>("PRINT_GRAPHS");
159159
constexpr int horizon_max = 1024 * 64;
160160
const auto env_horizon_step = pref.register_range<int>("HORIZON_STEP", 1, horizon_max);
161161
const auto env_horizon_max_para = pref.register_range<int>("HORIZON_MAX_PARALLELISM", 1, horizon_max);
@@ -201,7 +201,7 @@ namespace detail {
201201
const auto has_dry_run_nodes = parsed_and_validated_envs.get(env_dry_run_nodes);
202202
if(has_dry_run_nodes) { m_dry_run_nodes = *has_dry_run_nodes; }
203203

204-
m_recording = parsed_and_validated_envs.get_or(env_recording, false);
204+
m_should_print_graphs = parsed_and_validated_envs.get_or(env_print_graphs, false);
205205
m_horizon_step = parsed_and_validated_envs.get(env_horizon_step);
206206
m_horizon_max_parallelism = parsed_and_validated_envs.get(env_horizon_max_para);
207207

src/runtime.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ namespace detail {
149149
m_reduction_mngr = std::make_unique<reduction_manager>();
150150
m_host_object_mngr = std::make_unique<host_object_manager>();
151151

152-
if(m_cfg->is_recording()) m_task_recorder = std::make_unique<task_recorder>(m_buffer_mngr.get());
152+
if(m_cfg->should_record()) m_task_recorder = std::make_unique<task_recorder>(m_buffer_mngr.get());
153153

154154
task_manager::policy_set task_mngr_policy;
155155
// Merely _declaring_ an uninitialized read is legitimate as long as the kernel does not actually perform the read at runtime - this might happen in the
@@ -164,7 +164,7 @@ namespace detail {
164164
m_exec = std::make_unique<executor>(m_num_nodes, m_local_nid, *m_h_queue, *m_d_queue, *m_task_mngr, *m_buffer_mngr, *m_reduction_mngr);
165165

166166
m_cdag = std::make_unique<command_graph>();
167-
if(m_cfg->is_recording()) m_command_recorder = std::make_unique<command_recorder>(m_task_mngr.get(), m_buffer_mngr.get());
167+
if(m_cfg->should_record()) m_command_recorder = std::make_unique<command_recorder>(m_task_mngr.get(), m_buffer_mngr.get());
168168

169169
distributed_graph_generator::policy_set dggen_policy;
170170
// Any uninitialized read that is observed on CDAG generation was already logged on task generation, unless we have a bug.
@@ -224,17 +224,17 @@ namespace detail {
224224
m_d_queue->wait();
225225
m_h_queue->wait();
226226

227-
if(spdlog::should_log(log_level::trace) && m_cfg->is_recording()) {
227+
if(spdlog::should_log(log_level::info) && m_cfg->should_print_graphs()) {
228228
if(m_local_nid == 0) { // It's the same across all nodes
229229
assert(m_task_recorder.get() != nullptr);
230230
const auto graph_str = detail::print_task_graph(*m_task_recorder);
231-
CELERITY_TRACE("Task graph:\n\n{}\n", graph_str);
231+
CELERITY_INFO("Task graph:\n\n{}\n", graph_str);
232232
}
233233
// must be called on all nodes
234234
auto cmd_graph = gather_command_graph();
235235
if(m_local_nid == 0) {
236236
std::this_thread::sleep_for(std::chrono::milliseconds(500)); // Avoid racing on stdout with other nodes (funneled through mpirun)
237-
CELERITY_TRACE("Command graph:\n\n{}\n", cmd_graph);
237+
CELERITY_INFO("Command graph:\n\n{}\n", cmd_graph);
238238
}
239239
}
240240

test/print_graph_tests.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ TEST_CASE("command graph printing is unchanged", "[print_graph][command-graph]")
112112
}
113113

114114
TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer debug names show up in the generated graph", "[print_graph]") {
115-
env::scoped_test_environment tenv(recording_enabled_env_setting);
115+
env::scoped_test_environment tenv(print_graphs_env_setting);
116116

117117
distr_queue q;
118118
celerity::range<1> range(16);
@@ -141,8 +141,8 @@ TEST_CASE_METHOD(test_utils::runtime_fixture, "buffer debug names show up in the
141141
}
142142
}
143143

144-
TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY_RECORDING is set", "[print_graph]") {
145-
env::scoped_test_environment tenv(recording_enabled_env_setting);
144+
TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY_PRINT_GRAPHS is set", "[print_graph]") {
145+
env::scoped_test_environment tenv(print_graphs_env_setting);
146146

147147
distr_queue q;
148148
celerity::range<1> range(16);

test/runtime_tests.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,7 +1305,7 @@ namespace detail {
13051305
{"CELERITY_DEVICES", "1 1"},
13061306
{"CELERITY_PROFILE_KERNEL", "1"},
13071307
{"CELERITY_DRY_RUN_NODES", "4"},
1308-
{"CELERITY_RECORDING", "true"},
1308+
{"CELERITY_PRINT_GRAPHS", "true"},
13091309
};
13101310
const auto test_env = env::scoped_test_environment(env_map);
13111311
auto cfg = config(nullptr, nullptr);
@@ -1319,7 +1319,7 @@ namespace detail {
13191319
REQUIRE(has_prof.has_value());
13201320
CHECK((*has_prof) == true);
13211321
CHECK(cfg.get_dry_run_nodes() == 4);
1322-
CHECK(cfg.is_recording() == true);
1322+
CHECK(cfg.should_print_graphs() == true);
13231323
}
13241324

13251325
TEST_CASE_METHOD(test_utils::mpi_fixture, "config reports incorrect environment varibles", "[env-vars][config]") {

test/system/distr_tests.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ namespace detail {
138138
TEST_CASE_METHOD(
139139
test_utils::runtime_fixture, "runtime-shutdown graph printing works in the presence of a finished reduction", "[reductions][print_graph][smoke-test]") {
140140
#if CELERITY_FEATURE_SCALAR_REDUCTIONS
141-
env::scoped_test_environment test_env(recording_enabled_env_setting);
141+
env::scoped_test_environment test_env(print_graphs_env_setting);
142142
// init runtime early so the distr_queue ctor doesn't override the log level set by log_capture
143143
runtime::init(nullptr, nullptr);
144144

@@ -263,7 +263,7 @@ namespace detail {
263263
}
264264

265265
TEST_CASE_METHOD(test_utils::runtime_fixture, "generating same task graph on different nodes", "[task-graph]") {
266-
env::scoped_test_environment tenv(recording_enabled_env_setting);
266+
env::scoped_test_environment tenv(print_graphs_env_setting);
267267
distr_queue q;
268268
REQUIRE(runtime::get_instance().get_num_nodes() > 1);
269269

@@ -374,7 +374,7 @@ namespace detail {
374374
}
375375

376376
TEST_CASE_METHOD(test_utils::runtime_fixture, "command graph can be collected across distributed nodes", "[print_graph]") {
377-
env::scoped_test_environment tenv(recording_enabled_env_setting);
377+
env::scoped_test_environment tenv(print_graphs_env_setting);
378378

379379
int global_size = 0;
380380
MPI_Comm_size(MPI_COMM_WORLD, &global_size);

test/test_utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
namespace celerity {
5252
namespace detail {
5353

54-
const std::unordered_map<std::string, std::string> recording_enabled_env_setting{{"CELERITY_RECORDING", "1"}};
54+
const std::unordered_map<std::string, std::string> print_graphs_env_setting{{"CELERITY_PRINT_GRAPHS", "1"}};
5555

5656
struct runtime_testspy {
5757
static scheduler& get_schdlr(runtime& rt) { return *rt.m_schdlr; }

0 commit comments

Comments
 (0)