Skip to content

Commit 6d176ee

Browse files
committed
BF: CS-683 a MirrorDataStore is not always properly cleaned up at sge_qmaster shutdown when the reader thread pool is enabled
1 parent 020a974 commit 6d176ee

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

source/daemons/qmaster/ocs_MirrorDataStore.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,11 @@ namespace ocs {
294294
// do not exit even if event client shutdown event is received. We want the thread only to terminate in the cancellation
295295
// point to enforce that: other threads (accessing the data store) terminate before us and we need to do the
296296
// cleanup (free of data store memory and more) at the cancellation point
297+
298+
// the thread shall only terminate in pthread_testcancel() to ensure that proper cleanup is done
299+
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, nullptr);
300+
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, nullptr);
301+
297302
bool do_qmaster_shutdown = false;
298303
while (true) {
299304
lList *event_list = nullptr;
@@ -389,10 +394,12 @@ namespace ocs {
389394
// We just wait here for the final termination signal to do the cleanup.
390395
do {
391396
int execute = 0;
392-
pthread_cleanup_push(thread_cleanup_monitor, &monitor);
397+
pthread_cleanup_push(thread_cleanup_monitor, static_cast<void *>(&monitor));
393398
pthread_cleanup_push(thread_cleanup_data_store, nullptr);
394-
pthread_cleanup_push(thread_cleanup_event_client, evc);
399+
pthread_cleanup_push(thread_cleanup_event_client, static_cast<void *>(evc));
400+
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, nullptr);
395401
pthread_testcancel();
402+
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, nullptr);
396403
pthread_cleanup_pop(execute); // event client registration
397404
pthread_cleanup_pop(execute); // data store that was filled by this mirror
398405
pthread_cleanup_pop(execute); // monitor

source/daemons/qmaster/ocs_thread_mirror.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ namespace ocs {
7676
// wait till each thread returns from its main
7777
for (auto mirror_thread: Main_Control.mirror_thread_pool) {
7878
pthread_join(mirror_thread->thread, nullptr);
79-
delete mirror_thread;
8079
DPRINTF("termination of thread for data store %d finished\n", mirror_thread->data_store_id);
80+
delete mirror_thread;
8181
}
8282

8383
// empty the container

0 commit comments

Comments
 (0)