Skip to content

Commit a49431a

Browse files
committed
EH: CS-713 print more information about shutting down the individual qmaster threads / thread pools to the messages file at qmaster shutdown
1 parent 6d176ee commit a49431a

File tree

8 files changed

+45
-38
lines changed

8 files changed

+45
-38
lines changed

source/daemons/qmaster/msg_qmaster.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -608,10 +608,11 @@
608608
#define MSG_REPORTING_INTERMEDIATE_SS _MESSAGE(33865, _("write intermediate accounting record for job " SFQ " at " SFN ""))
609609

610610
#define MSG_THREAD_XTERMINATED_S _MESSAGE(33870, _(SFN " thread terminated"))
611-
#define MSG_THREAD_XNOTRUNNING_S _MESSAGE(33871, _(SFN " thread is not running"))
612-
#define MSG_THREAD_XHASSTARTED_S _MESSAGE(33872, _(SFN " has been started"))
613-
#define MSG_THREAD_XSTARTDISABLED_S _MESSAGE(33873, _("start of " SFN " thread is disabled in bootstrap file"))
614-
#define MSG_THREAD_XISRUNNING_S _MESSAGE(33874, _(SFN " thread is already running"))
611+
#define MSG_THREADPOOL_XTERMINATED_S _MESSAGE(33871, _(SFN " thread pool terminated"))
612+
#define MSG_THREAD_XNOTRUNNING_S _MESSAGE(33872, _(SFN " thread is not running"))
613+
#define MSG_THREAD_XHASSTARTED_S _MESSAGE(33873, _(SFN " has been started"))
614+
#define MSG_THREAD_XSTARTDISABLED_S _MESSAGE(33874, _("start of " SFN " thread is disabled in bootstrap file"))
615+
#define MSG_THREAD_XISRUNNING_S _MESSAGE(33875, _(SFN " thread is already running"))
615616

616617
#define MSG_JOB_CHANGEJOBSHARE _MESSAGE(33900, _("change job share"))
617618
#define MSG_JOB_PRIOSET_SSUI _MESSAGE(33901, _(SFN "@" SFN " sets scheduling priority of job " sge_U32CFormat " to %d"))

source/daemons/qmaster/ocs_thread_mirror.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919
/*___INFO__MARK_END_NEW__*/
2020

2121
#include "uti/sge_rmon_macros.h"
22+
#include "uti/sge_log.h"
2223

2324
#include "ocs_thread_mirror.h"
2425
#include "ocs_MirrorReaderDataStore.h"
2526
#include "ocs_MirrorListenerDataStore.h"
2627
#include "sge_thread_main.h"
28+
#include "msg_qmaster.h"
2729

2830
namespace ocs {
2931
static void *
@@ -83,6 +85,8 @@ namespace ocs {
8385
// empty the container
8486
Main_Control.mirror_thread_pool.clear();
8587

88+
INFO(MSG_THREADPOOL_XTERMINATED_S, threadnames[EVENT_MIRROR_THREAD]);
89+
8690
DRETURN_VOID;
8791
}
8892
}

source/daemons/qmaster/sge_thread_event_master.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <pthread.h>
3737

3838
#include "uti/sge_bootstrap.h"
39+
#include "uti/sge_log.h"
3940
#include "uti/sge_profiling.h"
4041
#include "uti/sge_rmon_macros.h"
4142
#include "uti/sge_thread_ctrl.h"
@@ -54,6 +55,7 @@
5455
#include "sge_qmaster_timed_event.h"
5556
#include "sge_thread_main.h"
5657
#include "sge_thread_event_master.h"
58+
#include "msg_qmaster.h"
5759

5860
static void
5961
sge_event_master_cleanup_monitor(void *arg) {
@@ -97,6 +99,7 @@ sge_event_master_terminate() {
9799
thread = cl_thread_list_get_first_thread(Main_Control.event_master_thread_pool);
98100
}
99101
DPRINTF("all " SFN " threads terminated\n", threadnames[EVENT_MASTER_THREAD]);
102+
INFO(MSG_THREAD_XTERMINATED_S, threadnames[EVENT_MASTER_THREAD]);
100103

101104
DRETURN_VOID;
102105
}

source/daemons/qmaster/sge_thread_listener.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "uti/sge_profiling.h"
4141
#include "uti/sge_rmon_macros.h"
4242
#include "uti/sge_thread_ctrl.h"
43+
#include "uti/sge_time.h"
4344

4445
#include "comm/cl_commlib.h"
4546

@@ -117,6 +118,7 @@ sge_listener_terminate() {
117118
thread = cl_thread_list_get_first_thread(Main_Control.listener_thread_pool);
118119
}
119120
DPRINTF("all " SFN " threads exited\n", threadnames[LISTENER_THREAD]);
121+
INFO(MSG_THREADPOOL_XTERMINATED_S, threadnames[LISTENER_THREAD]);
120122
DRETURN_VOID;
121123
}
122124

@@ -149,8 +151,6 @@ sge_listener_main(void *arg) {
149151

150152
DPRINTF("entering main loop\n");
151153
while (true) {
152-
int execute = 0;
153-
154154
if (!sge_thread_has_shutdown_started()) {
155155
thread_start_stop_profiling();
156156

@@ -163,12 +163,13 @@ sge_listener_main(void *arg) {
163163

164164
/* pthread cancellation point */
165165
do {
166+
int execute = 0;
166167
pthread_cleanup_push((void (*)(void *)) sge_listener_cleanup_monitor, (void *) &monitor);
167168
cl_thread_func_testcancel(thread_config);
168169
pthread_cleanup_pop(execute);
169170
if (sge_thread_has_shutdown_started()) {
170171
DPRINTF("waiting for termination\n");
171-
sleep(1);
172+
sge_usleep(50000);
172173
}
173174
} while (sge_thread_has_shutdown_started());
174175
}

source/daemons/qmaster/sge_thread_reader.cc

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -96,32 +96,29 @@ sge_reader_terminate() {
9696
* trigger pthread_cancel for each thread so that further
9797
* shutdown process will be faster
9898
*/
99-
{
100-
cl_thread_list_elem_t *thr;
99+
cl_thread_list_elem_t *thr;
101100

102-
cl_thread_list_elem_t *thr_nxt = cl_thread_list_get_first_elem(Main_Control.reader_thread_pool);
103-
while ((thr = thr_nxt) != nullptr) {
104-
thr_nxt = cl_thread_list_get_next_elem(thr);
101+
cl_thread_list_elem_t *thr_nxt = cl_thread_list_get_first_elem(Main_Control.reader_thread_pool);
102+
while ((thr = thr_nxt) != nullptr) {
103+
thr_nxt = cl_thread_list_get_next_elem(thr);
105104

106-
cl_thread_shutdown(thr->thread_config);
107-
}
105+
cl_thread_shutdown(thr->thread_config);
108106
}
109107

110108
sge_tq_wakeup_waiting(ReaderRequestQueue);
111109

112110
/*
113111
* Shutdown/delete the threads and wait for termination
114112
*/
115-
{
116-
cl_thread_settings_t *thread = cl_thread_list_get_first_thread(Main_Control.reader_thread_pool);
117-
while (thread != nullptr) {
118-
DPRINTF("gets canceled\n");
119-
cl_thread_list_delete_thread(Main_Control.reader_thread_pool, thread);
113+
cl_thread_settings_t *thread = cl_thread_list_get_first_thread(Main_Control.reader_thread_pool);
114+
while (thread != nullptr) {
115+
DPRINTF("gets canceled\n");
116+
cl_thread_list_delete_thread(Main_Control.reader_thread_pool, thread);
120117

121-
thread = cl_thread_list_get_first_thread(Main_Control.reader_thread_pool);
122-
}
123-
DPRINTF("all " SFN " threads terminated\n", threadnames[READER_THREAD]);
118+
thread = cl_thread_list_get_first_thread(Main_Control.reader_thread_pool);
124119
}
120+
DPRINTF("all " SFN " threads terminated\n", threadnames[READER_THREAD]);
121+
INFO(MSG_THREADPOOL_XTERMINATED_S, threadnames[READER_THREAD]);
125122

126123
DRETURN_VOID;
127124
}

source/daemons/qmaster/sge_thread_signaler.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ sge_signaler_terminate() {
9292
DPRINTF("getting canceled\n");
9393
cl_thread_list_delete_thread(Main_Control.signal_thread_pool, thread);
9494
}
95+
INFO(MSG_THREAD_XTERMINATED_S, threadnames[SIGNAL_THREAD]);
9596
DRETURN_VOID;
9697
}
9798

source/daemons/qmaster/sge_thread_timer.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@
7171
#include "sge_job_qmaster.h"
7272
#include "sge_log.h"
7373

74+
#include "msg_qmaster.h"
75+
7476
static void
7577
sge_timer_cleanup_monitor(monitoring_t *monitor) {
7678
DENTER(TOP_LAYER);
@@ -232,10 +234,10 @@ sge_timer_terminate() {
232234
thread = cl_thread_list_get_first_thread(Main_Control.timer_thread_pool);
233235
}
234236
DPRINTF("all " SFN " threads terminated\n", threadnames[TIMER_THREAD]);
235-
236237
te_shutdown();
237238

238239
DPRINTF(SFN " related cleanup has been done\n", threadnames[TIMER_THREAD]);
240+
INFO(MSG_THREAD_XTERMINATED_S, threadnames[TIMER_THREAD]);
239241

240242
DRETURN_VOID;
241243
}

source/daemons/qmaster/sge_thread_worker.cc

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -119,32 +119,28 @@ sge_worker_terminate() {
119119
* trigger pthread_cancel for each thread so that further
120120
* shutdown process will be faster
121121
*/
122-
{
123-
cl_thread_list_elem_t *thr;
122+
cl_thread_list_elem_t *thr;
124123

125-
cl_thread_list_elem_t *thr_nxt = cl_thread_list_get_first_elem(Main_Control.worker_thread_pool);
126-
while ((thr = thr_nxt) != nullptr) {
127-
thr_nxt = cl_thread_list_get_next_elem(thr);
124+
cl_thread_list_elem_t *thr_nxt = cl_thread_list_get_first_elem(Main_Control.worker_thread_pool);
125+
while ((thr = thr_nxt) != nullptr) {
126+
thr_nxt = cl_thread_list_get_next_elem(thr);
128127

129-
cl_thread_shutdown(thr->thread_config);
130-
}
128+
cl_thread_shutdown(thr->thread_config);
131129
}
132130

133131
sge_tq_wakeup_waiting(GlobalRequestQueue);
134132

135133
/*
136134
* Shutdown/delete the threads and wait for termination
137135
*/
138-
{
139-
cl_thread_settings_t *thread = cl_thread_list_get_first_thread(Main_Control.worker_thread_pool);
140-
while (thread != nullptr) {
141-
DPRINTF("gets canceled\n");
142-
cl_thread_list_delete_thread(Main_Control.worker_thread_pool, thread);
136+
cl_thread_settings_t *thread = cl_thread_list_get_first_thread(Main_Control.worker_thread_pool);
137+
while (thread != nullptr) {
138+
DPRINTF("gets canceled\n");
139+
cl_thread_list_delete_thread(Main_Control.worker_thread_pool, thread);
143140

144-
thread = cl_thread_list_get_first_thread(Main_Control.worker_thread_pool);
145-
}
146-
DPRINTF("all " SFN " threads terminated\n", threadnames[WORKER_THREAD]);
141+
thread = cl_thread_list_get_first_thread(Main_Control.worker_thread_pool);
147142
}
143+
DPRINTF("all " SFN " threads terminated\n", threadnames[WORKER_THREAD]);
148144

149145
do_final_spooling = sge_qmaster_do_final_spooling();
150146

@@ -173,6 +169,8 @@ sge_worker_terminate() {
173169
sge_shutdown_persistence(nullptr);
174170
DPRINTF("persistence module has been shutdown\n");
175171

172+
INFO(MSG_THREADPOOL_XTERMINATED_S, threadnames[WORKER_THREAD]);
173+
176174
DRETURN_VOID;
177175
}
178176

0 commit comments

Comments
 (0)