|
52 | 52 | from mig.shared.gridscript import clean_grid_stdin, \ |
53 | 53 | remove_jobrequest_pending_files, check_mrsl_files, requeue_job, \ |
54 | 54 | server_cleanup, load_queue, save_queue, load_schedule_cache, \ |
55 | | - save_schedule_cache, arc_job_status, clean_arc_job |
| 55 | + save_schedule_cache |
56 | 56 | from mig.shared.notification import notify_user_thread |
57 | 57 | from mig.shared.resadm import atomic_resource_exe_restart, put_exe_pgid |
58 | 58 | from mig.shared.vgrid import job_fits_res_vgrid, validated_vgrid_list |
@@ -168,38 +168,6 @@ def time_out_jobs(stop_event): |
168 | 168 | send_message_to_grid_script(grid_script_msg, |
169 | 169 | logger, configuration) |
170 | 170 |
|
171 | | - elif job['UNIQUE_RESOURCE_NAME'] == 'ARC': |
172 | | - if not configuration.arc_clusters: |
173 | | - logger.error('ARC backend disabled - ignore %s' % |
174 | | - job) |
175 | | - continue |
176 | | - jobstatus = arc_job_status(job, configuration, logger) |
177 | | - |
178 | | - # take action if the job is failed or killed. |
179 | | - # No action for a finished job, since other |
180 | | - # machinery will be at work to update it |
181 | | - |
182 | | - if jobstatus in ['FINISHED', 'FAILED', 'KILLED']: |
183 | | - logger.debug( |
184 | | - 'discovered %s job %s, clean it on the server' |
185 | | - % (jobstatus, job['JOB_ID'])) |
186 | | - if jobstatus in ['FAILED', 'KILLED']: |
187 | | - msg = '(failed inside ARC)' |
188 | | - else: |
189 | | - msg = None |
190 | | - exec_job = executing_queue.dequeue_job_by_id( |
191 | | - job['JOB_ID']) |
192 | | - if exec_job: |
193 | | - # job was still there, clean up here |
194 | | - # (otherwise, someone else picked it up in |
195 | | - # the meantime) |
196 | | - clean_arc_job(exec_job, jobstatus, msg, |
197 | | - configuration, logger, False) |
198 | | - else: |
199 | | - logger.debug( |
200 | | - 'Status %s for ARC job %s, no action required' |
201 | | - % (jobstatus, job['JOB_ID'])) |
202 | | - |
203 | 171 | except Exception as err: |
204 | 172 | logger.error('time_out_jobs: unexpected exception: %s' % err) |
205 | 173 | logger.info('time_out_jobs: time out thread terminating') |
@@ -447,66 +415,6 @@ def graceful_shutdown(): |
447 | 415 | dict_userjob['OWNER'] = user_id |
448 | 416 | dict_userjob['MIGRATE_COUNT'] = "0" |
449 | 417 |
|
450 | | - # ARC jobs: directly submit, and put in executing_queue |
451 | | - if dict_userjob['JOBTYPE'] == 'arc': |
452 | | - if not configuration.arc_clusters: |
453 | | - logger.error('ARC backend disabled - ignore %s' % |
454 | | - dict_userjob) |
455 | | - continue |
456 | | - logger.debug('ARC Job') |
457 | | - (arc_job, msg) = jobscriptgenerator.create_arc_job( |
458 | | - dict_userjob, configuration, logger) |
459 | | - if not arc_job: |
460 | | - # something has gone wrong |
461 | | - logger.error('Job NOT submitted (%s)' % msg) |
462 | | - # discard this job (as FAILED, including message) |
463 | | - # see gridscript::requeue_job for how to do this... |
464 | | - |
465 | | - dict_userjob['STATUS'] = 'FAILED' |
466 | | - dict_userjob['FAILED_TIMESTAMP'] = time.gmtime() |
467 | | - # and create an execution history (basically empty) |
468 | | - hist = ( |
469 | | - {'QUEUED_TIMESTAMP': dict_userjob['QUEUED_TIMESTAMP'], |
470 | | - 'EXECUTING_TIMESTAMP': dict_userjob['FAILED_TIMESTAMP'], |
471 | | - 'FAILED_TIMESTAMP': dict_userjob['FAILED_TIMESTAMP'], |
472 | | - 'FAILED_MESSAGE': ('ARC Submission failed: %s' % msg), |
473 | | - 'UNIQUE_RESOURCE_NAME': 'ARC', }) |
474 | | - dict_userjob['EXECUTION_HISTORY'] = [hist] |
475 | | - |
476 | | - # should also notify the user (if requested) |
477 | | - # not implented for this branch. |
478 | | - |
479 | | - else: |
480 | | - # all fine, job is now in some ARC queue |
481 | | - logger.debug('Job submitted (%s,%s)' % |
482 | | - (arc_job['SESSIONID'], arc_job['ARCID'])) |
483 | | - # set some job fields for job status retrieval, and |
484 | | - # put in exec.queue for job status queries and timeout |
485 | | - dict_userjob['SESSIONID'] = arc_job['SESSIONID'] |
486 | | - # abuse these two fields, |
487 | | - # expected by timeout thread to be there anyway |
488 | | - dict_userjob['UNIQUE_RESOURCE_NAME'] = 'ARC' |
489 | | - dict_userjob['EXE'] = arc_job['ARCID'] |
490 | | - |
491 | | - # this one is used by the timeout thread as well |
492 | | - # We put in a wild guess, 10 minutes. Perhaps not enough |
493 | | - dict_userjob['EXECUTION_DELAY'] = 600 |
494 | | - |
495 | | - # set to executing even though it is kind-of wrong... |
496 | | - dict_userjob['STATUS'] = 'EXECUTING' |
497 | | - dict_userjob['EXECUTING_TIMESTAMP'] = time.gmtime() |
498 | | - executing_queue.enqueue_job(dict_userjob, |
499 | | - executing_queue.queue_length()) |
500 | | - |
501 | | - # Either way, save the job mrsl. |
502 | | - # Status is EXECUTING or FAILED |
503 | | - pickle(dict_userjob, file_userjob, logger) |
504 | | - |
505 | | - # go on with scheduling loop (do not use scheduler magic below) |
506 | | - continue |
507 | | - |
508 | | - # following: non-ARC code |
509 | | - |
510 | 418 | # put job in queue |
511 | 419 |
|
512 | 420 | job_queue.enqueue_job(dict_userjob, job_queue.queue_length()) |
@@ -1296,22 +1204,6 @@ def graceful_shutdown(): |
1296 | 1204 | msg += \ |
1297 | 1205 | ', but job is being executed by %s:%s, ignoring result.'\ |
1298 | 1206 | % (job_dict['UNIQUE_RESOURCE_NAME'], job_dict['EXE']) |
1299 | | - elif job_dict['UNIQUE_RESOURCE_NAME'] == 'ARC': |
1300 | | - if not configuration.arc_clusters: |
1301 | | - logger.error('ARC backend disabled - ignore %s' % |
1302 | | - job_dict) |
1303 | | - continue |
1304 | | - msg += (', which is an ARC job (ID %s).' % job_dict['EXE']) |
1305 | | - |
1306 | | - # remove from the executing queue |
1307 | | - executing_queue.dequeue_job_by_id(job_id) |
1308 | | - |
1309 | | - # job status has been checked by put script already |
1310 | | - # we need to clean up the job remainder (links, queue, and ARC |
1311 | | - # side) |
1312 | | - clean_arc_job(job_dict, 'FINISHED', None, |
1313 | | - configuration, logger, False) |
1314 | | - msg += 'ARC job completed' |
1315 | 1207 |
|
1316 | 1208 | else: |
1317 | 1209 |
|
@@ -1455,26 +1347,6 @@ def graceful_shutdown(): |
1455 | 1347 | 'Cancel job: Could not get job_dict for executing job') |
1456 | 1348 | continue |
1457 | 1349 |
|
1458 | | - # special treatment of ARC jobs: delete two links and cancel job |
1459 | | - # in ARC |
1460 | | - if unique_resource_name == 'ARC': |
1461 | | - if not configuration.arc_clusters: |
1462 | | - logger.error('ARC backend disabled - ignore %s' % |
1463 | | - job_dict) |
1464 | | - continue |
1465 | | - |
1466 | | - # remove from the executing queue |
1467 | | - executing_queue.dequeue_job_by_id(job_id) |
1468 | | - |
1469 | | - # job status has been set by the cancel request already, but |
1470 | | - # we need to kill the ARC job, or clean it (if already |
1471 | | - # finished), and clean up the job remainder links |
1472 | | - clean_arc_job(job_dict, 'CANCELED', None, |
1473 | | - configuration, logger, True) |
1474 | | - |
1475 | | - logger.debug('ARC job completed') |
1476 | | - continue |
1477 | | - |
1478 | 1350 | if not server_cleanup( |
1479 | 1351 | job_dict['SESSIONID'], |
1480 | 1352 | job_dict['IOSESSIONID'], |
@@ -1539,26 +1411,6 @@ def graceful_shutdown(): |
1539 | 1411 |
|
1540 | 1412 | job_dict = executing_queue.get_job_by_id(jobid) |
1541 | 1413 |
|
1542 | | - # special treatment of ARC jobs: delete two links and |
1543 | | - # clean job in ARC system, do not retry. |
1544 | | - if job_dict and unique_resource_name == 'ARC': |
1545 | | - if not configuration.arc_clusters: |
1546 | | - logger.error('ARC backend disabled - ignore %s' % |
1547 | | - job_dict) |
1548 | | - continue |
1549 | | - |
1550 | | - # remove from the executing queue |
1551 | | - executing_queue.dequeue_job_by_id(jobid) |
1552 | | - |
1553 | | - # job status has been set by the cancel request already, but |
1554 | | - # we need to kill the ARC job, or clean it (if already finished), |
1555 | | - # and clean up the job remainder links |
1556 | | - clean_arc_job(job_dict, 'FAILED', 'Job timed out', |
1557 | | - configuration, logger, True) |
1558 | | - |
1559 | | - logger.debug('ARC job timed out, removed') |
1560 | | - continue |
1561 | | - |
1562 | 1414 | # Execution information is removed from job_dict in |
1563 | 1415 | # requeue_job - save here |
1564 | 1416 |
|
|
0 commit comments