File tree Expand file tree Collapse file tree 2 files changed +4
-2
lines changed
sagemaker_ssh_helper/cdk/low_gpu Expand file tree Collapse file tree 2 files changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -45,15 +45,16 @@ def handler(event, context):
4545 logging .warning (f"Found issues with GPU utilization of the training job "
4646 f"{ ssh_training_wrapper .training_job_name ()} : { status_details } " )
4747
48- # Send notification email and/or SMS through Amazon SNS topic
48+ logging . info ( f" Send notification email and/or SMS through Amazon SNS topic { sns_notification_topic_arn } " )
4949 sns_resource = boto3 .resource ('sns' )
5050 sns_notification_topic = sns_resource .Topic (sns_notification_topic_arn )
51- sns_notification_topic .publish (
51+ response = sns_notification_topic .publish (
5252 Subject = 'Training job with low GPU utilization' ,
5353 Message = status_details + "\n \n " +
5454 "Training job metadata URL:\n " +
5555 ssh_training_wrapper .get_metadata_url ()
5656 )
57+ logging .info (f"SNS response: { response } " )
5758
5859 # Optionally, stop the job (not recommended, better to keep notifications only)
5960 # ssh_training_wrapper.stop_training_job()
Original file line number Diff line number Diff line change @@ -138,5 +138,6 @@ def test_cloudwatch_metrics_sns(request):
138138
139139 log = SSHLog ()
140140 metrics_count = log .count_sns_notifications (topic_name , timedelta (minutes = 15 ))
141+ logging .info (metrics_count )
141142
142143 assert metrics_count > 0
You can’t perform that action at this time.
0 commit comments