Skip to content

Commit 8bccc24

Browse files
authored
Merge pull request #3356 from ekouts/bugfix/log_httpjson_errors_2
[bugfix] Check response status in the `httpjson` log handler and retry in case of `TOO_MANY_REQUESTS`
2 parents a3f5c25 + 78f6006 commit 8bccc24

File tree

4 files changed

+77
-13
lines changed

4 files changed

+77
-13
lines changed

docs/config_reference.rst

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1604,6 +1604,32 @@ This handler transmits the whole log record, meaning that all the information wi
16041604
.. versionadded:: 4.1
16051605

16061606

1607+
.. py:attribute:: logging.handlers_perflog..httpjson..backoff_intervals
1608+
1609+
:required: No
1610+
:default: ``[0.1, 0.2, 0.4, 0.8, 1.6, 3.2]``
1611+
1612+
List of wait intervals in seconds when server responds with HTTP error 429 (``TOO_MANY_REQUESTS``).
1613+
1614+
In this case, ReFrame will retry contacting the server after waiting an amount of time that is determined by cyclically iterating this list of intervals.
1615+
1616+
ReFrame will keep trying contacting the server, until a different HTTP resonse is received (either success or error) or the corresponding :attr:`~config.logging.handlers_perflog..httpjson..timeout` is exceeded.
1617+
1618+
.. versionadded:: 4.7.3
1619+
1620+
1621+
.. py:attribute:: logging.handlers_perflog..httpjson..retry_timeout
1622+
1623+
:required: No
1624+
:default: ``0``
1625+
1626+
Timeout in seconds for retrying when server responds with HTTP error 429 (``TOO_MANY_REQUESTS``).
1627+
1628+
If set to zero, ReFrame will retry until another HTTP response (success or error) is received.
1629+
1630+
1631+
.. versionadded:: 4.7.3
1632+
16071633

16081634
.. _exec-mode-config:
16091635

reframe/core/logging.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# SPDX-License-Identifier: BSD-3-Clause
55

66
import abc
7+
import itertools
78
import logging
89
import logging.handlers
910
import numbers
@@ -554,6 +555,8 @@ def _create_httpjson_handler(site_config, config_prefix):
554555
json_formatter = site_config.get(f'{config_prefix}/json_formatter')
555556
extra_headers = site_config.get(f'{config_prefix}/extra_headers')
556557
debug = site_config.get(f'{config_prefix}/debug')
558+
backoff_intervals = site_config.get(f'{config_prefix}/backoff_intervals')
559+
retry_timeout = site_config.get(f'{config_prefix}/retry_timeout')
557560

558561
parsed_url = urllib.parse.urlparse(url)
559562
if parsed_url.scheme not in {'http', 'https'}:
@@ -595,7 +598,8 @@ def _create_httpjson_handler(site_config, config_prefix):
595598
'no data will be sent to the server')
596599

597600
return HTTPJSONHandler(url, extras, ignore_keys, json_formatter,
598-
extra_headers, debug)
601+
extra_headers, debug, backoff_intervals,
602+
retry_timeout)
599603

600604

601605
def _record_to_json(record, extras, ignore_keys):
@@ -645,7 +649,7 @@ class HTTPJSONHandler(logging.Handler):
645649

646650
def __init__(self, url, extras=None, ignore_keys=None,
647651
json_formatter=None, extra_headers=None,
648-
debug=False):
652+
debug=False, backoff_intervals=(1, 2, 3), retry_timeout=0):
649653
super().__init__()
650654
self._url = url
651655
self._extras = extras
@@ -669,6 +673,8 @@ def __init__(self, url, extras=None, ignore_keys=None,
669673
self._headers.update(extra_headers)
670674

671675
self._debug = debug
676+
self._timeout = retry_timeout
677+
self._backoff_intervals = backoff_intervals
672678

673679
def emit(self, record):
674680
# Convert tags to a list to make them JSON friendly
@@ -680,19 +686,33 @@ def emit(self, record):
680686
return
681687

682688
if self._debug:
683-
import time
684689
ts = int(time.time() * 1_000)
685690
dump_file = f'httpjson_record_{ts}.json'
686691
with open(dump_file, 'w') as fp:
687692
fp.write(json_record)
688693

689694
return
690695

696+
timeout_time = time.time() + self._timeout
691697
try:
692-
requests.post(
693-
self._url, data=json_record,
694-
headers=self._headers
695-
)
698+
backoff_intervals = itertools.cycle(self._backoff_intervals)
699+
while True:
700+
response = requests.post(
701+
self._url, data=json_record,
702+
headers=self._headers
703+
)
704+
if response.ok:
705+
break
706+
707+
if (response.status_code == 429 and
708+
(not self._timeout or time.time() < timeout_time)):
709+
time.sleep(next(backoff_intervals))
710+
continue
711+
712+
raise LoggingError(
713+
f'HTTPJSONhandler logging failed: HTTP response code '
714+
f'{response.status_code}'
715+
)
696716
except requests.exceptions.RequestException as e:
697717
raise LoggingError('logging failed') from e
698718

reframe/frontend/executors/__init__.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@
2020
import reframe.utility.jsonext as jsonext
2121
import reframe.utility.typecheck as typ
2222
from reframe.core.exceptions import (AbortTaskError,
23-
JobNotStartedError,
2423
FailureLimitError,
2524
ForceExitError,
25+
JobNotStartedError,
26+
LoggingError,
2627
RunSessionTimeout,
2728
SkipTestError,
2829
StatisticsError,
@@ -480,8 +481,13 @@ def finalize(self):
480481

481482
self._current_stage = 'finalize'
482483
self._notify_listeners('on_task_success')
483-
self._perflogger.log_performance(logging.INFO, self,
484-
multiline=self._perflog_compat)
484+
try:
485+
self._perflogger.log_performance(logging.INFO, self,
486+
multiline=self._perflog_compat)
487+
except LoggingError as e:
488+
logging.getlogger().warning(
489+
f'could not log performance data for {self.testcase}: {e}'
490+
)
485491

486492
@logging.time_function
487493
def cleanup(self, *args, **kwargs):
@@ -491,8 +497,13 @@ def fail(self, exc_info=None, callback='on_task_failure'):
491497
self._failed_stage = self._current_stage
492498
self._exc_info = exc_info or sys.exc_info()
493499
self._notify_listeners(callback)
494-
self._perflogger.log_performance(logging.INFO, self,
495-
multiline=self._perflog_compat)
500+
try:
501+
self._perflogger.log_performance(logging.INFO, self,
502+
multiline=self._perflog_compat)
503+
except LoggingError as e:
504+
logging.getlogger().warning(
505+
f'could not log performance data for {self.testcase}: {e}'
506+
)
496507

497508
def skip(self, exc_info=None):
498509
self._skipped = True

reframe/schemas/config.json

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,12 @@
173173
},
174174
"json_formatter": {},
175175
"extra_headers": {"type": "object"},
176-
"debug": {"type": "boolean"}
176+
"debug": {"type": "boolean"},
177+
"backoff_intervals": {
178+
"type": "array",
179+
"items": {"type": "number"}
180+
},
181+
"retry_timeout": {"type": "number"}
177182
},
178183
"required": ["url"]
179184
}
@@ -632,6 +637,8 @@
632637
"logging/handlers_perflog/httpjson_json_formatter": null,
633638
"logging/handlers_perflog/httpjson_extra_headers": {},
634639
"logging/handlers_perflog/httpjson_debug": false,
640+
"logging/handlers_perflog/httpjson_backoff_intervals": [0.1, 0.2, 0.4, 0.8, 1.6, 3.2],
641+
"logging/handlers_perflog/httpjson_retry_timeout": 0,
635642
"modes/options": [],
636643
"modes/target_systems": ["*"],
637644
"storage/enable": false,

0 commit comments

Comments
 (0)