From e0ff9f7caaf7c16539f8b4135bc0f0c0b6dba79f Mon Sep 17 00:00:00 2001 From: btaran_cisco Date: Mon, 3 Nov 2025 13:51:59 +0100 Subject: [PATCH 1/4] CMLDEV-376: Initial implementation of stale PyATS connections; --- virl2_client/models/cl_pyats.py | 89 +++++++++++++++++++++++++++++++-- virl2_client/models/lab.py | 1 + virl2_client/models/node.py | 2 +- 3 files changed, 86 insertions(+), 6 deletions(-) diff --git a/virl2_client/models/cl_pyats.py b/virl2_client/models/cl_pyats.py index 3360bb45..0d44a609 100644 --- a/virl2_client/models/cl_pyats.py +++ b/virl2_client/models/cl_pyats.py @@ -25,12 +25,14 @@ from typing import TYPE_CHECKING, Any try: + import unicon.core.errors from pyats.topology.loader.base import TestbedFileLoader as _PyatsTFLoader from pyats.topology.loader.markup import TestbedMarkupProcessor as _PyatsTMProcessor from pyats.utils.yaml.markup import Processor as _PyatsProcessor except ImportError: _PyatsTFLoader = None _PyatsTMProcessor = None + unicon = None else: # Ensure markup processor never uses the command line arguments as that's broken _PyatsProcessor.argv.clear() @@ -156,6 +158,47 @@ def _prepare_params( params["init_config_commands"] = init_config_commands return params + def _should_retry_on_exception(self, exc: Exception) -> bool: + """ + Determine if an exception indicates a connection failure that warrants a retry. + + :param exc: The exception that was raised during command execution + :returns: True if we should retry with a fresh connection, False otherwise + """ + # Handle the case where unicon is not available + if unicon is None: + return False + + # Check for specific unicon connection-related exceptions + exception_types_to_retry = ( + # Connection errors indicate the connection was lost/broken + getattr(unicon.core.errors, "ConnectionError", type(None)), + # SubCommandFailure with timeout often indicates connection issues + getattr(unicon.core.errors, "SubCommandFailure", type(None)), + # TimeoutError can indicate the device is not responding + TimeoutError, + ) + + # Filter out None types (in case unicon attributes don't exist) + valid_exception_types = tuple( + t for t in exception_types_to_retry if t is not type(None) + ) + + if not valid_exception_types: + return False + + # Check if it's a direct match + if isinstance(exc, valid_exception_types): + # For SubCommandFailure, check if it's timeout-related + if hasattr(exc, "__class__") and "SubCommandFailure" in str(exc.__class__): + # Check if the error message contains timeout-related keywords + error_str = str(exc).lower() + timeout_keywords = ["timeout", "timed out", "connection", "disconnect"] + return any(keyword in error_str for keyword in timeout_keywords) + return True + + return False + def _execute_command( self, node_label: str, @@ -192,16 +235,34 @@ def _execute_command( params = self._prepare_params( init_exec_commands, init_config_commands, **pyats_params ) + if pyats_device not in self._connections or not pyats_device.is_connected(): if pyats_device in self._connections: pyats_device.destroy() pyats_device.connect(log_stdout=False, learn_hostname=True, **params) self._connections.add(pyats_device) - if configure_mode: - return pyats_device.configure(command, log_stdout=False, **params) - else: - return pyats_device.execute(command, log_stdout=False, **params) + + try: + if configure_mode: + return pyats_device.configure(command, log_stdout=False, **params) + else: + return pyats_device.execute(command, log_stdout=False, **params) + except Exception as exc: + if self._should_retry_on_exception(exc): + if pyats_device in self._connections: + self._connections.remove(pyats_device) + pyats_device.destroy() + + pyats_device.connect(log_stdout=False, learn_hostname=True, **params) + self._connections.add(pyats_device) + + if configure_mode: + return pyats_device.configure(command, log_stdout=False, **params) + else: + return pyats_device.execute(command, log_stdout=False, **params) + else: + raise def run_command( self, @@ -267,7 +328,25 @@ def run_config_command( ) def cleanup(self) -> None: - """Clean up the pyATS connections.""" + """Clean up all pyATS connections.""" for pyats_device in self._connections: pyats_device.destroy() self._connections.clear() + + def cleanup_node_connection(self, node_label: str) -> None: + """ + Clean up the pyATS connection for a specific node. + + :param node_label: The label/title of the node whose connection to cleanup + """ + if self._testbed is None: + return + + try: + pyats_device: Device = self._testbed.devices[node_label] + if pyats_device in self._connections: + pyats_device.destroy() + self._connections.remove(pyats_device) + except (KeyError, AttributeError): + # IGNORE - Node not found in testbed or connections + pass diff --git a/virl2_client/models/lab.py b/virl2_client/models/lab.py index 92acea5a..0916896c 100644 --- a/virl2_client/models/lab.py +++ b/virl2_client/models/lab.py @@ -1200,6 +1200,7 @@ def stop(self, wait: bool | None = None) -> None: """ url = self._url_for("stop") self._session.put(url) + self.cleanup_pyats_connections() if self.need_to_wait(wait): self.wait_until_lab_converged() _LOGGER.debug(f"Stopped lab: {self._id}") diff --git a/virl2_client/models/node.py b/virl2_client/models/node.py index 96c8b8d6..3f746edd 100644 --- a/virl2_client/models/node.py +++ b/virl2_client/models/node.py @@ -23,7 +23,6 @@ import logging import time -import warnings from copy import deepcopy from typing import TYPE_CHECKING, Any @@ -673,6 +672,7 @@ def stop(self, wait=False) -> None: """ url = self._url_for("stop") self._session.put(url) + self._lab.pyats.cleanup_node_connection(self.label) if self._lab.need_to_wait(wait): self.wait_until_converged() From 9e4222627ed687425d55a2c593b3045422aba30b Mon Sep 17 00:00:00 2001 From: btaran_cisco Date: Wed, 5 Nov 2025 09:15:39 +0100 Subject: [PATCH 2/4] CMLDEV-376: Small improvements logic covering the fix; --- virl2_client/models/cl_pyats.py | 135 +++++++++++++++----------------- 1 file changed, 61 insertions(+), 74 deletions(-) diff --git a/virl2_client/models/cl_pyats.py b/virl2_client/models/cl_pyats.py index 0d44a609..ec13e50d 100644 --- a/virl2_client/models/cl_pyats.py +++ b/virl2_client/models/cl_pyats.py @@ -21,25 +21,32 @@ from __future__ import annotations import io +import logging from pathlib import Path from typing import TYPE_CHECKING, Any try: - import unicon.core.errors from pyats.topology.loader.base import TestbedFileLoader as _PyatsTFLoader from pyats.topology.loader.markup import TestbedMarkupProcessor as _PyatsTMProcessor from pyats.utils.yaml.markup import Processor as _PyatsProcessor except ImportError: _PyatsTFLoader = None _PyatsTMProcessor = None - unicon = None else: # Ensure markup processor never uses the command line arguments as that's broken _PyatsProcessor.argv.clear() +try: + from unicon.core.errors import ConnectionError as _UConnectionError + from unicon.core.errors import SubCommandFailure as _USubCommandFailure +except Exception: + _UConnectionError = _USubCommandFailure = None + from ..exceptions import PyatsDeviceNotFound, PyatsNotInstalled +_LOGGER = logging.getLogger(__name__) + if TYPE_CHECKING: from genie.libs.conf.device import Device from genie.libs.conf.testbed import Testbed @@ -158,46 +165,16 @@ def _prepare_params( params["init_config_commands"] = init_config_commands return params - def _should_retry_on_exception(self, exc: Exception) -> bool: - """ - Determine if an exception indicates a connection failure that warrants a retry. - - :param exc: The exception that was raised during command execution - :returns: True if we should retry with a fresh connection, False otherwise - """ - # Handle the case where unicon is not available - if unicon is None: - return False - - # Check for specific unicon connection-related exceptions - exception_types_to_retry = ( - # Connection errors indicate the connection was lost/broken - getattr(unicon.core.errors, "ConnectionError", type(None)), - # SubCommandFailure with timeout often indicates connection issues - getattr(unicon.core.errors, "SubCommandFailure", type(None)), - # TimeoutError can indicate the device is not responding - TimeoutError, - ) - - # Filter out None types (in case unicon attributes don't exist) - valid_exception_types = tuple( - t for t in exception_types_to_retry if t is not type(None) - ) - - if not valid_exception_types: - return False - - # Check if it's a direct match - if isinstance(exc, valid_exception_types): - # For SubCommandFailure, check if it's timeout-related - if hasattr(exc, "__class__") and "SubCommandFailure" in str(exc.__class__): - # Check if the error message contains timeout-related keywords - error_str = str(exc).lower() - timeout_keywords = ["timeout", "timed out", "connection", "disconnect"] - return any(keyword in error_str for keyword in timeout_keywords) - return True - - return False + def _reconnect(self, pyats_device: "Device", params: dict) -> None: + """Helper method to reconnect a PyATS device with proper cleanup.""" + if pyats_device in self._connections: + self._connections.remove(pyats_device) + try: + pyats_device.destroy() + except Exception: + pass + pyats_device.connect(log_stdout=False, learn_hostname=True, **params) + self._connections.add(pyats_device) def _execute_command( self, @@ -227,8 +204,11 @@ def _execute_command( """ self._check_pyats_installed() + if self._testbed is None: + raise RuntimeError("pyATS testbed is not initialized") + try: - pyats_device: Device = self._testbed.devices[node_label] + pyats_device: "Device" = self._testbed.devices[node_label] except KeyError: raise PyatsDeviceNotFound(node_label) @@ -237,33 +217,38 @@ def _execute_command( ) if pyats_device not in self._connections or not pyats_device.is_connected(): - if pyats_device in self._connections: - pyats_device.destroy() - - pyats_device.connect(log_stdout=False, learn_hostname=True, **params) - self._connections.add(pyats_device) + self._reconnect(pyats_device, params) - try: + def _run(): if configure_mode: return pyats_device.configure(command, log_stdout=False, **params) - else: - return pyats_device.execute(command, log_stdout=False, **params) + return pyats_device.execute(command, log_stdout=False, **params) + + try: + return _run() except Exception as exc: - if self._should_retry_on_exception(exc): - if pyats_device in self._connections: - self._connections.remove(pyats_device) - pyats_device.destroy() - - pyats_device.connect(log_stdout=False, learn_hostname=True, **params) - self._connections.add(pyats_device) - - if configure_mode: - return pyats_device.configure(command, log_stdout=False, **params) - else: - return pyats_device.execute(command, log_stdout=False, **params) - else: + should_retry = False + retry_reason = None + + if _UConnectionError and isinstance(exc, _UConnectionError): + should_retry = True + retry_reason = f"ConnectionError: {exc}" + elif _USubCommandFailure and isinstance(exc, _USubCommandFailure): + cause = getattr(exc, "__cause__", None) + if isinstance(cause, TimeoutError): + should_retry = True + retry_reason = f"SubCommandFailure with TimeoutError cause: {cause}" + + if not should_retry: raise + _LOGGER.info( + f"PyATS command failed on node {node_label}, retrying after reconnection. Reason: {retry_reason}" + ) + + self._reconnect(pyats_device, params) + return _run() + def run_command( self, node_label: str, @@ -329,9 +314,11 @@ def run_config_command( def cleanup(self) -> None: """Clean up all pyATS connections.""" - for pyats_device in self._connections: - pyats_device.destroy() - self._connections.clear() + for pyats_device in tuple(self._connections): + try: + pyats_device.destroy() + finally: + self._connections.discard(pyats_device) def cleanup_node_connection(self, node_label: str) -> None: """ @@ -341,12 +328,12 @@ def cleanup_node_connection(self, node_label: str) -> None: """ if self._testbed is None: return - try: - pyats_device: Device = self._testbed.devices[node_label] - if pyats_device in self._connections: + pyats_device: "Device" = self._testbed.devices[node_label] + except Exception: + return + if pyats_device in self._connections: + try: pyats_device.destroy() - self._connections.remove(pyats_device) - except (KeyError, AttributeError): - # IGNORE - Node not found in testbed or connections - pass + finally: + self._connections.discard(pyats_device) From 9dd6d2621466d698cca4d1c45f92e01fb05dc92d Mon Sep 17 00:00:00 2001 From: btaran_cisco Date: Thu, 20 Nov 2025 06:27:41 +0100 Subject: [PATCH 3/4] CMLDEV-376: Implemented suggestions from PR; --- virl2_client/models/cl_pyats.py | 81 +++++++++++++++++++-------------- virl2_client/models/node.py | 2 +- 2 files changed, 48 insertions(+), 35 deletions(-) diff --git a/virl2_client/models/cl_pyats.py b/virl2_client/models/cl_pyats.py index ec13e50d..794844c5 100644 --- a/virl2_client/models/cl_pyats.py +++ b/virl2_client/models/cl_pyats.py @@ -22,6 +22,7 @@ import io import logging +import os from pathlib import Path from typing import TYPE_CHECKING, Any @@ -29,19 +30,16 @@ from pyats.topology.loader.base import TestbedFileLoader as _PyatsTFLoader from pyats.topology.loader.markup import TestbedMarkupProcessor as _PyatsTMProcessor from pyats.utils.yaml.markup import Processor as _PyatsProcessor + from unicon.core.errors import ConnectionError as _UConnectionError + from unicon.core.errors import SubCommandFailure as _USubCommandFailure except ImportError: _PyatsTFLoader = None _PyatsTMProcessor = None + _UConnectionError = _USubCommandFailure = None else: # Ensure markup processor never uses the command line arguments as that's broken _PyatsProcessor.argv.clear() -try: - from unicon.core.errors import ConnectionError as _UConnectionError - from unicon.core.errors import SubCommandFailure as _USubCommandFailure -except Exception: - _UConnectionError = _USubCommandFailure = None - from ..exceptions import PyatsDeviceNotFound, PyatsNotInstalled @@ -168,12 +166,14 @@ def _prepare_params( def _reconnect(self, pyats_device: "Device", params: dict) -> None: """Helper method to reconnect a PyATS device with proper cleanup.""" if pyats_device in self._connections: - self._connections.remove(pyats_device) + self._connections.discard(pyats_device) try: pyats_device.destroy() except Exception: pass - pyats_device.connect(log_stdout=False, learn_hostname=True, **params) + pyats_device.connect( + logfile=os.devnull, log_stdout=False, learn_hostname=True, **params + ) self._connections.add(pyats_device) def _execute_command( @@ -183,6 +183,7 @@ def _execute_command( configure_mode: bool = False, init_exec_commands: list[str] | None = None, init_config_commands: list[str] | None = None, + _retry_attempted: bool = False, **pyats_params: Any, ) -> str: """ @@ -227,19 +228,22 @@ def _run(): try: return _run() except Exception as exc: - should_retry = False + should_raise = True retry_reason = None if _UConnectionError and isinstance(exc, _UConnectionError): - should_retry = True + should_raise = False retry_reason = f"ConnectionError: {exc}" elif _USubCommandFailure and isinstance(exc, _USubCommandFailure): cause = getattr(exc, "__cause__", None) if isinstance(cause, TimeoutError): - should_retry = True + should_raise = False retry_reason = f"SubCommandFailure with TimeoutError cause: {cause}" - if not should_retry: + if should_raise: + raise + + if _retry_attempted: raise _LOGGER.info( @@ -247,7 +251,15 @@ def _run(): ) self._reconnect(pyats_device, params) - return _run() + return self._execute_command( + node_label, + command, + configure_mode, + init_exec_commands, + init_config_commands, + _retry_attempted=True, + **pyats_params, + ) def run_command( self, @@ -312,28 +324,29 @@ def run_config_command( **pyats_params, ) - def cleanup(self) -> None: - """Clean up all pyATS connections.""" - for pyats_device in tuple(self._connections): - try: - pyats_device.destroy() - finally: - self._connections.discard(pyats_device) - - def cleanup_node_connection(self, node_label: str) -> None: + def cleanup(self, node_label: str | None = None) -> None: """ - Clean up the pyATS connection for a specific node. + Clean up pyATS connections. - :param node_label: The label/title of the node whose connection to cleanup + :param node_label: The label/title of a specific node to cleanup. + If None, all connections will be cleaned up. """ - if self._testbed is None: - return - try: - pyats_device: "Device" = self._testbed.devices[node_label] - except Exception: - return - if pyats_device in self._connections: + if node_label is None: + for pyats_device in tuple(self._connections): + try: + pyats_device.destroy() + finally: + self._connections.discard(pyats_device) + else: + if self._testbed is None: + return try: - pyats_device.destroy() - finally: - self._connections.discard(pyats_device) + pyats_device: "Device" = self._testbed.devices[node_label] + except KeyError: + return + if pyats_device in self._connections: + try: + pyats_device.destroy() + + finally: + self._connections.discard(pyats_device) diff --git a/virl2_client/models/node.py b/virl2_client/models/node.py index 3f746edd..4e7fcd08 100644 --- a/virl2_client/models/node.py +++ b/virl2_client/models/node.py @@ -672,7 +672,7 @@ def stop(self, wait=False) -> None: """ url = self._url_for("stop") self._session.put(url) - self._lab.pyats.cleanup_node_connection(self.label) + self._lab.pyats.cleanup(self.label) if self._lab.need_to_wait(wait): self.wait_until_converged() From 7f99599722e26cbccc085f3438904f0e088150fc Mon Sep 17 00:00:00 2001 From: Tomas Mikuska Date: Fri, 21 Nov 2025 23:42:31 +0100 Subject: [PATCH 4/4] cleanup --- virl2_client/models/cl_pyats.py | 59 ++++++++++++++------------------ virl2_client/models/group.py | 3 +- virl2_client/models/interface.py | 1 - virl2_client/models/link.py | 1 - 4 files changed, 26 insertions(+), 38 deletions(-) diff --git a/virl2_client/models/cl_pyats.py b/virl2_client/models/cl_pyats.py index 794844c5..82b27b99 100644 --- a/virl2_client/models/cl_pyats.py +++ b/virl2_client/models/cl_pyats.py @@ -35,7 +35,8 @@ except ImportError: _PyatsTFLoader = None _PyatsTMProcessor = None - _UConnectionError = _USubCommandFailure = None + _UConnectionError = None + _USubCommandFailure = None else: # Ensure markup processor never uses the command line arguments as that's broken _PyatsProcessor.argv.clear() @@ -165,12 +166,7 @@ def _prepare_params( def _reconnect(self, pyats_device: "Device", params: dict) -> None: """Helper method to reconnect a PyATS device with proper cleanup.""" - if pyats_device in self._connections: - self._connections.discard(pyats_device) - try: - pyats_device.destroy() - except Exception: - pass + self._destroy_device(pyats_device, raise_exc=False) pyats_device.connect( logfile=os.devnull, log_stdout=False, learn_hostname=True, **params ) @@ -220,13 +216,10 @@ def _execute_command( if pyats_device not in self._connections or not pyats_device.is_connected(): self._reconnect(pyats_device, params) - def _run(): + try: if configure_mode: return pyats_device.configure(command, log_stdout=False, **params) return pyats_device.execute(command, log_stdout=False, **params) - - try: - return _run() except Exception as exc: should_raise = True retry_reason = None @@ -240,16 +233,12 @@ def _run(): should_raise = False retry_reason = f"SubCommandFailure with TimeoutError cause: {cause}" - if should_raise: - raise - - if _retry_attempted: + if _retry_attempted or should_raise: raise _LOGGER.info( f"PyATS command failed on node {node_label}, retrying after reconnection. Reason: {retry_reason}" ) - self._reconnect(pyats_device, params) return self._execute_command( node_label, @@ -329,24 +318,26 @@ def cleanup(self, node_label: str | None = None) -> None: Clean up pyATS connections. :param node_label: The label/title of a specific node to cleanup. - If None, all connections will be cleaned up. + If None, all connections will be cleaned up. """ if node_label is None: for pyats_device in tuple(self._connections): - try: - pyats_device.destroy() - finally: - self._connections.discard(pyats_device) - else: - if self._testbed is None: - return - try: - pyats_device: "Device" = self._testbed.devices[node_label] - except KeyError: - return - if pyats_device in self._connections: - try: - pyats_device.destroy() - - finally: - self._connections.discard(pyats_device) + self._destroy_device(pyats_device) + return + if self._testbed is None: + return + try: + pyats_device: "Device" = self._testbed.devices[node_label] + except KeyError: + return + if pyats_device in self._connections: + self._destroy_device(pyats_device) + + def _destroy_device(self, pyats_device: "Device", raise_exc=True) -> None: + try: + pyats_device.destroy() + except Exception: + if raise_exc: + raise + finally: + self._connections.discard(pyats_device) diff --git a/virl2_client/models/group.py b/virl2_client/models/group.py index be7c9544..e82bd835 100644 --- a/virl2_client/models/group.py +++ b/virl2_client/models/group.py @@ -165,8 +165,7 @@ def group_labs(self, group_id: str) -> list[str]: :returns: A list of labs associated with this group. """ warnings.warn( - "'GroupManagement.group_labs()' is deprecated." - "Use '.associations' instead.", + "'GroupManagement.group_labs()' is deprecated.Use '.associations' instead.", ) return [lab["id"] for lab in self.get_group(group_id)["labs"]] diff --git a/virl2_client/models/interface.py b/virl2_client/models/interface.py index 9a57f4bc..e4eca773 100644 --- a/virl2_client/models/interface.py +++ b/virl2_client/models/interface.py @@ -21,7 +21,6 @@ from __future__ import annotations import logging -import warnings from typing import TYPE_CHECKING, Any from ..utils import check_stale, get_url_from_template, locked diff --git a/virl2_client/models/link.py b/virl2_client/models/link.py index fcaa757e..3ddf2a13 100644 --- a/virl2_client/models/link.py +++ b/virl2_client/models/link.py @@ -22,7 +22,6 @@ import logging import time -import warnings from typing import TYPE_CHECKING from ..utils import check_stale, get_url_from_template, locked