Merge pull request #137 from VectorInstitute/add_config_flag

XkunW · web-flow · commit 238d70eea48b · 2025-08-27T13:30:27.000-04:00
Add config flag
diff --git a/vec_inf/cli/_cli.py b/vec_inf/cli/_cli.py
@@ -1,3 +1,5 @@
+# ruff: noqa: D301, D411
+# Using \f and \b for click --help formatting, which violates these rules.
 """Command line interface for Vector Inference.
 
 This module provides the command-line interface for interacting with Vector
@@ -124,16 +126,23 @@ def cli() -> None:
     type=str,
     help="Environment variables to be set. Seperate variables with commas. Can also include path to a file containing environment variables seperated by newlines. e.g. --env 'TRITON_CACHE_DIR=/scratch/.cache/triton,my_custom_vars_file.env'",
 )
+@click.option(
+    "--config",
+    type=str,
+    help="Path to a model config yaml file to use in place of the default",
+)
 def launch(
     model_name: str,
     **cli_kwargs: Optional[Union[str, int, float, bool]],
 ) -> None:
     """Launch a model on the cluster.
 
+    \b
     Parameters
     ----------
     model_name : str
         Name of the model to launch
+    \f
     **cli_kwargs : dict
         Additional launch options including:
         - model_family : str, optional
@@ -166,6 +175,10 @@ def launch(
             Path to model weights directory
         - vllm_args : str, optional
             vLLM engine arguments
+        - env : str, optional
+            Environment variables
+        - config : str, optional
+            Path to custom model config yaml file
         - json_mode : bool, optional
             Output in JSON format
 
@@ -220,10 +233,12 @@ def batch_launch(
 ) -> None:
     """Launch multiple models in a batch.
 
+    \b
     Parameters
     ----------
     model_names : tuple[str, ...]
         Names of the models to launch
+    \f
     batch_config : str
         Model configuration for batch launch
     json_mode : bool, default=False
@@ -267,10 +282,12 @@ def batch_launch(
 def status(slurm_job_id: str, json_mode: bool = False) -> None:
     """Get the status of a running model on the cluster.
 
+    \b
     Parameters
     ----------
     slurm_job_id : str
         ID of the SLURM job to check
+    \f
     json_mode : bool, default=False
         Whether to output in JSON format
 
@@ -302,10 +319,12 @@ def status(slurm_job_id: str, json_mode: bool = False) -> None:
 def shutdown(slurm_job_id: str) -> None:
     """Shutdown a running model on the cluster.
 
+    \b
     Parameters
     ----------
     slurm_job_id : str
         ID of the SLURM job to shut down
+    \f
 
     Raises
     ------
@@ -330,10 +349,12 @@ def shutdown(slurm_job_id: str) -> None:
 def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> None:
     """List all available models, or get default setup of a specific model.
 
+    \b
     Parameters
     ----------
     model_name : str, optional
         Name of specific model to get information for
+    \f
     json_mode : bool, default=False
         Whether to output in JSON format
 
@@ -363,10 +384,12 @@ def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> No
 def metrics(slurm_job_id: str) -> None:
     """Stream real-time performance metrics from the model endpoint.
 
+    \b
     Parameters
     ----------
     slurm_job_id : str
         ID of the SLURM job to monitor
+    \f
 
     Raises
     ------
@@ -433,6 +456,8 @@ def cleanup_logs_cli(
 ) -> None:
     """Clean up log files based on optional filters.
 
+    \f
+
     Parameters
     ----------
     log_dir : str or Path, optional
@@ -447,7 +472,7 @@ def cleanup_logs_cli(
         If provided, only delete logs with job ID less than this value.
     dry_run : bool
         If True, return matching files without deleting them.
-    """
+    """  # NOQA: D301, the \f prevents click from printing options twice.
     try:
         client = VecInfClient()
         matched = client.cleanup_logs(
diff --git a/vec_inf/client/_helper.py b/vec_inf/client/_helper.py
@@ -61,7 +61,7 @@ def __init__(self, model_name: str, kwargs: Optional[dict[str, Any]]):
         self.kwargs = kwargs or {}
         self.slurm_job_id = ""
         self.slurm_script_path = Path("")
-        self.model_config = self._get_model_configuration()
+        self.model_config = self._get_model_configuration(self.kwargs.get("config"))
         self.params = self._get_launch_params()
 
     def _warn(self, message: str) -> None:
@@ -74,9 +74,14 @@ def _warn(self, message: str) -> None:
         """
         warnings.warn(message, UserWarning, stacklevel=2)
 
-    def _get_model_configuration(self) -> ModelConfig:
+    def _get_model_configuration(self, config_path: str | None = None) -> ModelConfig:
         """Load and validate model configuration.
 
+        Parameters
+        ----------
+        config_path : str | None, optional
+            Path to a yaml file with custom model config to use in place of the default
+
         Returns
         -------
         ModelConfig
@@ -89,7 +94,7 @@ def _get_model_configuration(self) -> ModelConfig:
         ModelConfigurationError
             If model configuration is not found and weights don't exist
         """
-        model_configs = utils.load_config()
+        model_configs = utils.load_config(config_path=config_path)
         config = next(
             (m for m in model_configs if m.model_name == self.model_name), None
         )
diff --git a/vec_inf/client/models.py b/vec_inf/client/models.py
@@ -218,6 +218,8 @@ class LaunchOptions:
         Additional arguments for vLLM
     env : str, optional
         Environment variables to be set
+    config : str, optional
+        Path to custom model config yaml
     """
 
     model_family: Optional[str] = None
@@ -238,6 +240,7 @@ class LaunchOptions:
     model_weights_parent_dir: Optional[str] = None
     vllm_args: Optional[str] = None
     env: Optional[str] = None
+    config: Optional[str] = None
 
 
 @dataclass