Skip to content

Commit 8adff23

Browse files
committed
added clean up logs functionality to cli.
1 parent 019ca54 commit 8adff23

File tree

5 files changed

+169
-1
lines changed

5 files changed

+169
-1
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ Note that there are other parameters that can also be added to the config but no
102102
* `metrics`: Streams performance metrics to the console.
103103
* `shutdown`: Shutdown a model by providing its Slurm job ID.
104104
* `list`: List all available model names, or view the default/cached configuration of a specific model, `--json-mode` supported.
105+
* `cleanup`: Remove old log directories. You can filter by `--model-family`, `--model-name`, and/or `--job-id`. Use `--dry-run` to preview what would be deleted.
105106

106107
For more details on the usage of these commands, refer to the [User Guide](https://vectorinstitute.github.io/vector-inference/user_guide/)
107108

vec_inf/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@
55
* `metrics`: Streams performance metrics to the console.
66
* `shutdown`: Shutdown a model by providing its Slurm job ID.
77
* `list`: List all available model names, or view the default/cached configuration of a specific model, `--json-mode` supported.
8+
* `cleanup`: Remove old log directories. You can filter by `--model-family`, `--model-name`, and/or `--job-id`. Use `--dry-run` to preview what would be deleted.
89

910
Use `--help` to see all available options

vec_inf/cli/_cli.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,5 +336,60 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
336336
raise click.ClickException(f"Metrics check failed: {str(e)}") from e
337337

338338

339+
@cli.command("cleanup")
340+
@click.option("--log-dir", type=str, help="Path to SLURM log directory")
341+
@click.option("--model-family", type=str, help="Filter by model family")
342+
@click.option("--model-name", type=str, help="Filter by model name")
343+
@click.option(
344+
"--job-id", type=int, help="Only remove logs with this exact SLURM job ID"
345+
)
346+
@click.option("--dry-run", is_flag=True, help="List matching logs without deleting")
347+
def cleanup_logs_cli(
348+
log_dir: Optional[str],
349+
model_family: Optional[str],
350+
model_name: Optional[str],
351+
job_id: Optional[int],
352+
dry_run: bool,
353+
) -> None:
354+
"""Clean up log files based on optional filters.
355+
356+
Parameters
357+
----------
358+
log_dir : str or Path, optional
359+
Root directory containing log files. Defaults to ~/.vec-inf-logs.
360+
model_family : str, optional
361+
Only delete logs for this model family.
362+
model_name : str, optional
363+
Only delete logs for this model name.
364+
job_id : int, optional
365+
If provided, only match directories with this exact SLURM job ID.
366+
dry_run : bool
367+
If True, return matching files without deleting them.
368+
"""
369+
try:
370+
client = VecInfClient()
371+
matched = client.cleanup_logs(
372+
log_dir=log_dir,
373+
model_family=model_family,
374+
model_name=model_name,
375+
job_id=job_id,
376+
dry_run=dry_run,
377+
)
378+
379+
if not matched:
380+
if dry_run:
381+
click.echo("Dry run: no matching log directories found.")
382+
else:
383+
click.echo("No matching log directories were deleted.")
384+
elif dry_run:
385+
click.echo(f"Dry run: {len(matched)} directories would be deleted:")
386+
for f in matched:
387+
click.echo(f" - {f}")
388+
else:
389+
click.echo(f"Deleted {len(matched)} log directory(ies).")
390+
except Exception as e:
391+
raise click.ClickException(f"Cleanup failed: {str(e)}") from e
392+
393+
339394
if __name__ == "__main__":
340395
cli()

vec_inf/client/_utils.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,3 +285,65 @@ def parse_launch_output(output: str) -> tuple[str, dict[str, str]]:
285285
config_dict[key.lower().replace(" ", "_")] = value
286286

287287
return slurm_job_id, config_dict
288+
289+
290+
def find_matching_dirs(
291+
log_dir: Path,
292+
model_family: Optional[str] = None,
293+
model_name: Optional[str] = None,
294+
job_id: Optional[int] = None,
295+
) -> list[Path]:
296+
"""
297+
Find log directories based on filtering criteria.
298+
299+
Parameters
300+
----------
301+
log_dir : Path
302+
The base directory containing model family directories.
303+
model_family : str, optional
304+
Filter to only search inside this family.
305+
model_name : str, optional
306+
Filter to only match model names.
307+
job_id : int, optional
308+
Filter to only match this exact SLURM job ID.
309+
310+
Returns
311+
-------
312+
list[Path]
313+
List of directories that match the criteria and can be deleted.
314+
"""
315+
matched = []
316+
317+
if not log_dir.exists() or not log_dir.is_dir():
318+
raise FileNotFoundError(f"Log directory does not exist: {log_dir}")
319+
320+
if not model_family and not model_name and not job_id:
321+
return [log_dir]
322+
323+
# Iterate over model families
324+
for family_dir in log_dir.iterdir():
325+
if not family_dir.is_dir():
326+
continue
327+
if model_family and family_dir.name != model_family:
328+
continue
329+
330+
if model_family and not model_name and not job_id:
331+
return [family_dir]
332+
333+
for job_dir in family_dir.iterdir():
334+
if not job_dir.is_dir():
335+
continue
336+
337+
try:
338+
name_part, id_part = job_dir.name.rsplit(".", 1)
339+
parsed_id = int(id_part)
340+
except ValueError:
341+
continue
342+
343+
if model_name and name_part != model_name:
344+
continue
345+
if job_id is not None and parsed_id != job_id:
346+
continue
347+
348+
matched.append(job_dir)
349+
return matched

vec_inf/client/api.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
vec_inf.client.models : Data models for API responses
1111
"""
1212

13+
import shutil
1314
import time
1415
import warnings
16+
from pathlib import Path
1517
from typing import Any, Optional, Union
1618

1719
from vec_inf.client._exceptions import (
@@ -24,7 +26,7 @@
2426
ModelStatusMonitor,
2527
PerformanceMetricsCollector,
2628
)
27-
from vec_inf.client._utils import run_bash_command
29+
from vec_inf.client._utils import find_matching_dirs, run_bash_command
2830
from vec_inf.client.config import ModelConfig
2931
from vec_inf.client.models import (
3032
LaunchOptions,
@@ -60,6 +62,9 @@ class VecInfClient:
6062
wait_until_ready(slurm_job_id, timeout_seconds, poll_interval_seconds, log_dir)
6163
Wait for a model to become ready
6264
65+
cleanup_logs(log_dir, model_name, model_family, job_id, dry_run)
66+
Remove logs from the log directory.
67+
6368
Examples
6469
--------
6570
>>> from vec_inf.api import VecInfClient
@@ -300,3 +305,47 @@ def wait_until_ready(
300305

301306
# Wait before checking again
302307
time.sleep(poll_interval_seconds)
308+
309+
def cleanup_logs(
310+
self,
311+
log_dir: Optional[Union[str, Path]] = None,
312+
model_family: Optional[str] = None,
313+
model_name: Optional[str] = None,
314+
job_id: Optional[int] = None,
315+
dry_run: bool = False,
316+
) -> list[Path]:
317+
"""Remove logs from the log directory.
318+
319+
Parameters
320+
----------
321+
log_dir : str or Path, optional
322+
Root directory containing log files. Defaults to ~/.vec-inf-logs.
323+
model_family : str, optional
324+
Only delete logs for this model family.
325+
model_name : str, optional
326+
Only delete logs for this model name.
327+
job_id : int, optional
328+
If provided, only match directories with this exact SLURM job ID.
329+
dry_run : bool
330+
If True, return matching files without deleting them.
331+
332+
Returns
333+
-------
334+
list[Path]
335+
List of deleted (or matched if dry_run) log file paths.
336+
"""
337+
log_root = Path(log_dir) if log_dir else Path.home() / ".vec-inf-logs"
338+
matched = find_matching_dirs(
339+
log_dir=log_root,
340+
model_family=model_family,
341+
model_name=model_name,
342+
job_id=job_id,
343+
)
344+
345+
if dry_run:
346+
return matched
347+
348+
for path in matched:
349+
shutil.rmtree(path)
350+
351+
return matched

0 commit comments

Comments
 (0)