Add model type field to models, added 2 text embedding models, updated list command based on model type, updated READMEs, removed debugging code

XkunW · XkunW · commit c89f198e5532 · 2024-10-30T16:26:10.000-04:00
diff --git a/README.md b/README.md
@@ -38,6 +38,15 @@ There are 5 possible states:
 
 Note that the base URL is only available when model is in `READY` state, and if you've changed the Slurm log directory path, you also need to specify it when using the `status` command.
 
+Once your server is ready, you can check performance metrics by providing the Slurm job ID to the `metrics` command:
+```bash
+vec-inf metrics 
+```
+
+And you will see the performance metrics streamed to your console, note that the metrics are updated with a 10-second interval.
+
+<img width="400" alt="metrics_img" src="https://github.com/user-attachments/assets/6732215b-96f3-407c-ba45-6334b2061706">
+
 Finally, when you're finished using a model, you can shut it down by providing the Slurm job ID:
 ```bash
 vec-inf shutdown 13014393
@@ -49,13 +58,13 @@ You call view the full list of available models by running the `list` command:
 ```bash
 vec-inf list
 ```
-<img width="1200" alt="list_img" src="https://github.com/user-attachments/assets/a4f0d896-989d-43bf-82a2-6a6e5d0d288f">
+<img width="1200" alt="list_img" src="https://github.com/user-attachments/assets/50b12ca4-2adc-4b2b-8a40-543b6cda0b1a">
 
 You can also view the default setup for a specific supported model by providing the model name, for example `Meta-Llama-3.1-70B-Instruct`:
 ```bash
 vec-inf list Meta-Llama-3.1-70B-Instruct
 ```
-<img width="400" alt="list_model_img" src="https://github.com/user-attachments/assets/5dec7a33-ba6b-490d-af47-4cf7341d0b42">
+<img width="400" alt="list_model_img" src="https://github.com/user-attachments/assets/30e42ab7-dde2-4d20-85f0-187adffefc3d">
 
 `launch`, `list`, and `status` command supports `--json-mode`, where the command output would be structured as a JSON string.
 
diff --git a/vec_inf/README.md b/vec_inf/README.md
@@ -2,6 +2,7 @@
 
 * `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server, `--json-mode` supported. Check [`here`](./models/README.md) for complete list of available options.
 * `list`: List all available model names, `--json-mode` supported.
+* `metrics`: Streams performance metrics to the console.
 * `status`: Check the model status by providing its Slurm job ID, `--json-mode` supported.
 * `shutdown`: Shutdown a model by providing its Slurm job ID.
 
diff --git a/vec_inf/cli/_cli.py b/vec_inf/cli/_cli.py
@@ -3,6 +3,7 @@
 from typing import Optional
 
 import click
+import pandas as pd
 from rich.columns import Columns
 from rich.console import Console
 from rich.live import Live
@@ -111,13 +112,12 @@ def launch(
     else:
         model_args = models_df.columns.tolist()
         model_args.remove("model_name")
+        model_args.remove("model_type")
         for arg in model_args:
             if locals()[arg] is not None:
                 renamed_arg = arg.replace("_", "-")
                 launch_cmd += f" --{renamed_arg} {locals()[arg]}"
 
-    print(launch_cmd)
-
     output = utils.run_bash_command(launch_cmd)
 
     slurm_job_id = output.split(" ")[-1].strip().strip("\n")
@@ -242,17 +242,15 @@ def list(model_name: Optional[str] = None, json_mode: bool = False) -> None:
     """
     List all available models, or get default setup of a specific model
     """
-    models_df = utils.load_models_df()
 
-    if model_name:
+    def list_model(model_name: str, models_df: pd.DataFrame, json_mode: bool):
         if model_name not in models_df["model_name"].values:
             raise ValueError(f"Model name {model_name} not found in available models")
 
         excluded_keys = {"venv", "log_dir"}
         model_row = models_df.loc[models_df["model_name"] == model_name]
 
         if json_mode:
-            # click.echo(model_row.to_json(orient='records'))
             filtered_model_row = model_row.drop(columns=excluded_keys, errors="ignore")
             click.echo(filtered_model_row.to_json(orient="records"))
             return
@@ -262,16 +260,32 @@ def list(model_name: Optional[str] = None, json_mode: bool = False) -> None:
                 if key not in excluded_keys:
                     table.add_row(key, str(value))
         CONSOLE.print(table)
-        return
 
-    if json_mode:
-        click.echo(models_df["model_name"].to_json(orient="records"))
-        return
-    panels = []
-    for _, row in models_df.iterrows():
-        styled_text = f"[magenta]{row['model_family']}[/magenta]-{row['model_variant']}"
-        panels.append(Panel(styled_text, expand=True))
-    CONSOLE.print(Columns(panels, equal=True))
+    def list_all(models_df: pd.DataFrame, json_mode: bool):
+        if json_mode:
+            click.echo(models_df["model_name"].to_json(orient="records"))
+            return
+        panels = []
+        model_type_colors = {
+            "LLM": "cyan",
+            "VLM": "blue",
+            "Text Embedding": "purple",
+        }
+        custom_order = ["LLM", "VLM", "Text Embedding"]
+        models_df["model_type"] = pd.Categorical(models_df["model_type"], categories=custom_order, ordered=True)
+        models_df = models_df.sort_values(by="model_type")
+        for _, row in models_df.iterrows():
+            panel_color = model_type_colors.get(row["model_type"], "white")
+            styled_text = f"[magenta]{row['model_family']}[/magenta]-{row['model_variant']}"
+            panels.append(Panel(styled_text, expand=True, border_style=panel_color))
+        CONSOLE.print(Columns(panels, equal=True))
+
+    models_df = utils.load_models_df()
+
+    if model_name:
+        list_model(model_name, models_df, json_mode)
+    else:
+        list_all(models_df, json_mode)
 
 
 @cli.command("metrics")
@@ -283,7 +297,7 @@ def list(model_name: Optional[str] = None, json_mode: bool = False) -> None:
 )
 def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
     """
-    Get metrics of a running model on the cluster
+    Stream performance metrics to the console
     """
     status_cmd = f"scontrol show job {slurm_job_id} --oneliner"
     output = utils.run_bash_command(status_cmd)
diff --git a/vec_inf/cli/_utils.py b/vec_inf/cli/_utils.py
@@ -134,6 +134,7 @@ def load_default_args(models_df: pd.DataFrame, model_name: str) -> dict:
     row_data = models_df.loc[models_df["model_name"] == model_name]
     default_args = row_data.iloc[0].to_dict()
     default_args.pop("model_name")
+    default_args.pop("model_type")
     return default_args
 
 
diff --git a/vec_inf/launch_server.sh b/vec_inf/launch_server.sh
@@ -22,11 +22,11 @@ while [[ "$#" -gt 0 ]]; do
     shift
 done
 
-required_vars=(model_family model_variant partition qos walltime num_nodes num_gpus max_model_len vocab_size, pipeline_parallelism)
+required_vars=(model_family model_variant partition qos walltime num_nodes num_gpus max_model_len vocab_size pipeline_parallelism)
 
 for var in "$required_vars[@]"; do
     if [ -z "$!var" ]; then
-        echo "Error: Missing required --$var//_/- argument."
+        echo "Error: Missing required --$var argument."
         exit 1
     fi
 done
@@ -41,9 +41,6 @@ export NUM_GPUS=$num_gpus
 export VLLM_MAX_MODEL_LEN=$max_model_len
 export VLLM_MAX_LOGPROBS=$vocab_size
 export PIPELINE_PARALLELISM=$pipeline_parallelism
-
-echo Pipeline Parallelism: $PIPELINE_PARALLELISM
-
 # For custom models, the following are set to default if not specified
 export VLLM_DATA_TYPE="auto"
 export VENV_BASE="singularity"
diff --git a/vec_inf/models/models.csv b/vec_inf/models/models.csv