Skip to content

Commit 203d093

Browse files
committed
Add options for additional bindings, node exclusion and node list for model launch
1 parent 5c8dbda commit 203d093

File tree

3 files changed

+17
-1
lines changed

3 files changed

+17
-1
lines changed

vec_inf/client/_client_vars.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@
6565
"qos": "qos",
6666
"time": "time",
6767
"nodes": "num_nodes",
68+
"exclude": "exclude",
69+
"nodelist": "node_list",
6870
"gpus-per-node": "gpus_per_node",
6971
"cpus-per-task": "cpus_per_task",
7072
"mem": "mem_per_node",
@@ -164,7 +166,7 @@ class SlurmScriptTemplate(TypedDict):
164166
f"export LD_LIBRARY_PATH={LD_LIBRARY_PATH}",
165167
f"export VLLM_NCCL_SO_PATH={VLLM_NCCL_SO_PATH}",
166168
],
167-
"singularity_command": f"singularity exec --nv --bind {{model_weights_path}}:{{model_weights_path}} --containall {SINGULARITY_IMAGE}",
169+
"singularity_command": f"singularity exec --nv --bind {{model_weights_path}}:{{model_weights_path}}{{additional_binds}} --containall {SINGULARITY_IMAGE}",
168170
"activate_venv": "source {venv}/bin/activate",
169171
"server_setup": {
170172
"single_node": [

vec_inf/client/_slurm_script_generator.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ def __init__(self, params: dict[str, Any]):
3939
self.params = params
4040
self.is_multinode = int(self.params["num_nodes"]) > 1
4141
self.use_singularity = self.params["venv"] == "singularity"
42+
self.additional_binds = self.params.get("binds", "")
43+
if self.additional_binds:
44+
self.additional_binds = f" --bind {self.additional_binds}"
4245
self.model_weights_path = str(
4346
Path(params["model_weights_parent_dir"], params["model_name"])
4447
)
@@ -104,6 +107,7 @@ def _generate_server_setup(self) -> str:
104107
"SINGULARITY_PLACEHOLDER",
105108
SLURM_SCRIPT_TEMPLATE["singularity_command"].format(
106109
model_weights_path=self.model_weights_path,
110+
additional_binds=self.additional_binds,
107111
),
108112
)
109113
else:
@@ -135,6 +139,7 @@ def _generate_launch_cmd(self) -> str:
135139
launcher_script.append(
136140
SLURM_SCRIPT_TEMPLATE["singularity_command"].format(
137141
model_weights_path=self.model_weights_path,
142+
additional_binds=self.additional_binds,
138143
)
139144
+ " \\"
140145
)

vec_inf/client/config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,15 @@ class ModelConfig(BaseModel):
108108
partition: Union[PARTITION, str] = Field(
109109
default=cast(str, DEFAULT_ARGS["partition"]), description="GPU partition type"
110110
)
111+
exclude: Optional[str] = Field(
112+
default=None, description="Exclude certain nodes from the resources granted to the job"
113+
)
114+
node_list: Optional[str] = Field(
115+
default=None, description="Request a specific list of nodes for deployment"
116+
)
117+
binds: Optional[str] = Field(
118+
default=None, description="Additional binds for the singularity container"
119+
)
111120
venv: str = Field(
112121
default="singularity", description="Virtual environment/container system"
113122
)

0 commit comments

Comments
 (0)