Skip to content

Commit 1938311

Browse files
committed
Move all bindings into BINDPATH env var
1 parent 367fcdb commit 1938311

File tree

2 files changed

+17
-25
lines changed

2 files changed

+17
-25
lines changed

vec_inf/client/_slurm_script_generator.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@ def __init__(self, params: dict[str, Any]):
3434
self.params = params
3535
self.is_multinode = int(self.params["num_nodes"]) > 1
3636
self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
37-
self.additional_binds = self.params.get("bind", "")
38-
if self.additional_binds:
39-
self.additional_binds = f" --bind {self.additional_binds}"
37+
self.additional_binds = f",{self.params['bind']}" if self.params.get("bind") else ""
4038
self.model_weights_path = str(
4139
Path(self.params["model_weights_parent_dir"], self.params["model_name"])
4240
)
@@ -107,7 +105,7 @@ def _generate_server_setup(self) -> str:
107105
server_script = ["\n"]
108106
if self.use_container:
109107
server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"]))
110-
server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_env_vars"]))
108+
server_script.append(SLURM_SCRIPT_TEMPLATE["bind_path"].format(model_weights_path=self.model_weights_path, additional_binds=self.additional_binds))
111109
else:
112110
server_script.append(
113111
SLURM_SCRIPT_TEMPLATE["activate_venv"].format(venv=self.params["venv"])
@@ -125,7 +123,6 @@ def _generate_server_setup(self) -> str:
125123
"CONTAINER_PLACEHOLDER",
126124
SLURM_SCRIPT_TEMPLATE["container_command"].format(
127125
model_weights_path=self.model_weights_path,
128-
additional_binds=self.additional_binds,
129126
env_str=self.env_str,
130127
),
131128
)
@@ -163,7 +160,6 @@ def _generate_launch_cmd(self) -> str:
163160
launcher_script.append(
164161
SLURM_SCRIPT_TEMPLATE["container_command"].format(
165162
model_weights_path=self.model_weights_path,
166-
additional_binds=self.additional_binds,
167163
env_str=self.env_str,
168164
)
169165
)
@@ -215,11 +211,7 @@ def __init__(self, params: dict[str, Any]):
215211
self.script_paths: list[Path] = []
216212
self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
217213
for model_name in self.params["models"]:
218-
self.params["models"][model_name]["additional_binds"] = ""
219-
if self.params["models"][model_name].get("bind"):
220-
self.params["models"][model_name]["additional_binds"] = (
221-
f" --bind {self.params['models'][model_name]['bind']}"
222-
)
214+
self.params["models"][model_name]["additional_binds"] = f",{self.params['bind']}" if self.params.get("bind") else ""
223215
self.params["models"][model_name]["model_weights_path"] = str(
224216
Path(
225217
self.params["models"][model_name]["model_weights_parent_dir"],
@@ -259,7 +251,10 @@ def _generate_model_launch_script(self, model_name: str) -> Path:
259251
script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["shebang"])
260252
if self.use_container:
261253
script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_setup"])
262-
script_content.append("\n".join(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["env_vars"]))
254+
script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["bind_path"].format(
255+
model_weights_path=model_params["model_weights_path"],
256+
additional_binds=model_params["additional_binds"],
257+
))
263258
script_content.append(
264259
"\n".join(
265260
BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["server_address_setup"]
@@ -277,7 +272,6 @@ def _generate_model_launch_script(self, model_name: str) -> Path:
277272
script_content.append(
278273
BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format(
279274
model_weights_path=model_params["model_weights_path"],
280-
additional_binds=model_params["additional_binds"],
281275
)
282276
)
283277
script_content.append(

vec_inf/client/_slurm_templates.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ class SlurmScriptTemplate(TypedDict):
5757
Commands for container setup
5858
imports : str
5959
Import statements and source commands
60+
bind_path : str
61+
Bind path environment variable for the container
6062
container_command : str
6163
Template for container execution command
6264
activate_venv : str
@@ -74,7 +76,7 @@ class SlurmScriptTemplate(TypedDict):
7476
shebang: ShebangConfig
7577
container_setup: list[str]
7678
imports: str
77-
container_env_vars: list[str]
79+
bind_path: str
7880
container_command: str
7981
activate_venv: str
8082
server_setup: ServerSetupConfig
@@ -96,10 +98,8 @@ class SlurmScriptTemplate(TypedDict):
9698
f"{CONTAINER_MODULE_NAME} exec {IMAGE_PATH} ray stop",
9799
],
98100
"imports": "source {src_dir}/find_port.sh",
99-
"container_env_vars": [
100-
f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp"
101-
],
102-
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
101+
"bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
102+
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {IMAGE_PATH} \\",
103103
"activate_venv": "source {venv}/bin/activate",
104104
"server_setup": {
105105
"single_node": [
@@ -215,8 +215,8 @@ class BatchModelLaunchScriptTemplate(TypedDict):
215215
Shebang line for the script
216216
container_setup : list[str]
217217
Commands for container setup
218-
env_vars : list[str]
219-
Environment variables to set
218+
bind_path : list[str]
219+
Bind path environment variable for the container
220220
server_address_setup : list[str]
221221
Commands to setup the server address
222222
launch_cmd : list[str]
@@ -227,7 +227,7 @@ class BatchModelLaunchScriptTemplate(TypedDict):
227227

228228
shebang: str
229229
container_setup: str
230-
env_vars: list[str]
230+
bind_path: list[str]
231231
server_address_setup: list[str]
232232
write_to_json: list[str]
233233
launch_cmd: list[str]
@@ -237,9 +237,7 @@ class BatchModelLaunchScriptTemplate(TypedDict):
237237
BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
238238
"shebang": "#!/bin/bash\n",
239239
"container_setup": f"{CONTAINER_LOAD_CMD}\n",
240-
"env_vars": [
241-
f"export {CONTAINER_MODULE_NAME}_BINDPATH=${CONTAINER_MODULE_NAME}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g')"
242-
],
240+
"bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
243241
"server_address_setup": [
244242
"source {src_dir}/find_port.sh",
245243
"head_node_ip=${{SLURMD_NODENAME}}",
@@ -255,7 +253,7 @@ class BatchModelLaunchScriptTemplate(TypedDict):
255253
' "$json_path" > temp_{model_name}.json \\',
256254
' && mv temp_{model_name}.json "$json_path"\n',
257255
],
258-
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
256+
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv --containall {IMAGE_PATH} \\",
259257
"launch_cmd": [
260258
"vllm serve {model_weights_path} \\",
261259
" --served-model-name {model_name} \\",

0 commit comments

Comments
 (0)