Skip to content

Commit 3742e12

Browse files
committed
Move generated slurm script to job log directory post launch
1 parent 9695582 commit 3742e12

File tree

3 files changed

+45
-49
lines changed

3 files changed

+45
-49
lines changed

tests/vec_inf/cli/test_cli.py

Lines changed: 25 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -226,13 +226,12 @@ def base_patches(test_paths, mock_truediv, debug_helper):
226226
"pathlib.Path.parent", return_value=debug_helper.config_file.parent.parent
227227
),
228228
patch("pathlib.Path.__truediv__", side_effect=mock_truediv),
229-
patch("pathlib.Path.iterdir", return_value=[]), # Mock empty directory listing
229+
patch("pathlib.Path.iterdir", return_value=[]),
230230
patch("json.dump"),
231231
patch("pathlib.Path.touch"),
232232
patch("vec_inf.client._utils.Path", return_value=test_paths["weights_dir"]),
233-
patch(
234-
"pathlib.Path.home", return_value=Path("/home/user")
235-
), # Mock home directory
233+
patch("pathlib.Path.home", return_value=Path("/home/user")),
234+
patch("pathlib.Path.rename"),
236235
]
237236

238237

@@ -246,25 +245,19 @@ def apply_base_patches(base_patches):
246245
yield
247246

248247

249-
def test_launch_command_success(runner, mock_launch_output, path_exists, debug_helper):
248+
def test_launch_command_success(
249+
runner, mock_launch_output, path_exists, debug_helper, mock_truediv, test_paths, base_patches
250+
):
250251
"""Test successful model launch with minimal required arguments."""
251-
test_log_dir = Path("/tmp/test_vec_inf_logs")
252+
with ExitStack() as stack:
253+
# Apply all base patches
254+
for patch_obj in base_patches:
255+
stack.enter_context(patch_obj)
256+
257+
# Apply specific patches for this test
258+
mock_run = stack.enter_context(patch("vec_inf.client._utils.run_bash_command"))
259+
stack.enter_context(patch("pathlib.Path.exists", new=path_exists))
252260

253-
with (
254-
patch("vec_inf.client._utils.run_bash_command") as mock_run,
255-
patch("pathlib.Path.mkdir"),
256-
patch("builtins.open", debug_helper.tracked_mock_open),
257-
patch("pathlib.Path.open", debug_helper.tracked_mock_open),
258-
patch("pathlib.Path.exists", new=path_exists),
259-
patch("pathlib.Path.expanduser", return_value=test_log_dir),
260-
patch("pathlib.Path.resolve", return_value=debug_helper.config_file.parent),
261-
patch(
262-
"pathlib.Path.parent", return_value=debug_helper.config_file.parent.parent
263-
),
264-
patch("json.dump"),
265-
patch("pathlib.Path.touch"),
266-
patch("pathlib.Path.__truediv__", return_value=test_log_dir),
267-
):
268261
expected_job_id = "14933053"
269262
mock_run.return_value = mock_launch_output(expected_job_id)
270263

@@ -277,25 +270,18 @@ def test_launch_command_success(runner, mock_launch_output, path_exists, debug_h
277270

278271

279272
def test_launch_command_with_json_output(
280-
runner, mock_launch_output, path_exists, debug_helper
273+
runner, mock_launch_output, path_exists, debug_helper, mock_truediv, test_paths, base_patches
281274
):
282275
"""Test JSON output format for launch command."""
283-
test_log_dir = Path("/tmp/test_vec_inf_logs")
284-
with (
285-
patch("vec_inf.client._utils.run_bash_command") as mock_run,
286-
patch("pathlib.Path.mkdir"),
287-
patch("builtins.open", debug_helper.tracked_mock_open),
288-
patch("pathlib.Path.open", debug_helper.tracked_mock_open),
289-
patch("pathlib.Path.exists", new=path_exists),
290-
patch("pathlib.Path.expanduser", return_value=test_log_dir),
291-
patch("pathlib.Path.resolve", return_value=debug_helper.config_file.parent),
292-
patch(
293-
"pathlib.Path.parent", return_value=debug_helper.config_file.parent.parent
294-
),
295-
patch("json.dump"),
296-
patch("pathlib.Path.touch"),
297-
patch("pathlib.Path.__truediv__", return_value=test_log_dir),
298-
):
276+
with ExitStack() as stack:
277+
# Apply all base patches
278+
for patch_obj in base_patches:
279+
stack.enter_context(patch_obj)
280+
281+
# Apply specific patches for this test
282+
mock_run = stack.enter_context(patch("vec_inf.client._utils.run_bash_command"))
283+
stack.enter_context(patch("pathlib.Path.exists", new=path_exists))
284+
299285
expected_job_id = "14933051"
300286
mock_run.return_value = mock_launch_output(expected_job_id)
301287

@@ -319,7 +305,7 @@ def test_launch_command_with_json_output(
319305
assert output.get("slurm_job_id") == expected_job_id
320306
assert output.get("model_name") == "Meta-Llama-3.1-8B"
321307
assert output.get("model_type") == "LLM"
322-
assert str(test_log_dir) in output.get("log_dir", "")
308+
assert str(test_paths["log_dir"]) in output.get("log_dir", "")
323309

324310

325311
def test_launch_command_no_model_weights_parent_dir(runner, debug_helper, base_patches):

vec_inf/client/_helper.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def __init__(self, model_name: str, kwargs: Optional[dict[str, Any]]):
5050
self.model_name = model_name
5151
self.kwargs = kwargs or {}
5252
self.slurm_job_id = ""
53+
self.slurm_script_path = Path("")
5354
self.model_config = self._get_model_configuration()
5455
self.params = self._get_launch_params()
5556

@@ -163,8 +164,10 @@ def _build_launch_command(self) -> str:
163164
]
164165
)
165166
# Add slurm script
166-
slurm_script = SlurmScriptGenerator(self.params, SRC_DIR).write_to_log_dir()
167-
command_list.append(str(slurm_script))
167+
self.slurm_script_path = SlurmScriptGenerator(
168+
self.params, SRC_DIR
169+
).write_to_log_dir()
170+
command_list.append(str(self.slurm_script_path))
168171
return " ".join(command_list)
169172

170173
def launch(self) -> LaunchResponse:
@@ -181,15 +184,22 @@ def launch(self) -> LaunchResponse:
181184
self.slurm_job_id = command_output.split(" ")[-1].strip().strip("\n")
182185
self.params["slurm_job_id"] = self.slurm_job_id
183186

184-
# Create log directory and job json file
187+
# Create log directory and job json file, move slurm script to job log directory
188+
job_log_dir = Path(
189+
self.params["log_dir"], f"{self.model_name}.{self.slurm_job_id}"
190+
)
191+
job_log_dir.mkdir(parents=True, exist_ok=True)
192+
185193
job_json = Path(
186-
self.params["log_dir"],
187-
f"{self.model_name}.{self.slurm_job_id}",
194+
job_log_dir,
188195
f"{self.model_name}.{self.slurm_job_id}.json",
189196
)
190-
job_json.parent.mkdir(parents=True, exist_ok=True)
191197
job_json.touch(exist_ok=True)
192198

199+
self.slurm_script_path.rename(
200+
job_log_dir / f"{self.model_name}.{self.slurm_job_id}.slurm"
201+
)
202+
193203
with job_json.open("w") as file:
194204
json.dump(self.params, file, indent=4)
195205

vec_inf/client/_slurm_script_generator.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,11 @@ def _generate_launcher(self) -> str:
170170
return "\n".join(launcher_script)
171171

172172
def write_to_log_dir(self) -> Path:
173-
log_subdir: Path = Path(self.params["log_dir"]) / self.params["model_name"]
174-
log_subdir.mkdir(parents=True, exist_ok=True)
175-
176173
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
177-
script_path: Path = log_subdir / f"launch_{timestamp}.slurm"
174+
script_path: Path = (
175+
Path(self.params["log_dir"])
176+
/ f"launch_{self.params['model_name']}_{timestamp}.slurm"
177+
)
178178

179179
content = self._generate_script_content()
180180
script_path.write_text(content)

0 commit comments

Comments
 (0)