Skip to content

Commit 61113ab

Browse files
committed
raise error for wait
1 parent 3250fca commit 61113ab

File tree

2 files changed

+11
-4
lines changed

2 files changed

+11
-4
lines changed

ucm/integration/vllm/ucm_connector.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -482,11 +482,13 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
482482
for request_id, task in request_to_task.items():
483483
# TODO error handling
484484
if self.global_rank == 0 or not self.load_only_first_rank:
485-
if self.store.wait(task) != 0:
485+
try:
486+
self.store.wait(task)
487+
except RuntimeError as e:
488+
logger.error("request {request_id} load kv cache failed.:", e)
486489
self._invalid_block_ids.update(
487490
metadata.request_meta[request_id].load_block_ids[1]
488491
)
489-
logger.error(f"request {request_id} load kv cache failed.")
490492
if self.load_only_first_rank:
491493
self._broadcast(req_broadcast_addr[request_id])
492494
load_end_time = time.perf_counter() * 1000
@@ -557,7 +559,10 @@ def wait_for_save(self) -> None:
557559
)
558560

559561
for request_id, task in request_to_task.items():
560-
self.store.wait(task)
562+
try:
563+
self.store.wait(task)
564+
except RuntimeError as e:
565+
logger.error("request {request_id} dump kv cache failed.:", e)
561566
save_end_time = time.perf_counter() * 1000
562567
save_speed = (
563568
num_saved_block

ucm/store/pcstore/pcstore_connector.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,9 @@ def dump_data(
107107
return task_id
108108

109109
def wait(self, task: Task) -> None:
110-
self.store.Wait(task.task_id)
110+
ret = self.store.Wait(task.task_id)
111+
if ret != 0:
112+
raise RuntimeError(f"Wait failed for task {task.task_id}, return={ret}")
111113

112114
def check(self, task: Task) -> bool:
113115
return self.store.Check(task.task_id)

0 commit comments

Comments
 (0)