Skip to content

Commit b1f5667

Browse files
authored
[bugfix]add synchronize on ascend platform (#485)
* add synchronize on ascend platform * use self.synchronize * move synchronize
1 parent 3476145 commit b1f5667

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

ucm/integration/vllm/ucm_connector.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from vllm.distributed.parallel_state import get_tp_group, get_world_group
1717
from vllm.platforms import current_platform
1818
from vllm.v1.core.sched.output import SchedulerOutput
19-
from vllm.v1.request import Request
2019

2120
from ucm.logger import init_logger
2221
from ucm.shared.metrics import ucmmonitor
@@ -29,6 +28,7 @@
2928
from vllm.attention.backends.abstract import AttentionMetadata
3029
from vllm.forward_context import ForwardContext
3130
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
31+
from vllm.v1.request import Request
3232

3333
logger = init_logger(__name__)
3434

@@ -178,11 +178,12 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
178178
self.metrics_config,
179179
)
180180
self.monitor = ucmmonitor.StatsMonitor.get_instance()
181-
self.synchronize = (
182-
torch.cuda.synchronize
183-
if current_platform.is_cuda_alike()
184-
else torch.npu.synchronize
185-
)
181+
182+
self.synchronize = (
183+
torch.cuda.synchronize
184+
if current_platform.is_cuda_alike()
185+
else torch.npu.synchronize
186+
)
186187

187188
# invlalid block ids due to load errors
188189
self._invalid_block_ids: set[int] = set()
@@ -558,7 +559,9 @@ def wait_for_save(self) -> None:
558559
# TODO support PP
559560
if (self.is_mla or self.is_dsa) and self.global_rank != 0:
560561
return
561-
if self.metrics_config:
562+
if self.metrics_config or current_platform.device_type == "npu":
563+
# When use vllm_ascend, we should add synchronize here, otherwise accuracy problem will raise
564+
# This has already been fixed in the latest main branch of vllm_ascend, so synchronize will no longer be needed in future versions.
562565
self.synchronize()
563566

564567
metadata = self._get_connector_metadata()

0 commit comments

Comments
 (0)