vllm-project · usberkeley · Nov 27, 2025
diff --git a/examples/others/lmcache/README.md b/examples/others/lmcache/README.md
@@ -54,7 +54,3 @@ The main script generates several log files:
 ## 3. KV Cache Sharing
 
 The `kv_cache_sharing_lmcache_v1.py` example demonstrates how to share KV caches between vLLM v1 instances.
-
-## 4. Disaggregated Prefill in vLLM v0
-
-The `disaggregated_prefill_lmcache_v0.py` provides an example of how to run disaggregated prefill in vLLM v0.
diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v0.py b/examples/others/lmcache/disagg_prefill_lmcache_v0.py
diff --git a/examples/others/lmcache/kv_cache_sharing_lmcache_v1.py b/examples/others/lmcache/kv_cache_sharing_lmcache_v1.py
@@ -12,10 +12,10 @@
 Learn more about LMCache in https://github.com/LMCache/LMCache.
 """
 
+import multiprocessing
 import os
 import subprocess
 import time
-from multiprocessing import Event, Process
 
 from lmcache.integration.vllm.utils import ENGINE_NAME
 from lmcache.v1.cache_engine import LMCacheEngineBuilder
@@ -111,9 +111,16 @@ def run_lmcache_server(port):
 
 
 def main():
-    store_done = Event()
-    store_process = Process(target=run_store, args=(store_done, prompts))
-    retrieve_process = Process(target=run_retrieve, args=(store_done, prompts))
+    # Set multiprocessing start method to 'spawn' for CUDA compatibility
+    # This is required when using CUDA with multiprocessing
+    if multiprocessing.get_start_method(allow_none=True) != "spawn":
+        multiprocessing.set_start_method("spawn", force=True)
+
+    # Use spawn context to create processes
+    ctx = multiprocessing.get_context("spawn")
+    store_done = ctx.Event()
+    store_process = ctx.Process(target=run_store, args=(store_done, prompts))
+    retrieve_process = ctx.Process(target=run_retrieve, args=(store_done, prompts))
     lmcache_server_process = run_lmcache_server(port)
 
     # Start KV cache store process