[Docs] PD disaggregation documentation update (#479)

sumingZero · web-flow · commit 79d250dc335e · 2025-12-08T09:53:15.000+08:00
[Docs] Remove the 'enforce-eager' parameter from the service startup command in the PD disaggregation documentation
diff --git a/docs/source/user-guide/pd-disaggregation/1p1d.md b/docs/source/user-guide/pd-disaggregation/1p1d.md
@@ -13,14 +13,12 @@ For illustration purposes, let us take GPU as an example and assume the model us
 ### Run prefill server
 Prefiller Launch Command:
 ```bash
-export PYTHONHASHSEED=123456
 export CUDA_VISIBLE_DEVICES=0 
 vllm serve /home/models/Qwen2.5-7B-Instruct \
 --max-model-len 20000 \
 --tensor-parallel-size 1 \
 --gpu_memory_utilization 0.87 \
 --trust-remote-code \
---enforce-eager \
 --no-enable-prefix-caching \
 --port 7800 \
 --block-size 128 \
@@ -42,14 +40,12 @@ vllm serve /home/models/Qwen2.5-7B-Instruct \
 ### Run decode server
 Decoder Launch Command:
 ```bash
-export PYTHONHASHSEED=123456 
 export CUDA_VISIBLE_DEVICES=0 
 vllm serve /home/models/Qwen2.5-7B-Instruct \
 --max-model-len 20000 \
 --tensor-parallel-size 1 \
 --gpu_memory_utilization 0.87 \
 --trust-remote-code \
---enforce-eager \
 --no-enable-prefix-caching \
 --port 7801 \
 --block-size 128 \
diff --git a/docs/source/user-guide/pd-disaggregation/npgd.md b/docs/source/user-guide/pd-disaggregation/npgd.md
@@ -19,14 +19,12 @@ For illustration purposes, let us assume that the model used is Qwen2.5-7B-Instr
 ### Run prefill server
 Prefiller Launch Command:
 ```bash
-export PYTHONHASHSEED=123456
 export ASCEND_RT_VISIBLE_DEVICES=0
 vllm serve /home/models/Qwen2.5-7B-Instruct \
 --max-model-len 20000 \
 --tensor-parallel-size 1 \
 --gpu_memory_utilization 0.87 \
 --trust-remote-code \
---enforce-eager \
 --no-enable-prefix-caching \
 --port 7800 \
 --block-size 128 \
@@ -49,14 +47,12 @@ vllm serve /home/models/Qwen2.5-7B-Instruct \
 ### Run decode server
 Decoder Launch Command:
 ```bash
-export PYTHONHASHSEED=123456
 export CUDA_VISIBLE_DEVICES=0 
 vllm serve /home/models/Qwen2.5-7B-Instruct \
 --max-model-len 20000 \
 --tensor-parallel-size 1 \
 --gpu_memory_utilization 0.87 \
 --trust-remote-code \
---enforce-eager \
 --no-enable-prefix-caching \
 --port 7801 \
 --block-size 128 \
diff --git a/docs/source/user-guide/pd-disaggregation/xpyd.md b/docs/source/user-guide/pd-disaggregation/xpyd.md
@@ -13,14 +13,12 @@ For illustration purposes, let us take GPU as an example and assume the model us
 ### Run prefill servers
 Prefiller1 Launch Command:
 ```bash
-export PYTHONHASHSEED=123456
 export CUDA_VISIBLE_DEVICES=0 
 vllm serve /home/models/Qwen2.5-7B-Instruct \
 --max-model-len 20000 \
 --tensor-parallel-size 1 \
 --gpu_memory_utilization 0.87 \
 --trust-remote-code \
---enforce-eager \
 --no-enable-prefix-caching \
 --port 7800 \
 --block-size 128 \
@@ -41,14 +39,12 @@ vllm serve /home/models/Qwen2.5-7B-Instruct \
 
 Prefiller2 Launch Command:
 ```bash
-export PYTHONHASHSEED=123456
 export CUDA_VISIBLE_DEVICES=1 
 vllm serve /home/models/Qwen2.5-7B-Instruct \
 --max-model-len 20000 \
 --tensor-parallel-size 1 \
 --gpu_memory_utilization 0.87 \
 --trust-remote-code \
---enforce-eager \
 --no-enable-prefix-caching \
 --port 7801 \
 --block-size 128 \