Skip to content

Commit 97178a8

Browse files
committed
Add suport for enforce eager, remove URL file creation
1 parent e7b6871 commit 97178a8

File tree

3 files changed

+31
-15
lines changed

3 files changed

+31
-15
lines changed

vec_inf/launch_server.sh

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ while [[ "$#" -gt 0 ]]; do
1919
--log-dir) log_dir="$2"; shift ;;
2020
--model-weights-parent-dir) model_weights_parent_dir="$2"; shift ;;
2121
--pipeline-parallelism) pipeline_parallelism="$2"; shift ;;
22+
--enforce-eager) enforce_eager="$2"; shift ;;
2223
*) echo "Unknown parameter passed: $1"; exit 1 ;;
2324
esac
2425
shift
@@ -59,6 +60,12 @@ else
5960
export PIPELINE_PARALLELISM="False"
6061
fi
6162

63+
if [ -n "$enforce_eager" ]; then
64+
export ENFORCE_EAGER=$enforce_eager
65+
else
66+
export ENFORCE_EAGER="False"
67+
fi
68+
6269
# ================================= Set default environment variables ======================================
6370
# Slurm job configuration
6471
export JOB_NAME="$MODEL_FAMILY-$MODEL_VARIANT"
@@ -68,10 +75,9 @@ fi
6875
mkdir -p $LOG_DIR
6976

7077
# Model and entrypoint configuration. API Server URL (host, port) are set automatically based on the
71-
# SLURM job and are written to the file specified at VLLM_BASE_URL_FILENAME
78+
# SLURM job
7279
export SRC_DIR="$(dirname "$0")"
7380
export MODEL_DIR="${SRC_DIR}/models/${MODEL_FAMILY}"
74-
export VLLM_BASE_URL_FILENAME="${MODEL_DIR}/.${JOB_NAME}_url"
7581

7682
# Variables specific to your working environment, below are examples for the Vector cluster
7783
export VLLM_MODEL_WEIGHTS="${MODEL_WEIGHTS_PARENT_DIR}/${JOB_NAME}"
@@ -89,11 +95,6 @@ if [[ $fp16_partitions =~ $JOB_PARTITION ]]; then
8995
echo "Data type set to due to non-Ampere GPUs used: $VLLM_DATA_TYPE"
9096
fi
9197

92-
# Create a file to store the API server URL if it doesn't exist
93-
if [ -f $VLLM_BASE_URL_FILENAME ]; then
94-
touch $VLLM_BASE_URL_FILENAME
95-
fi
96-
9798
echo Job Name: $JOB_NAME
9899
echo Partition: $JOB_PARTITION
99100
echo Num Nodes: $NUM_NODES
@@ -105,6 +106,7 @@ echo Max Model Length: $VLLM_MAX_MODEL_LEN
105106
echo Max Num Seqs: $VLLM_MAX_NUM_SEQS
106107
echo Vocabulary Size: $VLLM_MAX_LOGPROBS
107108
echo Pipeline Parallelism: $PIPELINE_PARALLELISM
109+
echo Enforce Eager: $ENFORCE_EAGER
108110
echo Log Directory: $LOG_DIR
109111
echo Model Weights Parent Directory: $MODEL_WEIGHTS_PARENT_DIR
110112

vec_inf/multinode_vllm.slurm

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ nvidia-smi
1212
source ${SRC_DIR}/find_port.sh
1313

1414
if [ "$VENV_BASE" = "singularity" ]; then
15-
export SINGULARITY_IMAGE=/projects/aieng/public/vector-inference_0.3.4.sif
15+
export SINGULARITY_IMAGE=/projects/aieng/public/vector-inference_0.6.4.post1.sif
1616
export VLLM_NCCL_SO_PATH=/vec-inf/nccl/libnccl.so.2.18.1
1717
module load singularity-ce/3.8.2
1818
singularity exec $SINGULARITY_IMAGE ray stop
@@ -73,7 +73,6 @@ done
7373
vllm_port_number=$(find_available_port $head_node_ip 8080 65535)
7474

7575
echo "Server address: http://${head_node_ip}:${vllm_port_number}/v1"
76-
echo "http://${head_node_ip}:${vllm_port_number}/v1" > ${VLLM_BASE_URL_FILENAME}
7776

7877
if [ "$PIPELINE_PARALLELISM" = "True" ]; then
7978
export PIPELINE_PARALLEL_SIZE=$NUM_NODES
@@ -83,6 +82,12 @@ else
8382
export TENSOR_PARALLEL_SIZE=$((NUM_NODES*NUM_GPUS))
8483
fi
8584

85+
if [ "$ENFORCE_EAGER" = "True" ]; then
86+
export ENFORCE_EAGER="--enforce-eager"
87+
else
88+
export ENFORCE_EAGER=""
89+
fi
90+
8691
# Activate vllm venv
8792
if [ "$VENV_BASE" = "singularity" ]; then
8893
singularity exec --nv --bind ${MODEL_WEIGHTS_PARENT_DIR}:${MODEL_WEIGHTS_PARENT_DIR} $SINGULARITY_IMAGE \
@@ -97,7 +102,8 @@ if [ "$VENV_BASE" = "singularity" ]; then
97102
--trust-remote-code \
98103
--max-logprobs ${VLLM_MAX_LOGPROBS} \
99104
--max-model-len ${VLLM_MAX_MODEL_LEN} \
100-
--max-num-seqs ${VLLM_MAX_NUM_SEQS}
105+
--max-num-seqs ${VLLM_MAX_NUM_SEQS} \
106+
${ENFORCE_EAGER}
101107
else
102108
source ${VENV_BASE}/bin/activate
103109
python3 -m vllm.entrypoints.openai.api_server \
@@ -111,5 +117,6 @@ else
111117
--trust-remote-code \
112118
--max-logprobs ${VLLM_MAX_LOGPROBS} \
113119
--max-model-len ${VLLM_MAX_MODEL_LEN} \
114-
--max-num-seqs ${VLLM_MAX_NUM_SEQS}
120+
--max-num-seqs ${VLLM_MAX_NUM_SEQS} \
121+
${ENFORCE_EAGER}
115122
fi

vec_inf/vllm.slurm

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,16 @@ hostname=${SLURMD_NODENAME}
1414
vllm_port_number=$(find_available_port $hostname 8080 65535)
1515

1616
echo "Server address: http://${hostname}:${vllm_port_number}/v1"
17-
echo "http://${hostname}:${vllm_port_number}/v1" > ${VLLM_BASE_URL_FILENAME}
17+
18+
if [ "$ENFORCE_EAGER" = "True" ]; then
19+
export ENFORCE_EAGER="--enforce-eager"
20+
else
21+
export ENFORCE_EAGER=""
22+
fi
1823

1924
# Activate vllm venv
2025
if [ "$VENV_BASE" = "singularity" ]; then
21-
export SINGULARITY_IMAGE=/projects/aieng/public/vector-inference_0.3.4.sif
26+
export SINGULARITY_IMAGE=/projects/aieng/public/vector-inference_0.6.4.post1.sif
2227
export VLLM_NCCL_SO_PATH=/vec-inf/nccl/libnccl.so.2.18.1
2328
module load singularity-ce/3.8.2
2429
singularity exec $SINGULARITY_IMAGE ray stop
@@ -33,7 +38,8 @@ if [ "$VENV_BASE" = "singularity" ]; then
3338
--max-logprobs ${VLLM_MAX_LOGPROBS} \
3439
--trust-remote-code \
3540
--max-model-len ${VLLM_MAX_MODEL_LEN} \
36-
--max-num-seqs ${VLLM_MAX_NUM_SEQS}
41+
--max-num-seqs ${VLLM_MAX_NUM_SEQS} \
42+
${ENFORCE_EAGER}
3743
else
3844
source ${VENV_BASE}/bin/activate
3945
python3 -m vllm.entrypoints.openai.api_server \
@@ -46,5 +52,6 @@ else
4652
--max-logprobs ${VLLM_MAX_LOGPROBS} \
4753
--trust-remote-code \
4854
--max-model-len ${VLLM_MAX_MODEL_LEN} \
49-
--max-num-seqs ${VLLM_MAX_NUM_SEQS}
55+
--max-num-seqs ${VLLM_MAX_NUM_SEQS} \
56+
${ENFORCE_EAGER}
5057
fi

0 commit comments

Comments
 (0)