Skip to content

Commit 487aef8

Browse files
committed
Enable default values for common params, added max_num_seqs param
1 parent c5d7ae7 commit 487aef8

File tree

4 files changed

+56
-30
lines changed

4 files changed

+56
-30
lines changed

vec_inf/cli/_cli.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import inspect
12
import os
23
import time
34
from typing import Optional
@@ -27,9 +28,19 @@ def cli():
2728
@click.option(
2829
"--max-model-len",
2930
type=int,
30-
help="Model context length. If unspecified, will be automatically derived from the model config.",
31+
help="Model context length. Default value set based on suggested resource allocation.",
32+
)
33+
@click.option(
34+
"--max-num-seqs",
35+
type=int,
36+
help="Maximum number of sequences to process in a single request",
37+
)
38+
@click.option(
39+
"--partition",
40+
type=str,
41+
default="a40",
42+
help="Type of compute partition, default to a40"
3143
)
32-
@click.option("--partition", type=str, help="Type of compute partition, default to a40")
3344
@click.option(
3445
"--num-nodes",
3546
type=int,
@@ -43,29 +54,37 @@ def cli():
4354
@click.option(
4455
"--qos",
4556
type=str,
46-
help="Quality of service, default depends on suggested resource allocation required for the model",
57+
default="m2",
58+
help="Quality of service, default set to m2",
4759
)
4860
@click.option(
4961
"--time",
5062
type=str,
51-
help="Time limit for job, this should comply with QoS, default to max walltime of the chosen QoS",
63+
default="08:00:00",
64+
help="Time limit for job, this should comply with QoS, default to max walltime of m2",
5265
)
5366
@click.option(
5467
"--vocab-size",
5568
type=int,
5669
help="Vocabulary size, this option is intended for custom models",
5770
)
58-
@click.option("--data-type", type=str, help="Model data type, default to auto")
59-
@click.option("--venv", type=str, help="Path to virtual environment")
71+
@click.option("--data-type", type=str, default="auto", help="Model data type, default to auto")
72+
@click.option(
73+
"--venv",
74+
type=str,
75+
default="singularity",
76+
help="Path to virtual environment, default to preconfigured singularity container"
77+
)
6078
@click.option(
6179
"--log-dir",
6280
type=str,
63-
help="Path to slurm log directory, default to .vec-inf-logs in home directory",
81+
default="default",
82+
help="Path to slurm log directory, default to .vec-inf-logs in user home directory",
6483
)
6584
@click.option(
6685
"--pipeline-parallelism",
6786
type=str,
68-
help="Enable pipeline parallelism, accepts 'true' or 'false', defaults to 'true' for supported models"
87+
help="Enable pipeline parallelism, accepts 'True' or 'False', default to 'True' for supported models",
6988
)
7089
@click.option(
7190
"--json-mode",
@@ -77,6 +96,7 @@ def launch(
7796
model_family: Optional[str] = None,
7897
model_variant: Optional[str] = None,
7998
max_model_len: Optional[int] = None,
99+
max_num_seqs: Optional[int] = None,
80100
partition: Optional[str] = None,
81101
num_nodes: Optional[int] = None,
82102
num_gpus: Optional[int] = None,
@@ -92,8 +112,9 @@ def launch(
92112
"""
93113
Launch a model on the cluster
94114
"""
95-
96-
pipeline_parallelism = pipeline_parallelism is None or pipeline_parallelism.lower() == "true"
115+
116+
if isinstance(pipeline_parallelism, str):
117+
pipeline_parallelism = pipeline_parallelism.lower() == "true"
97118

98119
launch_script_path = os.path.join(
99120
os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "launch_server.sh"

vec_inf/launch_server.sh

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,18 @@ while [[ "$#" -gt 0 ]]; do
1212
--num-nodes) num_nodes="$2"; shift ;;
1313
--num-gpus) num_gpus="$2"; shift ;;
1414
--max-model-len) max_model_len="$2"; shift ;;
15+
--max-num-seqs) max_num_seqs="$2"; shift ;;
1516
--vocab-size) vocab_size="$2"; shift ;;
1617
--data-type) data_type="$2"; shift ;;
17-
--venv) virtual_env="$2"; shift ;;
18+
--venv) venv="$2"; shift ;;
1819
--log-dir) log_dir="$2"; shift ;;
1920
--pipeline-parallelism) pipeline_parallelism="$2"; shift ;;
2021
*) echo "Unknown parameter passed: $1"; exit 1 ;;
2122
esac
2223
shift
2324
done
2425

25-
required_vars=(model_family model_variant partition qos walltime num_nodes num_gpus max_model_len vocab_size pipeline_parallelism)
26+
required_vars=(model_family model_variant partition qos walltime num_nodes num_gpus max_model_len vocab_size data_type venv log_dir)
2627

2728
for var in "$required_vars[@]"; do
2829
if [ -z "$!var" ]; then
@@ -40,22 +41,20 @@ export NUM_NODES=$num_nodes
4041
export NUM_GPUS=$num_gpus
4142
export VLLM_MAX_MODEL_LEN=$max_model_len
4243
export VLLM_MAX_LOGPROBS=$vocab_size
43-
export PIPELINE_PARALLELISM=$pipeline_parallelism
44-
# For custom models, the following are set to default if not specified
45-
export VLLM_DATA_TYPE="auto"
46-
export VENV_BASE="singularity"
47-
export LOG_DIR="default"
48-
49-
if [ -n "$data_type" ]; then
50-
export VLLM_DATA_TYPE=$data_type
44+
export VLLM_DATA_TYPE=$data_type
45+
export VENV_BASE=$venv
46+
export LOG_DIR=$log_dir
47+
48+
if [ -n "$max_num_seqs" ]; then
49+
export VLLM_MAX_NUM_SEQS=$max_num_seqs
50+
else
51+
export VLLM_MAX_NUM_SEQS=256
5152
fi
5253

53-
if [ -n "$virtual_env" ]; then
54-
export VENV_BASE=$virtual_env
55-
fi
56-
57-
if [ -n "$log_dir" ]; then
58-
export LOG_DIR=$log_dir
54+
if [ -n "$pipeline_parallelism" ]; then
55+
export PIPELINE_PARALLELISM=$pipeline_parallelism
56+
else
57+
export PIPELINE_PARALLELISM="False"
5958
fi
6059

6160
# ================================= Set default environment variables ======================================
@@ -100,6 +99,8 @@ echo GPUs per Node: $NUM_GPUS
10099
echo QOS: $QOS
101100
echo Walltime: $WALLTIME
102101
echo Data Type: $VLLM_DATA_TYPE
102+
echo Max Model Length: $VLLM_MAX_MODEL_LEN
103+
echo Max Num Seqs: $VLLM_MAX_NUM_SEQS
103104

104105
is_special=""
105106
if [ "$NUM_NODES" -gt 1 ]; then

vec_inf/multinode_vllm.slurm

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ if [ "$VENV_BASE" = "singularity" ]; then
9595
--dtype ${VLLM_DATA_TYPE} \
9696
--trust-remote-code \
9797
--max-logprobs ${VLLM_MAX_LOGPROBS} \
98-
--max-model-len ${VLLM_MAX_MODEL_LEN}
98+
--max-model-len ${VLLM_MAX_MODEL_LEN} \
99+
--max-num-seqs ${VLLM_MAX_NUM_SEQS}
99100
else
100101
source ${VENV_BASE}/bin/activate
101102
python3 -m vllm.entrypoints.openai.api_server \
@@ -108,5 +109,6 @@ else
108109
--dtype ${VLLM_DATA_TYPE} \
109110
--trust-remote-code \
110111
--max-logprobs ${VLLM_MAX_LOGPROBS} \
111-
--max-model-len ${VLLM_MAX_MODEL_LEN}
112+
--max-model-len ${VLLM_MAX_MODEL_LEN} \
113+
--max-num-seqs ${VLLM_MAX_NUM_SEQS}
112114
fi

vec_inf/vllm.slurm

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ if [ "$VENV_BASE" = "singularity" ]; then
3131
--dtype ${VLLM_DATA_TYPE} \
3232
--max-logprobs ${VLLM_MAX_LOGPROBS} \
3333
--trust-remote-code \
34-
--max-model-len ${VLLM_MAX_MODEL_LEN}
34+
--max-model-len ${VLLM_MAX_MODEL_LEN} \
35+
--max-num-seqs ${VLLM_MAX_NUM_SEQS}
3536
else
3637
source ${VENV_BASE}/bin/activate
3738
python3 -m vllm.entrypoints.openai.api_server \
@@ -43,5 +44,6 @@ else
4344
--dtype ${VLLM_DATA_TYPE} \
4445
--max-logprobs ${VLLM_MAX_LOGPROBS} \
4546
--trust-remote-code \
46-
--max-model-len ${VLLM_MAX_MODEL_LEN}
47+
--max-model-len ${VLLM_MAX_MODEL_LEN} \
48+
--max-num-seqs ${VLLM_MAX_NUM_SEQS}
4749
fi

0 commit comments

Comments
 (0)