1+ import inspect
12import os
23import time
34from typing import Optional
@@ -27,9 +28,19 @@ def cli():
2728@click .option (
2829 "--max-model-len" ,
2930 type = int ,
30- help = "Model context length. If unspecified, will be automatically derived from the model config." ,
31+ help = "Model context length. Default value set based on suggested resource allocation." ,
32+ )
33+ @click .option (
34+ "--max-num-seqs" ,
35+ type = int ,
36+ help = "Maximum number of sequences to process in a single request" ,
37+ )
38+ @click .option (
39+ "--partition" ,
40+ type = str ,
41+ default = "a40" ,
42+ help = "Type of compute partition, default to a40"
3143)
32- @click .option ("--partition" , type = str , help = "Type of compute partition, default to a40" )
3344@click .option (
3445 "--num-nodes" ,
3546 type = int ,
@@ -43,29 +54,37 @@ def cli():
4354@click .option (
4455 "--qos" ,
4556 type = str ,
46- help = "Quality of service, default depends on suggested resource allocation required for the model" ,
57+ default = "m2" ,
58+ help = "Quality of service, default set to m2" ,
4759)
4860@click .option (
4961 "--time" ,
5062 type = str ,
51- help = "Time limit for job, this should comply with QoS, default to max walltime of the chosen QoS" ,
63+ default = "08:00:00" ,
64+ help = "Time limit for job, this should comply with QoS, default to max walltime of m2" ,
5265)
5366@click .option (
5467 "--vocab-size" ,
5568 type = int ,
5669 help = "Vocabulary size, this option is intended for custom models" ,
5770)
58- @click .option ("--data-type" , type = str , help = "Model data type, default to auto" )
59- @click .option ("--venv" , type = str , help = "Path to virtual environment" )
71+ @click .option ("--data-type" , type = str , default = "auto" , help = "Model data type, default to auto" )
72+ @click .option (
73+ "--venv" ,
74+ type = str ,
75+ default = "singularity" ,
76+ help = "Path to virtual environment, default to preconfigured singularity container"
77+ )
6078@click .option (
6179 "--log-dir" ,
6280 type = str ,
63- help = "Path to slurm log directory, default to .vec-inf-logs in home directory" ,
81+ default = "default" ,
82+ help = "Path to slurm log directory, default to .vec-inf-logs in user home directory" ,
6483)
6584@click .option (
6685 "--pipeline-parallelism" ,
6786 type = str ,
68- help = "Enable pipeline parallelism, accepts 'true ' or 'false ', defaults to 'true ' for supported models"
87+ help = "Enable pipeline parallelism, accepts 'True ' or 'False ', default to 'True ' for supported models" ,
6988)
7089@click .option (
7190 "--json-mode" ,
@@ -77,6 +96,7 @@ def launch(
7796 model_family : Optional [str ] = None ,
7897 model_variant : Optional [str ] = None ,
7998 max_model_len : Optional [int ] = None ,
99+ max_num_seqs : Optional [int ] = None ,
80100 partition : Optional [str ] = None ,
81101 num_nodes : Optional [int ] = None ,
82102 num_gpus : Optional [int ] = None ,
@@ -92,8 +112,9 @@ def launch(
92112 """
93113 Launch a model on the cluster
94114 """
95-
96- pipeline_parallelism = pipeline_parallelism is None or pipeline_parallelism .lower () == "true"
115+
116+ if isinstance (pipeline_parallelism , str ):
117+ pipeline_parallelism = pipeline_parallelism .lower () == "true"
97118
98119 launch_script_path = os .path .join (
99120 os .path .dirname (os .path .dirname (os .path .realpath (__file__ ))), "launch_server.sh"
0 commit comments