3434 "max_model_len" ,
3535}
3636
37+ BOOLEAN_FIELDS = {
38+ "pipeline_parallelism" ,
39+ "enforce_eager" ,
40+ "enable_prefix_caching" ,
41+ "enable_chunked_prefill" ,
42+ }
43+
3744LD_LIBRARY_PATH = "/scratch/ssd001/pkgs/cudnn-11.7-v8.5.0.96/lib/:/scratch/ssd001/pkgs/cuda-11.7/targets/x86_64-linux/lib/"
3845SRC_DIR = str (Path (__file__ ).parent .parent )
3946
@@ -90,16 +97,15 @@ def _get_launch_params(self) -> dict[str, Any]:
9097 params = self .model_config .model_dump ()
9198
9299 # Process boolean fields
93- for bool_field in [ "pipeline_parallelism" , "enforce_eager" ] :
94- if ( value := self .cli_kwargs . get ( bool_field )) is not None :
95- params [bool_field ] = utils . convert_boolean_value ( value )
100+ for bool_field in BOOLEAN_FIELDS :
101+ if self .cli_kwargs [ bool_field ] :
102+ params [bool_field ] = True
96103
97104 # Merge other overrides
98105 for key , value in self .cli_kwargs .items ():
99106 if value is not None and key not in [
100107 "json_mode" ,
101- "pipeline_parallelism" ,
102- "enforce_eager" ,
108+ * BOOLEAN_FIELDS ,
103109 ]:
104110 params [key ] = value
105111
@@ -129,7 +135,7 @@ def set_env_vars(self) -> None:
129135 os .environ ["GPU_MEMORY_UTILIZATION" ] = self .params ["gpu_memory_utilization" ]
130136 os .environ ["TASK" ] = VLLM_TASK_MAP [self .params ["model_type" ]]
131137 os .environ ["PIPELINE_PARALLELISM" ] = self .params ["pipeline_parallelism" ]
132- os .environ ["ENFORCE_EAGER " ] = self .params ["enforce_eager " ]
138+ os .environ ["COMPILATION_CONFIG " ] = self .params ["compilation_config " ]
133139 os .environ ["SRC_DIR" ] = SRC_DIR
134140 os .environ ["MODEL_WEIGHTS" ] = str (
135141 Path (self .params ["model_weights_parent_dir" ], self .model_name )
@@ -138,6 +144,15 @@ def set_env_vars(self) -> None:
138144 os .environ ["VENV_BASE" ] = self .params ["venv" ]
139145 os .environ ["LOG_DIR" ] = self .params ["log_dir" ]
140146
147+ if self .params .get ("enable_prefix_caching" ):
148+ os .environ ["ENABLE_PREFIX_CACHING" ] = self .params ["enable_prefix_caching" ]
149+ if self .params .get ("enable_chunked_prefill" ):
150+ os .environ ["ENABLE_CHUNKED_PREFILL" ] = self .params ["enable_chunked_prefill" ]
151+ if self .params .get ("max_num_batched_tokens" ):
152+ os .environ ["MAX_NUM_BATCHED_TOKENS" ] = self .params ["max_num_batched_tokens" ]
153+ if self .params .get ("enforce_eager" ):
154+ os .environ ["ENFORCE_EAGER" ] = self .params ["enforce_eager" ]
155+
141156 def build_launch_command (self ) -> str :
142157 """Construct the full launch command with parameters."""
143158 # Base command
@@ -185,8 +200,20 @@ def format_table_output(self, job_id: str) -> Table:
185200 table .add_row ("Max Model Length" , self .params ["max_model_len" ])
186201 table .add_row ("Max Num Seqs" , self .params ["max_num_seqs" ])
187202 table .add_row ("GPU Memory Utilization" , self .params ["gpu_memory_utilization" ])
203+ table .add_row ("Compilation Config" , self .params ["compilation_config" ])
188204 table .add_row ("Pipeline Parallelism" , self .params ["pipeline_parallelism" ])
189- table .add_row ("Enforce Eager" , self .params ["enforce_eager" ])
205+ if self .params .get ("enable_prefix_caching" ):
206+ table .add_row ("Enable Prefix Caching" , self .params ["enable_prefix_caching" ])
207+ if self .params .get ("enable_chunked_prefill" ):
208+ table .add_row (
209+ "Enable Chunked Prefill" , self .params ["enable_chunked_prefill" ]
210+ )
211+ if self .params .get ("max_num_batched_tokens" ):
212+ table .add_row (
213+ "Max Num Batched Tokens" , self .params ["max_num_batched_tokens" ]
214+ )
215+ if self .params .get ("enforce_eager" ):
216+ table .add_row ("Enforce Eager" , self .params ["enforce_eager" ])
190217 table .add_row ("Model Weights Directory" , os .environ .get ("MODEL_WEIGHTS" ))
191218 table .add_row ("Log Directory" , self .params ["log_dir" ])
192219
0 commit comments