You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: vec_inf/cli/_cli.py
+20Lines changed: 20 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -39,6 +39,21 @@ def cli() -> None:
39
39
type=float,
40
40
help="GPU memory utilization, default to 0.9",
41
41
)
42
+
@click.option(
43
+
"--enable-prefix-caching",
44
+
type=click.Choice(["True", "False"]),
45
+
help="Enables automatic prefix caching, accepts 'True' or 'False', default to 'False'",
46
+
)
47
+
@click.option(
48
+
"--enable-chunked-prefill",
49
+
type=click.Choice(["True", "False"]),
50
+
help="Enable chunked prefill, accepts 'True' or 'False', default to 'True' if max-num-seqs > 32k, else 'False'",
51
+
)
52
+
@click.option(
53
+
"--max-num-batched-tokens",
54
+
type=int,
55
+
help="Maximum number of batched tokens per iteration, defaults to min(2048, max-num-seqs), pairs with --enable-chunked-prefill to control the batch size at the prefill stage",
56
+
)
42
57
@click.option(
43
58
"--partition",
44
59
type=str,
@@ -90,6 +105,11 @@ def cli() -> None:
90
105
type=str,
91
106
help="Enable pipeline parallelism, accepts 'True' or 'False', default to 'True' for supported models",
92
107
)
108
+
@click.option(
109
+
"--compilation-config",
110
+
type=click.Choice(["0", "3"]),
111
+
help="torch.compile optimization level, accepts '0' or '3', default to '0', which means no optimization is applied",
0 commit comments