Skip to content

Commit a8e3f77

Browse files
committed
Add chunk_size cmdline arg to model_init
1 parent 25e46d8 commit a8e3f77

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

exllamav2/model_init.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ def add_args(parser):
2222
parser.add_argument("-lq4", "--load_q4", action = "store_true", help = "Load weights in Q4 mode")
2323
parser.add_argument("-fst", "--fast_safetensors", action = "store_true", help = "Use alternative safetensors loader (with direct I/O when available)")
2424
parser.add_argument("-ic", "--ignore_compatibility", action = "store_true", help = "Do not override model config options in case of compatibility issues")
25+
parser.add_argument("-chunk", "--chunk_size", type = int, help = "Chunk size ('input length')")
26+
2527

2628

2729
def print_options(args):
@@ -41,6 +43,7 @@ def print_options(args):
4143
if args.experts_per_token is not None: print_opts += [f"experts_per_token: {args.experts_per_token}"]
4244
if args.load_q4: print_opts += ["load_q4"]
4345
if args.ignore_compatibility: print_opts += ["ignore_compatibility"]
46+
if args.chunk_size is not None: print_opts += [f"chunk_size: {args.chunk_size}"]
4447
print(f" -- Options: {print_opts}")
4548

4649

@@ -107,6 +110,10 @@ def init(args,
107110
if args.low_mem: config.set_low_mem()
108111
if args.load_q4: config.load_in_q4 = True
109112

113+
if args.chunk_size is not None:
114+
config.max_input_len = args.chunk_size
115+
config.max_attention_size = args.chunk_size ** 2
116+
110117
# Compatibility warnings
111118

112119
config.arch_compat_overrides(warn_only = args.ignore_compatibility)

0 commit comments

Comments
 (0)