@@ -22,6 +22,8 @@ def add_args(parser):
2222 parser .add_argument ("-lq4" , "--load_q4" , action = "store_true" , help = "Load weights in Q4 mode" )
2323 parser .add_argument ("-fst" , "--fast_safetensors" , action = "store_true" , help = "Use alternative safetensors loader (with direct I/O when available)" )
2424 parser .add_argument ("-ic" , "--ignore_compatibility" , action = "store_true" , help = "Do not override model config options in case of compatibility issues" )
25+ parser .add_argument ("-chunk" , "--chunk_size" , type = int , help = "Chunk size ('input length')" )
26+
2527
2628
2729def print_options (args ):
@@ -41,6 +43,7 @@ def print_options(args):
4143 if args .experts_per_token is not None : print_opts += [f"experts_per_token: { args .experts_per_token } " ]
4244 if args .load_q4 : print_opts += ["load_q4" ]
4345 if args .ignore_compatibility : print_opts += ["ignore_compatibility" ]
46+ if args .chunk_size is not None : print_opts += [f"chunk_size: { args .chunk_size } " ]
4447 print (f" -- Options: { print_opts } " )
4548
4649
@@ -107,6 +110,10 @@ def init(args,
107110 if args .low_mem : config .set_low_mem ()
108111 if args .load_q4 : config .load_in_q4 = True
109112
113+ if args .chunk_size is not None :
114+ config .max_input_len = args .chunk_size
115+ config .max_attention_size = args .chunk_size ** 2
116+
110117 # Compatibility warnings
111118
112119 config .arch_compat_overrides (warn_only = args .ignore_compatibility )
0 commit comments