You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: exllamav2/conversion/convert_exl2.py
+8Lines changed: 8 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -31,6 +31,7 @@
31
31
parser.add_argument("-ml", "--measurement_length", type=int, default=2048, help="Max no. tokens per sample when measuring")
32
32
parser.add_argument("-so", "--status_output", action="store_true", help="Include machine-parseable status updates in console output")
33
33
parser.add_argument("-hsol", "--hidden_state_offload_layers", type=int, default=0, help="Number of hidden/target states to keep in VRAM. Speed-up but increases VRAM usage")
34
+
parser.add_argument("-fst", "--fast_safetensors", action="store_true", help="Use fast-safetensors to load layers of the unquantized model. This can help alleviate some out-of-memory issues, especially on Windows.")
34
35
35
36
args=parser.parse_args()
36
37
@@ -112,6 +113,7 @@ def save_job():
112
113
"rope_scale": args.rope_scale,
113
114
"rope_alpha": args.rope_alpha,
114
115
"output_measurement": output_measurement,
116
+
"fast_safetensors": args.fast_safetensors,
115
117
"progress": "begin"}
116
118
117
119
ifargs.measurementisnotNone:
@@ -160,6 +162,8 @@ def save_job():
160
162
else:
161
163
print(f" -- Measurement will be saved to {job['output_measurement']}")
162
164
print(f" !! Conversion script will end after measurement pass")
0 commit comments