Skip to content

Commit c68069d

Browse files
committed
clean up a bit
1 parent 832dc26 commit c68069d

File tree

2 files changed

+9
-7
lines changed

2 files changed

+9
-7
lines changed

convert_hf_to_gguf.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7723,11 +7723,14 @@ def repack_mxfp4(self, new_name: str, blocks: Tensor, scales: Tensor):
77237723
scales = scales.unsqueeze(-1)
77247724
assert len(blocks.shape) == 4
77257725
assert len(scales.shape) == 4
7726-
new_data = torch.cat([scales, blocks], dim=-1)
7727-
new_data = new_data.numpy()
7728-
new_shape = [scales.shape[0], scales.shape[1], scales.shape[2] * 32]
7726+
scales = scales.numpy()
7727+
blocks = blocks.numpy()
7728+
new_data = np.concatenate([scales, blocks], axis=-1)
7729+
new_shape = [new_data.shape[0], new_data.shape[1], new_data.shape[2] * 32]
77297730
logger.info(f"Repacked {new_name} with shape {new_shape} and quantization MXFP4")
7730-
self.gguf_writer.add_tensor(new_name, new_data, new_shape, gguf.GGMLQuantizationType.MXFP4)
7731+
# flatten last dim
7732+
new_data = new_data.reshape(new_data.shape[0], new_data.shape[1], new_data.shape[2] * new_data.shape[3])
7733+
self.gguf_writer.add_tensor(new_name, new_data, raw_dtype=gguf.GGMLQuantizationType.MXFP4)
77317734

77327735
def convert_moe_packed_tensors(
77337736
self,
@@ -7794,7 +7797,6 @@ def convert_moe_packed_tensors(
77947797
out = out.reshape(*prefix_shape, G, B * 2).view(*prefix_shape, G * B * 2)
77957798
out = out.numpy()
77967799
logger.info(f"Unpacked {new_name} with shape {out.shape} from MXFP4 to F16")
7797-
print(out.dtype, out.device, out.shape)
77987800
self.gguf_writer.add_tensor(new_name, out)
77997801

78007802
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
@@ -7806,7 +7808,7 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
78067808
blocks0 = data_torch
78077809
elif "mlp.experts.down_proj_scales" in name:
78087810
new_name = self.map_tensor_name(name.replace("_scales", ".weight"))
7809-
#self.repack_mxfp4(new_name, blocks0, data_torch)
7811+
# self.repack_mxfp4(new_name, blocks0, data_torch)
78107812
self.convert_moe_packed_tensors(new_name, blocks0, data_torch)
78117813
elif "mlp.experts.gate_up_proj_blocks" in name:
78127814
blocks0, blocks1 = data_torch[:, ::2, :, :], data_torch[:, 1::2, :, :]

gguf-py/gguf/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2802,7 +2802,7 @@ class VisionProjectorType:
28022802
GGMLQuantizationType.BF16: (1, 2),
28032803
GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
28042804
GGMLQuantizationType.TQ2_0: (256, 2 + 64),
2805-
GGMLQuantizationType.MXFP4: (1, 1), # quick hack to write MXFP4 as U8
2805+
GGMLQuantizationType.MXFP4: (32, 1 + 16),
28062806
}
28072807

28082808

0 commit comments

Comments
 (0)