Skip to content

Commit ebc7da5

Browse files
committed
fix bf16 conversion
1 parent 4dd479b commit ebc7da5

File tree

1 file changed

+7
-8
lines changed

1 file changed

+7
-8
lines changed

convert_hf_to_gguf.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7738,9 +7738,9 @@ def convert_moe_packed_tensors(
77387738
blocks,
77397739
scales,
77407740
*,
7741-
dtype: torch.dtype = torch.float16,
7741+
dtype: torch.dtype = torch.float32,
77427742
rows_per_chunk: int = 32768 * 1024,
7743-
):
7743+
) -> tuple[str, Tensor]:
77447744
import math
77457745

77467746
scales = scales.to(torch.int32) - 127
@@ -7795,9 +7795,8 @@ def convert_moe_packed_tensors(
77957795
del idx_lo, idx_hi, blk, exp
77967796

77977797
out = out.reshape(*prefix_shape, G, B * 2).view(*prefix_shape, G * B * 2)
7798-
out = out.numpy()
7799-
logger.info(f"Unpacked {new_name} with shape {out.shape} from MXFP4 to F16")
7800-
self.gguf_writer.add_tensor(new_name, out)
7798+
logger.info(f"Unpacked {new_name} with shape {out.shape} from MXFP4")
7799+
return new_name, out
78017800

78027801
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
78037802
blocks0: Tensor = torch.zeros(1)
@@ -7809,7 +7808,7 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
78097808
elif "mlp.experts.down_proj_scales" in name:
78107809
new_name = self.map_tensor_name(name.replace("_scales", ".weight"))
78117810
# self.repack_mxfp4(new_name, blocks0, data_torch)
7812-
self.convert_moe_packed_tensors(new_name, blocks0, data_torch)
7811+
yield self.convert_moe_packed_tensors(new_name, blocks0, data_torch)
78137812
elif "mlp.experts.gate_up_proj_blocks" in name:
78147813
blocks0, blocks1 = data_torch[:, ::2, :, :], data_torch[:, 1::2, :, :]
78157814
elif "mlp.experts.gate_up_proj_scales" in name:
@@ -7818,8 +7817,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
78187817
new_name_up = self.map_tensor_name(name.replace("gate_up_proj_scales", "up_proj.weight"))
78197818
# self.repack_mxfp4(new_name_gate, blocks0, scales0)
78207819
# self.repack_mxfp4(new_name_up, blocks1, scales1)
7821-
self.convert_moe_packed_tensors(new_name_gate, blocks0, scales0)
7822-
self.convert_moe_packed_tensors(new_name_up, blocks1, scales1)
7820+
yield self.convert_moe_packed_tensors(new_name_gate, blocks0, scales0)
7821+
yield self.convert_moe_packed_tensors(new_name_up, blocks1, scales1)
78237822
return []
78247823

78257824
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:

0 commit comments

Comments
 (0)