Skip to content

Commit 3c4725b

Browse files
committed
direct mapping mxfp4, FINALLY
1 parent 6197917 commit 3c4725b

File tree

1 file changed

+36
-8
lines changed

1 file changed

+36
-8
lines changed

convert_hf_to_gguf.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7807,14 +7807,42 @@ def set_vocab(self):
78077807
class GptOssModel(TextModel):
78087808
model_arch = gguf.MODEL_ARCH.GPT_OSS
78097809

7810+
def transform_nibble_layout(self, tensor):
7811+
assert tensor.dtype == torch.uint8
7812+
assert tensor.shape[-1] == 16
7813+
tensor = tensor.clone().to(device="cpu")
7814+
# swap nibbles
7815+
t_lo = tensor & 0x0F
7816+
t_hi = tensor & 0xF0
7817+
t_swapped = (t_lo << 4) | (t_hi >> 4)
7818+
tensor = t_swapped
7819+
# transform aaaa...bbbb... to abababab...
7820+
blk_a, blk_b = tensor.chunk(2, dim=-1)
7821+
# get a_
7822+
blk_a0 = (blk_a & 0xF0).view(-1, 1)
7823+
blk_a1 = (blk_a << 4).view(-1, 1)
7824+
blk_a = torch.stack((blk_a0, blk_a1), dim=2).view(tensor.shape)
7825+
# get _b
7826+
blk_b0 = (blk_b >> 4).view(-1, 1)
7827+
blk_b1 = (blk_b & 0x0F).view(-1, 1)
7828+
blk_b = torch.stack((blk_b0, blk_b1), dim=2).view(tensor.shape)
7829+
# swap once more
7830+
out = blk_a | blk_b
7831+
out_h = out & 0xF0
7832+
out_l = out & 0x0F
7833+
out = (out_h >> 4) | (out_l << 4)
7834+
return out
7835+
78107836
def repack_mxfp4(self, new_name: str, blocks: Tensor, scales: Tensor):
78117837
assert blocks.dtype == torch.uint8
78127838
assert scales.dtype == torch.uint8
78137839
scales = scales.unsqueeze(-1)
78147840
assert len(blocks.shape) == 4
78157841
assert len(scales.shape) == 4
7816-
scales = scales.numpy()
7817-
blocks = blocks.numpy()
7842+
# convert to numpy
7843+
scales = scales.to_eager(scales).numpy()
7844+
blocks = blocks.to_eager(blocks)
7845+
blocks = self.transform_nibble_layout(blocks).numpy()
78187846
new_data = np.concatenate([scales, blocks], axis=-1)
78197847
new_shape = [new_data.shape[0], new_data.shape[1], new_data.shape[2] * 32]
78207848
logger.info(f"Repacked {new_name} with shape {new_shape} and quantization MXFP4")
@@ -7897,18 +7925,18 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
78977925
blocks0 = data_torch
78987926
elif "mlp.experts.down_proj_scales" in name:
78997927
new_name = self.map_tensor_name(name.replace("_scales", ".weight"))
7900-
# self.repack_mxfp4(new_name, blocks0, data_torch)
7901-
yield self.convert_moe_packed_tensors(new_name, blocks0, data_torch)
7928+
self.repack_mxfp4(new_name, blocks0, data_torch)
7929+
# yield self.convert_moe_packed_tensors(new_name, blocks0, data_torch)
79027930
elif "mlp.experts.gate_up_proj_blocks" in name:
79037931
blocks0, blocks1 = data_torch[:, ::2, :, :], data_torch[:, 1::2, :, :]
79047932
elif "mlp.experts.gate_up_proj_scales" in name:
79057933
scales0, scales1 = data_torch[:, ::2, :], data_torch[:, 1::2, :]
79067934
new_name_gate = self.map_tensor_name(name.replace("gate_up_proj_scales", "gate_proj.weight"))
79077935
new_name_up = self.map_tensor_name(name.replace("gate_up_proj_scales", "up_proj.weight"))
7908-
# self.repack_mxfp4(new_name_gate, blocks0, scales0)
7909-
# self.repack_mxfp4(new_name_up, blocks1, scales1)
7910-
yield self.convert_moe_packed_tensors(new_name_gate, blocks0, scales0)
7911-
yield self.convert_moe_packed_tensors(new_name_up, blocks1, scales1)
7936+
self.repack_mxfp4(new_name_gate, blocks0, scales0)
7937+
self.repack_mxfp4(new_name_up, blocks1, scales1)
7938+
# yield self.convert_moe_packed_tensors(new_name_gate, blocks0, scales0)
7939+
# yield self.convert_moe_packed_tensors(new_name_up, blocks1, scales1)
79127940
return []
79137941

79147942
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:

0 commit comments

Comments
 (0)