@@ -7723,11 +7723,14 @@ def repack_mxfp4(self, new_name: str, blocks: Tensor, scales: Tensor):
77237723 scales = scales .unsqueeze (- 1 )
77247724 assert len (blocks .shape ) == 4
77257725 assert len (scales .shape ) == 4
7726- new_data = torch .cat ([scales , blocks ], dim = - 1 )
7727- new_data = new_data .numpy ()
7728- new_shape = [scales .shape [0 ], scales .shape [1 ], scales .shape [2 ] * 32 ]
7726+ scales = scales .numpy ()
7727+ blocks = blocks .numpy ()
7728+ new_data = np .concatenate ([scales , blocks ], axis = - 1 )
7729+ new_shape = [new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * 32 ]
77297730 logger .info (f"Repacked { new_name } with shape { new_shape } and quantization MXFP4" )
7730- self .gguf_writer .add_tensor (new_name , new_data , new_shape , gguf .GGMLQuantizationType .MXFP4 )
7731+ # flatten last dim
7732+ new_data = new_data .reshape (new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * new_data .shape [3 ])
7733+ self .gguf_writer .add_tensor (new_name , new_data , raw_dtype = gguf .GGMLQuantizationType .MXFP4 )
77317734
77327735 def convert_moe_packed_tensors (
77337736 self ,
@@ -7794,7 +7797,6 @@ def convert_moe_packed_tensors(
77947797 out = out .reshape (* prefix_shape , G , B * 2 ).view (* prefix_shape , G * B * 2 )
77957798 out = out .numpy ()
77967799 logger .info (f"Unpacked { new_name } with shape { out .shape } from MXFP4 to F16" )
7797- print (out .dtype , out .device , out .shape )
77987800 self .gguf_writer .add_tensor (new_name , out )
77997801
78007802 def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
@@ -7806,7 +7808,7 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
78067808 blocks0 = data_torch
78077809 elif "mlp.experts.down_proj_scales" in name :
78087810 new_name = self .map_tensor_name (name .replace ("_scales" , ".weight" ))
7809- #self.repack_mxfp4(new_name, blocks0, data_torch)
7811+ # self.repack_mxfp4(new_name, blocks0, data_torch)
78107812 self .convert_moe_packed_tensors (new_name , blocks0 , data_torch )
78117813 elif "mlp.experts.gate_up_proj_blocks" in name :
78127814 blocks0 , blocks1 = data_torch [:, ::2 , :, :], data_torch [:, 1 ::2 , :, :]
0 commit comments