@@ -7738,9 +7738,9 @@ def convert_moe_packed_tensors(
77387738 blocks ,
77397739 scales ,
77407740 * ,
7741- dtype : torch .dtype = torch .float16 ,
7741+ dtype : torch .dtype = torch .float32 ,
77427742 rows_per_chunk : int = 32768 * 1024 ,
7743- ):
7743+ ) -> tuple [ str , Tensor ] :
77447744 import math
77457745
77467746 scales = scales .to (torch .int32 ) - 127
@@ -7795,9 +7795,8 @@ def convert_moe_packed_tensors(
77957795 del idx_lo , idx_hi , blk , exp
77967796
77977797 out = out .reshape (* prefix_shape , G , B * 2 ).view (* prefix_shape , G * B * 2 )
7798- out = out .numpy ()
7799- logger .info (f"Unpacked { new_name } with shape { out .shape } from MXFP4 to F16" )
7800- self .gguf_writer .add_tensor (new_name , out )
7798+ logger .info (f"Unpacked { new_name } with shape { out .shape } from MXFP4" )
7799+ return new_name , out
78017800
78027801 def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
78037802 blocks0 : Tensor = torch .zeros (1 )
@@ -7809,7 +7808,7 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
78097808 elif "mlp.experts.down_proj_scales" in name :
78107809 new_name = self .map_tensor_name (name .replace ("_scales" , ".weight" ))
78117810 # self.repack_mxfp4(new_name, blocks0, data_torch)
7812- self .convert_moe_packed_tensors (new_name , blocks0 , data_torch )
7811+ yield self .convert_moe_packed_tensors (new_name , blocks0 , data_torch )
78137812 elif "mlp.experts.gate_up_proj_blocks" in name :
78147813 blocks0 , blocks1 = data_torch [:, ::2 , :, :], data_torch [:, 1 ::2 , :, :]
78157814 elif "mlp.experts.gate_up_proj_scales" in name :
@@ -7818,8 +7817,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
78187817 new_name_up = self .map_tensor_name (name .replace ("gate_up_proj_scales" , "up_proj.weight" ))
78197818 # self.repack_mxfp4(new_name_gate, blocks0, scales0)
78207819 # self.repack_mxfp4(new_name_up, blocks1, scales1)
7821- self .convert_moe_packed_tensors (new_name_gate , blocks0 , scales0 )
7822- self .convert_moe_packed_tensors (new_name_up , blocks1 , scales1 )
7820+ yield self .convert_moe_packed_tensors (new_name_gate , blocks0 , scales0 )
7821+ yield self .convert_moe_packed_tensors (new_name_up , blocks1 , scales1 )
78237822 return []
78247823
78257824 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
0 commit comments