@@ -7807,14 +7807,42 @@ def set_vocab(self):
78077807class GptOssModel (TextModel ):
78087808 model_arch = gguf .MODEL_ARCH .GPT_OSS
78097809
7810+ def transform_nibble_layout (self , tensor ):
7811+ assert tensor .dtype == torch .uint8
7812+ assert tensor .shape [- 1 ] == 16
7813+ tensor = tensor .clone ().to (device = "cpu" )
7814+ # swap nibbles
7815+ t_lo = tensor & 0x0F
7816+ t_hi = tensor & 0xF0
7817+ t_swapped = (t_lo << 4 ) | (t_hi >> 4 )
7818+ tensor = t_swapped
7819+ # transform aaaa...bbbb... to abababab...
7820+ blk_a , blk_b = tensor .chunk (2 , dim = - 1 )
7821+ # get a_
7822+ blk_a0 = (blk_a & 0xF0 ).view (- 1 , 1 )
7823+ blk_a1 = (blk_a << 4 ).view (- 1 , 1 )
7824+ blk_a = torch .stack ((blk_a0 , blk_a1 ), dim = 2 ).view (tensor .shape )
7825+ # get _b
7826+ blk_b0 = (blk_b >> 4 ).view (- 1 , 1 )
7827+ blk_b1 = (blk_b & 0x0F ).view (- 1 , 1 )
7828+ blk_b = torch .stack ((blk_b0 , blk_b1 ), dim = 2 ).view (tensor .shape )
7829+ # swap once more
7830+ out = blk_a | blk_b
7831+ out_h = out & 0xF0
7832+ out_l = out & 0x0F
7833+ out = (out_h >> 4 ) | (out_l << 4 )
7834+ return out
7835+
78107836 def repack_mxfp4 (self , new_name : str , blocks : Tensor , scales : Tensor ):
78117837 assert blocks .dtype == torch .uint8
78127838 assert scales .dtype == torch .uint8
78137839 scales = scales .unsqueeze (- 1 )
78147840 assert len (blocks .shape ) == 4
78157841 assert len (scales .shape ) == 4
7816- scales = scales .numpy ()
7817- blocks = blocks .numpy ()
7842+ # convert to numpy
7843+ scales = scales .to_eager (scales ).numpy ()
7844+ blocks = blocks .to_eager (blocks )
7845+ blocks = self .transform_nibble_layout (blocks ).numpy ()
78187846 new_data = np .concatenate ([scales , blocks ], axis = - 1 )
78197847 new_shape = [new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * 32 ]
78207848 logger .info (f"Repacked { new_name } with shape { new_shape } and quantization MXFP4" )
@@ -7897,18 +7925,18 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
78977925 blocks0 = data_torch
78987926 elif "mlp.experts.down_proj_scales" in name :
78997927 new_name = self .map_tensor_name (name .replace ("_scales" , ".weight" ))
7900- # self.repack_mxfp4(new_name, blocks0, data_torch)
7901- yield self .convert_moe_packed_tensors (new_name , blocks0 , data_torch )
7928+ self .repack_mxfp4 (new_name , blocks0 , data_torch )
7929+ # yield self.convert_moe_packed_tensors(new_name, blocks0, data_torch)
79027930 elif "mlp.experts.gate_up_proj_blocks" in name :
79037931 blocks0 , blocks1 = data_torch [:, ::2 , :, :], data_torch [:, 1 ::2 , :, :]
79047932 elif "mlp.experts.gate_up_proj_scales" in name :
79057933 scales0 , scales1 = data_torch [:, ::2 , :], data_torch [:, 1 ::2 , :]
79067934 new_name_gate = self .map_tensor_name (name .replace ("gate_up_proj_scales" , "gate_proj.weight" ))
79077935 new_name_up = self .map_tensor_name (name .replace ("gate_up_proj_scales" , "up_proj.weight" ))
7908- # self.repack_mxfp4(new_name_gate, blocks0, scales0)
7909- # self.repack_mxfp4(new_name_up, blocks1, scales1)
7910- yield self .convert_moe_packed_tensors (new_name_gate , blocks0 , scales0 )
7911- yield self .convert_moe_packed_tensors (new_name_up , blocks1 , scales1 )
7936+ self .repack_mxfp4 (new_name_gate , blocks0 , scales0 )
7937+ self .repack_mxfp4 (new_name_up , blocks1 , scales1 )
7938+ # yield self.convert_moe_packed_tensors(new_name_gate, blocks0, scales0)
7939+ # yield self.convert_moe_packed_tensors(new_name_up, blocks1, scales1)
79127940 return []
79137941
79147942 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
0 commit comments