@@ -1579,15 +1579,7 @@ def __init__(self, *args, **kwargs):
15791579
15801580 # TODO @ngxson : this is a hack to support both vision and audio encoders
15811581 have_multiple_encoders = self .has_audio_encoder and self .has_vision_encoder
1582- self .block_count = 128 if have_multiple_encoders else self .find_hparam (self .n_block_keys , True )
1583- # FIXME: DeepseekOCRVisionModel specific hack
1584- if self .block_count is None :
1585- if isinstance (self , DeepseekOCRVisionModel ):
1586- clip_block_count = self .hparams ['layers' ]
1587- if clip_block_count is not None :
1588- self .block_count = clip_block_count
1589- if self .block_count is None :
1590- raise KeyError (f"could not find block count using any of: { self .n_block_keys } " )
1582+ self .block_count = 128 if have_multiple_encoders else self .find_hparam (self .n_block_keys )
15911583 self .tensor_map = gguf .get_tensor_name_map (gguf .MODEL_ARCH .MMPROJ , self .block_count )
15921584
15931585 # load preprocessor config
@@ -6003,16 +5995,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
60035995
60045996@ModelBase .register ("DeepseekOCRForCausalLM" )
60055997class DeepseekOCRVisionModel (MmprojModel ):
6006- def __init__ (self , * args , ** kwargs ):
6007- super ().__init__ (* args , ** kwargs )
6008-
6009- proc_fname = self .dir_model / "processor_config.json"
6010-
6011- if proc_fname .is_file ():
6012- with open (proc_fname , "r" ) as f :
6013- self .preprocessor_config = json .load (f )
6014-
6015-
60165998 def set_gguf_parameters (self ):
60175999 super ().set_gguf_parameters ()
60186000 hparams = self .hparams
@@ -6071,27 +6053,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
60716053 if ".attn.rel_pos_h" in name or ".attn.rel_pos_w" in name :
60726054 return [(self .map_tensor_name (name , try_suffixes = ("" ,)), data_torch )]
60736055
6074- if name .startswith ("model.vision_model.transformer.layers." ):
6075- # process visual tensors
6076- # split QKV tensors if needed
6077- if ".qkv_proj." in name :
6078- if data_torch .ndim == 2 : # weight
6079- c3 , _ = data_torch .shape
6080- else : # bias
6081- c3 = data_torch .shape [0 ]
6082- assert c3 % 3 == 0
6083- c = c3 // 3
6084- wq = data_torch [:c ]
6085- wk = data_torch [c : c * 2 ]
6086- wv = data_torch [c * 2 :]
6087- return [
6088- (self .map_tensor_name (name .replace ("qkv" , "q" )), wq ),
6089- (self .map_tensor_name (name .replace ("qkv" , "k" )), wk ),
6090- (self .map_tensor_name (name .replace ("qkv" , "v" )), wv ),
6091- ]
6092- else :
6093- return [(self .map_tensor_name (name ), data_torch )]
6094-
60956056 return [(self .map_tensor_name (name ), data_torch )]
60966057
60976058
@@ -7263,12 +7224,20 @@ def prepare_tensors(self):
72637224@ModelBase .register (
72647225 "DeepseekV2ForCausalLM" ,
72657226 "DeepseekV3ForCausalLM" ,
7266- "DeepseekOCRForCausalLM" ,
72677227 "KimiVLForConditionalGeneration" ,
72687228)
72697229class DeepseekV2Model (TextModel ):
72707230 model_arch = gguf .MODEL_ARCH .DEEPSEEK2
72717231
7232+ def __init__ (self , * args , ** kwargs ):
7233+ super ().__init__ (* args , ** kwargs )
7234+ vision_config = self .hparams .get ('vision_config' , {}).get ('width' , {})
7235+
7236+ if 'clip-l-14-224' in vision_config and 'sam_vit_b' in vision_config :
7237+ self .model_arch = gguf .MODEL_ARCH .DEEPSEEK2OCR
7238+ self .gguf_writer .arch = gguf .MODEL_ARCH_NAMES [self .model_arch ]
7239+ self .gguf_writer .add_architecture ()
7240+
72727241 def set_vocab (self ):
72737242 try :
72747243 self ._set_vocab_gpt2 ()
@@ -7324,7 +7293,7 @@ def set_vocab(self):
73247293 raise NotImplementedError (f"Deepseek pre-tokenizer { tokpre !r} is not supported yet!" )
73257294
73267295 def set_gguf_parameters (self ):
7327- is_ocr = (self .hparams [ "num_hidden_layers" ] == 12 )
7296+ is_ocr = (self .model_arch == gguf . MODEL_ARCH . DEEPSEEK2OCR )
73287297
73297298 if is_ocr :
73307299 self .hparams ['rope_theta' ] = self .hparams .get ('rope_theta' , 10000.0 )
@@ -7335,11 +7304,9 @@ def set_gguf_parameters(self):
73357304
73367305 super ().set_gguf_parameters ()
73377306 hparams = self .hparams
7338- kv_lora_rank = hparams ["q_lora_rank " ] if hparams [ "q_lora_rank" ] is not None else 512
7307+ kv_lora_rank = hparams ["kv_lora_rank " ] if hparams . get ( "kv_lora_rank" ) is not None else 512
73397308 routed_scaling_factor = hparams .get ("routed_scaling_factor" , 1.0 )
73407309 norm_topk_prob = hparams .get ("norm_topk_prob" , False )
7341- scoring_func = hparams .get ("scoring_func" , "softmax" )
7342-
73437310 self .gguf_writer .add_leading_dense_block_count (hparams ["first_k_dense_replace" ])
73447311 self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
73457312 if "q_lora_rank" in hparams and hparams ["q_lora_rank" ] is not None :
@@ -7361,12 +7328,6 @@ def set_gguf_parameters(self):
73617328 self .gguf_writer .add_expert_weights_scale (routed_scaling_factor )
73627329 self .gguf_writer .add_expert_weights_norm (norm_topk_prob )
73637330
7364- if scoring_func == "sigmoid" :
7365- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
7366- elif scoring_func == "softmax" :
7367- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
7368- else :
7369- raise ValueError (f"Unsupported scoring_func value: { scoring_func } " )
73707331 self .gguf_writer .add_rope_dimension_count (hparams ["qk_rope_head_dim" ])
73717332
73727333 rope_scaling = self .hparams .get ("rope_scaling" ) or {}
@@ -7462,7 +7423,6 @@ def prepare_tensors(self):
74627423 if len (experts ) > 0 :
74637424 raise ValueError (f"Unprocessed experts: { experts } " )
74647425
7465-
74667426@ModelBase .register ("MiniMaxM2ForCausalLM" )
74677427class MiniMaxM2Model (TextModel ):
74687428 model_arch = gguf .MODEL_ARCH .MINIMAXM2
0 commit comments