@@ -1524,6 +1524,79 @@ def _set_vocab_interns1(self):
15241524 special_vocab ._set_special_token ("bos" , 151643 )
15251525 special_vocab .add_to_gguf (self .gguf_writer )
15261526
1527+ def _set_vocab_mistral (self ):
1528+ if not _mistral_common_installed :
1529+ raise ImportError (_mistral_import_error_msg )
1530+
1531+ vocab = MistralVocab (self .dir_model )
1532+ logger .info (
1533+ f"Converting tokenizer { vocab .tokenizer_type } of size { vocab .vocab_size } ."
1534+ )
1535+
1536+ self .gguf_writer .add_tokenizer_model (vocab .gguf_tokenizer_model )
1537+
1538+ tokens = []
1539+ scores = []
1540+ toktypes = []
1541+
1542+ for text , score , toktype in vocab .all_tokens ():
1543+ tokens .append (text )
1544+ scores .append (score )
1545+ toktypes .append (toktype )
1546+
1547+ assert len (tokens ) == vocab .vocab_size , (
1548+ f"token count ({ len (tokens )} ) != vocab size ({ vocab .vocab_size } )"
1549+ )
1550+
1551+ if vocab .tokenizer_type == MistralTokenizerType .tekken :
1552+ self .gguf_writer .add_tokenizer_pre ("tekken" )
1553+ self .gguf_writer .add_token_merges (
1554+ vocab .extract_vocab_merges_from_model ()
1555+ )
1556+
1557+ logger .info (
1558+ f"Setting bos, eos, unk and pad token IDs to { vocab .bos_id } , { vocab .eos_id } , { vocab .unk_id } , { vocab .pad_id } ."
1559+ )
1560+
1561+ self .gguf_writer .add_bos_token_id (vocab .bos_id )
1562+ self .gguf_writer .add_eos_token_id (vocab .eos_id )
1563+ self .gguf_writer .add_unk_token_id (vocab .unk_id )
1564+ self .gguf_writer .add_pad_token_id (vocab .pad_id )
1565+
1566+ self .gguf_writer .add_token_list (tokens )
1567+ self .gguf_writer .add_token_scores (scores )
1568+ self .gguf_writer .add_token_types (toktypes )
1569+ self .gguf_writer .add_vocab_size (vocab .vocab_size )
1570+
1571+ self .gguf_writer .add_add_bos_token (True )
1572+ self .gguf_writer .add_add_eos_token (False )
1573+
1574+ local_template_file_path = self .dir_model / "chat_template.jinja"
1575+
1576+ if self .is_mistral_format and local_template_file_path .is_file ():
1577+ # Ministral-3 and other new Mistral models come with chat templates.
1578+ # ref: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512/tree/main
1579+ logger .info ("Using an existing Mistral local chat template." )
1580+
1581+ with open (local_template_file_path , "r" , encoding = "utf-8" ) as f :
1582+ template = f .read ()
1583+ elif not self .is_mistral_format or not self .disable_mistral_community_chat_template :
1584+ template_dir = Path (__file__ ).parent / "models/templates/"
1585+
1586+ # Log only for Mistral format that the official tokenization and detokenization is via `mistral-common`.
1587+ if self .is_mistral_format :
1588+ logger .info (
1589+ "Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. "
1590+ "Mistral recommends to use `mistral-common` to perform tokenization and detokenization."
1591+ )
1592+ template = MistralModel .get_community_chat_template (vocab , template_dir , self .is_mistral_format )
1593+ else :
1594+ logger .info ("Not using a Mistral local or community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`." )
1595+ template = None
1596+
1597+ if template is not None :
1598+ self .gguf_writer .add_chat_template (template )
1599+
15271600
15281601class MmprojModel (ModelBase ):
15291602 model_type = ModelType .MMPROJ
@@ -2294,79 +2367,6 @@ def __init__(self, *args, **kwargs):
22942367 if self .hf_arch == "VLlama3ForCausalLM" :
22952368 self .hparams ["num_attention_heads" ] = self .hparams .get ("num_attention_heads" , 32 )
22962369
2297- def _set_vocab_mistral (self ):
2298- if not _mistral_common_installed :
2299- raise ImportError (_mistral_import_error_msg )
2300-
2301- vocab = MistralVocab (self .dir_model )
2302- logger .info (
2303- f"Converting tokenizer { vocab .tokenizer_type } of size { vocab .vocab_size } ."
2304- )
2305-
2306- self .gguf_writer .add_tokenizer_model (vocab .gguf_tokenizer_model )
2307-
2308- tokens = []
2309- scores = []
2310- toktypes = []
2311-
2312- for text , score , toktype in vocab .all_tokens ():
2313- tokens .append (text )
2314- scores .append (score )
2315- toktypes .append (toktype )
2316-
2317- assert len (tokens ) == vocab .vocab_size , (
2318- f"token count ({ len (tokens )} ) != vocab size ({ vocab .vocab_size } )"
2319- )
2320-
2321- if vocab .tokenizer_type == MistralTokenizerType .tekken :
2322- self .gguf_writer .add_tokenizer_pre ("tekken" )
2323- self .gguf_writer .add_token_merges (
2324- vocab .extract_vocab_merges_from_model ()
2325- )
2326-
2327- logger .info (
2328- f"Setting bos, eos, unk and pad token IDs to { vocab .bos_id } , { vocab .eos_id } , { vocab .unk_id } , { vocab .pad_id } ."
2329- )
2330-
2331- self .gguf_writer .add_bos_token_id (vocab .bos_id )
2332- self .gguf_writer .add_eos_token_id (vocab .eos_id )
2333- self .gguf_writer .add_unk_token_id (vocab .unk_id )
2334- self .gguf_writer .add_pad_token_id (vocab .pad_id )
2335-
2336- self .gguf_writer .add_token_list (tokens )
2337- self .gguf_writer .add_token_scores (scores )
2338- self .gguf_writer .add_token_types (toktypes )
2339- self .gguf_writer .add_vocab_size (vocab .vocab_size )
2340-
2341- self .gguf_writer .add_add_bos_token (True )
2342- self .gguf_writer .add_add_eos_token (False )
2343-
2344- local_template_file_path = self .dir_model / "chat_template.jinja"
2345-
2346- if self .is_mistral_format and local_template_file_path .is_file ():
2347- # Ministral-3 and other new Mistral models come with chat templates.
2348- # ref: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512/tree/main
2349- logger .info ("Using an existing Mistral local chat template." )
2350-
2351- with open (local_template_file_path , "r" , encoding = "utf-8" ) as f :
2352- template = f .read ()
2353- elif not self .is_mistral_format or not self .disable_mistral_community_chat_template :
2354- template_dir = Path (__file__ ).parent / "models/templates/"
2355-
2356- # Log only for Mistral format that the official tokenization and detokenization is via `mistral-common`.
2357- if self .is_mistral_format :
2358- logger .info (
2359- "Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. "
2360- "Mistral recommends to use `mistral-common` to perform tokenization and detokenization."
2361- )
2362- template = MistralModel .get_community_chat_template (vocab , template_dir , self .is_mistral_format )
2363- else :
2364- logger .info ("Not using a Mistral local or community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`." )
2365- template = None
2366-
2367- if template is not None :
2368- self .gguf_writer .add_chat_template (template )
2369-
23702370 def set_vocab (self ):
23712371 if self .is_mistral_format :
23722372 return self ._set_vocab_mistral ()
@@ -9924,17 +9924,109 @@ def get_community_chat_template(vocab: MistralVocab, templates_dir: Path, is_mis
99249924
99259925 def set_gguf_parameters (self ):
99269926 super ().set_gguf_parameters ()
9927- if "yarn" in self .hparams :
9928- yarn_params = self .hparams ["yarn" ]
9929- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
9930- self .gguf_writer .add_rope_scaling_factor (yarn_params ["factor" ])
9931- self .gguf_writer .add_rope_scaling_yarn_beta_fast (yarn_params ["beta" ])
9932- self .gguf_writer .add_rope_scaling_yarn_beta_slow (yarn_params ["alpha" ])
9933- self .gguf_writer .add_rope_scaling_yarn_log_mul (1.0 ) # mscale_all_dim
9934- self .gguf_writer .add_rope_scaling_orig_ctx_len (yarn_params ["original_max_position_embeddings" ])
9927+ MistralModel .set_mistral_config (self .gguf_writer , self .hparams )
99359928
9936- if "llama_4_scaling" in self .hparams :
9937- self .gguf_writer .add_attn_temperature_scale (self .hparams ["llama_4_scaling" ]["beta" ])
9929+ @staticmethod
9930+ def set_mistral_config (gguf_writer : gguf .GGUFWriter , hparams : dict ):
9931+ if "yarn" in hparams :
9932+ yarn_params = hparams ["yarn" ]
9933+ gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
9934+ gguf_writer .add_rope_scaling_factor (yarn_params ["factor" ])
9935+ gguf_writer .add_rope_scaling_yarn_beta_fast (yarn_params ["beta" ])
9936+ gguf_writer .add_rope_scaling_yarn_beta_slow (yarn_params ["alpha" ])
9937+ gguf_writer .add_rope_scaling_yarn_log_mul (1.0 ) # mscale_all_dim
9938+ gguf_writer .add_rope_scaling_orig_ctx_len (yarn_params ["original_max_position_embeddings" ])
9939+
9940+ if "llama_4_scaling" in hparams :
9941+ gguf_writer .add_attn_temperature_scale (hparams ["llama_4_scaling" ]["beta" ])
9942+
9943+
9944+ class MistralMoeModel (DeepseekV2Model ):
9945+ model_arch = gguf .MODEL_ARCH .DEEPSEEK2
9946+ model_name = "Mistral"
9947+ hf_arch = ""
9948+ is_mistral_format = True
9949+
9950+ def __init__ (self , * args , ** kwargs ):
9951+ super ().__init__ (* args , ** kwargs )
9952+ logger .info ("Using MistralMoeModel" )
9953+ # remap hparams from Mistral MoE format to DeepseekV2 format
9954+ # we do this way to be able to reuse DeepseekV2Model set_gguf_parameters logic
9955+ # ref: https://github.com/vllm-project/vllm/blob/b294e28db2c5dee61bc25157664edcada8b90b31/vllm/transformers_utils/configs/mistral.py
9956+ config = self .hparams
9957+ # Mistral key -> HF key
9958+ config_mapping = {
9959+ "dim" : "hidden_size" ,
9960+ "norm_eps" : "rms_norm_eps" ,
9961+ "n_kv_heads" : "num_key_value_heads" ,
9962+ "n_layers" : "num_hidden_layers" ,
9963+ "n_heads" : "num_attention_heads" ,
9964+ "hidden_dim" : "intermediate_size" ,
9965+ }
9966+ # HF key -> (Mistral key, default value)
9967+ top_level_mapping_with_default = {
9968+ "model_type" : ("model_type" , "transformer" ),
9969+ "hidden_act" : ("activation" , "silu" ),
9970+ "tie_word_embeddings" : ("tied_embeddings" , False ),
9971+ "max_seq_len" : ("max_seq_len" , config .get ("max_position_embeddings" , 128_000 )),
9972+ "max_position_embeddings" : ("max_position_embeddings" , 128_000 ),
9973+ }
9974+ # mapping top-level keys
9975+ for key , new_key in config_mapping .items ():
9976+ if key in config :
9977+ config [new_key ] = config [key ]
9978+ for new_key , (key , default_value ) in top_level_mapping_with_default .items ():
9979+ config [new_key ] = config .get (key , default_value )
9980+ # mapping MoE-specific keys
9981+ moe_config_map = {
9982+ "route_every_n" : "moe_layer_freq" ,
9983+ "first_k_dense_replace" : "first_k_dense_replace" ,
9984+ "num_experts_per_tok" : "num_experts_per_tok" ,
9985+ "num_experts" : "n_routed_experts" ,
9986+ "expert_hidden_dim" : "moe_intermediate_size" ,
9987+ "routed_scale" : "routed_scaling_factor" ,
9988+ "num_shared_experts" : "n_shared_experts" ,
9989+ "num_expert_groups" : "n_group" ,
9990+ "num_expert_groups_per_tok" : "topk_group" ,
9991+ }
9992+ moe = config ["moe" ]
9993+ for key , new_key in moe_config_map .items ():
9994+ if key in moe :
9995+ config [new_key ] = moe [key ]
9996+ # provide missing values
9997+ config ["topk_method" ] = None
9998+ config ["norm_topk_prob" ] = True
9999+ config ["scoring_func" ] = "softmax"
10000+
10001+ def set_vocab (self ):
10002+ self ._set_vocab_mistral ()
10003+
10004+ def set_gguf_parameters (self ):
10005+ super ().set_gguf_parameters ()
10006+ MistralModel .set_mistral_config (self .gguf_writer , self .hparams )
10007+ yarn_params = self .hparams ["yarn" ]
10008+ self .gguf_writer .add_attn_temperature_length (yarn_params ["original_max_position_embeddings" ])
10009+ self .gguf_writer .add_rope_scaling_yarn_log_mul (0.1 ) # mscale_all_dim * 0.1
10010+
10011+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ):
10012+ if name .startswith ("vision_" ) or name .startswith ("patch_merger." ) or "mm_projector" in name :
10013+ return []
10014+
10015+ # rename certain tensors so that we can reuse DeepseekV2Model modify_tensors logic
10016+ if name .endswith (".qscale_act" ):
10017+ name = name .replace (".qscale_act" , ".input_scale" )
10018+ if name .endswith (".qscale_weight" ):
10019+ name = name .replace (".qscale_weight" , ".weight_scale" )
10020+ if ".wkv_b." in name :
10021+ name = name .replace (".wkv_b." , ".kv_b_proj." )
10022+ if ".experts." in name :
10023+ name = name .replace (".experts." , ".mlp.experts." )
10024+ name = name .replace (".w1." , ".gate_proj." )
10025+ name = name .replace (".w2." , ".down_proj." )
10026+ name = name .replace (".w3." , ".up_proj." )
10027+ name = "model." + name
10028+
10029+ return super ().modify_tensors (data_torch , name , bid )
993810030
993910031
994010032class PixtralModel (LlavaVisionModel ):
@@ -10490,6 +10582,8 @@ def main() -> None:
1049010582 elif args .mmproj :
1049110583 assert hparams .get ("vision_encoder" ) is not None , "This model does not support multimodal"
1049210584 model_class = PixtralModel
10585+ elif "moe" in hparams :
10586+ model_class = MistralMoeModel
1049310587 else :
1049410588 model_class = MistralModel
1049510589
0 commit comments