@@ -1524,6 +1524,67 @@ def _set_vocab_interns1(self):
15241524 special_vocab ._set_special_token ("bos" , 151643 )
15251525 special_vocab .add_to_gguf (self .gguf_writer )
15261526
1527+ def _set_vocab_mistral (self ):
1528+ if not _mistral_common_installed :
1529+ raise ImportError (_mistral_import_error_msg )
1530+
1531+ vocab = MistralVocab (self .dir_model )
1532+ logger .info (
1533+ f"Converting tokenizer { vocab .tokenizer_type } of size { vocab .vocab_size } ."
1534+ )
1535+
1536+ self .gguf_writer .add_tokenizer_model (vocab .gguf_tokenizer_model )
1537+
1538+ tokens = []
1539+ scores = []
1540+ toktypes = []
1541+
1542+ for text , score , toktype in vocab .all_tokens ():
1543+ tokens .append (text )
1544+ scores .append (score )
1545+ toktypes .append (toktype )
1546+
1547+ assert len (tokens ) == vocab .vocab_size , (
1548+ f"token count ({ len (tokens )} ) != vocab size ({ vocab .vocab_size } )"
1549+ )
1550+
1551+ if vocab .tokenizer_type == MistralTokenizerType .tekken :
1552+ self .gguf_writer .add_tokenizer_pre ("tekken" )
1553+ self .gguf_writer .add_token_merges (
1554+ vocab .extract_vocab_merges_from_model ()
1555+ )
1556+
1557+ logger .info (
1558+ f"Setting bos, eos, unk and pad token IDs to { vocab .bos_id } , { vocab .eos_id } , { vocab .unk_id } , { vocab .pad_id } ."
1559+ )
1560+
1561+ self .gguf_writer .add_bos_token_id (vocab .bos_id )
1562+ self .gguf_writer .add_eos_token_id (vocab .eos_id )
1563+ self .gguf_writer .add_unk_token_id (vocab .unk_id )
1564+ self .gguf_writer .add_pad_token_id (vocab .pad_id )
1565+
1566+ self .gguf_writer .add_token_list (tokens )
1567+ self .gguf_writer .add_token_scores (scores )
1568+ self .gguf_writer .add_token_types (toktypes )
1569+ self .gguf_writer .add_vocab_size (vocab .vocab_size )
1570+
1571+ self .gguf_writer .add_add_bos_token (True )
1572+ self .gguf_writer .add_add_eos_token (False )
1573+
1574+ template_dir = Path (__file__ ).parent / "models/templates/"
1575+
1576+ if not self .is_mistral_format or not self .disable_mistral_community_chat_template :
1577+ # Log only for Mistral format that the official tokenization and detokenization is via `mistral-common`.
1578+ if self .is_mistral_format :
1579+ logger .info (
1580+ "Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. "
1581+ "Mistral recommends to use `mistral-common` to perform tokenization and detokenization."
1582+ )
1583+ template = MistralModel .get_community_chat_template (vocab , template_dir , self .is_mistral_format )
1584+ self .gguf_writer .add_chat_template (template )
1585+ else :
1586+ logger .info ("Not using a Mistral community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`." )
1587+
15271588
15281589class MmprojModel (ModelBase ):
15291590 model_type = ModelType .MMPROJ
@@ -2294,67 +2355,6 @@ def __init__(self, *args, **kwargs):
22942355 if self .hf_arch == "VLlama3ForCausalLM" :
22952356 self .hparams ["num_attention_heads" ] = self .hparams .get ("num_attention_heads" , 32 )
22962357
2297- def _set_vocab_mistral (self ):
2298- if not _mistral_common_installed :
2299- raise ImportError (_mistral_import_error_msg )
2300-
2301- vocab = MistralVocab (self .dir_model )
2302- logger .info (
2303- f"Converting tokenizer { vocab .tokenizer_type } of size { vocab .vocab_size } ."
2304- )
2305-
2306- self .gguf_writer .add_tokenizer_model (vocab .gguf_tokenizer_model )
2307-
2308- tokens = []
2309- scores = []
2310- toktypes = []
2311-
2312- for text , score , toktype in vocab .all_tokens ():
2313- tokens .append (text )
2314- scores .append (score )
2315- toktypes .append (toktype )
2316-
2317- assert len (tokens ) == vocab .vocab_size , (
2318- f"token count ({ len (tokens )} ) != vocab size ({ vocab .vocab_size } )"
2319- )
2320-
2321- if vocab .tokenizer_type == MistralTokenizerType .tekken :
2322- self .gguf_writer .add_tokenizer_pre ("tekken" )
2323- self .gguf_writer .add_token_merges (
2324- vocab .extract_vocab_merges_from_model ()
2325- )
2326-
2327- logger .info (
2328- f"Setting bos, eos, unk and pad token IDs to { vocab .bos_id } , { vocab .eos_id } , { vocab .unk_id } , { vocab .pad_id } ."
2329- )
2330-
2331- self .gguf_writer .add_bos_token_id (vocab .bos_id )
2332- self .gguf_writer .add_eos_token_id (vocab .eos_id )
2333- self .gguf_writer .add_unk_token_id (vocab .unk_id )
2334- self .gguf_writer .add_pad_token_id (vocab .pad_id )
2335-
2336- self .gguf_writer .add_token_list (tokens )
2337- self .gguf_writer .add_token_scores (scores )
2338- self .gguf_writer .add_token_types (toktypes )
2339- self .gguf_writer .add_vocab_size (vocab .vocab_size )
2340-
2341- self .gguf_writer .add_add_bos_token (True )
2342- self .gguf_writer .add_add_eos_token (False )
2343-
2344- template_dir = Path (__file__ ).parent / "models/templates/"
2345-
2346- if not self .is_mistral_format or not self .disable_mistral_community_chat_template :
2347- # Log only for Mistral format that the official tokenization and detokenization is via `mistral-common`.
2348- if self .is_mistral_format :
2349- logger .info (
2350- "Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. "
2351- "Mistral recommends to use `mistral-common` to perform tokenization and detokenization."
2352- )
2353- template = MistralModel .get_community_chat_template (vocab , template_dir , self .is_mistral_format )
2354- self .gguf_writer .add_chat_template (template )
2355- else :
2356- logger .info ("Not using a Mistral community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`." )
2357-
23582358 def set_vocab (self ):
23592359 if self .is_mistral_format :
23602360 return self ._set_vocab_mistral ()
@@ -9934,11 +9934,12 @@ class MistralMoeModel(DeepseekV2Model):
99349934 model_name = "Mistral"
99359935 hf_arch = ""
99369936 is_mistral_format = True
9937- undo_permute = False
99389937
99399938 def __init__ (self , * args , ** kwargs ):
99409939 super ().__init__ (* args , ** kwargs )
99419940 logger .info ("Using MistralMoeModel" )
9941+ # remap hparams from Mistral MoE format to DeepseekV2 format
9942+ # we do this way to be able to reuse DeepseekV2Model set_gguf_parameters logic
99429943 # ref: https://github.com/vllm-project/vllm/blob/b294e28db2c5dee61bc25157664edcada8b90b31/vllm/transformers_utils/configs/mistral.py
99439944 config = self .hparams
99449945 # Mistral key -> HF key
@@ -9958,11 +9959,13 @@ def __init__(self, *args, **kwargs):
99589959 "max_seq_len" : ("max_seq_len" , config .get ("max_position_embeddings" , 128_000 )),
99599960 "max_position_embeddings" : ("max_position_embeddings" , 128_000 ),
99609961 }
9962+ # mapping top-level keys
99619963 for key , new_key in config_mapping .items ():
99629964 if key in config :
99639965 config [new_key ] = config [key ]
99649966 for new_key , (key , default_value ) in top_level_mapping_with_default .items ():
99659967 config [new_key ] = config .get (key , default_value )
9968+ # mapping MoE-specific keys
99669969 moe_config_map = {
99679970 "route_every_n" : "moe_layer_freq" ,
99689971 "first_k_dense_replace" : "first_k_dense_replace" ,
@@ -9978,12 +9981,13 @@ def __init__(self, *args, **kwargs):
99789981 for key , new_key in moe_config_map .items ():
99799982 if key in moe :
99809983 config [new_key ] = moe [key ]
9984+ # provide missing values
99819985 config ["topk_method" ] = None
99829986 config ["norm_topk_prob" ] = True
99839987 config ["scoring_func" ] = "softmax"
99849988
99859989 def set_vocab (self ):
9986- LlamaModel ._set_vocab_mistral (self ) # type: ignore
9990+ self ._set_vocab_mistral ()
99879991
99889992 def set_gguf_parameters (self ):
99899993 super ().set_gguf_parameters ()
@@ -9992,54 +9996,22 @@ def set_gguf_parameters(self):
99929996 self .gguf_writer .add_attn_temperature_length (yarn_params ["original_max_position_embeddings" ])
99939997 self .gguf_writer .add_rope_scaling_yarn_log_mul (0.1 ) # mscale_all_dim * 0.1
99949998
9995- # TODO @ngxson : this should be in tensor_mapping, but I don't have time for now
9996- # copied from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/models/mistral_large_3.py
9997- remapping = {
9998- r"layers\.(\d+)\.attention_norm\.weight" : r"model.layers.\1.input_layernorm.weight" , # noqa: E501
9999- r"layers\.(\d+)\.attention\.wq_a\.(\w+)" : r"model.layers.\1.self_attn.q_a_proj.\2" , # noqa: E501
10000- r"layers\.(\d+)\.attention\.q_a_norm\.weight" : r"model.layers.\1.self_attn.q_a_layernorm.weight" , # noqa: E501
10001- r"layers\.(\d+)\.attention\.wq_b\.(\w+)" : r"model.layers.\1.self_attn.q_b_proj.\2" , # noqa: E501
10002- r"layers\.(\d+)\.attention\.wkv_a_with_mqa\.(\w+)" : r"model.layers.\1.self_attn.kv_a_proj_with_mqa.\2" , # noqa: E501
10003- r"layers\.(\d+)\.attention\.kv_a_norm\.weight" : r"model.layers.\1.self_attn.kv_a_layernorm.weight" , # noqa: E501
10004- r"layers\.(\d+)\.attention\.wkv_b\.(\w+)" : r"model.layers.\1.self_attn.kv_b_proj.\2" , # noqa: E501
10005- r"layers\.(\d+)\.attention\.wo\.(\w+)" : r"model.layers.\1.self_attn.o_proj.\2" , # noqa: E501
10006- r"layers\.(\d+)\.ffn_norm\.weight" : r"model.layers.\1.post_attention_layernorm.weight" , # noqa: E501
10007- r"layers\.(\d+)\.feed_forward\.w1\.(\w+)" : r"model.layers.\1.mlp.gate_proj.\2" , # noqa: E501
10008- r"layers\.(\d+)\.feed_forward\.w2\.(\w+)" : r"model.layers.\1.mlp.down_proj.\2" , # noqa: E501
10009- r"layers\.(\d+)\.feed_forward\.w3\.(\w+)" : r"model.layers.\1.mlp.up_proj.\2" , # noqa: E501
10010- r"layers\.(\d+)\.gate\.weight" : r"model.layers.\1.mlp.gate.weight" , # noqa: E501
10011- r"layers\.(\d+)\.shared_experts\.w1\.(\w+)" : r"model.layers.\1.mlp.shared_experts.gate_proj.\2" , # noqa: E501
10012- r"layers\.(\d+)\.shared_experts\.w2\.(\w+)" : r"model.layers.\1.mlp.shared_experts.down_proj.\2" , # noqa: E501
10013- r"layers\.(\d+)\.shared_experts\.w3\.(\w+)" : r"model.layers.\1.mlp.shared_experts.up_proj.\2" , # noqa: E501
10014- r"layers\.(\d+)\.experts\.(\d+)\.w1\.(\w+)" : r"model.layers.\1.mlp.experts.\2.gate_proj.\3" , # noqa: E501
10015- r"layers\.(\d+)\.experts\.(\d+)\.w2\.(\w+)" : r"model.layers.\1.mlp.experts.\2.down_proj.\3" , # noqa: E501
10016- r"layers\.(\d+)\.experts\.(\d+)\.w3\.(\w+)" : r"model.layers.\1.mlp.experts.\2.up_proj.\3" , # noqa: E501
10017- r"norm\.weight" : "model.norm.weight" , # noqa: E501
10018- r"tok_embeddings\.weight" : "model.embed_tokens.weight" , # noqa: E501
10019- r"output\.weight" : "lm_head.weight" , # noqa: E501
10020- }
10021-
10022- def _remap_mistral_to_ds (self , name : str ) -> str :
10023- for k , v in self .remapping .items ():
10024- match = re .fullmatch (k , name )
10025- if match :
10026- name = re .sub (k , v , name )
10027- break
10028- else :
10029- raise ValueError (f"Cannot remap { name } " )
10030-
10031- # Remapping scale names. We could do this in the regex above but it
10032- # would triple the number of lines for most layers.
10033- if name .endswith (".qscale_act" ):
10034- name = re .sub (r"\.qscale_act$" , ".input_scale" , name )
10035- elif name .endswith (".qscale_weight" ):
10036- name = re .sub (r"\.qscale_weight$" , ".weight_scale" , name )
10037- return name
10038-
100399999 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ):
1004010000 if name .startswith ("vision_" ) or name .startswith ("patch_merger." ) or "mm_projector" in name :
1004110001 return []
10042- name = self ._remap_mistral_to_ds (name )
10002+
10003+ # rename certain tensors so that we can reuse DeepseekV2Model modify_tensors logic
10004+ if name .endswith (".qscale_act" ):
10005+ name = name .replace (".qscale_act" , ".input_scale" )
10006+ if name .endswith (".qscale_weight" ):
10007+ name = name .replace (".qscale_weight" , ".weight_scale" )
10008+ if ".experts." in name :
10009+ name = name .replace (".experts." , ".mlp.experts." )
10010+ name = name .replace (".w1." , ".gate_proj." )
10011+ name = name .replace (".w2." , ".down_proj." )
10012+ name = name .replace (".w3." , ".up_proj." )
10013+ name = "model." + name
10014+
1004310015 return super ().modify_tensors (data_torch , name , bid )
1004410016
1004510017
0 commit comments