From ee4d781b507e9f43304367e6b6594e1f5f6517b7 Mon Sep 17 00:00:00 2001 From: SmartestWashingMachine Date: Thu, 4 Dec 2025 12:25:07 +1100 Subject: [PATCH 1/3] conversion: use existing local chat_template.jinja file if mistral-format model has one. --- convert_hf_to_gguf.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 8ddb6d04cd..7eba1b6f13 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2341,19 +2341,32 @@ def _set_vocab_mistral(self): self.gguf_writer.add_add_bos_token(True) self.gguf_writer.add_add_eos_token(False) - template_dir = Path(__file__).parent / "models/templates/" + local_template_file_path = self.dir_model / "chat_template.jinja" + + if self.is_mistral_format and local_template_file_path.exists(): + # Ministral-3 models come with chat templates. + # ref: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512/tree/main + logger.info("Using an existing Mistral local chat template.") + template_path = local_template_file_path + elif not self.is_mistral_format or not self.disable_mistral_community_chat_template: + template_dir = Path(__file__).parent / "models/templates/" - if not self.is_mistral_format or not self.disable_mistral_community_chat_template: # Log only for Mistral format that the official tokenization and detokenization is via `mistral-common`. if self.is_mistral_format: logger.info( "Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. " "Mistral recommends to use `mistral-common` to perform tokenization and detokenization." ) - template = MistralModel.get_community_chat_template(vocab, template_dir, self.is_mistral_format) - self.gguf_writer.add_chat_template(template) + template_path = MistralModel.get_community_chat_template_path(vocab, template_dir, self.is_mistral_format) else: - logger.info("Not using a Mistral community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`.") + logger.info("Not using a Mistral local or community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`.") + template_path = None + + if template_path is not None: + with open(template_path, "r", encoding="utf-8") as f: + template = f.read() + + self.gguf_writer.add_chat_template(template) def set_vocab(self): if self.is_mistral_format: @@ -9872,7 +9885,7 @@ def __init__(self, *args, **kwargs): self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) @staticmethod - def get_community_chat_template(vocab: MistralVocab, templates_dir: Path, is_mistral_format: bool): + def get_community_chat_template_path(vocab: MistralVocab, templates_dir: Path, is_mistral_format: bool): assert TokenizerVersion is not None and Tekkenizer is not None and SentencePieceTokenizer is not None, _mistral_import_error_msg assert isinstance(vocab.tokenizer, (Tekkenizer, SentencePieceTokenizer)), ( f"Expected Tekkenizer or SentencePieceTokenizer, got {type(vocab.tokenizer)}" @@ -9905,10 +9918,7 @@ def get_community_chat_template(vocab: MistralVocab, templates_dir: Path, is_mis if not template_path.exists(): raise FileNotFoundError(f"Template file not found: {template_path}") - with open(template_path, "r", encoding="utf-8") as f: - template = f.read() - - return template + return template_path def set_gguf_parameters(self): super().set_gguf_parameters() From 64ee00babe2bbcf619dd240eb022eac328d33f0c Mon Sep 17 00:00:00 2001 From: SmartestWashingMachine Date: Thu, 4 Dec 2025 14:12:50 +1100 Subject: [PATCH 2/3] fix --mistral-format mistakenly assuming some <=v7 chat template names are file paths and reading them. --- convert_hf_to_gguf.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7eba1b6f13..1257421ac0 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2344,10 +2344,12 @@ def _set_vocab_mistral(self): local_template_file_path = self.dir_model / "chat_template.jinja" if self.is_mistral_format and local_template_file_path.exists(): - # Ministral-3 models come with chat templates. + # Ministral-3 and other new Mistral models come with chat templates. # ref: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512/tree/main logger.info("Using an existing Mistral local chat template.") - template_path = local_template_file_path + + with open(local_template_file_path, "r", encoding="utf-8") as f: + template = f.read() elif not self.is_mistral_format or not self.disable_mistral_community_chat_template: template_dir = Path(__file__).parent / "models/templates/" @@ -2357,15 +2359,12 @@ def _set_vocab_mistral(self): "Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. " "Mistral recommends to use `mistral-common` to perform tokenization and detokenization." ) - template_path = MistralModel.get_community_chat_template_path(vocab, template_dir, self.is_mistral_format) + template = MistralModel.get_community_chat_template(vocab, template_dir, self.is_mistral_format) else: logger.info("Not using a Mistral local or community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`.") - template_path = None - - if template_path is not None: - with open(template_path, "r", encoding="utf-8") as f: - template = f.read() + template = None + if template is not None: self.gguf_writer.add_chat_template(template) def set_vocab(self): @@ -9885,7 +9884,7 @@ def __init__(self, *args, **kwargs): self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) @staticmethod - def get_community_chat_template_path(vocab: MistralVocab, templates_dir: Path, is_mistral_format: bool): + def get_community_chat_template(vocab: MistralVocab, templates_dir: Path, is_mistral_format: bool): assert TokenizerVersion is not None and Tekkenizer is not None and SentencePieceTokenizer is not None, _mistral_import_error_msg assert isinstance(vocab.tokenizer, (Tekkenizer, SentencePieceTokenizer)), ( f"Expected Tekkenizer or SentencePieceTokenizer, got {type(vocab.tokenizer)}" @@ -9918,7 +9917,10 @@ def get_community_chat_template_path(vocab: MistralVocab, templates_dir: Path, i if not template_path.exists(): raise FileNotFoundError(f"Template file not found: {template_path}") - return template_path + with open(template_path, "r", encoding="utf-8") as f: + template = f.read() + + return template def set_gguf_parameters(self): super().set_gguf_parameters() From cd40efed841fbc555d891b2d30ec9ac192f7e85b Mon Sep 17 00:00:00 2001 From: SmartestWashingMachine Date: Thu, 4 Dec 2025 19:42:56 +1100 Subject: [PATCH 3/3] Update convert_hf_to_gguf.py - change from exists() to is_file() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sigbjørn Skjæret --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 1257421ac0..4590b23921 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2343,7 +2343,7 @@ def _set_vocab_mistral(self): local_template_file_path = self.dir_model / "chat_template.jinja" - if self.is_mistral_format and local_template_file_path.exists(): + if self.is_mistral_format and local_template_file_path.is_file(): # Ministral-3 and other new Mistral models come with chat templates. # ref: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512/tree/main logger.info("Using an existing Mistral local chat template.")