Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions vllm/config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,13 +440,6 @@ def __post_init__(
self.model = maybe_model_redirect(self.model)
# The tokenizer is consistent with the model by default.
if self.tokenizer is None:
# Check if this is a GGUF model (either local file or remote GGUF)
if is_gguf(self.model):
raise ValueError(
"Using a tokenizer is mandatory when loading a GGUF model. "
"Please specify the tokenizer path or name using the "
"--tokenizer argument."
)
self.tokenizer = self.model
if self.tokenizer_revision is None:
self.tokenizer_revision = self.revision
Expand Down Expand Up @@ -700,6 +693,14 @@ def _task_to_convert(task: TaskOption) -> ConvertType:

self.multimodal_config = MultiModalConfig(**mm_config_kwargs)

# Multimodal GGUF models must use original repo for mm processing
if is_gguf(self.tokenizer) and self.is_multimodal_model:
raise ValueError(
"Loading a multimodal GGUF model needs to use original "
"tokenizer. Please specify the unquantized hf model's "
"repo name or path using the --tokenizer argument."
)

if self.disable_sliding_window:
# Set after get_and_verify_max_len to ensure that max_model_len
# can be correctly capped to sliding window size
Expand Down
42 changes: 42 additions & 0 deletions vllm/transformers_utils/gguf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig

from vllm.logger import init_logger
from vllm.transformers_utils.config import list_filtered_repo_files

logger = init_logger(__name__)

Expand Down Expand Up @@ -164,3 +165,44 @@ def maybe_patch_hf_config_from_gguf(
hf_config = new_hf_config

return hf_config


def get_gguf_file_path_from_hf(
repo_id: str | Path,
quant_type: str,
revision: str | None = None,
) -> str:
"""Get the GGUF file path from HuggingFace Hub based on repo_id and quant_type.

Args:
repo_id: The HuggingFace repository ID (e.g., "Qwen/Qwen3-0.6B")
quant_type: The quantization type (e.g., "Q4_K_M", "F16")
revision: Optional revision/branch name

Returns:
The path to the GGUF file on HuggingFace Hub (e.g., "filename.gguf"),
"""
repo_id = str(repo_id)
gguf_patterns = [
f"*-{quant_type}.gguf",
f"*-{quant_type}-*.gguf",
f"*/*-{quant_type}.gguf",
f"*/*-{quant_type}-*.gguf",
]
matching_files = list_filtered_repo_files(
repo_id,
allow_patterns=gguf_patterns,
revision=revision,
)

if len(matching_files) == 0:
raise ValueError(
"Could not find GGUF file for repo %s with quantization %s.",
repo_id,
quant_type,
)

# Sort to ensure consistent ordering (prefer non-sharded files)
matching_files.sort(key=lambda x: (x.count("-"), x))
gguf_filename = matching_files[0]
return gguf_filename
10 changes: 9 additions & 1 deletion vllm/transformers_utils/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
get_sentence_transformer_tokenizer_config,
list_filtered_repo_files,
)
from vllm.transformers_utils.gguf_utils import get_gguf_file_path_from_hf
from vllm.transformers_utils.tokenizers import MistralTokenizer
from vllm.transformers_utils.utils import (
check_gguf_file,
Expand Down Expand Up @@ -190,7 +191,14 @@ def get_tokenizer(
kwargs["gguf_file"] = Path(tokenizer_name).name
tokenizer_name = Path(tokenizer_name).parent
elif is_remote_gguf(tokenizer_name):
tokenizer_name, _ = split_remote_gguf(tokenizer_name)
tokenizer_name, quant_type = split_remote_gguf(tokenizer_name)
# Get the HuggingFace Hub path for the GGUF file
gguf_file = get_gguf_file_path_from_hf(
tokenizer_name,
quant_type,
revision=revision,
)
kwargs["gguf_file"] = gguf_file

# if `tokenizer_mode` == "auto", check if tokenizer can be loaded via Mistral format
# first to use official Mistral tokenizer if possible.
Expand Down