Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions vllm/config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
)
from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri
from vllm.transformers_utils.utils import (
is_gguf,
is_remote_gguf,
maybe_model_redirect,
split_remote_gguf,
Expand Down Expand Up @@ -440,13 +439,6 @@ def __post_init__(
self.model = maybe_model_redirect(self.model)
# The tokenizer is consistent with the model by default.
if self.tokenizer is None:
# Check if this is a GGUF model (either local file or remote GGUF)
if is_gguf(self.model):
raise ValueError(
"Using a tokenizer is mandatory when loading a GGUF model. "
"Please specify the tokenizer path or name using the "
"--tokenizer argument."
)
self.tokenizer = self.model
if self.tokenizer_revision is None:
self.tokenizer_revision = self.revision
Expand Down
65 changes: 65 additions & 0 deletions vllm/transformers_utils/gguf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,17 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""GGUF utility functions."""

import fnmatch
from pathlib import Path

import gguf
from gguf.constants import Keys, VisionProjectorType
from huggingface_hub import HfFileSystem
from huggingface_hub.errors import (
HfHubHTTPError,
RepositoryNotFoundError,
RevisionNotFoundError,
)
from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig

from vllm.logger import init_logger
Expand Down Expand Up @@ -164,3 +171,61 @@ def maybe_patch_hf_config_from_gguf(
hf_config = new_hf_config

return hf_config


def get_gguf_file_path_from_hf(
repo_id: str | Path,
quant_type: str,
revision: str | None = None,
) -> str | None:
"""Get the GGUF file path from HuggingFace Hub based on repo_id and quant_type.

Args:
repo_id: The HuggingFace repository ID (e.g., "Qwen/Qwen3-0.6B")
quant_type: The quantization type (e.g., "Q4_K_M", "F16")
revision: Optional revision/branch name

Returns:
The path to the GGUF file on HuggingFace Hub (e.g., "filename.gguf"),
or None if not found
"""
repo_id = str(repo_id)
try:
fs = HfFileSystem()
# List all files in the repository
file_list = fs.ls(repo_id, detail=False, revision=revision)

# Patterns to match GGUF files with the quant_type
patterns = [
f"*-{quant_type}.gguf",
f"*-{quant_type}-*.gguf",
f"*/*-{quant_type}.gguf",
f"*/*-{quant_type}-*.gguf",
]

# Find matching files
matching_files = []
for pattern in patterns:
matches = fnmatch.filter(file_list, pattern)
matching_files.extend(matches)

if not matching_files:
logger.warning(
"No GGUF file found in %s with quant_type %s", repo_id, quant_type
)
return None

# Sort to ensure consistent ordering (prefer non-sharded files)
matching_files.sort(key=lambda x: (x.count("-"), x))
gguf_filename = matching_files[0]

return gguf_filename.replace(repo_id + "/", "", 1)
except (RepositoryNotFoundError, RevisionNotFoundError, HfHubHTTPError) as e:
logger.warning(
"Failed to get GGUF file path from HuggingFace Hub for %s "
"with quant_type %s: %s",
repo_id,
quant_type,
e,
)
return None
15 changes: 14 additions & 1 deletion vllm/transformers_utils/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
get_sentence_transformer_tokenizer_config,
list_filtered_repo_files,
)
from vllm.transformers_utils.gguf_utils import get_gguf_file_path_from_hf
from vllm.transformers_utils.tokenizers import MistralTokenizer
from vllm.transformers_utils.utils import (
check_gguf_file,
Expand Down Expand Up @@ -190,7 +191,19 @@ def get_tokenizer(
kwargs["gguf_file"] = Path(tokenizer_name).name
tokenizer_name = Path(tokenizer_name).parent
elif is_remote_gguf(tokenizer_name):
tokenizer_name, _ = split_remote_gguf(tokenizer_name)
tokenizer_name, quant_type = split_remote_gguf(tokenizer_name)
# Get the HuggingFace Hub path for the GGUF file
gguf_file = get_gguf_file_path_from_hf(
tokenizer_name,
quant_type,
revision=revision,
)
if gguf_file is None:
raise ValueError(
f"Could not find GGUF file for repo {tokenizer_name} "
f"with quantization {quant_type}."
)
kwargs["gguf_file"] = gguf_file

# if `tokenizer_mode` == "auto", check if tokenizer can be loaded via Mistral format
# first to use official Mistral tokenizer if possible.
Expand Down