vllm-project · Isotr0py · Nov 27, 2025 · Nov 27, 2025 · Nov 27, 2025 · Nov 27, 2025
@@ -40,7 +40,6 @@
 )
 from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri
 from vllm.transformers_utils.utils import (
-    is_gguf,
     is_remote_gguf,
     maybe_model_redirect,
     split_remote_gguf,
@@ -440,13 +439,6 @@ def __post_init__(
         self.model = maybe_model_redirect(self.model)
         # The tokenizer is consistent with the model by default.
         if self.tokenizer is None:
-            # Check if this is a GGUF model (either local file or remote GGUF)
-            if is_gguf(self.model):
-                raise ValueError(
-                    "Using a tokenizer is mandatory when loading a GGUF model. "
-                    "Please specify the tokenizer path or name using the "
-                    "--tokenizer argument."
-                )
             self.tokenizer = self.model
         if self.tokenizer_revision is None:
             self.tokenizer_revision = self.revision

diff --git a/vllm/transformers_utils/gguf_utils.py b/vllm/transformers_utils/gguf_utils.py
@@ -2,10 +2,17 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """GGUF utility functions."""
 
+import fnmatch
 from pathlib import Path
 
 import gguf
 from gguf.constants import Keys, VisionProjectorType
+from huggingface_hub import HfFileSystem
+from huggingface_hub.errors import (
+    HfHubHTTPError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+)
 from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig
 
 from vllm.logger import init_logger
@@ -164,3 +171,61 @@ def maybe_patch_hf_config_from_gguf(
             hf_config = new_hf_config
 
     return hf_config
+
+
+def get_gguf_file_path_from_hf(
+    repo_id: str | Path,
+    quant_type: str,
+    revision: str | None = None,
+) -> str | None:
+    """Get the GGUF file path from HuggingFace Hub based on repo_id and quant_type.
+
+    Args:
+        repo_id: The HuggingFace repository ID (e.g., "Qwen/Qwen3-0.6B")
+        quant_type: The quantization type (e.g., "Q4_K_M", "F16")
+        revision: Optional revision/branch name
+
+    Returns:
+        The path to the GGUF file on HuggingFace Hub (e.g., "filename.gguf"),
+        or None if not found
+    """
+    repo_id = str(repo_id)
+    try:
+        fs = HfFileSystem()
+        # List all files in the repository
+        file_list = fs.ls(repo_id, detail=False, revision=revision)
+
+        # Patterns to match GGUF files with the quant_type
+        patterns = [
+            f"*-{quant_type}.gguf",
+            f"*-{quant_type}-*.gguf",
+            f"*/*-{quant_type}.gguf",
+            f"*/*-{quant_type}-*.gguf",
+        ]
+
+        # Find matching files
+        matching_files = []
+        for pattern in patterns:
+            matches = fnmatch.filter(file_list, pattern)
+            matching_files.extend(matches)
+
+        if not matching_files:
+            logger.warning(
+                "No GGUF file found in %s with quant_type %s", repo_id, quant_type
+            )
+            return None
+
+        # Sort to ensure consistent ordering (prefer non-sharded files)
+        matching_files.sort(key=lambda x: (x.count("-"), x))
+        gguf_filename = matching_files[0]
+
+        return gguf_filename.replace(repo_id + "/", "", 1)
+    except (RepositoryNotFoundError, RevisionNotFoundError, HfHubHTTPError) as e:
+        logger.warning(
+            "Failed to get GGUF file path from HuggingFace Hub for %s "
+            "with quant_type %s: %s",
+            repo_id,
+            quant_type,
+            e,
+        )
+        return None
diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
@@ -19,6 +19,7 @@
     get_sentence_transformer_tokenizer_config,
     list_filtered_repo_files,
 )
+from vllm.transformers_utils.gguf_utils import get_gguf_file_path_from_hf
 from vllm.transformers_utils.tokenizers import MistralTokenizer
 from vllm.transformers_utils.utils import (
     check_gguf_file,
@@ -190,7 +191,19 @@ def get_tokenizer(
             kwargs["gguf_file"] = Path(tokenizer_name).name
             tokenizer_name = Path(tokenizer_name).parent
         elif is_remote_gguf(tokenizer_name):
-            tokenizer_name, _ = split_remote_gguf(tokenizer_name)
+            tokenizer_name, quant_type = split_remote_gguf(tokenizer_name)
+            # Get the HuggingFace Hub path for the GGUF file
+            gguf_file = get_gguf_file_path_from_hf(
+                tokenizer_name,
+                quant_type,
+                revision=revision,
+            )
+            if gguf_file is None:
+                raise ValueError(
+                    f"Could not find GGUF file for repo {tokenizer_name} "
+                    f"with quantization {quant_type}."
+                )
+            kwargs["gguf_file"] = gguf_file
 
     # if `tokenizer_mode` == "auto", check if tokenizer can be loaded via Mistral format
     # first to use official Mistral tokenizer if possible.