Skip to content

Commit 304f1da

Browse files
committed
modify accomplishment measure
1 parent 471b938 commit 304f1da

File tree

14 files changed

+1390
-196
lines changed

14 files changed

+1390
-196
lines changed

xinference/model/embedding/core.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,46 @@ def match_json(
171171
) -> bool:
172172
pass
173173

174+
@classmethod
175+
def match_json_with_reason(
176+
cls,
177+
model_family: EmbeddingModelFamilyV2,
178+
model_spec: EmbeddingSpecV1,
179+
quantization: str,
180+
) -> "MatchResult":
181+
"""
182+
Check if the engine can handle the given embedding model with detailed error information.
183+
184+
This method provides detailed failure reasons and suggestions when an engine
185+
cannot handle a specific model configuration. The default implementation
186+
falls back to the boolean match_json method for backward compatibility.
187+
188+
Args:
189+
model_family: The embedding model family information
190+
model_spec: The model specification
191+
quantization: The quantization method
192+
193+
Returns:
194+
MatchResult: Detailed match result with reasons and suggestions
195+
"""
196+
from .match_result import ErrorType, MatchResult
197+
198+
# Default implementation for backward compatibility
199+
if cls.match_json(model_family, model_spec, quantization):
200+
return MatchResult.success()
201+
else:
202+
# Get basic reason based on common failure patterns
203+
if not cls.check_lib():
204+
return MatchResult.failure(
205+
reason=f"Required library for {cls.__name__} is not available",
206+
error_type=ErrorType.DEPENDENCY_MISSING,
207+
)
208+
else:
209+
return MatchResult.failure(
210+
reason=f"Embedding model configuration is not compatible with {cls.__name__}",
211+
error_type=ErrorType.MODEL_COMPATIBILITY,
212+
)
213+
174214
@classmethod
175215
def match(
176216
cls,

xinference/model/embedding/llama_cpp/core.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,64 @@ def match_json(
235235
model_spec: EmbeddingSpecV1,
236236
quantization: str,
237237
) -> bool:
238+
from ..match_result import MatchResult
239+
240+
result = cls.match_json_with_reason(model_family, model_spec, quantization)
241+
return result.is_match
242+
243+
@classmethod
244+
def match_json_with_reason(
245+
cls,
246+
model_family: EmbeddingModelFamilyV2,
247+
model_spec: EmbeddingSpecV1,
248+
quantization: str,
249+
) -> "MatchResult":
250+
from ..match_result import ErrorType, MatchResult
251+
252+
# Check library availability
253+
if not cls.check_lib():
254+
return MatchResult.failure(
255+
reason="llama.cpp library (xllamacpp) is not installed for embedding",
256+
error_type=ErrorType.DEPENDENCY_MISSING,
257+
technical_details="xllamacpp package not found in Python environment",
258+
)
259+
260+
# Check model format compatibility
238261
if model_spec.model_format not in ["ggufv2"]:
239-
return False
240-
return True
262+
return MatchResult.failure(
263+
reason=f"llama.cpp embedding only supports GGUF v2 format, got: {model_spec.model_format}",
264+
error_type=ErrorType.MODEL_FORMAT,
265+
technical_details=f"Unsupported format: {model_spec.model_format}, required: ggufv2",
266+
)
267+
268+
# Check embedding-specific requirements
269+
if not hasattr(model_spec, "model_file_name_template"):
270+
return MatchResult.failure(
271+
reason="GGUF embedding model requires proper file configuration",
272+
error_type=ErrorType.CONFIGURATION_ERROR,
273+
technical_details="Missing model_file_name_template for GGUF embedding",
274+
)
275+
276+
# Check model dimensions for llama.cpp compatibility
277+
model_dimensions = model_family.dimensions
278+
if model_dimensions > 4096: # llama.cpp may have limitations
279+
return MatchResult.failure(
280+
reason=f"Large embedding model may have compatibility issues with llama.cpp ({model_dimensions} dimensions)",
281+
error_type=ErrorType.MODEL_COMPATIBILITY,
282+
technical_details=f"Large embedding dimensions: {model_dimensions}",
283+
)
284+
285+
# Check platform-specific considerations
286+
import platform
287+
288+
current_platform = platform.system()
289+
290+
# llama.cpp works across platforms but may have performance differences
291+
if current_platform == "Windows":
292+
return MatchResult.failure(
293+
reason="llama.cpp embedding may have limited performance on Windows",
294+
error_type=ErrorType.OS_REQUIREMENT,
295+
technical_details=f"Windows platform: {current_platform}",
296+
)
297+
298+
return MatchResult.success()

xinference/model/embedding/sentence_transformers/core.py

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,5 +434,78 @@ def match_json(
434434
model_spec: EmbeddingSpecV1,
435435
quantization: str,
436436
) -> bool:
437-
# As default embedding engine, sentence-transformer support all models
438-
return model_spec.model_format in ["pytorch"]
437+
from ..match_result import MatchResult
438+
439+
result = cls.match_json_with_reason(model_family, model_spec, quantization)
440+
return result.is_match
441+
442+
@classmethod
443+
def match_json_with_reason(
444+
cls,
445+
model_family: EmbeddingModelFamilyV2,
446+
model_spec: EmbeddingSpecV1,
447+
quantization: str,
448+
) -> "MatchResult":
449+
from ..match_result import ErrorType, MatchResult
450+
451+
# Check library availability
452+
if not cls.check_lib():
453+
return MatchResult.failure(
454+
reason="Sentence Transformers library is not installed",
455+
error_type=ErrorType.DEPENDENCY_MISSING,
456+
technical_details="sentence_transformers package not found in Python environment",
457+
)
458+
459+
# Check model format compatibility
460+
if model_spec.model_format not in ["pytorch"]:
461+
return MatchResult.failure(
462+
reason=f"Sentence Transformers only supports pytorch format, got: {model_spec.model_format}",
463+
error_type=ErrorType.MODEL_FORMAT,
464+
technical_details=f"Unsupported format: {model_spec.model_format}, required: pytorch",
465+
)
466+
467+
# Check model dimensions compatibility
468+
model_dimensions = model_family.dimensions
469+
if model_dimensions > 1536: # Very large embedding models
470+
return MatchResult.failure(
471+
reason=f"Large embedding model detected ({model_dimensions} dimensions)",
472+
error_type=ErrorType.MODEL_COMPATIBILITY,
473+
technical_details=f"Large embedding dimensions: {model_dimensions}",
474+
)
475+
476+
# Check token limits
477+
max_tokens = model_family.max_tokens
478+
if max_tokens > 8192: # Very high token limits
479+
return MatchResult.failure(
480+
reason=f"High token limit model detected (max_tokens: {max_tokens})",
481+
error_type=ErrorType.CONFIGURATION_ERROR,
482+
technical_details=f"High max_tokens: {max_tokens}",
483+
)
484+
485+
# Check for special model requirements
486+
model_name = model_family.model_name.lower()
487+
488+
# Check Qwen2 GTE models
489+
if "gte" in model_name and "qwen2" in model_name:
490+
# These models have specific requirements
491+
if not hasattr(cls, "_check_qwen_gte_requirements"):
492+
return MatchResult.failure(
493+
reason="Qwen2 GTE models require special handling",
494+
error_type=ErrorType.MODEL_COMPATIBILITY,
495+
technical_details="Qwen2 GTE model special requirements",
496+
)
497+
498+
# Check Qwen3 models
499+
if "qwen3" in model_name:
500+
# Qwen3 has flash attention requirements
501+
try:
502+
# This would be checked during actual loading
503+
pass
504+
except Exception:
505+
return MatchResult.failure(
506+
reason="Qwen3 embedding model may have compatibility issues",
507+
error_type=ErrorType.VERSION_REQUIREMENT,
508+
technical_details="Qwen3 model compatibility check",
509+
)
510+
511+
return MatchResult.success()

xinference/model/llm/core.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
if TYPE_CHECKING:
3333
from .llm_family import LLMFamilyV2, LLMSpecV1
34+
from .match_result import ErrorType, MatchResult
3435

3536
logger = logging.getLogger(__name__)
3637

@@ -157,6 +158,43 @@ def match_json(
157158
) -> bool:
158159
raise NotImplementedError
159160

161+
@classmethod
162+
def match_json_with_reason(
163+
cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
164+
) -> "MatchResult":
165+
"""
166+
Check if the engine can handle the given model with detailed error information.
167+
168+
This method provides detailed failure reasons and suggestions when an engine
169+
cannot handle a specific model configuration. The default implementation
170+
falls back to the boolean match_json method for backward compatibility.
171+
172+
Args:
173+
llm_family: The model family information
174+
llm_spec: The model specification
175+
quantization: The quantization method
176+
177+
Returns:
178+
MatchResult: Detailed match result with reasons and suggestions
179+
"""
180+
from .match_result import ErrorType, MatchResult
181+
182+
# Default implementation for backward compatibility
183+
if cls.match_json(llm_family, llm_spec, quantization):
184+
return MatchResult.success()
185+
else:
186+
# Get basic reason based on common failure patterns
187+
if not cls.check_lib():
188+
return MatchResult.failure(
189+
reason=f"Required library for {cls.__name__} is not available",
190+
error_type=ErrorType.DEPENDENCY_MISSING,
191+
)
192+
else:
193+
return MatchResult.failure(
194+
reason=f"Model configuration is not compatible with {cls.__name__}",
195+
error_type=ErrorType.MODEL_COMPATIBILITY,
196+
)
197+
160198
def prepare_parse_reasoning_content(
161199
self, reasoning_content: bool, enable_thinking: bool = True
162200
):

xinference/model/llm/llama_cpp/core.py

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,67 @@ def check_lib(cls) -> bool:
8484
def match_json(
8585
cls, llm_family: LLMFamilyV2, llm_spec: LLMSpecV1, quantization: str
8686
) -> bool:
87+
from ..match_result import MatchResult
88+
89+
result = cls.match_json_with_reason(llm_family, llm_spec, quantization)
90+
return result.is_match
91+
92+
@classmethod
93+
def match_json_with_reason(
94+
cls, llm_family: LLMFamilyV2, llm_spec: LLMSpecV1, quantization: str
95+
) -> "MatchResult":
96+
from ..match_result import ErrorType, MatchResult
97+
98+
# Check library availability
99+
if not cls.check_lib():
100+
return MatchResult.failure(
101+
reason="llama.cpp library (xllamacpp) is not installed",
102+
error_type=ErrorType.DEPENDENCY_MISSING,
103+
technical_details="xllamacpp package not found in Python environment",
104+
)
105+
106+
# Check model format compatibility
87107
if llm_spec.model_format not in ["ggufv2"]:
88-
return False
108+
return MatchResult.failure(
109+
reason=f"llama.cpp only supports GGUF v2 format, got: {llm_spec.model_format}",
110+
error_type=ErrorType.MODEL_FORMAT,
111+
technical_details=f"Unsupported format: {llm_spec.model_format}, required: ggufv2",
112+
)
113+
114+
# Check model abilities - llama.cpp supports both chat and generation
89115
if (
90116
"chat" not in llm_family.model_ability
91117
and "generate" not in llm_family.model_ability
92118
):
93-
return False
94-
return True
119+
return MatchResult.failure(
120+
reason=f"llama.cpp requires 'chat' or 'generate' ability, model has: {llm_family.model_ability}",
121+
error_type=ErrorType.ABILITY_MISMATCH,
122+
technical_details=f"Model abilities: {llm_family.model_ability}",
123+
)
124+
125+
# Check platform-specific issues
126+
import platform
127+
128+
current_platform = platform.system()
129+
130+
# Check for ARM64 specific issues
131+
if current_platform == "Darwin" and platform.machine() == "arm64":
132+
# Apple Silicon specific checks could go here
133+
pass
134+
elif current_platform == "Windows":
135+
# Windows specific checks could go here
136+
pass
137+
138+
# Check memory requirements (basic heuristic)
139+
model_size = float(str(llm_spec.model_size_in_billions))
140+
if model_size > 70: # Very large models
141+
return MatchResult.failure(
142+
reason=f"llama.cpp may struggle with very large models ({model_size}B parameters)",
143+
error_type=ErrorType.MODEL_COMPATIBILITY,
144+
technical_details=f"Large model size: {model_size}B parameters",
145+
)
146+
147+
return MatchResult.success()
95148

96149
def load(self):
97150
try:

xinference/model/llm/lmdeploy/core.py

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,22 @@ def check_lib(cls) -> bool:
119119
def match_json(
120120
cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
121121
) -> bool:
122-
return False
122+
from ..match_result import MatchResult
123+
124+
result = cls.match_json_with_reason(llm_family, llm_spec, quantization)
125+
return result.is_match
126+
127+
@classmethod
128+
def match_json_with_reason(
129+
cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
130+
) -> "MatchResult":
131+
from ..match_result import ErrorType, MatchResult
132+
133+
return MatchResult.failure(
134+
reason="LMDeploy base model does not support direct inference",
135+
error_type=ErrorType.MODEL_COMPATIBILITY,
136+
technical_details="LMDeploy base model class is not intended for direct use",
137+
)
123138

124139
def generate(
125140
self,
@@ -172,13 +187,52 @@ def load(self):
172187
def match_json(
173188
cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
174189
) -> bool:
190+
from ..match_result import MatchResult
191+
192+
result = cls.match_json_with_reason(llm_family, llm_spec, quantization)
193+
return result.is_match
194+
195+
@classmethod
196+
def match_json_with_reason(
197+
cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
198+
) -> "MatchResult":
199+
from ..match_result import ErrorType, MatchResult
200+
201+
# Check library availability first
202+
if not LMDEPLOY_INSTALLED:
203+
return MatchResult.failure(
204+
reason="LMDeploy library is not installed",
205+
error_type=ErrorType.DEPENDENCY_MISSING,
206+
technical_details="lmdeploy package not found in Python environment",
207+
)
208+
209+
# Check model format compatibility and quantization
175210
if llm_spec.model_format == "awq":
176-
# Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
211+
# LMDeploy has specific AWQ quantization requirements
177212
if "4" not in quantization:
178-
return False
213+
return MatchResult.failure(
214+
reason=f"LMDeploy AWQ format requires 4-bit quantization, got: {quantization}",
215+
error_type=ErrorType.QUANTIZATION,
216+
technical_details=f"AWQ + {quantization} not supported by LMDeploy",
217+
)
218+
219+
# Check model compatibility
179220
if llm_family.model_name not in LMDEPLOY_SUPPORTED_CHAT_MODELS:
180-
return False
181-
return LMDEPLOY_INSTALLED
221+
return MatchResult.failure(
222+
reason=f"Chat model not supported by LMDeploy: {llm_family.model_name}",
223+
error_type=ErrorType.MODEL_COMPATIBILITY,
224+
technical_details=f"Unsupported chat model: {llm_family.model_name}",
225+
)
226+
227+
# Check model abilities - LMDeploy primarily supports chat models
228+
if "chat" not in llm_family.model_ability:
229+
return MatchResult.failure(
230+
reason=f"LMDeploy Chat requires 'chat' ability, model has: {llm_family.model_ability}",
231+
error_type=ErrorType.ABILITY_MISMATCH,
232+
technical_details=f"Model abilities: {llm_family.model_ability}",
233+
)
234+
235+
return MatchResult.success()
182236

183237
async def async_chat(
184238
self,

0 commit comments

Comments
 (0)