Skip to content

Commit 7a20a25

Browse files
author
Paweł Kędzia
committed
Merge branch 'features/fix_routing_api'
2 parents c52711e + 13fbca3 commit 7a20a25

File tree

7 files changed

+19
-14
lines changed

7 files changed

+19
-14
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@
1010
| 0.1.1 | Prometheus metrics logging. Workers/Threads/Workers class is able to set by environments. Streaming fixes. Multi-providers for single model with default-balanced strategy. |
1111
| 0.2.0 | Add balancing strategies: `balanced`, `weighted`, `dynamic_weighted` and `first_available` which works for streaming and non streaming requests. Included Prometheus metrics logging via `/metrics` endpoint. First stage of `llm_router_lib` library, to simply usage of `llm-router-api`. |
1212
| 0.2.1 | Fix stream: OpenAI->Ollama, Ollama->OpenAI. Add Redis caching of availability of model providers (when using `first_available` strategy). Add `llm_router_web` module with simple flask-based frontend to manage llm-router config files. |
13-
| 0.2.2 | Update dockerfile and requirements. |
13+
| 0.2.2 | Update dockerfile and requirements. Fix routing with vLLM. |

llm_router_api/core/api_types/dispatcher.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,13 @@ def _get_impl(cls, api_type: str) -> ApiTypesI:
103103
)
104104
return impl()
105105

106+
def get_proper_endpoint(self, api_type: str, endpoint_url: str) -> str:
107+
endpoint_url = endpoint_url.strip("/")
108+
if endpoint_url in ["chat/completions", "api/chat/completions"]:
109+
return self.chat_ep(api_type=api_type)
110+
111+
return self.completions_ep(api_type=api_type)
112+
106113
@classmethod
107114
def chat_ep(cls, api_type: str) -> str:
108115
"""

llm_router_api/core/api_types/ollama.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def chat_method(self) -> str:
1818
return "POST"
1919

2020
def completions_ep(self) -> str:
21-
return "/api/generate"
21+
return self.chat_ep()
2222

2323
def completions_method(self) -> str:
2424
return "POST"

llm_router_api/core/api_types/openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def chat_method(self) -> str:
3535
return "POST"
3636

3737
def completions_ep(self) -> str:
38-
return "/v1/completions"
38+
return self.chat_ep()
3939

4040
def completions_method(self) -> str:
4141
return "POST"

llm_router_api/core/api_types/vllm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def chat_method(self) -> str:
1717
return "POST"
1818

1919
def completions_ep(self) -> str:
20-
return "/v1/completions"
20+
return self.chat_ep()
2121

2222
def completions_method(self) -> str:
2323
return "POST"

llm_router_api/endpoints/builtin/openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def __init__(
8080
prompt_handler=prompt_handler,
8181
model_handler=model_handler,
8282
dont_add_api_prefix=True,
83-
api_types=["openai", "lmstudio", "vllm"],
83+
api_types=["openai", "lmstudio"],
8484
direct_return=direct_return,
8585
method="POST",
8686
)

llm_router_api/endpoints/endpoint_i.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ def run_ep(
669669
map_prompt = params.pop("map_prompt", {})
670670
prompt_str_force = params.pop("prompt_str_force", "")
671671
prompt_str_postfix = params.pop("prompt_str_postfix", "")
672+
params.pop("response_time", "")
672673

673674
# self.logger.debug(json.dumps(params or {}, indent=2, ensure_ascii=False))
674675

@@ -712,12 +713,12 @@ def run_ep(
712713

713714
use_streaming = bool((params or {}).get("stream", False))
714715

715-
if simple_proxy and not use_streaming:
716-
ep_pref = ""
717-
if self.add_api_prefix and DEFAULT_API_PREFIX:
718-
ep_pref = DEFAULT_API_PREFIX.strip()
719-
ep_url = ep_pref.strip("/") + "/" + self.name.lstrip("/")
716+
# Prepare proper endpoint url
717+
ep_url = self._api_type_dispatcher.get_proper_endpoint(
718+
api_type=api_model_provider.api_type, endpoint_url=self.name
719+
)
720720

721+
if simple_proxy and not use_streaming:
721722
return self._return_response_or_rerun(
722723
api_model_provider=api_model_provider,
723724
ep_url=ep_url,
@@ -732,10 +733,6 @@ def run_ep(
732733
self.logger.debug(f" -> prompt_name: {prompt_name}")
733734
self.logger.debug(f" -> prompt_str: {str(prompt_str)[:40]}...")
734735

735-
ep_url = self._api_type_dispatcher.chat_ep(
736-
api_type=api_model_provider.api_type
737-
)
738-
739736
if api_model_provider.api_type in ["openai"]:
740737
params = self._filter_params_to_acceptable(
741738
api_type=api_model_provider.api_type, params=params
@@ -813,6 +810,7 @@ def _return_response_or_rerun(
813810
call_for_each_user_msg=self._call_for_each_user_msg,
814811
)
815812
except Exception as e:
813+
self.logger.error(e)
816814
status_code_force = 500
817815

818816
self.unset_model(

0 commit comments

Comments
 (0)