Skip to content

Commit 058628a

Browse files
committed
fix: 性能、系统工具不显示、rate limit不正常
1 parent 57ed325 commit 058628a

File tree

3 files changed

+131
-85
lines changed

3 files changed

+131
-85
lines changed

data/excluded_models.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,2 @@
1-
veo-2.0-generate-001
2-
imagen-4.0-fast-generate-001
3-
imagen-4.0-ultra-generate-001
4-
imagen-4.0-generate-001
51
gemini-2.5-flash-native-audio-preview-09-2025
6-
gemini-2.5-flash-image
72
gemini-3-pro-image-preview

src/gateway.py

Lines changed: 106 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import json
33
import asyncio
44
import logging
5+
import time
56
from typing import Optional, AsyncGenerator
67
from fastapi import FastAPI, Request, HTTPException
78
from fastapi.responses import StreamingResponse, Response
@@ -14,134 +15,159 @@
1415
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
1516
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
1617
DATA_DIR = os.path.join(PROJECT_ROOT, 'data')
17-
WORKERS_CONFIG_PATH = os.path.join(DATA_DIR, 'workers.json')
18+
19+
MANAGER_URL = "http://127.0.0.1:9000"
20+
RATE_LIMIT_KEYWORDS = [b"exceeded quota", b"out of free generations", b"rate limit"]
1821

1922
app = FastAPI(title="AIStudio2API Gateway")
2023

21-
workers = []
22-
current_index = 0
24+
_session: Optional[aiohttp.ClientSession] = None
25+
_worker_cache = {"workers": [], "last_update": 0, "index": 0}
26+
CACHE_TTL = 5
2327

24-
def load_workers():
25-
global workers
26-
if os.path.exists(WORKERS_CONFIG_PATH):
27-
try:
28-
with open(WORKERS_CONFIG_PATH, 'r', encoding='utf-8') as f:
29-
config = json.load(f)
30-
workers = [w['port'] for w in config.get('workers', [])]
31-
logger.info(f"Loaded {len(workers)} workers: {workers}")
32-
except Exception as e:
33-
logger.error(f"Load workers failed: {e}")
28+
async def get_session() -> aiohttp.ClientSession:
29+
global _session
30+
if _session is None or _session.closed:
31+
connector = aiohttp.TCPConnector(limit=100, limit_per_host=20, keepalive_timeout=30)
32+
_session = aiohttp.ClientSession(connector=connector)
33+
return _session
3434

35-
def get_next_worker() -> Optional[int]:
36-
global current_index, workers
37-
if not workers:
35+
async def refresh_workers():
36+
cache = _worker_cache
37+
if time.time() - cache["last_update"] < CACHE_TTL and cache["workers"]:
38+
return
39+
try:
40+
session = await get_session()
41+
async with session.get(f"{MANAGER_URL}/api/workers", timeout=aiohttp.ClientTimeout(total=5)) as resp:
42+
workers = await resp.json()
43+
cache["workers"] = [w for w in workers if w.get("status") == "running"]
44+
cache["last_update"] = time.time()
45+
except Exception as e:
46+
logger.warning(f"Refresh workers failed: {e}")
47+
48+
def get_next_worker(model: str = "") -> Optional[dict]:
49+
cache = _worker_cache
50+
available = cache["workers"]
51+
if not available:
3852
return None
39-
port = workers[current_index % len(workers)]
40-
current_index += 1
41-
return port
53+
worker = available[cache["index"] % len(available)]
54+
cache["index"] += 1
55+
return worker
56+
57+
async def report_rate_limit(worker_id: str, model: str):
58+
try:
59+
session = await get_session()
60+
await session.post(f"{MANAGER_URL}/api/workers/{worker_id}/rate-limit", json={"model": model}, timeout=aiohttp.ClientTimeout(total=2))
61+
except:
62+
pass
63+
64+
def check_rate_limit_in_response(content: bytes) -> bool:
65+
content_lower = content.lower()
66+
return any(kw in content_lower for kw in RATE_LIMIT_KEYWORDS)
4267

4368
@app.on_event("startup")
4469
async def startup():
45-
load_workers()
46-
logger.info(f"Gateway started with {len(workers)} workers")
70+
await refresh_workers()
71+
logger.info(f"Gateway started")
72+
73+
@app.on_event("shutdown")
74+
async def shutdown():
75+
global _session
76+
if _session and not _session.closed:
77+
await _session.close()
4778

4879
@app.get("/")
4980
async def root():
50-
return {"status": "ok", "mode": "gateway", "workers": len(workers)}
81+
return {"status": "ok", "mode": "gateway", "workers": len(_worker_cache["workers"])}
5182

5283
@app.get("/v1/models")
5384
async def models():
54-
port = get_next_worker()
55-
if not port:
85+
await refresh_workers()
86+
worker = get_next_worker()
87+
if not worker:
5688
raise HTTPException(status_code=503, detail="No workers available")
5789

90+
port = worker["port"]
5891
url = f"http://127.0.0.1:{port}/v1/models"
59-
logger.info(f"GET /v1/models -> worker:{port}")
6092

61-
timeout = aiohttp.ClientTimeout(total=60)
62-
async with aiohttp.ClientSession(timeout=timeout) as session:
63-
try:
64-
async with session.get(url) as resp:
65-
content = await resp.read()
66-
return Response(content=content, status_code=resp.status, media_type=resp.content_type)
67-
except Exception as e:
68-
logger.error(f"Forward /v1/models failed: {e}")
69-
raise HTTPException(status_code=502, detail=str(e))
93+
session = await get_session()
94+
try:
95+
async with session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as resp:
96+
content = await resp.read()
97+
return Response(content=content, status_code=resp.status, media_type=resp.content_type)
98+
except Exception as e:
99+
logger.error(f"Forward /v1/models failed: {e}")
100+
raise HTTPException(status_code=502, detail=str(e))
70101

71102
@app.post("/v1/chat/completions")
72103
async def chat_completions(request: Request):
104+
await refresh_workers()
73105
body = await request.body()
74106
body_json = json.loads(body)
75107
is_stream = body_json.get("stream", False)
108+
model_id = body_json.get("model", "")
76109

77-
port = get_next_worker()
78-
if not port:
110+
worker = get_next_worker(model_id)
111+
if not worker:
79112
raise HTTPException(status_code=503, detail="No workers available")
80113

114+
port = worker["port"]
115+
worker_id = worker.get("id", "")
81116
url = f"http://127.0.0.1:{port}/v1/chat/completions"
82-
req_id = f"gw-{current_index}"
117+
req_id = f"gw-{worker_id}"
83118
logger.info(f"[{req_id}] POST -> worker:{port} (stream={is_stream})")
84119

85120
forward_headers = {'Content-Type': 'application/json'}
86121
for k, v in request.headers.items():
87-
k_lower = k.lower()
88-
if k_lower not in ('host', 'content-length', 'transfer-encoding', 'content-type'):
122+
if k.lower() not in ('host', 'content-length', 'transfer-encoding', 'content-type'):
89123
forward_headers[k] = v
90124

125+
session = await get_session()
126+
91127
if is_stream:
92128
async def stream_proxy() -> AsyncGenerator[bytes, None]:
93-
timeout = aiohttp.ClientTimeout(total=600, sock_read=300)
94-
connector = aiohttp.TCPConnector(force_close=True)
95-
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
96-
try:
97-
async with session.post(url, data=body, headers=forward_headers) as resp:
98-
logger.info(f"[{req_id}] Stream started, status={resp.status}")
99-
chunk_count = 0
100-
async for chunk in resp.content.iter_chunks():
101-
data, end_of_chunk = chunk
102-
if data:
103-
chunk_count += 1
104-
yield data
105-
logger.info(f"[{req_id}] Stream completed, chunks={chunk_count}")
106-
except asyncio.CancelledError:
107-
logger.warning(f"[{req_id}] Stream cancelled")
108-
except Exception as e:
109-
logger.error(f"[{req_id}] Stream error: {e}")
110-
111-
return StreamingResponse(
112-
stream_proxy(),
113-
media_type="text/event-stream",
114-
headers={
115-
"Cache-Control": "no-cache",
116-
"Connection": "keep-alive",
117-
"X-Accel-Buffering": "no",
118-
"Transfer-Encoding": "chunked"
119-
}
120-
)
121-
else:
122-
timeout = aiohttp.ClientTimeout(total=300)
123-
async with aiohttp.ClientSession(timeout=timeout) as session:
129+
rate_limited = False
130+
check_count = 0
124131
try:
125-
async with session.post(url, data=body, headers=forward_headers) as resp:
126-
content = await resp.read()
127-
logger.info(f"[{req_id}] Non-stream response, status={resp.status}, len={len(content)}")
128-
return Response(content=content, status_code=resp.status, media_type=resp.content_type)
132+
async with session.post(url, data=body, headers=forward_headers, timeout=aiohttp.ClientTimeout(total=600, sock_read=300)) as resp:
133+
async for chunk in resp.content.iter_chunks():
134+
data, _ = chunk
135+
if data:
136+
check_count += 1
137+
if check_count <= 5 and not rate_limited:
138+
if check_rate_limit_in_response(data):
139+
rate_limited = True
140+
yield data
141+
if rate_limited and worker_id and model_id:
142+
asyncio.create_task(report_rate_limit(worker_id, model_id))
143+
except asyncio.CancelledError:
144+
pass
129145
except Exception as e:
130-
logger.error(f"[{req_id}] Forward failed: {e}")
131-
raise HTTPException(status_code=502, detail=str(e))
146+
logger.error(f"[{req_id}] Stream error: {e}")
147+
148+
return StreamingResponse(stream_proxy(), media_type="text/event-stream", headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
149+
else:
150+
try:
151+
async with session.post(url, data=body, headers=forward_headers, timeout=aiohttp.ClientTimeout(total=300)) as resp:
152+
content = await resp.read()
153+
if check_rate_limit_in_response(content) and worker_id and model_id:
154+
asyncio.create_task(report_rate_limit(worker_id, model_id))
155+
return Response(content=content, status_code=resp.status, media_type=resp.content_type)
156+
except Exception as e:
157+
logger.error(f"[{req_id}] Forward failed: {e}")
158+
raise HTTPException(status_code=502, detail=str(e))
132159

133160
@app.get("/health")
134161
async def health():
135-
return {"status": "ok", "workers": workers}
162+
return {"status": "ok", "workers": len(_worker_cache["workers"])}
136163

137164
def main():
138165
import argparse
139166
parser = argparse.ArgumentParser()
140167
parser.add_argument('--port', type=int, default=2048)
141168
args = parser.parse_args()
142-
143-
logger.info(f"Starting Gateway on port {args.port}")
144-
uvicorn.run(app, host="0.0.0.0", port=args.port, log_level="info")
169+
uvicorn.run(app, host="0.0.0.0", port=args.port, log_level="warning")
145170

146171
if __name__ == "__main__":
147172
main()
173+

src/manager.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,11 @@ async def check_all_ports():
527527

528528
if config.get('stream_port_enabled'):
529529
ports_to_check.append({"label": "流式代理", "port": config.get('stream_port', 3120)})
530+
531+
if config.get('worker_mode_enabled') and WORKER_POOL_AVAILABLE:
532+
for w in worker_pool.workers.values():
533+
ports_to_check.append({"label": f"Worker-{w.id} API", "port": w.port})
534+
ports_to_check.append({"label": f"Worker-{w.id} Camoufox", "port": w.camoufox_port})
530535

531536
results = []
532537
for item in ports_to_check:
@@ -710,6 +715,26 @@ async def save_workers_config():
710715
except Exception as e:
711716
return {"success": False, "error": str(e)}
712717

718+
@app.get("/api/workers/next")
719+
async def get_next_available_worker(model: str = ""):
720+
if not WORKER_POOL_AVAILABLE:
721+
raise HTTPException(status_code=503, detail="Worker pool not available")
722+
worker = worker_pool.get_worker_for_model(model)
723+
if worker:
724+
worker.request_count += 1
725+
return {"port": worker.port, "worker_id": worker.id}
726+
all_limited = all(w.is_model_limited(model) for w in worker_pool.workers.values() if w.status == "running")
727+
if all_limited:
728+
return {"error": "all_rate_limited", "message": f"All workers rate limited for model {model}"}
729+
return {"error": "no_workers", "message": "No available workers"}
730+
731+
@app.post("/api/workers/{worker_id}/rate-limit")
732+
async def mark_worker_rate_limited(worker_id: str, model: str = Body(..., embed=True)):
733+
if not WORKER_POOL_AVAILABLE:
734+
raise HTTPException(status_code=500, detail="Worker pool not available")
735+
worker_pool.mark_rate_limited(worker_id, model)
736+
return {"success": True}
737+
713738
@app.post("/api/workers/{worker_id}/start")
714739
async def start_worker_api(worker_id: str):
715740
if not WORKER_POOL_AVAILABLE:

0 commit comments

Comments
 (0)