@@ -176,6 +176,58 @@ async def stream_proxy() -> AsyncGenerator[bytes, None]:
176176async def health ():
177177 return {"status" : "ok" , "workers" : len (_worker_cache ["workers" ])}
178178
179+ async def forward_media_request (request : Request , path : str ):
180+ await refresh_workers ()
181+ worker = get_next_worker ()
182+ if not worker :
183+ raise HTTPException (status_code = 503 , detail = "No workers available" )
184+
185+ port = worker ["port" ]
186+ url = f"http://127.0.0.1:{ port } { path } "
187+
188+ body = await request .body ()
189+ forward_headers = {}
190+ for k , v in request .headers .items ():
191+ if k .lower () not in ('host' , 'content-length' , 'transfer-encoding' ):
192+ forward_headers [k ] = v
193+
194+ session = await get_session ()
195+ try :
196+ async with session .post (url , data = body , headers = forward_headers , timeout = aiohttp .ClientTimeout (total = 300 )) as resp :
197+ content = await resp .read ()
198+ return Response (content = content , status_code = resp .status , media_type = resp .content_type )
199+ except Exception as e :
200+ logger .error (f"Forward { path } failed: { e } " )
201+ raise HTTPException (status_code = 502 , detail = str (e ))
202+
203+ @app .post ("/generate-speech" )
204+ async def generate_speech (request : Request ):
205+ return await forward_media_request (request , "/generate-speech" )
206+
207+ @app .post ("/generate-image" )
208+ async def generate_image (request : Request ):
209+ return await forward_media_request (request , "/generate-image" )
210+
211+ @app .post ("/generate-video" )
212+ async def generate_video (request : Request ):
213+ return await forward_media_request (request , "/generate-video" )
214+
215+ @app .post ("/nano/generate" )
216+ async def nano_generate (request : Request ):
217+ return await forward_media_request (request , "/nano/generate" )
218+
219+ @app .post ("/v1beta/models/{model}:generateContent" )
220+ async def v1beta_generate_content (request : Request , model : str ):
221+ return await forward_media_request (request , f"/v1beta/models/{ model } :generateContent" )
222+
223+ @app .post ("/v1beta/models/{model}:predict" )
224+ async def v1beta_predict (request : Request , model : str ):
225+ return await forward_media_request (request , f"/v1beta/models/{ model } :predict" )
226+
227+ @app .post ("/v1beta/models/{model}:predictLongRunning" )
228+ async def v1beta_predict_long_running (request : Request , model : str ):
229+ return await forward_media_request (request , f"/v1beta/models/{ model } :predictLongRunning" )
230+
179231def main ():
180232 import argparse
181233 parser = argparse .ArgumentParser ()
0 commit comments