@@ -292,6 +292,15 @@ async def create_stream_generator_from_helper(event_to_set: Event, task_to_cance
292292 continue
293293 elif isinstance (raw_data , dict ):
294294 data = raw_data
295+ if data .get ('error' ) == 'rate_limit' :
296+ logger .warning (f"[{ req_id } ] 🚨 接收到来自代理的速率限制信号: { data } " )
297+ try :
298+ error_chunk = {'id' : chat_completion_id , 'object' : 'chat.completion.chunk' , 'model' : model_name_for_stream , 'created' : created_timestamp , 'choices' : [{'index' : 0 , 'delta' : {'role' : 'assistant' , 'content' : f"\n \n [System: Rate Limit Exceeded - { data .get ('detail' , 'Quota exceeded' )} ]" }, 'finish_reason' : 'stop' , 'native_finish_reason' : 'stop' }]}
299+ yield f"data: { json .dumps (error_chunk , ensure_ascii = False , separators = (',' , ':' ))} \n \n "
300+ except : pass
301+ if not event_to_set .is_set ():
302+ event_to_set .set ()
303+ break
295304 else :
296305 logger .warning (f'[{ req_id } ] 未知的流数据类型: { type (raw_data )} ' )
297306 continue
@@ -339,6 +348,30 @@ async def create_stream_generator_from_helper(event_to_set: Event, task_to_cance
339348 choice_item = {'index' : 0 , 'delta' : {'role' : 'assistant' }, 'finish_reason' : 'stop' , 'native_finish_reason' : 'stop' }
340349 output = {'id' : chat_completion_id , 'object' : 'chat.completion.chunk' , 'model' : model_name_for_stream , 'created' : created_timestamp , 'choices' : [choice_item ]}
341350 yield f"data: { json .dumps (output , ensure_ascii = False , separators = (',' , ':' ))} \n \n "
351+
352+ # Late Rate Limit Check
353+ late_check_wait = 2.0 if len (full_body_content ) < 50 else 0.2
354+ if late_check_wait > 0.5 :
355+ logger .info (f"[{ req_id } ] 内容较短 ({ len (full_body_content )} ), 等待 { late_check_wait } s 检查延迟 Rate Limit" )
356+ await asyncio .sleep (late_check_wait )
357+ try :
358+ from server import STREAM_QUEUE
359+ import queue
360+ if STREAM_QUEUE :
361+ while True :
362+ try :
363+ msg = STREAM_QUEUE .get_nowait ()
364+ if isinstance (msg , dict ) and msg .get ('error' ) == 'rate_limit' :
365+ logger .warning (f"[{ req_id } ] 🚨 捕获到延迟的 Rate Limit 信号: { msg } " )
366+ try :
367+ error_chunk = {'id' : chat_completion_id , 'object' : 'chat.completion.chunk' , 'model' : model_name_for_stream , 'created' : created_timestamp , 'choices' : [{'index' : 0 , 'delta' : {'role' : 'assistant' , 'content' : f"\n \n [System: Rate Limit Exceeded - { msg .get ('detail' , 'Quota exceeded' )} ]" }, 'finish_reason' : 'stop' , 'native_finish_reason' : 'stop' }]}
368+ yield f"data: { json .dumps (error_chunk , ensure_ascii = False , separators = (',' , ':' ))} \n \n "
369+ except : pass
370+ except queue .Empty :
371+ break
372+ except Exception as e :
373+ logger .error (f"[{ req_id } ] Late check failed: { e } " )
374+
342375 except ClientDisconnectedError as disconnect_err :
343376 abort_handler = AbortSignalHandler ()
344377 disconnect_info = abort_handler .handle_error (disconnect_err , req_id )
@@ -427,6 +460,9 @@ async def create_stream_generator_from_helper(event_to_set: Event, task_to_cance
427460 continue
428461 elif isinstance (raw_data , dict ):
429462 data = raw_data
463+ if data .get ('error' ) == 'rate_limit' :
464+ logger .warning (f"[{ req_id } ] 🚨 非流式请求中接收到速率限制: { data } " )
465+ raise HTTPException (status_code = 429 , detail = f"Rate limit exceeded: { data .get ('detail' )} " )
430466 else :
431467 logger .warning (f'[{ req_id } ] 非流式未知数据类型: { type (raw_data )} ' )
432468 continue
0 commit comments