@@ -95,42 +95,31 @@ def generate(self, prompt):
9595
9696 # Use the predefined max output tokens, or adjust if prompt is very long
9797 max_completion_tokens = min (self .max_output_tokens , self .max_tokens - prompt_tokens )
98-
98+
9999 if self .model in self .special_models :
100- # Non-streaming approach
101100 response = self .openai .chat .completions .create (
102101 model = self .model ,
103102 messages = [{"role" : "user" , "content" : prompt }],
104103 max_completion_tokens = max_completion_tokens ,
105104 stream = True ,
106105 )
107- for chunk in response :
108- if chunk .choices [0 ].delta .content :
109- chunk_text = chunk .choices [0 ].delta .content
110- full_response += chunk_text
111- print (chunk_text , end = "" , flush = True )
112- if "^^^end^^^" in full_response :
113- break
114-
115- print () # Print a newline at the end
116106 else :
117- # Streaming approach
118107 response = self .openai .chat .completions .create (
119108 model = self .model ,
120109 messages = [{"role" : "user" , "content" : prompt }],
121110 max_tokens = max_completion_tokens ,
122111 stream = True ,
123112 )
124113
125- for chunk in response :
126- if chunk .choices [0 ].delta .content :
127- chunk_text = chunk .choices [0 ].delta .content
128- full_response += chunk_text
129- print (chunk_text , end = "" , flush = True )
130- if "^^^end^^^" in full_response :
131- break
114+ for chunk in response :
115+ if chunk .choices [0 ].delta .content :
116+ chunk_text = chunk .choices [0 ].delta .content
117+ full_response += chunk_text
118+ print (chunk_text , end = "" , flush = True )
119+ if "^^^end^^^" in full_response :
120+ break
132121
133- print () # Print a newline at the end
122+ print () # Print a newline at the end
134123
135124 # Extract content between markers
136125 start_marker = "^^^start^^^"
0 commit comments