@@ -106,13 +106,9 @@ def __call__(self, messages: List[Dict[str, Any]], stop_sequences: Optional[List
106106
107107 try :
108108 for chunk in current_request :
109- if not chunk .choices :
110- chunk_list .append (chunk )
111- continue
112-
113- delta = chunk .choices [0 ].delta
114- new_token = getattr (delta , "content" , None )
115- reasoning_content = getattr (delta , "reasoning_content" , None )
109+ new_token = chunk .choices [0 ].delta .content
110+ reasoning_content = getattr (
111+ chunk .choices [0 ].delta , 'reasoning_content' , None )
116112
117113 # Handle reasoning_content if it exists and is not null
118114 if reasoning_content is not None :
@@ -134,7 +130,7 @@ def __call__(self, messages: List[Dict[str, Any]], stop_sequences: Optional[List
134130
135131 self .observer .add_model_new_token (new_token )
136132 token_join .append (new_token )
137- role = getattr ( delta , " role" , role )
133+ role = chunk . choices [ 0 ]. delta . role
138134
139135 chunk_list .append (chunk )
140136 if self .stop_event .is_set ():
@@ -144,19 +140,15 @@ def __call__(self, messages: List[Dict[str, Any]], stop_sequences: Optional[List
144140 raise RuntimeError (
145141 "Model is interrupted by stop event" )
146142
147- if not chunk_list :
148- raise RuntimeError ("Empty completion stream" )
149-
150143 # Send end marker
151144 self .observer .flush_remaining_tokens ()
152145 model_output = "" .join (token_join )
153146
154147 # Extract token usage
155148 input_tokens = 0
156149 output_tokens = 0
157- usage_chunk = next ((c for c in reversed (chunk_list ) if getattr (c , "usage" , None ) is not None ), None )
158- if usage_chunk is not None :
159- usage = usage_chunk .usage
150+ if chunk_list and chunk_list [- 1 ].usage is not None :
151+ usage = chunk_list [- 1 ].usage
160152 input_tokens = usage .prompt_tokens
161153 output_tokens = usage .completion_tokens if hasattr (
162154 usage , 'completion_tokens' ) else usage .total_tokens
0 commit comments