11"""Streaming utilities for OpenAI-compatible server responses."""
22
33from collections .abc import AsyncGenerator
4+ from typing import Literal
45
56from mellea .core .base import ModelOutputThunk
67from mellea .core .utils import MelleaLogger
7- from mellea .helpers .openai_compatible_helpers import build_completion_usage
8+ from mellea .helpers .openai_compatible_helpers import (
9+ build_completion_usage ,
10+ build_tool_calls ,
11+ )
812
913from .models import (
1014 ChatCompletionChunk ,
1115 ChatCompletionChunkChoice ,
1216 ChatCompletionChunkDelta ,
17+ ChatCompletionMessageToolCall ,
1318 OpenAIError ,
1419 OpenAIErrorResponse ,
1520 StreamOptions ,
21+ ToolCallFunction ,
1622)
1723
1824
@@ -98,6 +104,46 @@ async def stream_chat_completion_chunks(
98104 )
99105 yield f"data: { chunk .model_dump_json ()} \n \n "
100106
107+ # Extract tool calls from the ModelOutputThunk if available
108+ tool_calls_list = build_tool_calls (output )
109+
110+ if tool_calls_list :
111+ # Convert to ChatCompletionMessageToolCall objects
112+ tool_calls = [
113+ ChatCompletionMessageToolCall (
114+ id = tc ["id" ],
115+ type = tc ["type" ],
116+ function = ToolCallFunction (
117+ name = tc ["function" ]["name" ],
118+ arguments = tc ["function" ]["arguments" ],
119+ ),
120+ )
121+ for tc in tool_calls_list
122+ ]
123+
124+ # Emit tool calls in a separate chunk before the final chunk
125+ tool_call_chunk = ChatCompletionChunk (
126+ id = completion_id ,
127+ model = model ,
128+ created = created ,
129+ choices = [
130+ ChatCompletionChunkChoice (
131+ index = 0 ,
132+ delta = ChatCompletionChunkDelta (tool_calls = tool_calls ),
133+ finish_reason = None ,
134+ )
135+ ],
136+ object = "chat.completion.chunk" ,
137+ system_fingerprint = system_fingerprint ,
138+ )
139+ yield f"data: { tool_call_chunk .model_dump_json ()} \n \n "
140+
141+ # Determine finish_reason based on tool calls
142+ finish_reason : (
143+ Literal ["stop" , "length" , "content_filter" , "tool_calls" , "function_call" ]
144+ | None
145+ ) = "tool_calls" if tool_calls_list else "stop"
146+
101147 # Include usage in final chunk only if explicitly requested via stream_options
102148 # Per OpenAI spec: usage is only included when stream_options.include_usage=True
103149 include_usage = stream_options is not None and stream_options .include_usage
@@ -112,7 +158,7 @@ async def stream_chat_completion_chunks(
112158 ChatCompletionChunkChoice (
113159 index = 0 ,
114160 delta = ChatCompletionChunkDelta (content = None ),
115- finish_reason = "stop" ,
161+ finish_reason = finish_reason ,
116162 )
117163 ],
118164 object = "chat.completion.chunk" ,
0 commit comments