Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
c2113e3
feat: add tool calling support to m serve
markstur Apr 13, 2026
82a8fb7
fix: fixed the bug in m serve where finish_reason=tool_calls for empt…
markstur Apr 14, 2026
d897e41
fix: move message add to outside the loop in client_tool_calling.py e…
markstur Apr 14, 2026
128a9c6
fix: cli app.py loop variable tool_name is never used
markstur Apr 14, 2026
0e23d92
fix: fix test_mot_init_typing() hasattr was always true
markstur Apr 14, 2026
0f894d8
fix: update m_serve_example_tool_calling.py to use safer example tool
markstur Apr 14, 2026
7de99e4
fix: replace repeated hard-coded string with constant
markstur Apr 17, 2026
98ede72
fix: add TOOL_CHOICE to ModelOptions like TEMPERATURE not a sentinel
markstur Apr 17, 2026
6a812f2
fix: fix m serve tool-calling examples
markstur Apr 17, 2026
8bd74fd
fix: remove unused imports in example
markstur Apr 17, 2026
9a82f5f
feat: cli support for OpenAI API tool calling with streaming
markstur Apr 24, 2026
e12f2a7
fix: add required index field to streaming tool call deltas
markstur Apr 29, 2026
0177010
fix: move build_tool_calls invocation
markstur Apr 29, 2026
43cb8b8
test: add integration test for cli/serve using TestClient with stream…
markstur Apr 29, 2026
e68d50c
fix: use fallback for json.dumps in build_tool_calls
markstur Apr 29, 2026
f3c9d85
test: restore cli streaming tests to fix conflicts
markstur Apr 30, 2026
e46afd3
test: update output.usage -> output.generation.usage
markstur Apr 30, 2026
5b737ab
test: update tests usage -> gneration.usage
markstur Apr 30, 2026
3aea0c3
refactor(serve): simplify tool call construction with Pydantic valida…
markstur May 5, 2026
2b6792e
fix: remove unused imports
markstur May 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 30 additions & 7 deletions cli/serve/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sys
import time
import uuid
from typing import Literal

try:
import typer
Expand All @@ -21,11 +22,15 @@
) from e

from mellea.backends.model_options import ModelOption
from mellea.helpers.openai_compatible_helpers import build_completion_usage
from mellea.helpers.openai_compatible_helpers import (
build_completion_usage,
build_tool_calls,
)

from .models import (
ChatCompletion,
ChatCompletionMessage,
ChatCompletionMessageToolCall,
ChatCompletionRequest,
Choice,
OpenAIError,
Expand Down Expand Up @@ -111,14 +116,14 @@ def _build_model_options(request: ChatCompletionRequest) -> dict:
"response_format", # Response format (json_object) - not yet implemented
"functions", # Legacy function calling - not yet implemented
"function_call", # Legacy function calling - not yet implemented
"tools", # Tool calling - not yet implemented
"tool_choice", # Tool choice - not yet implemented
}
openai_to_model_option = {
"temperature": ModelOption.TEMPERATURE,
"max_tokens": ModelOption.MAX_NEW_TOKENS,
"seed": ModelOption.SEED,
"stream": ModelOption.STREAM,
"tools": ModelOption.TOOLS,
"tool_choice": ModelOption.TOOL_CHOICE,
}

# Get all non-None fields
Expand Down Expand Up @@ -171,8 +176,6 @@ async def endpoint(request: ChatCompletionRequest):
model_options=model_options,
)

# system_fingerprint represents backend config hash, not model name
# The model name is already in response.model (line 73)
# Leave as None since we don't track backend config fingerprints yet
system_fingerprint = None

Expand All @@ -190,6 +193,24 @@ async def endpoint(request: ChatCompletionRequest):
media_type="text/event-stream",
)

tool_calls_list = build_tool_calls(output)
tool_calls = (
Comment thread
markstur marked this conversation as resolved.
[
ChatCompletionMessageToolCall.model_validate(tc)
for tc in tool_calls_list
]
if tool_calls_list
else None
)
Comment thread
markstur marked this conversation as resolved.

# Determine finish_reason based on tool calls
finish_reason: (
Literal[
"stop", "length", "content_filter", "tool_calls", "function_call"
]
| None
) = "tool_calls" if tool_calls else "stop"

return ChatCompletion(
id=completion_id,
model=request.model,
Expand All @@ -198,9 +219,11 @@ async def endpoint(request: ChatCompletionRequest):
Choice(
index=0,
message=ChatCompletionMessage(
content=output.value, role="assistant"
content=output.value,
role="assistant",
tool_calls=tool_calls,
),
finish_reason="stop",
finish_reason=finish_reason,
)
],
object="chat.completion", # type: ignore
Expand Down
72 changes: 72 additions & 0 deletions cli/serve/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,67 @@ class ChatCompletionRequest(BaseModel):
extra: dict[str, Any] = Field(default_factory=dict)


class ToolCallFunction(BaseModel):
"""Function details for a tool call."""

name: str
"""The name of the function to call."""

arguments: str
"""The arguments to call the function with, as a JSON string."""


class ChatCompletionMessageToolCall(BaseModel):
"""A tool call generated by the model (non-streaming)."""

id: str
"""The ID of the tool call."""

type: Literal["function"]
"""The type of the tool. Currently, only 'function' is supported."""

function: ToolCallFunction
"""The function that the model called."""


class ToolCallFunctionDelta(BaseModel):
"""Function details for a streaming tool call delta.

In streaming responses, function name and arguments may arrive across
multiple chunks, so both fields are optional.
"""

name: str | None = None
"""The name of the function to call (may be None in delta chunks)."""

arguments: str | None = None
"""The arguments fragment for this delta (may be None in delta chunks)."""


class ChatCompletionMessageToolCallDelta(BaseModel):
"""A tool call delta in a streaming response.

Per OpenAI streaming spec, each delta must include an index field that
clients use to reassemble tool calls across chunks. The id, type, and
function fields are optional since they may arrive incrementally.
"""

index: int
"""The index of this tool call in the tool_calls array.

Required for delta reassembly in OpenAI SDK and compatible clients.
"""

id: str | None = None
"""The ID of the tool call (may be None in subsequent delta chunks)."""

type: Literal["function"] | None = None
"""The type of the tool (may be None in subsequent delta chunks)."""

function: ToolCallFunctionDelta | None = None
"""The function delta for this chunk (may be None in some chunks)."""


# Taking this from OpenAI types https://github.com/openai/openai-python/blob/main/src/openai/types/chat/chat_completion.py,
class ChatCompletionMessage(BaseModel):
content: str | None = None
Expand All @@ -91,6 +152,9 @@ class ChatCompletionMessage(BaseModel):
role: Literal["assistant"]
"""The role of the author of this message."""

tool_calls: list[ChatCompletionMessageToolCall] | None = None
"""The tool calls generated by the model, such as function calls."""


class Choice(BaseModel):
index: int
Expand Down Expand Up @@ -144,6 +208,14 @@ class ChatCompletionChunkDelta(BaseModel):
refusal: str | None = None
"""The refusal message fragment, if any."""

tool_calls: list[ChatCompletionMessageToolCallDelta] | None = None
"""The tool call deltas in this chunk.

Each delta includes a required index field for reassembly by OpenAI SDK
and compatible clients. The id, type, and function fields are optional
since they may arrive incrementally across multiple chunks.
"""


class ChatCompletionChunkChoice(BaseModel):
"""A choice in a streaming chunk."""
Expand Down
41 changes: 39 additions & 2 deletions cli/serve/streaming.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
"""Streaming utilities for OpenAI-compatible server responses."""

from collections.abc import AsyncGenerator
from typing import Literal

from mellea.core.base import ModelOutputThunk
from mellea.core.utils import MelleaLogger
from mellea.helpers.openai_compatible_helpers import build_completion_usage
from mellea.helpers.openai_compatible_helpers import (
build_completion_usage,
build_tool_calls,
)

from .models import (
ChatCompletionChunk,
ChatCompletionChunkChoice,
ChatCompletionChunkDelta,
ChatCompletionMessageToolCallDelta,
OpenAIError,
OpenAIErrorResponse,
StreamOptions,
Expand Down Expand Up @@ -98,6 +103,38 @@ async def stream_chat_completion_chunks(
)
yield f"data: {chunk.model_dump_json()}\n\n"

tool_calls_list = build_tool_calls(output)

if tool_calls_list:
# Convert to ChatCompletionMessageToolCallDelta objects with required index
tool_calls = [
ChatCompletionMessageToolCallDelta.model_validate({**tc, "index": idx})
for idx, tc in enumerate(tool_calls_list)
]
Comment thread
markstur marked this conversation as resolved.

# Emit tool calls in a separate chunk before the final chunk
tool_call_chunk = ChatCompletionChunk(
id=completion_id,
model=model,
created=created,
choices=[
ChatCompletionChunkChoice(
index=0,
delta=ChatCompletionChunkDelta(tool_calls=tool_calls),
finish_reason=None,
Comment thread
markstur marked this conversation as resolved.
)
],
object="chat.completion.chunk",
system_fingerprint=system_fingerprint,
)
yield f"data: {tool_call_chunk.model_dump_json()}\n\n"

# Determine finish_reason based on tool calls
finish_reason: (
Literal["stop", "length", "content_filter", "tool_calls", "function_call"]
| None
) = "tool_calls" if tool_calls_list else "stop"

# Include usage in final chunk only if explicitly requested via stream_options
# Per OpenAI spec: usage is only included when stream_options.include_usage=True
include_usage = stream_options is not None and stream_options.include_usage
Expand All @@ -112,7 +149,7 @@ async def stream_chat_completion_chunks(
ChatCompletionChunkChoice(
index=0,
delta=ChatCompletionChunkDelta(content=None),
finish_reason="stop",
finish_reason=finish_reason,
)
],
object="chat.completion.chunk",
Expand Down
Loading
Loading