Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions docs/examples/as_generic_chat_history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# pytest: unit
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we default to unit and don't put this in, but at the moment I'm not sure if that applies to docs/examples.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pytest: unit in docs/examples/as_generic_chat_history.py is correct per AGENTS.md — examples require this opt-in comment to be collected.

"""Convert a heterogeneous context to a generic chat history.

The as_generic_chat_history() function converts any Context into a list of
Messages, gracefully handling unknown component types by converting them to
strings. This is useful for working with mixed-type contexts or when you need
a more flexible interface than as_chat_history().
"""

from mellea.core import CBlock, ModelOutputThunk
from mellea.stdlib.components import Message, as_generic_chat_history
from mellea.stdlib.context import ChatContext


def basic_example() -> list[Message]:
"""Convert a standard Message-based context to chat history."""
ctx = ChatContext()
ctx = ctx.add(Message("user", "What is 2+2?"))
ctx = ctx.add(Message("assistant", "2+2 equals 4."))

history = as_generic_chat_history(ctx)
assert len(history) == 2
assert history[0].content == "What is 2+2?"
assert history[1].content == "2+2 equals 4."
return history


def with_heterogeneous_components() -> list[Message]:
"""Handle mixed component types gracefully.

Unlike as_chat_history(), as_generic_chat_history() can handle any
component type by converting unknown types to strings.
"""
ctx = ChatContext()
ctx = ctx.add(Message("user", "Summarize this"))
ctx = ctx.add(CBlock("Some inline content to process"))
mot = ModelOutputThunk(value="The summary is...")
ctx = ctx.add(mot)

history = as_generic_chat_history(ctx)
assert len(history) == 3
assert history[0].role == "user"
assert history[1].role == "user" # CBlock defaults to 'user'
assert history[2].role == "assistant" # MOT defaults to 'assistant'
return history


def with_custom_formatter() -> list[Message]:
"""Use a custom formatter for ModelOutputThunk with unparsed content.

You can provide a formatter function to customize how unparsed outputs
or other unknown types are converted to strings.
"""

def my_formatter(obj: object) -> str:
return f"[Formatted: {type(obj).__name__}]"

ctx = ChatContext()
ctx = ctx.add(Message("user", "Process this"))
# Add a ModelOutputThunk with a non-Message parsed_repr
mot = ModelOutputThunk(value="raw data")
mot.parsed_repr = {"type": "dict", "data": "structured"}
ctx = ctx.add(mot)

history = as_generic_chat_history(ctx, formatter=my_formatter)
assert len(history) == 2
assert "[Formatted:" in history[1].content
return history


if __name__ == "__main__":
basic = basic_example()
print("Basic example:")
for msg in basic:
print(f" {msg.role}: {msg.content}")

heterogeneous = with_heterogeneous_components()
print("\nHeterogeneous example:")
for msg in heterogeneous:
print(f" {msg.role}: {msg.content}")

custom = with_custom_formatter()
print("\nCustom formatter example:")
for msg in custom:
print(f" {msg.role}: {msg.content}")
3 changes: 2 additions & 1 deletion mellea/stdlib/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
TemplateRepresentation,
blockify,
)
from .chat import Message, ToolMessage, as_chat_history
from .chat import Message, ToolMessage, as_chat_history, as_generic_chat_history
from .docs.document import Document
from .instruction import Instruction
from .intrinsic import Intrinsic
Expand All @@ -36,6 +36,7 @@
"ToolMessage",
"Transform",
"as_chat_history",
"as_generic_chat_history",
"blockify",
"mify",
]
101 changes: 95 additions & 6 deletions mellea/stdlib/components/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
Defines ``Message``, the ``Component`` subtype used to represent a single turn in a
chat history with a ``role`` (``user``, ``assistant``, ``system``, or ``tool``),
text ``content``, and optional ``images`` and ``documents`` attachments. Also provides
``ToolMessage`` (a ``Message`` subclass that carries the tool name and arguments) and
the ``as_chat_history`` utility for converting a ``Context`` into a flat list of
``Message`` objects.
``ToolMessage`` (a ``Message`` subclass that carries the tool name and arguments), and
utilities for converting a ``Context`` into a flat list of ``Message`` objects:
``as_chat_history`` (strict typing) and ``as_generic_chat_history`` (flexible with
configurable formatter).
"""

from collections.abc import Iterable, Mapping
import logging
from collections.abc import Callable, Iterable, Mapping
from typing import Any, Literal

from ...core import (
Expand All @@ -22,6 +24,8 @@
)
from .docs.document import Document, _coerce_to_documents

_logger = logging.getLogger(__name__)


class Message(Component["Message"]):
"""A single Message in a Chat history.
Expand Down Expand Up @@ -250,7 +254,7 @@ def as_chat_history(ctx: Context) -> list[Message]:
List of ``Message`` objects in conversation order.

Raises:
Exception: If the context history is non-linear and cannot be cast to a
ValueError: If the context history is non-linear and cannot be cast to a
flat list.
AssertionError: If any entry in the context cannot be converted to a
``Message``.
Expand All @@ -271,8 +275,93 @@ def _to_msg(c: CBlock | Component | ModelOutputThunk) -> Message | None:

all_ctx_events = ctx.as_list()
if all_ctx_events is None:
raise Exception("Trying to cast a non-linear history into a chat history.")
raise ValueError("Trying to cast a non-linear history into a chat history.")
else:
history = [_to_msg(c) for c in all_ctx_events]
assert None not in history, "Could not render this context as a chat history."
return history # type: ignore


def _default_formatter(obj: object) -> str:
"""Default formatter for unknown component types.

Logs a warning and converts the object to a string representation.
"""
_logger.warning(
f"Unknown component type {type(obj).__name__} in as_generic_chat_history; "
f"converting to string representation."
)
return str(obj)


def as_generic_chat_history(
ctx: Context, formatter: Callable[[object], str] | None = None
) -> list[Message]:
Copy link
Copy Markdown
Contributor

@planetf1 planetf1 May 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor / style: Small typing issue here — both _default_formatter and the formatter parameter use Any, which the project convention asks us to avoid unless an external library forces it (nothing does here). object is the right type: it accepts everything, is covariant, and keeps mypy happy with callers that pass typed formatters.

def _default_formatter(obj: object) -> str: ...
formatter: Callable[[object], str] | None = None

The test helpers already use obj: object, so it is just the library code that needs updating.

"""Returns a list of Messages corresponding to a Context, with flexible type handling.

This function is more permissive than ``as_chat_history()``, allowing arbitrary
component types. Unknown types are converted to strings using a configurable
formatter, making it suitable for general-purpose use where context composition
may be heterogeneous.

The formatter is applied to:
- ``ModelOutputThunk`` with non-Message ``parsed_repr``
- ``CBlock`` subclasses (subclasses only; plain ``CBlock`` is stringified)
- Other unknown component types

Existing ``Message`` objects are preserved as-is; their content is not formatted.
This design preserves Message fidelity while providing an escape hatch for unknown types.

Args:
ctx: A linear ``Context`` that may contain ``Message``, ``ModelOutputThunk``,
or other ``Component`` types.
formatter: Optional callable that converts unknown types to strings.
Defaults to ``_default_formatter`` which logs a warning and stringifies.

Returns:
List of ``Message`` objects in conversation order.

Raises:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also:
ValueError: If a ModelOutputThunk has neither value nor parsed_repr.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

ValueError: If the context history is non-linear and cannot be cast to a
flat list.
"""
if formatter is None:
formatter = _default_formatter

def _to_msg(c: CBlock | Component | ModelOutputThunk) -> Message:
match c:
case Message():
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as a naive user I expected to be able to format a Message.
I might be totally wrong about the use case, but hard-coding a "format=" to be so specialized seems odd. Why doesn't the formatter work on the prepared final message/content instead of only certain types of content.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Formatting apply to the Message content itself would be a breaking change and might not be the intended use case.

return c
case ModelOutputThunk():
if isinstance(c.parsed_repr, Message):
return c.parsed_repr
if isinstance(c.parsed_repr, str):
return Message(role="assistant", content=c.parsed_repr)
# Use value if parsed_repr is None
if c.parsed_repr is None:
if c.value is None:
raise ValueError(
"ModelOutputThunk has no value and no parsed_repr — was it evaluated?"
)
content = str(c.value)
else:
_logger.warning(
f"ModelOutputThunk.parsed_repr is {type(c.parsed_repr).__name__}, "
f"not a Message; falling back to value."
)
content = formatter(c.parsed_repr)
return Message(role="assistant", content=content)
case CBlock():
if type(c) is not CBlock:
content = formatter(c)
else:
content = str(c)
return Message(role="user", content=content)
case _:
Copy link
Copy Markdown
Contributor

@planetf1 planetf1 May 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More significant: One tricky edge case: ImageBlock inherits from CBlock (see core/base.py:79), so it will fall into this arm and produce Message(role="user", content=str(image_block)) — meaning a raw base64 PNG string ends up in the chat history as a text message. This is a reachable code path if a context with image content is passed in, and the corruption would be invisible downstream.

A simple guard fixes it:

case CBlock():
    if type(c) is not CBlock:
        return Message(role="user", content=formatter(c))
    return Message(role="user", content=str(c))

That way any CBlock subclasses not explicitly handled go through the formatter instead.

content = formatter(c)
return Message(role="user", content=content)

all_ctx_events = ctx.as_list()
if all_ctx_events is None:
raise ValueError("Trying to cast a non-linear history into a chat history.")
return [_to_msg(c) for c in all_ctx_events]
Loading
Loading