-
Notifications
You must be signed in to change notification settings - Fork 115
feat: add as_generic_chat_history function to convert any Context to … #1007
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
dce6efc
afbb19a
6a5b6ae
e055bdd
019cdd3
630d14d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| # pytest: unit | ||
| """Convert a heterogeneous context to a generic chat history. | ||
|
|
||
| The as_generic_chat_history() function converts any Context into a list of | ||
| Messages, gracefully handling unknown component types by converting them to | ||
| strings. This is useful for working with mixed-type contexts or when you need | ||
| a more flexible interface than as_chat_history(). | ||
| """ | ||
|
|
||
| from mellea.core import CBlock, ModelOutputThunk | ||
| from mellea.stdlib.components import Message, as_generic_chat_history | ||
| from mellea.stdlib.context import ChatContext | ||
|
|
||
|
|
||
| def basic_example() -> list[Message]: | ||
| """Convert a standard Message-based context to chat history.""" | ||
| ctx = ChatContext() | ||
| ctx = ctx.add(Message("user", "What is 2+2?")) | ||
| ctx = ctx.add(Message("assistant", "2+2 equals 4.")) | ||
|
|
||
| history = as_generic_chat_history(ctx) | ||
| assert len(history) == 2 | ||
| assert history[0].content == "What is 2+2?" | ||
| assert history[1].content == "2+2 equals 4." | ||
| return history | ||
|
|
||
|
|
||
| def with_heterogeneous_components() -> list[Message]: | ||
| """Handle mixed component types gracefully. | ||
|
|
||
| Unlike as_chat_history(), as_generic_chat_history() can handle any | ||
| component type by converting unknown types to strings. | ||
| """ | ||
| ctx = ChatContext() | ||
| ctx = ctx.add(Message("user", "Summarize this")) | ||
| ctx = ctx.add(CBlock("Some inline content to process")) | ||
| mot = ModelOutputThunk(value="The summary is...") | ||
| ctx = ctx.add(mot) | ||
|
|
||
| history = as_generic_chat_history(ctx) | ||
| assert len(history) == 3 | ||
| assert history[0].role == "user" | ||
| assert history[1].role == "user" # CBlock defaults to 'user' | ||
| assert history[2].role == "assistant" # MOT defaults to 'assistant' | ||
| return history | ||
|
|
||
|
|
||
| def with_custom_formatter() -> list[Message]: | ||
| """Use a custom formatter for ModelOutputThunk with unparsed content. | ||
|
|
||
| You can provide a formatter function to customize how unparsed outputs | ||
| or other unknown types are converted to strings. | ||
| """ | ||
|
|
||
| def my_formatter(obj: object) -> str: | ||
| return f"[Formatted: {type(obj).__name__}]" | ||
|
|
||
| ctx = ChatContext() | ||
| ctx = ctx.add(Message("user", "Process this")) | ||
| # Add a ModelOutputThunk with a non-Message parsed_repr | ||
| mot = ModelOutputThunk(value="raw data") | ||
| mot.parsed_repr = {"type": "dict", "data": "structured"} | ||
| ctx = ctx.add(mot) | ||
|
|
||
| history = as_generic_chat_history(ctx, formatter=my_formatter) | ||
| assert len(history) == 2 | ||
| assert "[Formatted:" in history[1].content | ||
| return history | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| basic = basic_example() | ||
| print("Basic example:") | ||
| for msg in basic: | ||
| print(f" {msg.role}: {msg.content}") | ||
|
|
||
| heterogeneous = with_heterogeneous_components() | ||
| print("\nHeterogeneous example:") | ||
| for msg in heterogeneous: | ||
| print(f" {msg.role}: {msg.content}") | ||
|
|
||
| custom = with_custom_formatter() | ||
| print("\nCustom formatter example:") | ||
| for msg in custom: | ||
| print(f" {msg.role}: {msg.content}") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,12 +3,14 @@ | |
| Defines ``Message``, the ``Component`` subtype used to represent a single turn in a | ||
| chat history with a ``role`` (``user``, ``assistant``, ``system``, or ``tool``), | ||
| text ``content``, and optional ``images`` and ``documents`` attachments. Also provides | ||
| ``ToolMessage`` (a ``Message`` subclass that carries the tool name and arguments) and | ||
| the ``as_chat_history`` utility for converting a ``Context`` into a flat list of | ||
| ``Message`` objects. | ||
| ``ToolMessage`` (a ``Message`` subclass that carries the tool name and arguments), and | ||
| utilities for converting a ``Context`` into a flat list of ``Message`` objects: | ||
| ``as_chat_history`` (strict typing) and ``as_generic_chat_history`` (flexible with | ||
| configurable formatter). | ||
| """ | ||
|
|
||
| from collections.abc import Iterable, Mapping | ||
| import logging | ||
| from collections.abc import Callable, Iterable, Mapping | ||
| from typing import Any, Literal | ||
|
|
||
| from ...core import ( | ||
|
|
@@ -22,6 +24,8 @@ | |
| ) | ||
| from .docs.document import Document, _coerce_to_documents | ||
|
|
||
| _logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class Message(Component["Message"]): | ||
| """A single Message in a Chat history. | ||
|
|
@@ -250,7 +254,7 @@ def as_chat_history(ctx: Context) -> list[Message]: | |
| List of ``Message`` objects in conversation order. | ||
|
|
||
| Raises: | ||
| Exception: If the context history is non-linear and cannot be cast to a | ||
| ValueError: If the context history is non-linear and cannot be cast to a | ||
| flat list. | ||
| AssertionError: If any entry in the context cannot be converted to a | ||
| ``Message``. | ||
|
|
@@ -271,8 +275,93 @@ def _to_msg(c: CBlock | Component | ModelOutputThunk) -> Message | None: | |
|
|
||
| all_ctx_events = ctx.as_list() | ||
| if all_ctx_events is None: | ||
| raise Exception("Trying to cast a non-linear history into a chat history.") | ||
| raise ValueError("Trying to cast a non-linear history into a chat history.") | ||
| else: | ||
| history = [_to_msg(c) for c in all_ctx_events] | ||
| assert None not in history, "Could not render this context as a chat history." | ||
| return history # type: ignore | ||
|
|
||
|
|
||
| def _default_formatter(obj: object) -> str: | ||
| """Default formatter for unknown component types. | ||
|
|
||
| Logs a warning and converts the object to a string representation. | ||
| """ | ||
| _logger.warning( | ||
| f"Unknown component type {type(obj).__name__} in as_generic_chat_history; " | ||
| f"converting to string representation." | ||
| ) | ||
| return str(obj) | ||
|
|
||
|
|
||
| def as_generic_chat_history( | ||
| ctx: Context, formatter: Callable[[object], str] | None = None | ||
| ) -> list[Message]: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor / style: Small typing issue here — both def _default_formatter(obj: object) -> str: ...
formatter: Callable[[object], str] | None = NoneThe test helpers already use |
||
| """Returns a list of Messages corresponding to a Context, with flexible type handling. | ||
|
|
||
| This function is more permissive than ``as_chat_history()``, allowing arbitrary | ||
| component types. Unknown types are converted to strings using a configurable | ||
| formatter, making it suitable for general-purpose use where context composition | ||
| may be heterogeneous. | ||
|
|
||
| The formatter is applied to: | ||
| - ``ModelOutputThunk`` with non-Message ``parsed_repr`` | ||
| - ``CBlock`` subclasses (subclasses only; plain ``CBlock`` is stringified) | ||
| - Other unknown component types | ||
|
|
||
| Existing ``Message`` objects are preserved as-is; their content is not formatted. | ||
| This design preserves Message fidelity while providing an escape hatch for unknown types. | ||
|
|
||
| Args: | ||
| ctx: A linear ``Context`` that may contain ``Message``, ``ModelOutputThunk``, | ||
| or other ``Component`` types. | ||
| formatter: Optional callable that converts unknown types to strings. | ||
| Defaults to ``_default_formatter`` which logs a warning and stringifies. | ||
|
|
||
| Returns: | ||
| List of ``Message`` objects in conversation order. | ||
|
|
||
| Raises: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also:
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
| ValueError: If the context history is non-linear and cannot be cast to a | ||
| flat list. | ||
| """ | ||
| if formatter is None: | ||
| formatter = _default_formatter | ||
|
|
||
| def _to_msg(c: CBlock | Component | ModelOutputThunk) -> Message: | ||
| match c: | ||
| case Message(): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as a naive user I expected to be able to format a Message.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Formatting apply to the Message content itself would be a breaking change and might not be the intended use case. |
||
| return c | ||
| case ModelOutputThunk(): | ||
| if isinstance(c.parsed_repr, Message): | ||
| return c.parsed_repr | ||
| if isinstance(c.parsed_repr, str): | ||
| return Message(role="assistant", content=c.parsed_repr) | ||
| # Use value if parsed_repr is None | ||
| if c.parsed_repr is None: | ||
| if c.value is None: | ||
| raise ValueError( | ||
| "ModelOutputThunk has no value and no parsed_repr — was it evaluated?" | ||
| ) | ||
| content = str(c.value) | ||
| else: | ||
| _logger.warning( | ||
| f"ModelOutputThunk.parsed_repr is {type(c.parsed_repr).__name__}, " | ||
| f"not a Message; falling back to value." | ||
| ) | ||
| content = formatter(c.parsed_repr) | ||
| return Message(role="assistant", content=content) | ||
| case CBlock(): | ||
| if type(c) is not CBlock: | ||
| content = formatter(c) | ||
| else: | ||
| content = str(c) | ||
| return Message(role="user", content=content) | ||
| case _: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. More significant: One tricky edge case: A simple guard fixes it: case CBlock():
if type(c) is not CBlock:
return Message(role="user", content=formatter(c))
return Message(role="user", content=str(c))That way any |
||
| content = formatter(c) | ||
| return Message(role="user", content=content) | ||
|
|
||
| all_ctx_events = ctx.as_list() | ||
| if all_ctx_events is None: | ||
| raise ValueError("Trying to cast a non-linear history into a chat history.") | ||
| return [_to_msg(c) for c in all_ctx_events] | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought we default to unit and don't put this in, but at the moment I'm not sure if that applies to docs/examples.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pytest: unit in docs/examples/as_generic_chat_history.py is correct per AGENTS.md — examples require this opt-in comment to be collected.