fix(stdlib): address second-round review feedback

planetf1 · planetf1 · commit da41a06a0ce0 · 2026-04-28T20:32:35.000+01:00
Three items from the second independent review:

cancel_generation(error=) — accept an optional Exception parameter.
When the orchestrator enters the except Exception path, it now passes
the caught exception to cancel_generation() so the backend telemetry
span records the real cause via set_span_error instead of a generic
RuntimeError("Generation cancelled"). The original exception still
surfaces to the consumer via astream()/acomplete(); this is purely an
OTEL accuracy fix. Backward-compatible: the default None preserves the
previous "Generation cancelled" message for the normal fail path.

stream_with_chunking docstring — the "After the stream ends (naturally
or via early exit), validate() is called" wording overstated behaviour.
The orchestrator actually skips final validate() on early exit
(test_early_exit_on_fail verifies final_validations == []). Docstring
now correctly says final validate() runs only on natural completion.

test_exception_in_stream_validate_cancels_generation docstring — the
test fails on chunk 1 so the queue never actually fills; it verifies
the cancel-on-exception path and the no-hang guarantee but does not
directly prove the worst-case "producer blocked on full queue"
scenario. Docstring now states what it actually covers and points at
test/core/ for the cancel_generation drain logic.

Assisted-by: Claude Code
diff --git a/mellea/core/base.py b/mellea/core/base.py
@@ -364,7 +364,7 @@ def _record_ttfb(self) -> None:
             ).total_seconds() * 1000
             self._first_chunk_received = True
 
-    async def cancel_generation(self) -> None:
+    async def cancel_generation(self, error: Exception | None = None) -> None:
         """Cancel an in-progress streaming generation, drain the queue, and close any open telemetry span.
 
         Safe to call at any point during streaming. After this method returns,
@@ -375,6 +375,14 @@ async def cancel_generation(self) -> None:
         Draining the internal queue after cancellation is necessary to release
         any ``asyncio.Queue.put()`` call that the generation task was blocked on
         (queue maxsize=20).
+
+        Args:
+            error: Optional cause attributed to the open telemetry span.  When
+                provided, this exception is recorded via ``set_span_error`` so
+                the span reflects the actual reason for cancellation (e.g. the
+                requirement failure or an unhandled exception from a streaming
+                validator).  When ``None``, a generic
+                ``RuntimeError("Generation cancelled")`` is recorded.
         """
         if self._computed:
             return
@@ -414,7 +422,10 @@ def _drain() -> None:
         if span is not None:
             from ..telemetry import end_backend_span, set_span_error
 
-            set_span_error(span, RuntimeError("Generation cancelled"))
+            recorded: Exception = (
+                error if error is not None else RuntimeError("Generation cancelled")
+            )
+            set_span_error(span, recorded)
             end_backend_span(span)
             del self._meta["_telemetry_span"]
 
diff --git a/mellea/stdlib/streaming.py b/mellea/stdlib/streaming.py
@@ -247,8 +247,10 @@ async def _validate_and_emit(c: str) -> bool:
         # otherwise mot._async_queue (maxsize=20) fills and the feeder task
         # blocks indefinitely. The spec (#891, #901) calls this out for the
         # "fail" path; the same reasoning applies to any unplanned exit.
+        # Pass `exc` so the backend telemetry span records the real cause
+        # rather than a generic "Generation cancelled".
         try:
-            await mot.cancel_generation()
+            await mot.cancel_generation(error=exc)
         except Exception as cleanup_exc:
             # Never let cleanup mask the original exception: log loudly and
             # continue to surface `exc` to the consumer.
@@ -304,9 +306,12 @@ async def stream_with_chunking(
     same terms as the regular chunks.  On early exit, the trailing fragment
     is discarded because the generation was cancelled mid-token.
 
-    After the stream ends (naturally or via early exit), ``validate()`` is
-    called on all requirements that did not return ``"fail"``.  Requirements
-    are cloned (``copy(req)``) before use so originals are never mutated.
+    After the stream ends naturally, ``validate()`` is called on every
+    requirement that did not return ``"fail"`` — both ``"pass"`` and
+    ``"unknown"`` trigger final validation.  On early exit, no ``validate()``
+    call is made; :attr:`StreamChunkingResult.final_validations` remains
+    empty.  Requirements are cloned (``copy(req)``) before use so originals
+    are never mutated.
 
     Requirements that need context beyond the current chunk should
     accumulate it themselves across ``stream_validate`` calls (e.g.
diff --git a/test/stdlib/test_streaming.py b/test/stdlib/test_streaming.py
@@ -678,10 +678,12 @@ async def validate(
     call_count = 0
     real_cancel = ModelOutputThunk.cancel_generation
 
-    async def spy_cancel(self: ModelOutputThunk) -> None:
+    async def spy_cancel(
+        self: ModelOutputThunk, error: Exception | None = None
+    ) -> None:
         nonlocal call_count
         call_count += 1
-        await real_cancel(self)
+        await real_cancel(self, error)
 
     ModelOutputThunk.cancel_generation = spy_cancel  # type: ignore[method-assign]
     try:
@@ -702,10 +704,16 @@ async def spy_cancel(self: ModelOutputThunk) -> None:
 
 @pytest.mark.asyncio
 async def test_exception_in_stream_validate_cancels_generation() -> None:
-    """If stream_validate raises, the orchestrator must still call
-    cancel_generation() — otherwise the backend producer blocks on the
-    (maxsize=20) queue — and surface the exception to the consumer via
-    astream()/acomplete()."""
+    """Verifies the orchestrator's exception-path cleanup: if stream_validate
+    raises, cancel_generation() is called and the exception surfaces to the
+    consumer via astream()/acomplete() without hanging.
+
+    This covers the cancel-on-exception path and the no-hang guarantee.
+    It does not directly exercise the worst-case "producer already blocked on
+    full queue" scenario (here the fail happens on chunk 1 so the queue never
+    fills); the cancel_generation drain logic is covered by its own tests in
+    test/core/.
+    """
 
     from mellea.core.base import ModelOutputThunk
 
@@ -736,10 +744,12 @@ async def validate(
     call_count = 0
     real_cancel = ModelOutputThunk.cancel_generation
 
-    async def spy_cancel(self: ModelOutputThunk) -> None:
+    async def spy_cancel(
+        self: ModelOutputThunk, error: Exception | None = None
+    ) -> None:
         nonlocal call_count
         call_count += 1
-        await real_cancel(self)
+        await real_cancel(self, error)
 
     ModelOutputThunk.cancel_generation = spy_cancel  # type: ignore[method-assign]
     try: