microsoft · jsong468 · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/pyrit/executor/attack/multi_turn/crescendo.py b/pyrit/executor/attack/multi_turn/crescendo.py
@@ -676,7 +676,9 @@ async def _check_refusal_async(self, context: CrescendoAttackContext, objective:
             objective=context.objective,
         ):
             scores = await self._refusal_scorer.score_async(
-                message=context.last_response, objective=objective, skip_on_error_result=False
+                message=context.last_response,
+                objective=objective,
+                skip_on_error_result=False,
             )
         return scores[0]
 

diff --git a/pyrit/executor/attack/multi_turn/tree_of_attacks.py b/pyrit/executor/attack/multi_turn/tree_of_attacks.py
@@ -352,6 +352,11 @@ def __init__(
                 corresponding score is assigned instead of invoking the scorer. This prevents
                 premature branch pruning when targets return blocked/filtered responses.
                 Defaults to {"blocked": 0.0}. Pass an empty dict to disable.
+
+                Note: This check runs before the scorer, so if ``score_blocked_content``
+                is set on the objective scorer, it will have no effect for error types
+                present in this map. To evaluate partial content from blocked responses,
+                pass ``error_score_map={}`` to disable the early-return.
         """
         # Store configuration
         self._objective_target = objective_target
@@ -1398,6 +1403,11 @@ def __init__(
                 content policy violations from image generation targets). Defaults to
                 {"blocked": 0.0}. Pass an empty dict to disable.
 
+                Note: This check runs before the scorer, so if ``score_blocked_content``
+                is set on the objective scorer, it will have no effect for error types
+                present in this map. To evaluate partial content from blocked responses,
+                pass ``error_score_map={}`` to disable the early-return.
+
         Raises:
             ValueError: If attack_scoring_config uses a non-FloatScaleThresholdScorer objective scorer,
                 if the adversarial target does not natively support the capabilities TAP needs,

diff --git a/pyrit/prompt_target/openai/openai_chat_target.py b/pyrit/prompt_target/openai/openai_chat_target.py
@@ -288,6 +288,26 @@ def _check_content_filter(self, response: Any) -> bool:
             pass
         return False
 
+    def _extract_partial_content(self, response: Any) -> Optional[str]:
+        """
+        Extract partial content from a Chat Completions response with finish_reason=content_filter.
+
+        When Azure Content Safety triggers mid-generation, the model may have produced partial
+        text in ``response.choices[0].message.content`` before being cut off.
+
+        Args:
+            response: A ChatCompletion object from the OpenAI SDK.
+
+        Returns:
+            The partial text content, or None if no content was generated.
+        """
+        try:
+            if response.choices and response.choices[0].message and response.choices[0].message.content:
+                return response.choices[0].message.content
+        except (AttributeError, IndexError):
+            pass
+        return None
+
     def _validate_response(self, response: Any, request: MessagePiece) -> Optional[Message]:
         """
         Validate a Chat Completions API response for errors.

diff --git a/pyrit/prompt_target/openai/openai_target.py b/pyrit/prompt_target/openai/openai_target.py
@@ -559,6 +559,10 @@ def _handle_content_filter_response(self, response: Any, request: MessagePiece)
         """
         Handle content filter errors by creating a proper error Message.
 
+        If the subclass provides partial content via ``_extract_partial_content``,
+        it is attached to each response piece as ``prompt_metadata["partial_content"]``
+        so that scorers with ``score_blocked_content=True`` can evaluate it.
+
         Args:
             response: The response object from OpenAI SDK.
             request: The original request message piece.
@@ -567,13 +571,37 @@ def _handle_content_filter_response(self, response: Any, request: MessagePiece)
             Message object with error type indicating content was filtered.
         """
         logger.warning("Output content filtered by content policy.")
-        return handle_bad_request_exception(
+
+        partial_content = self._extract_partial_content(response)
+
+        error_message = handle_bad_request_exception(
             response_text=response.model_dump_json(),
             request=request,
             error_code=200,
             is_content_filter=True,
         )
 
+        if partial_content:
+            for piece in error_message.message_pieces:
+                piece.prompt_metadata["partial_content"] = partial_content
+
+        return error_message
+
+    def _extract_partial_content(self, response: Any) -> Optional[str]:
+        """
+        Extract any partial content the model generated before the content filter triggered.
+
+        Override this in subclasses to extract partial content from API-specific response
+        structures. The base implementation returns None (no partial content).
+
+        Args:
+            response: The response object from OpenAI SDK.
+
+        Returns:
+            The partial text content, or None if no content was generated.
+        """
+        return None
+
     def _validate_response(self, response: Any, request: MessagePiece) -> Optional[Message]:
         """
         Validate the response and return error Message if needed.

diff --git a/pyrit/score/conversation_scorer.py b/pyrit/score/conversation_scorer.py
@@ -71,7 +71,17 @@ async def _score_async(self, message: Message, *, objective: Optional[str] = Non
                 # Only include user and assistant messages in the conversation text
                 if piece.api_role in ["user", "assistant", "tool"]:
                     role_display = "Assistant (simulated)" if piece.is_simulated else piece.api_role.capitalize()
-                    conversation_text += f"{role_display}: {piece.converted_value}\n"
+                    # For blocked pieces with partial content, use the partial content
+                    # instead of the error JSON when score_blocked_content is enabled
+                    if (
+                        self.score_blocked_content
+                        and piece.is_blocked()
+                        and piece.prompt_metadata.get("partial_content")
+                    ):
+                        text = str(piece.prompt_metadata["partial_content"])
+                    else:
+                        text = piece.converted_value
+                    conversation_text += f"{role_display}: {text}\n"
 
         # Create a new message with the concatenated conversation text
         # Preserve the original message piece metadata

diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py
@@ -69,6 +69,15 @@ class Scorer(Identifiable, abc.ABC):
 
     _identifier: Optional[ComponentIdentifier] = None
 
+    #: When True, blocked responses that contain partial content
+    #: (in prompt_metadata["partial_content"]) will be scored using that content
+    #: instead of being filtered out or short-circuited.
+    #: Set this on scorer instances before use. Defaults to False.
+    #:
+    #: Note: This attribute will only take effect if the target supports partial content extraction when content
+    #: filters are triggered (e.g., Chat Completions model via OpenAIChatTarget).
+    score_blocked_content: bool = False
+
     def __init__(self, *, validator: ScorerPromptValidator, chat_target: Optional[PromptTarget] = None) -> None:
         """
         Initialize the Scorer.
@@ -174,7 +183,9 @@ async def score_async(
             role_filter (Optional[ChatMessageRole]): Only score messages with this exact stored role.
                 Use "assistant" to score only real assistant responses, or "simulated_assistant"
                 to score only simulated responses. Defaults to None (no filtering).
-            skip_on_error_result (bool): If True, skip scoring if the message contains an error. Defaults to False.
+            skip_on_error_result (bool): If True, skip scoring if the message contains an error.
+                When self.score_blocked_content is also True, blocked responses with partial content
+                will still be scored instead of skipping. Defaults to False.
             infer_objective_from_request (bool): If True, infer the objective from the message's previous request
                 when objective is not provided. Defaults to False.
 
@@ -192,15 +203,25 @@ async def score_async(
             return []
 
         if skip_on_error_result and message.is_error():
-            logger.debug("Skipping scoring due to error in message and skip_on_error=True.")
-            return []
+            # When score_blocked_content is enabled and the message has partial content,
+            # don't skip — let _score_async handle the substitution.
+            has_partial = any(
+                p.prompt_metadata.get("partial_content") for p in message.message_pieces if p.is_blocked()
+            )
+            if not (self.score_blocked_content and has_partial):
+                logger.debug("Skipping scoring due to error in message and skip_on_error=True.")
+                return []
 
         if infer_objective_from_request and (not objective):
             objective = self._extract_objective_from_response(message)
 
+        # When score_blocked_content is enabled, create a modified message where blocked pieces
+        # with partial content are replaced with text-type substitutes (response_error="none").
+        scoring_message = self._apply_blocked_content_substitution(message) if self.score_blocked_content else message
+
         try:
             scores = await self._score_async(
-                message,
+                scoring_message,
                 objective=objective,
             )
         except PyritException as e:
@@ -253,6 +274,74 @@ async def _score_async(self, message: Message, *, objective: Optional[str] = Non
     async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
         raise NotImplementedError
 
+    @staticmethod
+    def _create_text_piece_from_blocked(piece: MessagePiece) -> Optional[MessagePiece]:
+        """
+        Create a text-typed copy of a blocked MessagePiece using its partial content.
+
+        The substitute preserves the original piece's id (so scores link back correctly),
+        sets converted_value to the partial content with converted_value_data_type="text",
+        and sets response_error="none" so scorer short-circuits (e.g., refusal scorer's
+        blocked check) do not fire.
+
+        Args:
+            piece: A blocked MessagePiece with prompt_metadata["partial_content"].
+
+        Returns:
+            MessagePiece with text content, or None if partial content is empty.
+        """
+        partial_content = str(piece.prompt_metadata.get("partial_content", ""))
+        if not partial_content:
+            return None
+
+        return MessagePiece(
+            id=piece.id,
+            role=piece.api_role,
+            original_value=piece.original_value,
+            converted_value=partial_content,
+            original_value_data_type=piece.original_value_data_type,
+            converted_value_data_type="text",
+            conversation_id=piece.conversation_id,
+            sequence=piece.sequence,
+            labels=piece.labels,
+            prompt_metadata=piece.prompt_metadata,
+            converter_identifiers=list(piece.converter_identifiers),  # type: ignore[arg-type]
+            prompt_target_identifier=piece.prompt_target_identifier,
+            attack_identifier=piece.attack_identifier,
+            response_error="none",
+            timestamp=piece.timestamp,
+        )
+
+    def _apply_blocked_content_substitution(self, message: Message) -> Message:
+        """
+        Create a copy of the message where blocked pieces with partial content are substituted.
+
+        Each blocked piece that has prompt_metadata["partial_content"] is replaced with a
+        text-typed copy (response_error="none", converted_value=partial_content). Non-blocked
+        pieces and blocked pieces without partial content are kept as-is.
+
+        Args:
+            message: The original message potentially containing blocked pieces.
+
+        Returns:
+            A new Message with substituted pieces, or the original if no substitution was needed.
+        """
+        substituted = False
+        new_pieces: list[MessagePiece] = []
+        for piece in message.message_pieces:
+            if piece.is_blocked() and "partial_content" in piece.prompt_metadata:
+                substitute = self._create_text_piece_from_blocked(piece)
+                if substitute:
+                    new_pieces.append(substitute)
+                    substituted = True
+                    continue
+            new_pieces.append(piece)
+
+        if not substituted:
+            return message
+
+        return Message(message_pieces=new_pieces)
+
     def _get_supported_pieces(self, message: Message) -> list[MessagePiece]:
         """
         Get a list of supported message pieces for this scorer.

diff --git a/tests/unit/prompt_target/target/test_openai_chat_target.py b/tests/unit/prompt_target/target/test_openai_chat_target.py
@@ -1596,6 +1596,62 @@ async def test_save_audio_response_async_pcm16_format(patch_central_database):
         assert result == "/path/to/saved/audio.wav"
 
 
+# ── _extract_partial_content tests ──────────────────────────────────────────
+
+
+class TestExtractPartialContentChatTarget:
+    def test_extracts_partial_content_from_content_filter_response(self, target: OpenAIChatTarget):
+        mock_response = create_mock_completion(
+            content="Partial harmful content before cutoff", finish_reason="content_filter"
+        )
+        result = target._extract_partial_content(mock_response)
+        assert result == "Partial harmful content before cutoff"
+
+    def test_returns_none_when_no_content(self, target: OpenAIChatTarget):
+        mock_response = create_mock_completion(content=None, finish_reason="content_filter")
+        result = target._extract_partial_content(mock_response)
+        assert result is None
+
+    def test_returns_none_when_empty_content(self, target: OpenAIChatTarget):
+        mock_response = create_mock_completion(content="", finish_reason="content_filter")
+        result = target._extract_partial_content(mock_response)
+        assert result is None
+
+    def test_returns_none_when_no_choices(self, target: OpenAIChatTarget):
+        mock_response = MagicMock(spec=ChatCompletion)
+        mock_response.choices = []
+        result = target._extract_partial_content(mock_response)
+        assert result is None
+
+
+class TestContentFilterPreservesPartialContent:
+    async def test_200_content_filter_attaches_partial_content_metadata(self, target: OpenAIChatTarget):
+        """Integration: 200 + content_filter response preserves partial content in metadata."""
+        message = Message(
+            message_pieces=[MessagePiece(role="user", conversation_id="test-convo", original_value="test prompt")]
+        )
+        mock_completion = create_mock_completion(content="Harmful partial content here", finish_reason="content_filter")
+        target._async_client.chat.completions.create = AsyncMock(return_value=mock_completion)  # type: ignore[method-assign]
+
+        response = await target.send_prompt_async(message=message)
+
+        assert response[0].message_pieces[0].response_error == "blocked"
+        assert response[0].message_pieces[0].prompt_metadata["partial_content"] == "Harmful partial content here"
+
+    async def test_200_content_filter_no_metadata_when_no_content(self, target: OpenAIChatTarget):
+        """200 + content_filter with no content doesn't attach metadata."""
+        message = Message(
+            message_pieces=[MessagePiece(role="user", conversation_id="test-convo", original_value="test prompt")]
+        )
+        mock_completion = create_mock_completion(content=None, finish_reason="content_filter")
+        target._async_client.chat.completions.create = AsyncMock(return_value=mock_completion)  # type: ignore[method-assign]
+
+        response = await target.send_prompt_async(message=message)
+
+        assert response[0].message_pieces[0].response_error == "blocked"
+        assert "partial_content" not in response[0].message_pieces[0].prompt_metadata
+
+
 async def test_save_audio_response_async_flac_format(patch_central_database):
     """Test saving audio response with flac format."""
     audio_config = OpenAIChatAudioConfig(voice="alloy", audio_format="flac")