diff --git a/.env_example b/.env_example index fdf9d715e1..b925fb097c 100644 --- a/.env_example +++ b/.env_example @@ -79,6 +79,11 @@ ADVERSARIAL_CHAT_ENDPOINT="https://xxxxx.openai.azure.com/openai/v1" ADVERSARIAL_CHAT_KEY="xxxxx" ADVERSARIAL_CHAT_MODEL="deployment-name" +# Objective Scorer chat target (used in scorers in scenarios) +OBJECTIVE_SCORER_CHAT_ENDPOINT="https://xxxxx.openai.azure.com/openai/v1" +OBJECTIVE_SCORER_CHAT_KEY="xxxxx" +OBJECTIVE_SCORER_CHAT_MODEL="deployment-name" + AZURE_FOUNDRY_DEEPSEEK_ENDPOINT="https://xxxxx.eastus2.models.ai.azure.com" AZURE_FOUNDRY_DEEPSEEK_KEY="xxxxx" AZURE_FOUNDRY_DEEPSEEK_MODEL="" diff --git a/pyrit/scenario/core/__init__.py b/pyrit/scenario/core/__init__.py index 06304d3715..b1d3247857 100644 --- a/pyrit/scenario/core/__init__.py +++ b/pyrit/scenario/core/__init__.py @@ -10,9 +10,9 @@ from pyrit.scenario.core.dataset_configuration import EXPLICIT_SEED_GROUPS_KEY, DatasetConfiguration from pyrit.scenario.core.scenario import Scenario from pyrit.scenario.core.scenario_strategy import ScenarioCompositeStrategy, ScenarioStrategy +from pyrit.scenario.core.scenario_target_defaults import get_default_adversarial_target, get_default_scorer_target from pyrit.scenario.core.scenario_techniques import ( SCENARIO_TECHNIQUES, - get_default_adversarial_target, register_scenario_techniques, ) @@ -28,6 +28,7 @@ "ScenarioCompositeStrategy", "ScenarioStrategy", "ScorerOverridePolicy", - "get_default_adversarial_target", "register_scenario_techniques", + "get_default_scorer_target", + "get_default_adversarial_target", ] diff --git a/pyrit/scenario/core/scenario.py b/pyrit/scenario/core/scenario.py index 089bed4267..95ee8dd227 100644 --- a/pyrit/scenario/core/scenario.py +++ b/pyrit/scenario/core/scenario.py @@ -16,6 +16,7 @@ import uuid from abc import ABC, abstractmethod from collections.abc import Sequence +from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union, cast, get_origin from tqdm.auto import tqdm @@ -27,14 +28,20 @@ from pyrit.memory.memory_models import ScenarioResultEntry from pyrit.models import AttackResult, SeedAttackGroup from pyrit.models.scenario_result import ScenarioIdentifier, ScenarioResult -from pyrit.prompt_target import OpenAIChatTarget, PromptTarget +from pyrit.prompt_target import PromptTarget from pyrit.prompt_target.common.target_requirements import TargetRequirements -from pyrit.registry import ScorerRegistry +from pyrit.registry.object_registries.scorer_registry import ScorerRegistry from pyrit.scenario.core.atomic_attack import AtomicAttack from pyrit.scenario.core.attack_technique import AttackTechnique from pyrit.scenario.core.dataset_configuration import DatasetConfiguration from pyrit.scenario.core.scenario_strategy import ScenarioStrategy -from pyrit.score import Scorer, SelfAskRefusalScorer, TrueFalseInverterScorer, TrueFalseScorer +from pyrit.scenario.core.scenario_target_defaults import get_default_scorer_target +from pyrit.score import Scorer, TrueFalseScorer +from pyrit.score.true_false.self_ask_refusal_scorer import SelfAskRefusalScorer +from pyrit.score.true_false.self_ask_true_false_scorer import SelfAskTrueFalseScorer +from pyrit.score.true_false.true_false_composite_scorer import TrueFalseCompositeScorer +from pyrit.score.true_false.true_false_inverter_scorer import TrueFalseInverterScorer +from pyrit.score.true_false.true_false_score_aggregator import TrueFalseScoreAggregator if TYPE_CHECKING: from pyrit.executor.attack.core.attack_config import AttackScoringConfig @@ -107,6 +114,20 @@ class Scenario(ABC): #: what the scenario needs. Validated in ``initialize_async`` once the target is supplied. TARGET_REQUIREMENTS: ClassVar[TargetRequirements] = TargetRequirements() + @classmethod + def get_override_composite_scorer_questions_path(cls) -> Sequence[Path]: + """ + Override to provide true/false question prompt paths for objective scoring. + + When overridden to return a non-empty sequence, the default objective scorer becomes + one ``SelfAskTrueFalseScorer`` per path AND-ed together with ``NOT(SelfAskRefusalScorer)`` + instead of the scenario-level default. + + Returns: + Sequence[Path]: Paths to true/false question prompts, or an empty sequence to use the default scorer. + """ + return [] + def __init__( self, *, @@ -310,16 +331,42 @@ def _build_display_group(self, *, technique_name: str, seed_group_name: str) -> return technique_name def _get_default_objective_scorer(self) -> TrueFalseScorer: - # Deferred import to avoid circular dependency: + # Deferred import to avoid circular dependency. from pyrit.setup.initializers.components.scorers import ScorerInitializerTags + # first check if the registry has a default objective scorer + # if available either itself, or its chat target will be used + chat_target: PromptTarget | None = None + registry_default_scorer: TrueFalseScorer | None = None entries = ScorerRegistry.get_registry_singleton().get_by_tag(tag=ScorerInitializerTags.DEFAULT_OBJECTIVE_SCORER) if entries and isinstance(entries[0].instance, TrueFalseScorer): - scorer = entries[0].instance - logger.info(f"Using registered default objective scorer: {type(scorer).__name__}") + registry_default_scorer = entries[0].instance + chat_target = registry_default_scorer.get_chat_target() + logger.info(f"The registry contains default objective scorer: {type(registry_default_scorer).__name__}") + + chat_target = chat_target or get_default_scorer_target() + + # if the scenario has override composite scorer questions, use them to build a composite scorer + composite_scorer_questions_paths = type(self).get_override_composite_scorer_questions_path() + if composite_scorer_questions_paths: + path_scorers: list[TrueFalseScorer] = [ + SelfAskTrueFalseScorer(chat_target=chat_target, true_false_question_path=path) + for path in composite_scorer_questions_paths + ] + backstop_scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target)) + scorer = TrueFalseCompositeScorer( + aggregator=TrueFalseScoreAggregator.AND, + scorers=[*path_scorers, backstop_scorer], + ) + logger.info(f"Using composite default objective scorer: {type(scorer).__name__}") return scorer - scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=OpenAIChatTarget())) - logger.info(f"No registered default objective scorer found, using fallback: {type(scorer).__name__}") + + if registry_default_scorer: + logger.info(f"Using registry default objective scorer: {type(registry_default_scorer).__name__}") + return registry_default_scorer + + scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target)) + logger.warning(f"Using fallback default objective scorer: {type(scorer).__name__}") return scorer def set_params_from_args(self, *, args: dict[str, Any]) -> None: diff --git a/pyrit/scenario/core/scenario_target_defaults.py b/pyrit/scenario/core/scenario_target_defaults.py new file mode 100644 index 0000000000..856ae729f7 --- /dev/null +++ b/pyrit/scenario/core/scenario_target_defaults.py @@ -0,0 +1,98 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import logging + +from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget +from pyrit.prompt_target.common.target_capabilities import CapabilityName +from pyrit.registry import TargetRegistry + +logger = logging.getLogger(__name__) + + +def get_default_scorer_target() -> PromptChatTarget: + """ + Resolve the default objective scorer chat target. + + First checks the ``TargetRegistry`` for an ``"objective_scorer_chat"`` entry + (populated by ``TargetInitializer`` from ``OBJECTIVE_SCORER_CHAT_*`` env vars). + Falls back to a plain ``OpenAIChatTarget`` + + Returns: + PromptChatTarget: The resolved objective scorer chat target. + + Raises: + ValueError: If the registered target does not support multi-turn. + """ + return _get_default_chat_target(preferred_target_key="objective_scorer_chat") + + +def get_default_adversarial_target() -> PromptChatTarget: + """ + Resolve the default adversarial chat target. + + First checks the ``TargetRegistry`` for an ``"adversarial_chat"`` entry + (populated by ``TargetInitializer`` from ``ADVERSARIAL_CHAT_*`` env vars). + Falls back to a default fallback target with temperature=1.2 + + Returns: + PromptChatTarget: The resolved adversarial chat target. + + Raises: + ValueError: If the registered target does not support multi-turn. + """ + return _get_default_chat_target( + preferred_target_key="adversarial_chat", + required_capabilities={CapabilityName.MULTI_TURN}, + fallback_temperature=1.2, + ) + + +def _get_default_chat_target( + *, + preferred_target_key: str, + required_capabilities: set[CapabilityName] | None = None, + fallback_temperature: float | None = None, +) -> PromptChatTarget: + """ + Resolve a chat target from TargetRegistry with configurable fallback behavior. + + Resolution order: + 1. ``preferred_target_key`` entry from ``TargetRegistry`` + 2. ``OpenAIChatTarget(...)`` with optional temperature + + Args: + preferred_target_key (str): TargetRegistry key to resolve first. + required_capabilities (set[CapabilityName] | None): Optional capabilities + that a resolved target must support. + fallback_temperature (float | None): Optional temperature for fallback + ``OpenAIChatTarget`` construction. + + Returns: + PromptChatTarget: The resolved chat target. + + Raises: + ValueError: If the resolved target does not satisfy required capabilities. + ValueError: If the registry entry exists but is not a PromptChatTarget. + """ + registry = TargetRegistry.get_registry_singleton() + target = registry.get(preferred_target_key) + if target is not None: + # Check required capabilities first (fail fast) + if required_capabilities: + for capability in required_capabilities: + if not target.capabilities.includes(capability=capability): + raise ValueError(f"Registry entry '{preferred_target_key}' must support {capability.value}.") + + # Then check type + if not isinstance(target, PromptChatTarget): + raise ValueError( + f"Registry entry '{preferred_target_key}' must be a PromptChatTarget, but got {type(target).__name__}" + ) + + return target + + logger.warning( + f"TargetRegistry entry '{preferred_target_key}' not found. Falling back to default OpenAIChatTarget." + ) + return OpenAIChatTarget(temperature=fallback_temperature) diff --git a/pyrit/scenario/core/scenario_techniques.py b/pyrit/scenario/core/scenario_techniques.py index 0150d9775e..e794f5b409 100644 --- a/pyrit/scenario/core/scenario_techniques.py +++ b/pyrit/scenario/core/scenario_techniques.py @@ -22,6 +22,7 @@ import inspect import logging from pathlib import Path +from typing import TYPE_CHECKING from pyrit.common.path import EXECUTOR_SEED_PROMPT_PATH from pyrit.executor.attack import ( @@ -35,13 +36,15 @@ ) from pyrit.models import SeedAttackTechniqueGroup, SeedSimulatedConversation from pyrit.models.seeds.seed_simulated_conversation import NextMessageSystemPromptPaths -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget -from pyrit.prompt_target.common.target_capabilities import CapabilityName from pyrit.registry import TargetRegistry from pyrit.registry.object_registries.attack_technique_registry import ( AttackTechniqueRegistry, AttackTechniqueSpec, ) +from pyrit.scenario.core.scenario_target_defaults import get_default_adversarial_target + +if TYPE_CHECKING: + from pyrit.prompt_target import PromptChatTarget logger = logging.getLogger(__name__) @@ -104,40 +107,6 @@ ] -# --------------------------------------------------------------------------- -# Default adversarial target -# --------------------------------------------------------------------------- - - -def get_default_adversarial_target() -> PromptChatTarget: - """ - Resolve the default adversarial chat target. - - First checks the ``TargetRegistry`` for an ``"adversarial_chat"`` entry - (populated by ``TargetInitializer`` from ``ADVERSARIAL_CHAT_*`` env vars). - Falls back to a plain ``OpenAIChatTarget(temperature=1.2)`` using - ``@apply_defaults`` resolution. - - Returns: - PromptChatTarget: The resolved adversarial chat target. - - Raises: - ValueError: If the registered target does not support multi-turn. - """ - registry = TargetRegistry.get_registry_singleton() - if "adversarial_chat" in registry: - target = registry.get("adversarial_chat") - if target: - if not target.capabilities.includes(capability=CapabilityName.MULTI_TURN): - raise ValueError( - f"Registry entry 'adversarial_chat' must support multi-turn conversations, " - f"but {type(target).__name__} does not." - ) - return target - - return OpenAIChatTarget(temperature=1.2) - - # --------------------------------------------------------------------------- # Runtime spec builder # --------------------------------------------------------------------------- diff --git a/pyrit/scenario/scenarios/airt/cyber.py b/pyrit/scenario/scenarios/airt/cyber.py index 7ff874be1b..4f865507da 100644 --- a/pyrit/scenario/scenarios/airt/cyber.py +++ b/pyrit/scenario/scenarios/airt/cyber.py @@ -8,20 +8,14 @@ from pyrit.common import apply_defaults from pyrit.common.path import SCORER_SEED_PROMPT_PATH -from pyrit.prompt_target import OpenAIChatTarget from pyrit.scenario.core.dataset_configuration import DatasetConfiguration from pyrit.scenario.core.scenario import Scenario -from pyrit.score import ( - SelfAskRefusalScorer, - SelfAskTrueFalseScorer, - TrueFalseCompositeScorer, - TrueFalseInverterScorer, - TrueFalseScoreAggregator, - TrueFalseScorer, -) if TYPE_CHECKING: + from pathlib import Path + from pyrit.scenario.core.scenario_strategy import ScenarioStrategy + from pyrit.score import TrueFalseScorer logger = logging.getLogger(__name__) @@ -66,6 +60,16 @@ class Cyber(Scenario): VERSION: int = 2 _cached_strategy_class: ClassVar[type[ScenarioStrategy] | None] = None + @classmethod + def get_override_composite_scorer_questions_path(cls) -> list[Path]: + """ + Override true/false question paths for cyber objective scoring. + + Returns: + Sequence[Path]: Paths to true/false question paths for cyber objective scoring. + """ + return [SCORER_SEED_PROMPT_PATH / "true_false_question" / "malware.yaml"] + @classmethod def get_strategy_class(cls) -> type[ScenarioStrategy]: """ @@ -128,32 +132,3 @@ def __init__( include_default_baseline=include_baseline, scenario_result_id=scenario_result_id, ) - - def _get_default_objective_scorer(self) -> TrueFalseCompositeScorer: - """ - Retrieve the default objective scorer. - - The scoring mechanism is as follows: - 1. Detect for the presence of malware using a SelfAskTrueFalseScorer. - 2. Detect for NON-refusal as a backstop. (If the target model refuses to produce malware, we want that - to register as a negative score). - - The chat target is resolved from the base class default scorer if available, - otherwise falls back to OpenAIChatTarget(). - - Returns: - TrueFalseCompositeScorer: Default objective scorer with backstop and malware evaluation. - """ - base_scorer = super()._get_default_objective_scorer() - chat_target = base_scorer.get_chat_target() or OpenAIChatTarget() - - presence_of_malware = SelfAskTrueFalseScorer( - chat_target=chat_target, - true_false_question_path=SCORER_SEED_PROMPT_PATH / "true_false_question" / "malware.yaml", - ) - - backstop = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target)) - - return TrueFalseCompositeScorer( - aggregator=TrueFalseScoreAggregator.AND, scorers=[presence_of_malware, backstop] - ) diff --git a/pyrit/scenario/scenarios/airt/jailbreak.py b/pyrit/scenario/scenarios/airt/jailbreak.py index 1870fdc0cf..69673e766c 100644 --- a/pyrit/scenario/scenarios/airt/jailbreak.py +++ b/pyrit/scenario/scenarios/airt/jailbreak.py @@ -1,11 +1,9 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import os from pathlib import Path from typing import Any, Optional, Union -from pyrit.auth import get_azure_openai_auth from pyrit.common import apply_defaults from pyrit.datasets import TextJailBreak from pyrit.executor.attack.core.attack_config import ( @@ -20,12 +18,13 @@ from pyrit.models import SeedAttackGroup from pyrit.prompt_converter import TextJailbreakConverter from pyrit.prompt_normalizer import PromptConverterConfiguration -from pyrit.prompt_target import OpenAIChatTarget +from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget from pyrit.scenario.core.atomic_attack import AtomicAttack from pyrit.scenario.core.attack_technique import AttackTechnique from pyrit.scenario.core.dataset_configuration import DatasetConfiguration from pyrit.scenario.core.scenario import Scenario from pyrit.scenario.core.scenario_strategy import ScenarioStrategy +from pyrit.scenario.core.scenario_target_defaults import get_default_adversarial_target from pyrit.score import ( TrueFalseScorer, ) @@ -162,7 +161,7 @@ def __init__( self._num_templates = num_templates self._num_attempts = num_attempts - self._adversarial_target: Optional[OpenAIChatTarget] = None + self._adversarial_target: Optional[PromptChatTarget] = None # Note that num_templates and jailbreak_names are mutually exclusive. # If self._num_templates is None, then this returns all discoverable jailbreak templates. @@ -191,33 +190,18 @@ def __init__( # Will be resolved in _get_atomic_attacks_async self._seed_groups: Optional[list[SeedAttackGroup]] = None - def _create_adversarial_target(self) -> OpenAIChatTarget: - """ - Create a new adversarial target instance. - - Returns: - OpenAIChatTarget: A fresh adversarial target using an unfiltered endpoint. - """ - endpoint = os.getenv("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT") - return OpenAIChatTarget( - endpoint=endpoint, - api_key=get_azure_openai_auth(endpoint or ""), - model_name=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL"), - temperature=1.2, - ) - - def _get_or_create_adversarial_target(self) -> OpenAIChatTarget: + def _get_or_create_adversarial_target(self) -> PromptChatTarget: """ Return the shared adversarial target, creating it on first access. - Reuses a single OpenAIChatTarget instance across all role-play attacks + Reuses a single PromptChatTarget instance across all role-play attacks to avoid repeated client and TLS setup. Returns: - OpenAIChatTarget: The shared adversarial target. + PromptChatTarget: The shared adversarial target. """ if self._adversarial_target is None: - self._adversarial_target = self._create_adversarial_target() + self._adversarial_target = get_default_adversarial_target() return self._adversarial_target def _resolve_seed_groups(self) -> list[SeedAttackGroup]: diff --git a/pyrit/scenario/scenarios/airt/leakage.py b/pyrit/scenario/scenarios/airt/leakage.py index a3a1826aef..3033fca351 100644 --- a/pyrit/scenario/scenarios/airt/leakage.py +++ b/pyrit/scenario/scenarios/airt/leakage.py @@ -14,7 +14,6 @@ ) from pyrit.prompt_converter import AddImageTextConverter, FirstLetterConverter from pyrit.prompt_normalizer import PromptConverterConfiguration -from pyrit.prompt_target import OpenAIChatTarget from pyrit.registry.object_registries.attack_technique_registry import ( AttackTechniqueRegistry, AttackTechniqueSpec, @@ -22,18 +21,14 @@ from pyrit.registry.tag_query import TagQuery from pyrit.scenario.core.dataset_configuration import DatasetConfiguration from pyrit.scenario.core.scenario import Scenario -from pyrit.score import ( - SelfAskRefusalScorer, - SelfAskTrueFalseScorer, - TrueFalseCompositeScorer, - TrueFalseInverterScorer, - TrueFalseScoreAggregator, - TrueFalseScorer, -) +from pyrit.scenario.core.scenario_strategy import ScenarioStrategy if TYPE_CHECKING: + from pathlib import Path + from pyrit.scenario.core.attack_technique_factory import AttackTechniqueFactory from pyrit.scenario.core.scenario_strategy import ScenarioStrategy + from pyrit.score import TrueFalseScorer logger = logging.getLogger(__name__) @@ -105,6 +100,16 @@ class Leakage(Scenario): VERSION: int = 2 _cached_strategy_class: ClassVar[type[ScenarioStrategy] | None] = None + @classmethod + def get_override_composite_scorer_questions_path(cls) -> list[Path]: + """ + Override true/false question paths for leakage objective scoring. + + Returns: + Sequence[Path]: Paths to true/false question paths for leakage objective scoring. + """ + return [SCORER_SEED_PROMPT_PATH / "true_false_question" / "leakage.yaml"] + @classmethod def get_strategy_class(cls) -> type[ScenarioStrategy]: """Return the dynamically generated strategy class, building it on first access.""" @@ -159,34 +164,6 @@ def __init__( scenario_result_id=scenario_result_id, ) - def _get_default_objective_scorer(self) -> TrueFalseCompositeScorer: - """ - Retrieve the default objective scorer. - - Uses a composite scorer: - 1. SelfAskTrueFalseScorer with leakage detection prompt. - 2. Inverted SelfAskRefusalScorer as backstop (response must not be a refusal). - - The chat target is resolved from the base class default scorer if available, - otherwise falls back to OpenAIChatTarget(). - - Returns: - TrueFalseCompositeScorer: A composite scorer combining leakage detection and refusal backstop. - """ - base_scorer = super()._get_default_objective_scorer() - chat_target = base_scorer.get_chat_target() or OpenAIChatTarget() - - presence_of_leakage = SelfAskTrueFalseScorer( - chat_target=chat_target, - true_false_question_path=SCORER_SEED_PROMPT_PATH / "true_false_question" / "leakage.yaml", - ) - - backstop = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target)) - - return TrueFalseCompositeScorer( - aggregator=TrueFalseScoreAggregator.AND, scorers=[presence_of_leakage, backstop] - ) - def _get_attack_technique_factories(self) -> dict[str, AttackTechniqueFactory]: """ Return core + leakage-specific attack technique factories. diff --git a/pyrit/scenario/scenarios/airt/psychosocial.py b/pyrit/scenario/scenarios/airt/psychosocial.py index 8a0fc924b9..308e6db50e 100644 --- a/pyrit/scenario/scenarios/airt/psychosocial.py +++ b/pyrit/scenario/scenarios/airt/psychosocial.py @@ -2,14 +2,12 @@ # Licensed under the MIT license. import logging -import os import pathlib from dataclasses import dataclass from typing import Any, Optional, TypeVar import yaml -from pyrit.auth import get_azure_openai_auth from pyrit.common import apply_defaults from pyrit.common.path import DATASETS_PATH from pyrit.executor.attack import ( @@ -27,7 +25,7 @@ from pyrit.prompt_normalizer.prompt_converter_configuration import ( PromptConverterConfiguration, ) -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget +from pyrit.prompt_target import PromptChatTarget from pyrit.prompt_target.common.target_capabilities import CapabilityName from pyrit.prompt_target.common.target_requirements import TargetRequirements from pyrit.scenario.core.atomic_attack import AtomicAttack @@ -37,6 +35,7 @@ from pyrit.scenario.core.scenario_strategy import ( ScenarioStrategy, ) +from pyrit.scenario.core.scenario_target_defaults import get_default_adversarial_target, get_default_scorer_target from pyrit.score import ( FloatScaleScorer, FloatScaleThresholdScorer, @@ -253,7 +252,7 @@ def __init__( "objectives is deprecated and will be removed in a future version. " "Use dataset_config in initialize_async instead." ) - self._adversarial_chat = adversarial_chat if adversarial_chat else self._get_default_adversarial_target() + self._adversarial_chat = adversarial_chat if adversarial_chat else get_default_adversarial_target() # Merge user-provided configs with defaults (user-provided takes precedence) self._subharm_configs = {**self.DEFAULT_SUBHARM_CONFIGS, **(subharm_configs or {})} @@ -356,21 +355,6 @@ def _filter_by_harm_category( filtered_groups.append(SeedAttackGroup(seeds=filtered_seeds)) return filtered_groups - def _get_default_adversarial_target(self) -> OpenAIChatTarget: - """ - Create default adversarial chat target for multi-turn attacks. - - Returns: - OpenAIChatTarget: Default adversarial target, using an unfiltered endpoint. - """ - endpoint = os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT") - return OpenAIChatTarget( - endpoint=endpoint, - api_key=get_azure_openai_auth(endpoint or ""), - model_name=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL"), - temperature=0.7, - ) - def _get_scorer(self, subharm: Optional[str] = None) -> FloatScaleThresholdScorer: """ Create scorer for psychosocial harms evaluation. @@ -403,16 +387,11 @@ def _get_scorer(self, subharm: Optional[str] = None) -> FloatScaleThresholdScore # Extract the 'value' field which contains the actual rubric text psychosocial_harm_rubric = yaml_data["value"] - endpoint = os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT") - azure_openai_chat_target = OpenAIChatTarget( - endpoint=endpoint, - api_key=get_azure_openai_auth(endpoint or ""), - model_name=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL"), - ) + scorer_target = get_default_scorer_target() # Create the base crisis scorer psych_scorer = SelfAskGeneralFloatScaleScorer( - chat_target=azure_openai_chat_target, + chat_target=scorer_target, system_prompt_format_string=psychosocial_harm_rubric, rationale_output_key="reasoning", # Match the YAML JSON schema key category="psychosocial_harm", diff --git a/pyrit/scenario/scenarios/airt/scam.py b/pyrit/scenario/scenarios/airt/scam.py index e714b73a57..a3c9f45de6 100644 --- a/pyrit/scenario/scenarios/airt/scam.py +++ b/pyrit/scenario/scenarios/airt/scam.py @@ -2,11 +2,9 @@ # Licensed under the MIT license. import logging -import os from pathlib import Path from typing import TYPE_CHECKING, Any, Optional -from pyrit.auth import get_azure_openai_auth from pyrit.common import Parameter, apply_defaults from pyrit.common.path import ( EXECUTOR_RED_TEAM_PATH, @@ -23,20 +21,14 @@ AttackScoringConfig, ) from pyrit.models import SeedAttackGroup -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget +from pyrit.prompt_target import PromptChatTarget from pyrit.scenario.core.atomic_attack import AtomicAttack from pyrit.scenario.core.attack_technique import AttackTechnique from pyrit.scenario.core.dataset_configuration import DatasetConfiguration from pyrit.scenario.core.scenario import Scenario from pyrit.scenario.core.scenario_strategy import ScenarioStrategy -from pyrit.score import ( - SelfAskRefusalScorer, - SelfAskTrueFalseScorer, - TrueFalseCompositeScorer, - TrueFalseInverterScorer, - TrueFalseScoreAggregator, - TrueFalseScorer, -) +from pyrit.scenario.core.scenario_target_defaults import get_default_adversarial_target +from pyrit.score import TrueFalseScorer if TYPE_CHECKING: from pyrit.executor.attack.core.attack_strategy import AttackStrategy @@ -92,6 +84,16 @@ class Scam(Scenario): VERSION: int = 1 + @classmethod + def get_override_composite_scorer_questions_path(cls) -> list[Path]: + """ + Override true/false question paths for scam objective scoring. + + Returns: + Sequence[Path]: Paths to true/false question paths for scam objective scoring. + """ + return [SCORER_SEED_PROMPT_PATH / "true_false_question" / "scams.yaml"] + @classmethod def get_strategy_class(cls) -> type[ScenarioStrategy]: """ @@ -172,7 +174,7 @@ def __init__( self._scorer_config = AttackScoringConfig(objective_scorer=objective_scorer) # Used for multiturn strategies and RolePlayAttack rephrasing - self._adversarial_chat = adversarial_chat if adversarial_chat else self._get_default_adversarial_target() + self._adversarial_chat = adversarial_chat if adversarial_chat else get_default_adversarial_target() self._adversarial_config = AttackAdversarialConfig(target=self._adversarial_chat) super().__init__( @@ -186,55 +188,6 @@ def __init__( # Will be resolved in _get_atomic_attacks_async self._seed_groups: Optional[list[SeedAttackGroup]] = None - def _get_default_objective_scorer(self) -> TrueFalseCompositeScorer: - """ - Retrieve the default objective scorer. - The scoring mechanism is as follows: - 1. Detect for objective achieved using a SelfAskTrueFalseScorer with a custom true/false question path. - 2. Detect for NON-refusal as a backstop. (If the target model refuses to produce scam materials, we want that - to register as a negative score). - - Returns: - TrueFalseCompositeScorer: Default objective scorer with backstop and scam materials evaluation. - """ - endpoint = os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT") - scam_materials = SelfAskTrueFalseScorer( - chat_target=OpenAIChatTarget( - endpoint=endpoint, - api_key=get_azure_openai_auth(endpoint or ""), - model_name=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL"), - temperature=0.9, - ), - true_false_question_path=SCORER_SEED_PROMPT_PATH / "true_false_question" / "scams.yaml", - ) - - backstop = TrueFalseInverterScorer( - scorer=SelfAskRefusalScorer( - chat_target=OpenAIChatTarget( - endpoint=endpoint, - api_key=get_azure_openai_auth(endpoint or ""), - model_name=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL"), - ) - ) - ) - - return TrueFalseCompositeScorer(aggregator=TrueFalseScoreAggregator.AND, scorers=[scam_materials, backstop]) - - def _get_default_adversarial_target(self) -> OpenAIChatTarget: - """ - Provide an OpenAI target for the role-play rephrasing step. - - Returns: - OpenAIChatTarget: Target that supplies the persuasion script rephrasing. - """ - endpoint = os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT") - return OpenAIChatTarget( - endpoint=endpoint, - api_key=get_azure_openai_auth(endpoint or ""), - model_name=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL"), - temperature=1.2, - ) - def _resolve_seed_groups(self) -> list[SeedAttackGroup]: """ Resolve seed groups from dataset configuration. diff --git a/pyrit/scenario/scenarios/foundry/red_team_agent.py b/pyrit/scenario/scenarios/foundry/red_team_agent.py index a875e186a5..6b8f271d9b 100644 --- a/pyrit/scenario/scenarios/foundry/red_team_agent.py +++ b/pyrit/scenario/scenarios/foundry/red_team_agent.py @@ -10,13 +10,11 @@ """ import logging -import os from collections.abc import Sequence from dataclasses import dataclass, field from inspect import signature from typing import TYPE_CHECKING, Any, Optional, TypeVar, cast -from pyrit.auth import get_azure_openai_auth from pyrit.common import REQUIRED_VALUE, apply_defaults from pyrit.datasets import TextJailBreak from pyrit.executor.attack import ( @@ -62,12 +60,12 @@ ) from pyrit.prompt_target import PromptTarget from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget -from pyrit.prompt_target.openai.openai_chat_target import OpenAIChatTarget from pyrit.scenario.core.atomic_attack import AtomicAttack from pyrit.scenario.core.attack_technique import AttackTechnique from pyrit.scenario.core.dataset_configuration import DatasetConfiguration from pyrit.scenario.core.scenario import Scenario from pyrit.scenario.core.scenario_strategy import ScenarioCompositeStrategy, ScenarioStrategy +from pyrit.scenario.core.scenario_target_defaults import get_default_adversarial_target if TYPE_CHECKING: from pyrit.executor.attack.core.attack_strategy import AttackStrategy @@ -270,7 +268,7 @@ def __init__( Raises: ValueError: If attack_strategies is empty or contains unsupported strategies. """ - self._adversarial_chat = adversarial_chat if adversarial_chat else self._get_default_adversarial_target() + self._adversarial_chat = adversarial_chat if adversarial_chat else get_default_adversarial_target() if not attack_scoring_config: attack_scoring_config = AttackScoringConfig(objective_scorer=self._get_default_objective_scorer()) self._attack_scoring_config = attack_scoring_config @@ -426,15 +424,6 @@ async def _get_atomic_attacks_async(self) -> list[AtomicAttack]: return [self._get_attack_from_strategy(composition) for composition in self._scenario_composites] - def _get_default_adversarial_target(self) -> OpenAIChatTarget: - endpoint = os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT") - return OpenAIChatTarget( - endpoint=endpoint, - api_key=get_azure_openai_auth(endpoint or ""), - model_name=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL"), - temperature=1.2, - ) - def _get_attack_from_strategy(self, composite: FoundryComposite) -> AtomicAttack: """ Get an atomic attack for the specified FoundryComposite. diff --git a/pyrit/setup/initializers/components/scorers.py b/pyrit/setup/initializers/components/scorers.py index 3b2d3aee6c..92c4899313 100644 --- a/pyrit/setup/initializers/components/scorers.py +++ b/pyrit/setup/initializers/components/scorers.py @@ -76,6 +76,8 @@ class ScorerInitializerTags(str, Enum): # Target registry names used by scorer configurations. +MAIN_SCORER_TARGET: str = "objective_scorer_chat" +FALLBACK_SCORER_TARGET: str = "openai_chat" GPT4O_TARGET: str = "azure_openai_gpt4o" GPT4O_TEMP0_TARGET: str = "azure_openai_gpt4o_temp0" GPT4O_TEMP9_TARGET: str = "azure_openai_gpt4o_temp9" @@ -86,6 +88,8 @@ class ScorerInitializerTags(str, Enum): GPT5_1_TARGET: str = "azure_openai_gpt5_1" # Scorer registry names. +MAIN: str = "main" +FALLBACK: str = "fallback" REFUSAL_GPT4O_OBJECTIVE_STRICT: str = "refusal_gpt4o_objective_strict" REFUSAL_GPT4O_OBJECTIVE_LENIENT: str = "refusal_gpt4o_objective_lenient" REFUSAL_GPT4O_NO_OBJECTIVE_STRICT: str = "refusal_gpt4o_no_objective_strict" @@ -173,6 +177,7 @@ async def initialize_async(self) -> None: "Ensure TargetInitializer is included in the initializers list." ) + self._register_fallback_scorers() self._register_refusal_scorers() self._register_scale_scorers() self._register_acs_threshold_scorers() @@ -187,6 +192,27 @@ async def initialize_async(self) -> None: # Core scorer registration # --------------------------------------------------------------------------- + def _register_fallback_scorers(self) -> None: + """ + Register scorers used as fallback in scenarios. + """ + main = self._get_chat_target(MAIN_SCORER_TARGET) + fallback = self._get_chat_target(FALLBACK_SCORER_TARGET) + self._try_register( + name=MAIN, + factory=lambda: TrueFalseInverterScorer( + scorer=SelfAskRefusalScorer(chat_target=self._require_dependency(main, name=MAIN_SCORER_TARGET)) + ), + required_targets=[main], + ) + self._try_register( + name=FALLBACK, + factory=lambda: TrueFalseInverterScorer( + scorer=SelfAskRefusalScorer(chat_target=self._require_dependency(fallback, name=FALLBACK_SCORER_TARGET)) + ), + required_targets=[fallback], + ) + def _register_refusal_scorers(self) -> None: """ Register base refusal scorer variants and tag the best one. diff --git a/pyrit/setup/initializers/components/targets.py b/pyrit/setup/initializers/components/targets.py index 055e5c40d1..3a8049ca3a 100644 --- a/pyrit/setup/initializers/components/targets.py +++ b/pyrit/setup/initializers/components/targets.py @@ -180,6 +180,15 @@ class TargetConfig: temperature=1.2, tags=[TargetInitializerTags.DEFAULT, TargetInitializerTags.ADVERSARIAL], ), + TargetConfig( + registry_name="objective_scorer_chat", + target_class=OpenAIChatTarget, + endpoint_var="OBJECTIVE_SCORER_CHAT_ENDPOINT", + key_var="OBJECTIVE_SCORER_CHAT_KEY", + model_var="OBJECTIVE_SCORER_CHAT_MODEL", + underlying_model_var="OBJECTIVE_SCORER_CHAT_UNDERLYING_MODEL", + tags=[TargetInitializerTags.DEFAULT, TargetInitializerTags.SCORER], + ), TargetConfig( registry_name="azure_foundry_deepseek", target_class=OpenAIChatTarget, diff --git a/tests/unit/scenario/test_foundry.py b/tests/unit/scenario/test_foundry.py index ab1995e947..7811bf4144 100644 --- a/tests/unit/scenario/test_foundry.py +++ b/tests/unit/scenario/test_foundry.py @@ -91,18 +91,29 @@ def mock_float_threshold_scorer(): return mock -@pytest.mark.usefixtures("patch_central_database") -class TestFoundryInitialization: - """Tests for RedTeamAgent initialization.""" - - @patch.dict( +@pytest.fixture +def mock_runtime_env(): + with patch.dict( "os.environ", { "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", + "OPENAI_CHAT_ENDPOINT": "https://test.openai.azure.com/", + "OPENAI_CHAT_KEY": "test-key", + "OPENAI_CHAT_MODEL": "gpt-4", }, - ) + ): + yield + + +FIXTURES = ["patch_central_database", "mock_runtime_env"] + + +@pytest.mark.usefixtures(*FIXTURES) +class TestFoundryInitialization: + """Tests for RedTeamAgent initialization.""" + async def test_init_with_single_strategy( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -120,14 +131,6 @@ async def test_init_with_single_strategy( assert scenario.atomic_attack_count > 0 assert scenario.name == "RedTeamAgent" - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_init_with_multiple_strategies( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -150,14 +153,6 @@ async def test_init_with_multiple_strategies( ) assert scenario.atomic_attack_count >= len(strategies) - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) def test_init_with_custom_adversarial_target( self, mock_objective_target, mock_adversarial_target, mock_objective_scorer ): @@ -169,14 +164,6 @@ def test_init_with_custom_adversarial_target( assert scenario._adversarial_chat == mock_adversarial_target - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) def test_init_with_custom_scorer(self, mock_objective_target, mock_objective_scorer): """Test initialization with custom objective scorer.""" scenario = RedTeamAgent( @@ -185,14 +172,6 @@ def test_init_with_custom_scorer(self, mock_objective_target, mock_objective_sco assert scenario._attack_scoring_config.objective_scorer == mock_objective_scorer - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_init_with_memory_labels( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -215,14 +194,6 @@ async def test_init_with_memory_labels( assert scenario._memory_labels == memory_labels @patch("pyrit.scenario.core.scenario.Scenario._get_default_objective_scorer") - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) def test_init_creates_default_scorer_when_not_provided( self, mock_get_scorer, mock_objective_target, mock_memory_seed_groups ): @@ -240,14 +211,6 @@ def test_init_creates_default_scorer_when_not_provided( # seed_groups are resolved lazily during _get_atomic_attacks_async assert scenario._attack_scoring_config.objective_scorer == mock_scorer_instance - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_init_raises_exception_when_no_datasets_available(self, mock_objective_target, mock_objective_scorer): """Test that initialization raises ValueError when datasets are not available in memory.""" # Don't mock _resolve_seed_groups, let it try to load from empty memory @@ -258,18 +221,10 @@ async def test_init_raises_exception_when_no_datasets_available(self, mock_objec await scenario.initialize_async(objective_target=mock_objective_target) -@pytest.mark.usefixtures("patch_central_database") +@pytest.mark.usefixtures(*FIXTURES) class TestFoundryStrategyNormalization: """Tests for attack strategy normalization.""" - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_normalize_easy_strategies( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -287,14 +242,6 @@ async def test_normalize_easy_strategies( # EASY should expand to multiple attack strategies assert scenario.atomic_attack_count > 1 - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_normalize_moderate_strategies( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -312,14 +259,6 @@ async def test_normalize_moderate_strategies( # MODERATE should expand to moderate attack strategies (currently only 1: Tense) assert scenario.atomic_attack_count >= 1 - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_normalize_difficult_strategies( self, mock_objective_target, mock_float_threshold_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -338,14 +277,6 @@ async def test_normalize_difficult_strategies( # DIFFICULT should expand to multiple attack strategies assert scenario.atomic_attack_count > 1 - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_normalize_mixed_difficulty_levels( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -363,14 +294,6 @@ async def test_normalize_mixed_difficulty_levels( # Combined difficulty levels should expand to multiple strategies assert scenario.atomic_attack_count > 5 # EASY has 20, MODERATE has 1, combined should have more - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_normalize_with_specific_and_difficulty_levels( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -392,18 +315,10 @@ async def test_normalize_with_specific_and_difficulty_levels( assert scenario.atomic_attack_count >= 20 -@pytest.mark.usefixtures("patch_central_database") +@pytest.mark.usefixtures(*FIXTURES) class TestFoundryAttackCreation: """Tests for attack creation from strategies.""" - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_get_attack_from_single_turn_strategy( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -426,14 +341,6 @@ async def test_get_attack_from_single_turn_strategy( assert isinstance(atomic_attack, AtomicAttack) assert atomic_attack.seed_groups == mock_memory_seed_groups - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_get_attack_from_multi_turn_strategy( self, mock_objective_target, @@ -463,18 +370,10 @@ async def test_get_attack_from_multi_turn_strategy( assert atomic_attack.seed_groups == mock_memory_seed_groups -@pytest.mark.usefixtures("patch_central_database") +@pytest.mark.usefixtures(*FIXTURES) class TestFoundryGetAttack: """Tests for the _get_attack method.""" - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_get_attack_single_turn_with_converters( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -497,14 +396,6 @@ async def test_get_attack_single_turn_with_converters( assert isinstance(attack, PromptSendingAttack) - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_get_attack_multi_turn_with_adversarial_target( self, mock_objective_target, @@ -534,18 +425,10 @@ async def test_get_attack_multi_turn_with_adversarial_target( assert isinstance(attack, CrescendoAttack) -@pytest.mark.usefixtures("patch_central_database") +@pytest.mark.usefixtures(*FIXTURES) class TestFoundryAllStrategies: """Tests that all strategies can be instantiated.""" - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) @pytest.mark.parametrize( "strategy", [ @@ -592,14 +475,6 @@ async def test_all_single_turn_strategies_create_attack_runs( atomic_attack = scenario._get_attack_from_strategy(composite_strategy) assert isinstance(atomic_attack, AtomicAttack) - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) @pytest.mark.parametrize( "strategy", [ @@ -635,18 +510,10 @@ async def test_all_multi_turn_strategies_create_attack_runs( assert isinstance(atomic_attack, AtomicAttack) -@pytest.mark.usefixtures("patch_central_database") +@pytest.mark.usefixtures(*FIXTURES) class TestFoundryProperties: """Tests for RedTeamAgent properties and attributes.""" - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_scenario_composites_set_after_initialize( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -672,14 +539,6 @@ async def test_scenario_composites_set_after_initialize( assert len(scenario._scenario_composites) == len(strategies) assert scenario.atomic_attack_count == len(strategies) - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) def test_scenario_version_is_set(self, mock_objective_target, mock_objective_scorer): """Test that scenario version is properly set.""" scenario = RedTeamAgent( @@ -688,14 +547,6 @@ def test_scenario_version_is_set(self, mock_objective_target, mock_objective_sco assert scenario.VERSION == 1 - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_scenario_atomic_attack_count_matches_strategies( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -719,14 +570,6 @@ async def test_scenario_atomic_attack_count_matches_strategies( # Should have at least as many runs as specific strategies provided assert scenario.atomic_attack_count >= len(strategies) - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_initialize_with_foundry_composite_directly( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -750,14 +593,6 @@ async def test_initialize_with_foundry_composite_directly( assert result.converters == [FoundryStrategy.Base64] assert result.name == "ComposedStrategy(crescendo, base64)" - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) async def test_initialize_with_mixed_composites_and_strategies( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config ): @@ -780,14 +615,6 @@ async def test_initialize_with_mixed_composites_and_strategies( assert scenario._scenario_composites[1].attack is None assert scenario._scenario_composites[1].converters == [FoundryStrategy.ROT13] - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) @pytest.mark.filterwarnings("ignore::DeprecationWarning") async def test_initialize_converts_scenario_composite_strategy_to_foundry_composite( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config @@ -811,14 +638,6 @@ async def test_initialize_converts_scenario_composite_strategy_to_foundry_compos assert result.attack == FoundryStrategy.Crescendo assert result.converters == [FoundryStrategy.Base64] - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) @pytest.mark.filterwarnings("ignore::DeprecationWarning") async def test_initialize_converts_converter_first_composite_strategy( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config @@ -841,14 +660,6 @@ async def test_initialize_converts_converter_first_composite_strategy( assert result.attack == FoundryStrategy.Crescendo assert result.converters == [FoundryStrategy.Base64] - @patch.dict( - "os.environ", - { - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT": "https://test.openai.azure.com/", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY": "test-key", - "AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL": "gpt-4", - }, - ) @pytest.mark.filterwarnings("ignore::DeprecationWarning") async def test_initialize_converts_converter_only_composite_strategy( self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups, mock_dataset_config diff --git a/tests/unit/scenario/test_jailbreak.py b/tests/unit/scenario/test_jailbreak.py index c873465c6b..1ef7c2090b 100644 --- a/tests/unit/scenario/test_jailbreak.py +++ b/tests/unit/scenario/test_jailbreak.py @@ -15,7 +15,7 @@ from pyrit.executor.attack.single_turn.skeleton_key import SkeletonKeyAttack from pyrit.identifiers import ComponentIdentifier from pyrit.models import SeedGroup, SeedObjective -from pyrit.prompt_target import OpenAIChatTarget, PromptTarget +from pyrit.prompt_target import PromptTarget from pyrit.scenario.scenarios.airt.jailbreak import Jailbreak, JailbreakStrategy from pyrit.score.true_false.true_false_inverter_scorer import TrueFalseInverterScorer @@ -447,11 +447,13 @@ async def test_no_target_duplication_async( class TestJailbreakAdversarialTarget: """Tests for adversarial target creation and caching.""" - def test_create_adversarial_target_returns_openai_chat_target(self) -> None: - """Test that _create_adversarial_target returns a new OpenAIChatTarget.""" + def test_get_or_create_adversarial_target_returns_prompt_chat_target(self) -> None: + """Test that _get_or_create_adversarial_target returns a PromptChatTarget.""" + from pyrit.prompt_target import PromptChatTarget + scenario = Jailbreak() - target = scenario._create_adversarial_target() - assert isinstance(target, OpenAIChatTarget) + target = scenario._get_or_create_adversarial_target() + assert isinstance(target, PromptChatTarget) def test_get_or_create_adversarial_target_reuses_instance(self) -> None: """Test that _get_or_create_adversarial_target returns the same instance on repeated calls.""" diff --git a/tests/unit/scenario/test_rapid_response.py b/tests/unit/scenario/test_rapid_response.py index 0c53e4ac9f..985e80dbf2 100644 --- a/tests/unit/scenario/test_rapid_response.py +++ b/tests/unit/scenario/test_rapid_response.py @@ -656,7 +656,7 @@ def test_get_default_adversarial_target_capability_check(self): mock_target = MagicMock(spec=PromptTarget) mock_target.capabilities.includes.return_value = False target_registry.register(name="adversarial_chat", instance=mock_target) - with pytest.raises(ValueError, match="must support multi-turn"): + with pytest.raises(ValueError, match="must support"): get_default_adversarial_target() diff --git a/tests/unit/scenario/test_scenario.py b/tests/unit/scenario/test_scenario.py index bbead38407..303bc168f1 100644 --- a/tests/unit/scenario/test_scenario.py +++ b/tests/unit/scenario/test_scenario.py @@ -854,12 +854,16 @@ def test_returns_registry_scorer_when_tagged(self, mock_registry_cls) -> None: mock_registry.get_by_tag.return_value = [mock_entry] mock_registry_cls.get_registry_singleton.return_value = mock_registry - result = Scenario._get_default_objective_scorer(MagicMock()) + # Mock self with get_override_composite_scorer_questions_path returning empty sequence + mock_self = MagicMock() + type(mock_self).get_override_composite_scorer_questions_path = classmethod(lambda cls: []) + + result = Scenario._get_default_objective_scorer(mock_self) assert result is mock_scorer - @patch("pyrit.scenario.core.scenario.OpenAIChatTarget") + @patch("pyrit.scenario.core.scenario.get_default_scorer_target") @patch("pyrit.scenario.core.scenario.ScorerRegistry") - def test_returns_fallback_when_registry_empty(self, mock_registry_cls, mock_oai_target) -> None: + def test_returns_fallback_when_registry_empty(self, mock_registry_cls, mock_get_scorer_target) -> None: """Test fallback to TrueFalseInverterScorer when no tagged scorer exists.""" from pyrit.score import TrueFalseInverterScorer @@ -867,7 +871,11 @@ def test_returns_fallback_when_registry_empty(self, mock_registry_cls, mock_oai_ mock_registry.get_by_tag.return_value = [] mock_registry_cls.get_registry_singleton.return_value = mock_registry - result = Scenario._get_default_objective_scorer(MagicMock()) + # Mock self with get_override_composite_scorer_questions_path returning empty sequence + mock_self = MagicMock() + type(mock_self).get_override_composite_scorer_questions_path = classmethod(lambda cls: []) + + result = Scenario._get_default_objective_scorer(mock_self) assert isinstance(result, TrueFalseInverterScorer) diff --git a/tests/unit/setup/test_scenarios_initializer.py b/tests/unit/setup/test_scenarios_initializer.py index 4d9cfa841f..d56414279b 100644 --- a/tests/unit/setup/test_scenarios_initializer.py +++ b/tests/unit/setup/test_scenarios_initializer.py @@ -235,12 +235,11 @@ async def test_idempotent(self, mock_adversarial_target): @pytest.mark.asyncio async def test_falls_back_to_default_target_when_registry_empty(self): """With no 'adversarial_chat' in TargetRegistry, the fallback constructs an OpenAIChatTarget.""" - # Patch OpenAIChatTarget at the import site inside scenario_techniques - # (which is what get_default_adversarial_target calls), so the test does - # not depend on OPENAI_CHAT_MODEL or any other env var being set. + # Patch OpenAIChatTarget at the fallback construction site so the test + # does not depend on OPENAI_CHAT_MODEL or any other env var being set. fallback_target = MagicMock(spec=PromptChatTarget) with patch( - "pyrit.scenario.core.scenario_techniques.OpenAIChatTarget", + "pyrit.scenario.core.scenario_target_defaults.OpenAIChatTarget", return_value=fallback_target, ) as mock_openai: init = ScenarioTechniqueInitializer()