Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env_example
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ ADVERSARIAL_CHAT_ENDPOINT="https://xxxxx.openai.azure.com/openai/v1"
ADVERSARIAL_CHAT_KEY="xxxxx"
ADVERSARIAL_CHAT_MODEL="deployment-name"

# Objective Scorer chat target (used in scorers in scenarios)
Comment thread
behnam-o marked this conversation as resolved.
OBJECTIVE_SCORER_CHAT_ENDPOINT="https://xxxxx.openai.azure.com/openai/v1"
OBJECTIVE_SCORER_CHAT_KEY="xxxxx"
OBJECTIVE_SCORER_CHAT_MODEL="deployment-name"

AZURE_FOUNDRY_DEEPSEEK_ENDPOINT="https://xxxxx.eastus2.models.ai.azure.com"
AZURE_FOUNDRY_DEEPSEEK_KEY="xxxxx"
AZURE_FOUNDRY_DEEPSEEK_MODEL=""
Expand Down
5 changes: 3 additions & 2 deletions pyrit/scenario/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
from pyrit.scenario.core.dataset_configuration import EXPLICIT_SEED_GROUPS_KEY, DatasetConfiguration
from pyrit.scenario.core.scenario import Scenario
from pyrit.scenario.core.scenario_strategy import ScenarioCompositeStrategy, ScenarioStrategy
from pyrit.scenario.core.scenario_target_defaults import get_default_adversarial_target, get_default_scorer_target
from pyrit.scenario.core.scenario_techniques import (
SCENARIO_TECHNIQUES,
get_default_adversarial_target,
register_scenario_techniques,
)

Expand All @@ -28,6 +28,7 @@
"ScenarioCompositeStrategy",
"ScenarioStrategy",
"ScorerOverridePolicy",
"get_default_adversarial_target",
"register_scenario_techniques",
"get_default_scorer_target",
"get_default_adversarial_target",
]
63 changes: 55 additions & 8 deletions pyrit/scenario/core/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import uuid
from abc import ABC, abstractmethod
from collections.abc import Sequence
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union, cast, get_origin

from tqdm.auto import tqdm
Expand All @@ -27,14 +28,20 @@
from pyrit.memory.memory_models import ScenarioResultEntry
from pyrit.models import AttackResult, SeedAttackGroup
from pyrit.models.scenario_result import ScenarioIdentifier, ScenarioResult
from pyrit.prompt_target import OpenAIChatTarget, PromptTarget
from pyrit.prompt_target import PromptTarget
from pyrit.prompt_target.common.target_requirements import TargetRequirements
from pyrit.registry import ScorerRegistry
from pyrit.registry.object_registries.scorer_registry import ScorerRegistry
from pyrit.scenario.core.atomic_attack import AtomicAttack
from pyrit.scenario.core.attack_technique import AttackTechnique
from pyrit.scenario.core.dataset_configuration import DatasetConfiguration
from pyrit.scenario.core.scenario_strategy import ScenarioStrategy
from pyrit.score import Scorer, SelfAskRefusalScorer, TrueFalseInverterScorer, TrueFalseScorer
from pyrit.scenario.core.scenario_target_defaults import get_default_scorer_target
from pyrit.score import Scorer, TrueFalseScorer
from pyrit.score.true_false.self_ask_refusal_scorer import SelfAskRefusalScorer
from pyrit.score.true_false.self_ask_true_false_scorer import SelfAskTrueFalseScorer
from pyrit.score.true_false.true_false_composite_scorer import TrueFalseCompositeScorer
from pyrit.score.true_false.true_false_inverter_scorer import TrueFalseInverterScorer
from pyrit.score.true_false.true_false_score_aggregator import TrueFalseScoreAggregator

if TYPE_CHECKING:
from pyrit.executor.attack.core.attack_config import AttackScoringConfig
Expand Down Expand Up @@ -107,6 +114,20 @@ class Scenario(ABC):
#: what the scenario needs. Validated in ``initialize_async`` once the target is supplied.
TARGET_REQUIREMENTS: ClassVar[TargetRequirements] = TargetRequirements()

@classmethod
def get_override_composite_scorer_questions_path(cls) -> Sequence[Path]:
"""
Override to provide true/false question prompt paths for objective scoring.

When overridden to return a non-empty sequence, the default objective scorer becomes
one ``SelfAskTrueFalseScorer`` per path AND-ed together with ``NOT(SelfAskRefusalScorer)``
instead of the scenario-level default.

Returns:
Sequence[Path]: Paths to true/false question prompts, or an empty sequence to use the default scorer.
"""
return []

def __init__(
self,
*,
Expand Down Expand Up @@ -310,16 +331,42 @@ def _build_display_group(self, *, technique_name: str, seed_group_name: str) ->
return technique_name

def _get_default_objective_scorer(self) -> TrueFalseScorer:
# Deferred import to avoid circular dependency:
# Deferred import to avoid circular dependency.
from pyrit.setup.initializers.components.scorers import ScorerInitializerTags

# first check if the registry has a default objective scorer
# if available either itself, or its chat target will be used
chat_target: PromptTarget | None = None
registry_default_scorer: TrueFalseScorer | None = None
entries = ScorerRegistry.get_registry_singleton().get_by_tag(tag=ScorerInitializerTags.DEFAULT_OBJECTIVE_SCORER)
if entries and isinstance(entries[0].instance, TrueFalseScorer):
scorer = entries[0].instance
logger.info(f"Using registered default objective scorer: {type(scorer).__name__}")
registry_default_scorer = entries[0].instance
chat_target = registry_default_scorer.get_chat_target()
logger.info(f"The registry contains default objective scorer: {type(registry_default_scorer).__name__}")

chat_target = chat_target or get_default_scorer_target()

# if the scenario has override composite scorer questions, use them to build a composite scorer
composite_scorer_questions_paths = type(self).get_override_composite_scorer_questions_path()
if composite_scorer_questions_paths:
path_scorers: list[TrueFalseScorer] = [
SelfAskTrueFalseScorer(chat_target=chat_target, true_false_question_path=path)
for path in composite_scorer_questions_paths
]
backstop_scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target))
scorer = TrueFalseCompositeScorer(
aggregator=TrueFalseScoreAggregator.AND,
scorers=[*path_scorers, backstop_scorer],
)
logger.info(f"Using composite default objective scorer: {type(scorer).__name__}")
return scorer
scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=OpenAIChatTarget()))
logger.info(f"No registered default objective scorer found, using fallback: {type(scorer).__name__}")

if registry_default_scorer:
logger.info(f"Using registry default objective scorer: {type(registry_default_scorer).__name__}")
return registry_default_scorer

scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target))
logger.warning(f"Using fallback default objective scorer: {type(scorer).__name__}")
return scorer

def set_params_from_args(self, *, args: dict[str, Any]) -> None:
Expand Down
98 changes: 98 additions & 0 deletions pyrit/scenario/core/scenario_target_defaults.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging

from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget
from pyrit.prompt_target.common.target_capabilities import CapabilityName
from pyrit.registry import TargetRegistry

logger = logging.getLogger(__name__)


def get_default_scorer_target() -> PromptChatTarget:
Comment thread
behnam-o marked this conversation as resolved.
"""
Resolve the default objective scorer chat target.

First checks the ``TargetRegistry`` for an ``"objective_scorer_chat"`` entry
(populated by ``TargetInitializer`` from ``OBJECTIVE_SCORER_CHAT_*`` env vars).
Falls back to a plain ``OpenAIChatTarget``

Returns:
PromptChatTarget: The resolved objective scorer chat target.

Raises:
ValueError: If the registered target does not support multi-turn.
"""
return _get_default_chat_target(preferred_target_key="objective_scorer_chat")

Comment thread
behnam-o marked this conversation as resolved.

def get_default_adversarial_target() -> PromptChatTarget:
"""
Resolve the default adversarial chat target.

First checks the ``TargetRegistry`` for an ``"adversarial_chat"`` entry
(populated by ``TargetInitializer`` from ``ADVERSARIAL_CHAT_*`` env vars).
Falls back to a default fallback target with temperature=1.2

Returns:
PromptChatTarget: The resolved adversarial chat target.

Raises:
ValueError: If the registered target does not support multi-turn.
"""
return _get_default_chat_target(
preferred_target_key="adversarial_chat",
required_capabilities={CapabilityName.MULTI_TURN},
fallback_temperature=1.2,
)


def _get_default_chat_target(
*,
preferred_target_key: str,
required_capabilities: set[CapabilityName] | None = None,
fallback_temperature: float | None = None,
) -> PromptChatTarget:
"""
Resolve a chat target from TargetRegistry with configurable fallback behavior.

Resolution order:
1. ``preferred_target_key`` entry from ``TargetRegistry``
2. ``OpenAIChatTarget(...)`` with optional temperature

Args:
preferred_target_key (str): TargetRegistry key to resolve first.
required_capabilities (set[CapabilityName] | None): Optional capabilities
that a resolved target must support.
fallback_temperature (float | None): Optional temperature for fallback
``OpenAIChatTarget`` construction.

Returns:
PromptChatTarget: The resolved chat target.

Raises:
ValueError: If the resolved target does not satisfy required capabilities.
ValueError: If the registry entry exists but is not a PromptChatTarget.
"""
registry = TargetRegistry.get_registry_singleton()
target = registry.get(preferred_target_key)
if target is not None:
# Check required capabilities first (fail fast)
if required_capabilities:
for capability in required_capabilities:
if not target.capabilities.includes(capability=capability):
raise ValueError(f"Registry entry '{preferred_target_key}' must support {capability.value}.")

# Then check type
if not isinstance(target, PromptChatTarget):
raise ValueError(
f"Registry entry '{preferred_target_key}' must be a PromptChatTarget, but got {type(target).__name__}"
)

return target

logger.warning(
f"TargetRegistry entry '{preferred_target_key}' not found. Falling back to default OpenAIChatTarget."
)
return OpenAIChatTarget(temperature=fallback_temperature)
41 changes: 5 additions & 36 deletions pyrit/scenario/core/scenario_techniques.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import inspect
import logging
from pathlib import Path
from typing import TYPE_CHECKING

from pyrit.common.path import EXECUTOR_SEED_PROMPT_PATH
from pyrit.executor.attack import (
Expand All @@ -35,13 +36,15 @@
)
from pyrit.models import SeedAttackTechniqueGroup, SeedSimulatedConversation
from pyrit.models.seeds.seed_simulated_conversation import NextMessageSystemPromptPaths
from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget
from pyrit.prompt_target.common.target_capabilities import CapabilityName
from pyrit.registry import TargetRegistry
from pyrit.registry.object_registries.attack_technique_registry import (
AttackTechniqueRegistry,
AttackTechniqueSpec,
)
from pyrit.scenario.core.scenario_target_defaults import get_default_adversarial_target

if TYPE_CHECKING:
from pyrit.prompt_target import PromptChatTarget

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -104,40 +107,6 @@
]


# ---------------------------------------------------------------------------
# Default adversarial target
# ---------------------------------------------------------------------------


def get_default_adversarial_target() -> PromptChatTarget:
"""
Resolve the default adversarial chat target.

First checks the ``TargetRegistry`` for an ``"adversarial_chat"`` entry
(populated by ``TargetInitializer`` from ``ADVERSARIAL_CHAT_*`` env vars).
Falls back to a plain ``OpenAIChatTarget(temperature=1.2)`` using
``@apply_defaults`` resolution.

Returns:
PromptChatTarget: The resolved adversarial chat target.

Raises:
ValueError: If the registered target does not support multi-turn.
"""
registry = TargetRegistry.get_registry_singleton()
if "adversarial_chat" in registry:
target = registry.get("adversarial_chat")
if target:
if not target.capabilities.includes(capability=CapabilityName.MULTI_TURN):
raise ValueError(
f"Registry entry 'adversarial_chat' must support multi-turn conversations, "
f"but {type(target).__name__} does not."
)
return target

return OpenAIChatTarget(temperature=1.2)


# ---------------------------------------------------------------------------
# Runtime spec builder
# ---------------------------------------------------------------------------
Expand Down
51 changes: 13 additions & 38 deletions pyrit/scenario/scenarios/airt/cyber.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,14 @@

from pyrit.common import apply_defaults
from pyrit.common.path import SCORER_SEED_PROMPT_PATH
from pyrit.prompt_target import OpenAIChatTarget
from pyrit.scenario.core.dataset_configuration import DatasetConfiguration
from pyrit.scenario.core.scenario import Scenario
from pyrit.score import (
SelfAskRefusalScorer,
SelfAskTrueFalseScorer,
TrueFalseCompositeScorer,
TrueFalseInverterScorer,
TrueFalseScoreAggregator,
TrueFalseScorer,
)

if TYPE_CHECKING:
from pathlib import Path

from pyrit.scenario.core.scenario_strategy import ScenarioStrategy
from pyrit.score import TrueFalseScorer

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -66,6 +60,16 @@ class Cyber(Scenario):
VERSION: int = 2
_cached_strategy_class: ClassVar[type[ScenarioStrategy] | None] = None

@classmethod
def get_override_composite_scorer_questions_path(cls) -> list[Path]:
"""
Override true/false question paths for cyber objective scoring.

Returns:
Sequence[Path]: Paths to true/false question paths for cyber objective scoring.
"""
return [SCORER_SEED_PROMPT_PATH / "true_false_question" / "malware.yaml"]

@classmethod
def get_strategy_class(cls) -> type[ScenarioStrategy]:
"""
Expand Down Expand Up @@ -128,32 +132,3 @@ def __init__(
include_default_baseline=include_baseline,
scenario_result_id=scenario_result_id,
)

def _get_default_objective_scorer(self) -> TrueFalseCompositeScorer:
"""
Retrieve the default objective scorer.

The scoring mechanism is as follows:
1. Detect for the presence of malware using a SelfAskTrueFalseScorer.
2. Detect for NON-refusal as a backstop. (If the target model refuses to produce malware, we want that
to register as a negative score).

The chat target is resolved from the base class default scorer if available,
otherwise falls back to OpenAIChatTarget().

Returns:
TrueFalseCompositeScorer: Default objective scorer with backstop and malware evaluation.
"""
base_scorer = super()._get_default_objective_scorer()
chat_target = base_scorer.get_chat_target() or OpenAIChatTarget()

presence_of_malware = SelfAskTrueFalseScorer(
chat_target=chat_target,
true_false_question_path=SCORER_SEED_PROMPT_PATH / "true_false_question" / "malware.yaml",
)

backstop = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target))

return TrueFalseCompositeScorer(
aggregator=TrueFalseScoreAggregator.AND, scorers=[presence_of_malware, backstop]
)
Loading
Loading