Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 100 additions & 17 deletions pyrit/score/float_scale/self_ask_likert_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,36 +166,59 @@ def __init__(
self,
*,
chat_target: PromptChatTarget,
likert_scale: LikertScalePaths,
likert_scale: Optional[LikertScalePaths] = None,
custom_likert_path: Optional[Path] = None,
custom_system_prompt_path: Optional[Path] = None,
validator: Optional[ScorerPromptValidator] = None,
) -> None:
"""
Initialize the SelfAskLikertScorer.

Args:
chat_target (PromptChatTarget): The chat target to use for scoring.
likert_scale (LikertScalePaths): The Likert scale configuration to use for scoring.
likert_scale (Optional[LikertScalePaths]): The Likert scale configuration to use for scoring.
custom_likert_path (Optional[Path]): Path to a custom YAML file containing the Likert scale definition.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe this is cleaner here but wondering why we can't use the generic self ask scorer since you can add custom scales already there? this can take custom scales & scale definitions as a yaml or string

This allows users to use their own Likert scales without modifying the code, as long as
the YAML file follows the expected format. Only one of `likert_scale` or `custom_likert_path`
should be provided. Defaults to None.
custom_system_prompt_path (Optional[Path]): Path to a custom system prompt file. This allows users to
provide their own system prompt without modifying the code. Defaults to None.
validator (Optional[ScorerPromptValidator]): Custom validator for the scorer. Defaults to None.

Raises:
ValueError: If both `likert_scale` and `custom_likert_path` are provided, if neither is provided,
or if the provided Likert scale or system prompt YAML file is improperly formatted.
"""
super().__init__(validator=validator or self._DEFAULT_VALIDATOR)

self._prompt_target = chat_target
self._likert_scale = likert_scale

# Auto-set evaluation file mapping from the LikertScalePaths enum
if likert_scale.evaluation_files is not None:
from pyrit.score.scorer_evaluation.scorer_evaluator import (
ScorerEvalDatasetFiles,
)
if likert_scale is not None and custom_likert_path is not None:
raise ValueError("Only one of 'likert_scale' or 'custom_likert_path' should be provided, not both.")
if likert_scale is None and custom_likert_path is None:
raise ValueError("One of 'likert_scale' or 'custom_likert_path' must be provided.")
if custom_system_prompt_path is not None:
self._validate_custom_system_prompt_path(custom_system_prompt_path)
self._scoring_instructions_template = SeedPrompt.from_yaml_file(custom_system_prompt_path)
if likert_scale is not None:
# Auto-set evaluation file mapping from the LikertScalePaths enum
if likert_scale.evaluation_files is not None:
from pyrit.score.scorer_evaluation.scorer_evaluator import (
ScorerEvalDatasetFiles,
)

eval_files = likert_scale.evaluation_files
self.evaluation_file_mapping = ScorerEvalDatasetFiles(
human_labeled_datasets_files=eval_files.human_labeled_datasets_files,
result_file=eval_files.result_file,
harm_category=eval_files.harm_category,
)
eval_files = likert_scale.evaluation_files
self.evaluation_file_mapping = ScorerEvalDatasetFiles(
human_labeled_datasets_files=eval_files.human_labeled_datasets_files,
result_file=eval_files.result_file,
harm_category=eval_files.harm_category,
)

self._set_likert_scale_system_prompt(likert_scale_path=likert_scale.path)
self._set_likert_scale_system_prompt(likert_scale_path=likert_scale.path)
elif custom_likert_path is not None:
self._validate_custom_likert_path(custom_likert_path)
self._set_likert_scale_system_prompt(likert_scale_path=custom_likert_path)

def _build_identifier(self) -> ComponentIdentifier:
"""
Expand Down Expand Up @@ -268,9 +291,12 @@ def _set_likert_scale_system_prompt(self, likert_scale_path: Path) -> None:
f"but only a single unique value was found: {self._max_scale_value}."
)

self._scoring_instructions_template = SeedPrompt.from_yaml_file(
SCORER_LIKERT_PATH / "likert_system_prompt.yaml"
)
# Only load the default system prompt template if a custom one wasn't already
# set via custom_system_prompt_path in __init__.
if not hasattr(self, "_scoring_instructions_template"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we set to None & avoid using hasattr()?

ie if self._scoring_instructions_template is None:
or pass in as a parameter?

self._scoring_instructions_template = SeedPrompt.from_yaml_file(
SCORER_LIKERT_PATH / "likert_system_prompt.yaml"
)

self._system_prompt = self._scoring_instructions_template.render_template_value(
likert_scale=likert_scale_str,
Expand Down Expand Up @@ -337,6 +363,63 @@ def _likert_scale_description_to_string(self, descriptions: list[dict[str, str]]

return likert_scale_description

@staticmethod
def _validate_custom_system_prompt_path(custom_system_prompt_path: Path) -> None:
"""
Validate the custom system prompt path.

Checks that the file exists, has a YAML extension, and contains the required
template parameters (category, likert_scale, min_scale_value, max_scale_value)
that the Likert scorer needs to render the system prompt.

Args:
custom_system_prompt_path (Path): Path to the custom system prompt YAML file.

Raises:
FileNotFoundError: If the file does not exist.
ValueError: If the file is not a YAML file or is missing required template parameters.
"""
if not custom_system_prompt_path.exists():
raise FileNotFoundError(f"Custom system prompt file not found: '{custom_system_prompt_path}'")
if custom_system_prompt_path.suffix not in (".yaml", ".yml"):
raise ValueError(
f"Custom system prompt file must be a YAML file (.yaml or .yml), "
f"got '{custom_system_prompt_path.suffix}'."
)

# Validate the template contains all required parameters used by the Likert scorer.
SeedPrompt.from_yaml_with_required_parameters(
template_path=custom_system_prompt_path,
required_parameters=["category", "likert_scale", "min_scale_value", "max_scale_value"],
error_message=(
"Custom system prompt YAML must define parameters: "
"category, likert_scale, min_scale_value, max_scale_value"
),
)

@staticmethod
def _validate_custom_likert_path(custom_likert_path: Path) -> None:
"""
Validate the custom Likert scale path.

Performs basic path checks (existence and YAML extension). Deeper content
validation (category, scale_descriptions structure, score values) is handled
by ``_set_likert_scale_system_prompt`` when the file is actually parsed.

Args:
custom_likert_path (Path): Path to the custom Likert scale YAML file.

Raises:
FileNotFoundError: If the file does not exist.
ValueError: If the file is not a YAML file.
"""
if not custom_likert_path.exists():
raise FileNotFoundError(f"Custom Likert scale file not found: '{custom_likert_path}'")
if custom_likert_path.suffix not in (".yaml", ".yml"):
raise ValueError(
f"Custom Likert scale file must be a YAML file (.yaml or .yml), got '{custom_likert_path.suffix}'."
)

async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
"""
Score the given message_piece using "self-ask" for the chat target.
Expand Down
102 changes: 102 additions & 0 deletions tests/unit/score/test_self_ask_likert.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,3 +403,105 @@ def test_likert_scale_missing_score_value_key_rejected(tmp_path: Path):
chat_target=chat_target,
likert_scale=LikertScalePaths.CYBER_SCALE,
)


# ---------------------------------------------------------------------------
# custom_likert_path and custom_system_prompt_path tests
# ---------------------------------------------------------------------------


def _make_custom_system_prompt_yaml(tmp_path: Path, *, include_all_params: bool = True) -> Path:
"""Create a custom system prompt YAML file for testing."""
params = ["category", "likert_scale", "min_scale_value", "max_scale_value"] if include_all_params else ["category"]
prompt_data = {
"name": "custom test prompt",
"description": "test",
"parameters": params,
"data_type": "text",
"value": "Custom prompt for {{category}} with scale {{likert_scale}} "
"from {{min_scale_value}} to {{max_scale_value}}."
if include_all_params
else "Only {{category}}.",
}
yaml_file = tmp_path / "custom_system_prompt.yaml"
yaml_file.write_text(yaml.safe_dump(prompt_data), encoding="utf-8")
return yaml_file


def test_custom_likert_path_creates_scorer(tmp_path: Path):
"""Verify that passing custom_likert_path (instead of a LikertScalePaths enum) works."""
memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

custom_path = _make_custom_scale_yaml(tmp_path, category="custom_cat", min_val=0, max_val=3)
scorer = SelfAskLikertScorer(chat_target=chat_target, custom_likert_path=custom_path)

assert scorer._min_scale_value == 0
assert scorer._max_scale_value == 3
assert scorer._score_category == "custom_cat"


def test_custom_likert_path_file_not_found():
"""Verify that a non-existent custom_likert_path raises FileNotFoundError."""
memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

with pytest.raises(FileNotFoundError, match="Custom Likert scale file not found"):
SelfAskLikertScorer(chat_target=chat_target, custom_likert_path=Path("/does/not/exist.yaml"))


def test_custom_likert_path_non_yaml_rejected(tmp_path: Path):
"""Verify that a non-YAML custom_likert_path raises ValueError."""
bad_file = tmp_path / "scale.txt"
bad_file.write_text("not yaml", encoding="utf-8")

memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

with pytest.raises(ValueError, match="must be a YAML file"):
SelfAskLikertScorer(chat_target=chat_target, custom_likert_path=bad_file)


def test_custom_system_prompt_path_used_in_system_prompt(tmp_path: Path):
"""Verify that a custom system prompt template is rendered instead of the default."""
memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

custom_prompt_path = _make_custom_system_prompt_yaml(tmp_path)
custom_likert_path = _make_custom_scale_yaml(tmp_path, category="test_cat", min_val=1, max_val=5)

scorer = SelfAskLikertScorer(
chat_target=chat_target,
custom_likert_path=custom_likert_path,
custom_system_prompt_path=custom_prompt_path,
)

# The system prompt should come from the custom template, not the default one
assert "Custom prompt for test_cat" in scorer._system_prompt
assert "from 1 to 5" in scorer._system_prompt

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: maybe add unit tests for all the conditions of the scale/yaml - ie both likert & custom provided gives error, test missing both parameters gives error, file not found, non yaml/malformed?


def test_custom_system_prompt_missing_params_rejected(tmp_path: Path):
"""Verify that a custom system prompt missing required parameters raises ValueError."""
memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

bad_prompt_path = _make_custom_system_prompt_yaml(tmp_path, include_all_params=False)
custom_likert_path = _make_custom_scale_yaml(tmp_path)

with pytest.raises(ValueError, match="Custom system prompt YAML must define parameters"):
SelfAskLikertScorer(
chat_target=chat_target,
custom_likert_path=custom_likert_path,
custom_system_prompt_path=bad_prompt_path,
)
Loading