-
Notifications
You must be signed in to change notification settings - Fork 699
MAINT: Allow custom Likert system prompt and scale #1514
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -166,36 +166,59 @@ def __init__( | |
| self, | ||
| *, | ||
| chat_target: PromptChatTarget, | ||
| likert_scale: LikertScalePaths, | ||
| likert_scale: Optional[LikertScalePaths] = None, | ||
| custom_likert_path: Optional[Path] = None, | ||
| custom_system_prompt_path: Optional[Path] = None, | ||
| validator: Optional[ScorerPromptValidator] = None, | ||
| ) -> None: | ||
| """ | ||
| Initialize the SelfAskLikertScorer. | ||
|
|
||
| Args: | ||
| chat_target (PromptChatTarget): The chat target to use for scoring. | ||
| likert_scale (LikertScalePaths): The Likert scale configuration to use for scoring. | ||
| likert_scale (Optional[LikertScalePaths]): The Likert scale configuration to use for scoring. | ||
| custom_likert_path (Optional[Path]): Path to a custom YAML file containing the Likert scale definition. | ||
| This allows users to use their own Likert scales without modifying the code, as long as | ||
| the YAML file follows the expected format. Only one of `likert_scale` or `custom_likert_path` | ||
| should be provided. Defaults to None. | ||
| custom_system_prompt_path (Optional[Path]): Path to a custom system prompt file. This allows users to | ||
| provide their own system prompt without modifying the code. Defaults to None. | ||
| validator (Optional[ScorerPromptValidator]): Custom validator for the scorer. Defaults to None. | ||
|
|
||
| Raises: | ||
| ValueError: If both `likert_scale` and `custom_likert_path` are provided, if neither is provided, | ||
| or if the provided Likert scale or system prompt YAML file is improperly formatted. | ||
| """ | ||
| super().__init__(validator=validator or self._DEFAULT_VALIDATOR) | ||
|
|
||
| self._prompt_target = chat_target | ||
| self._likert_scale = likert_scale | ||
|
|
||
| # Auto-set evaluation file mapping from the LikertScalePaths enum | ||
| if likert_scale.evaluation_files is not None: | ||
| from pyrit.score.scorer_evaluation.scorer_evaluator import ( | ||
| ScorerEvalDatasetFiles, | ||
| ) | ||
| if likert_scale is not None and custom_likert_path is not None: | ||
| raise ValueError("Only one of 'likert_scale' or 'custom_likert_path' should be provided, not both.") | ||
| if likert_scale is None and custom_likert_path is None: | ||
| raise ValueError("One of 'likert_scale' or 'custom_likert_path' must be provided.") | ||
| if custom_system_prompt_path is not None: | ||
| self._validate_custom_system_prompt_path(custom_system_prompt_path) | ||
| self._scoring_instructions_template = SeedPrompt.from_yaml_file(custom_system_prompt_path) | ||
| if likert_scale is not None: | ||
| # Auto-set evaluation file mapping from the LikertScalePaths enum | ||
| if likert_scale.evaluation_files is not None: | ||
| from pyrit.score.scorer_evaluation.scorer_evaluator import ( | ||
| ScorerEvalDatasetFiles, | ||
| ) | ||
|
|
||
| eval_files = likert_scale.evaluation_files | ||
| self.evaluation_file_mapping = ScorerEvalDatasetFiles( | ||
| human_labeled_datasets_files=eval_files.human_labeled_datasets_files, | ||
| result_file=eval_files.result_file, | ||
| harm_category=eval_files.harm_category, | ||
| ) | ||
| eval_files = likert_scale.evaluation_files | ||
| self.evaluation_file_mapping = ScorerEvalDatasetFiles( | ||
| human_labeled_datasets_files=eval_files.human_labeled_datasets_files, | ||
| result_file=eval_files.result_file, | ||
| harm_category=eval_files.harm_category, | ||
| ) | ||
|
|
||
| self._set_likert_scale_system_prompt(likert_scale_path=likert_scale.path) | ||
| self._set_likert_scale_system_prompt(likert_scale_path=likert_scale.path) | ||
| elif custom_likert_path is not None: | ||
| self._validate_custom_likert_path(custom_likert_path) | ||
| self._set_likert_scale_system_prompt(likert_scale_path=custom_likert_path) | ||
|
|
||
| def _build_identifier(self) -> ComponentIdentifier: | ||
| """ | ||
|
|
@@ -268,9 +291,12 @@ def _set_likert_scale_system_prompt(self, likert_scale_path: Path) -> None: | |
| f"but only a single unique value was found: {self._max_scale_value}." | ||
| ) | ||
|
|
||
| self._scoring_instructions_template = SeedPrompt.from_yaml_file( | ||
| SCORER_LIKERT_PATH / "likert_system_prompt.yaml" | ||
| ) | ||
| # Only load the default system prompt template if a custom one wasn't already | ||
| # set via custom_system_prompt_path in __init__. | ||
| if not hasattr(self, "_scoring_instructions_template"): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: can we set to None & avoid using hasattr()? ie if self._scoring_instructions_template is None: |
||
| self._scoring_instructions_template = SeedPrompt.from_yaml_file( | ||
| SCORER_LIKERT_PATH / "likert_system_prompt.yaml" | ||
| ) | ||
|
|
||
| self._system_prompt = self._scoring_instructions_template.render_template_value( | ||
| likert_scale=likert_scale_str, | ||
|
|
@@ -337,6 +363,63 @@ def _likert_scale_description_to_string(self, descriptions: list[dict[str, str]] | |
|
|
||
| return likert_scale_description | ||
|
|
||
| @staticmethod | ||
| def _validate_custom_system_prompt_path(custom_system_prompt_path: Path) -> None: | ||
| """ | ||
| Validate the custom system prompt path. | ||
|
|
||
| Checks that the file exists, has a YAML extension, and contains the required | ||
| template parameters (category, likert_scale, min_scale_value, max_scale_value) | ||
| that the Likert scorer needs to render the system prompt. | ||
|
|
||
| Args: | ||
| custom_system_prompt_path (Path): Path to the custom system prompt YAML file. | ||
|
|
||
| Raises: | ||
| FileNotFoundError: If the file does not exist. | ||
| ValueError: If the file is not a YAML file or is missing required template parameters. | ||
| """ | ||
| if not custom_system_prompt_path.exists(): | ||
| raise FileNotFoundError(f"Custom system prompt file not found: '{custom_system_prompt_path}'") | ||
| if custom_system_prompt_path.suffix not in (".yaml", ".yml"): | ||
| raise ValueError( | ||
| f"Custom system prompt file must be a YAML file (.yaml or .yml), " | ||
| f"got '{custom_system_prompt_path.suffix}'." | ||
| ) | ||
|
|
||
| # Validate the template contains all required parameters used by the Likert scorer. | ||
| SeedPrompt.from_yaml_with_required_parameters( | ||
| template_path=custom_system_prompt_path, | ||
| required_parameters=["category", "likert_scale", "min_scale_value", "max_scale_value"], | ||
| error_message=( | ||
| "Custom system prompt YAML must define parameters: " | ||
| "category, likert_scale, min_scale_value, max_scale_value" | ||
| ), | ||
| ) | ||
|
|
||
| @staticmethod | ||
| def _validate_custom_likert_path(custom_likert_path: Path) -> None: | ||
| """ | ||
| Validate the custom Likert scale path. | ||
|
|
||
| Performs basic path checks (existence and YAML extension). Deeper content | ||
| validation (category, scale_descriptions structure, score values) is handled | ||
| by ``_set_likert_scale_system_prompt`` when the file is actually parsed. | ||
|
|
||
| Args: | ||
| custom_likert_path (Path): Path to the custom Likert scale YAML file. | ||
|
|
||
| Raises: | ||
| FileNotFoundError: If the file does not exist. | ||
| ValueError: If the file is not a YAML file. | ||
| """ | ||
| if not custom_likert_path.exists(): | ||
| raise FileNotFoundError(f"Custom Likert scale file not found: '{custom_likert_path}'") | ||
| if custom_likert_path.suffix not in (".yaml", ".yml"): | ||
| raise ValueError( | ||
| f"Custom Likert scale file must be a YAML file (.yaml or .yml), got '{custom_likert_path.suffix}'." | ||
| ) | ||
|
|
||
| async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]: | ||
| """ | ||
| Score the given message_piece using "self-ask" for the chat target. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -403,3 +403,105 @@ def test_likert_scale_missing_score_value_key_rejected(tmp_path: Path): | |
| chat_target=chat_target, | ||
| likert_scale=LikertScalePaths.CYBER_SCALE, | ||
| ) | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # custom_likert_path and custom_system_prompt_path tests | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| def _make_custom_system_prompt_yaml(tmp_path: Path, *, include_all_params: bool = True) -> Path: | ||
| """Create a custom system prompt YAML file for testing.""" | ||
| params = ["category", "likert_scale", "min_scale_value", "max_scale_value"] if include_all_params else ["category"] | ||
| prompt_data = { | ||
| "name": "custom test prompt", | ||
| "description": "test", | ||
| "parameters": params, | ||
| "data_type": "text", | ||
| "value": "Custom prompt for {{category}} with scale {{likert_scale}} " | ||
| "from {{min_scale_value}} to {{max_scale_value}}." | ||
| if include_all_params | ||
| else "Only {{category}}.", | ||
| } | ||
| yaml_file = tmp_path / "custom_system_prompt.yaml" | ||
| yaml_file.write_text(yaml.safe_dump(prompt_data), encoding="utf-8") | ||
| return yaml_file | ||
|
|
||
|
|
||
| def test_custom_likert_path_creates_scorer(tmp_path: Path): | ||
| """Verify that passing custom_likert_path (instead of a LikertScalePaths enum) works.""" | ||
| memory = MagicMock(MemoryInterface) | ||
| with patch.object(CentralMemory, "get_memory_instance", return_value=memory): | ||
| chat_target = MagicMock() | ||
| chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") | ||
|
|
||
| custom_path = _make_custom_scale_yaml(tmp_path, category="custom_cat", min_val=0, max_val=3) | ||
| scorer = SelfAskLikertScorer(chat_target=chat_target, custom_likert_path=custom_path) | ||
|
|
||
| assert scorer._min_scale_value == 0 | ||
| assert scorer._max_scale_value == 3 | ||
| assert scorer._score_category == "custom_cat" | ||
|
|
||
|
|
||
| def test_custom_likert_path_file_not_found(): | ||
| """Verify that a non-existent custom_likert_path raises FileNotFoundError.""" | ||
| memory = MagicMock(MemoryInterface) | ||
| with patch.object(CentralMemory, "get_memory_instance", return_value=memory): | ||
| chat_target = MagicMock() | ||
| chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") | ||
|
|
||
| with pytest.raises(FileNotFoundError, match="Custom Likert scale file not found"): | ||
| SelfAskLikertScorer(chat_target=chat_target, custom_likert_path=Path("/does/not/exist.yaml")) | ||
|
|
||
|
|
||
| def test_custom_likert_path_non_yaml_rejected(tmp_path: Path): | ||
| """Verify that a non-YAML custom_likert_path raises ValueError.""" | ||
| bad_file = tmp_path / "scale.txt" | ||
| bad_file.write_text("not yaml", encoding="utf-8") | ||
|
|
||
| memory = MagicMock(MemoryInterface) | ||
| with patch.object(CentralMemory, "get_memory_instance", return_value=memory): | ||
| chat_target = MagicMock() | ||
| chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") | ||
|
|
||
| with pytest.raises(ValueError, match="must be a YAML file"): | ||
| SelfAskLikertScorer(chat_target=chat_target, custom_likert_path=bad_file) | ||
|
|
||
|
|
||
| def test_custom_system_prompt_path_used_in_system_prompt(tmp_path: Path): | ||
| """Verify that a custom system prompt template is rendered instead of the default.""" | ||
| memory = MagicMock(MemoryInterface) | ||
| with patch.object(CentralMemory, "get_memory_instance", return_value=memory): | ||
| chat_target = MagicMock() | ||
| chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") | ||
|
|
||
| custom_prompt_path = _make_custom_system_prompt_yaml(tmp_path) | ||
| custom_likert_path = _make_custom_scale_yaml(tmp_path, category="test_cat", min_val=1, max_val=5) | ||
|
|
||
| scorer = SelfAskLikertScorer( | ||
| chat_target=chat_target, | ||
| custom_likert_path=custom_likert_path, | ||
| custom_system_prompt_path=custom_prompt_path, | ||
| ) | ||
|
|
||
| # The system prompt should come from the custom template, not the default one | ||
| assert "Custom prompt for test_cat" in scorer._system_prompt | ||
| assert "from 1 to 5" in scorer._system_prompt | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: maybe add unit tests for all the conditions of the scale/yaml - ie both likert & custom provided gives error, test missing both parameters gives error, file not found, non yaml/malformed? |
||
|
|
||
| def test_custom_system_prompt_missing_params_rejected(tmp_path: Path): | ||
| """Verify that a custom system prompt missing required parameters raises ValueError.""" | ||
| memory = MagicMock(MemoryInterface) | ||
| with patch.object(CentralMemory, "get_memory_instance", return_value=memory): | ||
| chat_target = MagicMock() | ||
| chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") | ||
|
|
||
| bad_prompt_path = _make_custom_system_prompt_yaml(tmp_path, include_all_params=False) | ||
| custom_likert_path = _make_custom_scale_yaml(tmp_path) | ||
|
|
||
| with pytest.raises(ValueError, match="Custom system prompt YAML must define parameters"): | ||
| SelfAskLikertScorer( | ||
| chat_target=chat_target, | ||
| custom_likert_path=custom_likert_path, | ||
| custom_system_prompt_path=bad_prompt_path, | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe this is cleaner here but wondering why we can't use the generic self ask scorer since you can add custom scales already there? this can take custom scales & scale definitions as a yaml or string