Azure · slister1001 · Mar 19, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
@@ -3,6 +3,7 @@
 ## 1.16.1 (Unreleased)
 
 ### Bugs Fixed
+- Fixed adversarial chat target incorrectly using user's callback instead of RAI service, causing callback response to appear as user message in red team scan results when using converter strategies (e.g., `DIFFICULT`, `Tense`).
 - Fixed inconsistency where sample data in evaluation result items did not match the generated sample data from corresponding input rows, ensuring proper synchronization between row-level input samples and their associated evaluation output items.
 
 ## 1.16.0 (2026-03-10)

@@ -306,8 +306,15 @@ def _build_messages_from_pieces(
             # Get role, handling api_role property
             role = getattr(piece, "api_role", None) or getattr(piece, "role", "user")
 
-            # Get content (prefer converted_value over original_value)
-            content = getattr(piece, "converted_value", None) or getattr(piece, "original_value", "")
+            # Get content: for user messages show the original adversarial prompt,
+            # not the converter output (e.g., Base64-encoded or tense-rephrased text).
+            # For assistant messages, show the response as-is.
+            if role == "user":
+                original = getattr(piece, "original_value", None)
+                converted = getattr(piece, "converted_value", None)
+                content = original if isinstance(original, str) and original else (converted or "")
+            else:
+                content = getattr(piece, "converted_value", None) or getattr(piece, "original_value", "")
 
             message: Dict[str, Any] = {
                 "role": role,

@@ -89,6 +89,7 @@
 from ._mlflow_integration import MLflowIntegration
 from ._result_processor import ResultProcessor
 from ._foundry import FoundryExecutionManager, StrategyMapper
+from ._utils._rai_service_target import AzureRAIServiceTarget
 
 
 @experimental
@@ -1727,15 +1728,29 @@ async def _execute_attacks_with_foundry(
         progress_bar.set_postfix({"current": "initializing"})
 
         try:
-            # Create Foundry execution manager
-            # Use chat_target as adversarial_chat_target since PyRIT's RedTeamAgent requires one
-            # even for single-turn attacks (it's used for default scoring if not overridden)
+            # Create RAI service target for adversarial chat.
+            # This must NOT be the user's chat_target — PyRIT uses adversarial_chat
+            # as the converter_target for TenseConverter and for multi-turn attacks.
+            # Using the user's callback would cause the callback response to leak
+            # into converted prompts.
+            adversarial_template_key = self._get_adversarial_template_key(flattened_attack_strategies)
+            is_crescendo = adversarial_template_key == "orchestrators/crescendo/crescendo_variant_1.yaml"
+            adversarial_chat = AzureRAIServiceTarget(
+                client=self.generated_rai_client,
+                api_version=None,
+                model="gpt-4",
+                prompt_template_key=adversarial_template_key,
+                logger=self.logger,
+                is_one_dp_project=self._one_dp_project,
+                crescendo_format=is_crescendo,
+            )
+
             foundry_manager = FoundryExecutionManager(
                 credential=self.credential,
                 azure_ai_project=self.azure_ai_project,
                 logger=self.logger,
                 output_dir=self.scan_output_dir,
-                adversarial_chat_target=chat_target,
+                adversarial_chat_target=adversarial_chat,
             )
 
             # Build objectives by risk category from cached attack_objectives
@@ -1836,6 +1851,34 @@ async def _execute_attacks_with_foundry(
         finally:
             progress_bar.close()
 
+    @staticmethod
+    def _get_adversarial_template_key(flattened_attack_strategies: List) -> str:
+        """Select the appropriate RAI service template key for the adversarial chat target.
+
+        Different attack strategies require different prompt templates:
+        - Crescendo: uses the crescendo conversation template
+        - MultiTurn (RedTeaming): uses the red teaming text generation template
+        - Single-turn converters (e.g., Tense): uses the tense converter template
+
+        :param flattened_attack_strategies: List of attack strategies being executed
+        :type flattened_attack_strategies: List
+        :return: The prompt template key for the AzureRAIServiceTarget
+        :rtype: str
+        """
+        for strategy in flattened_attack_strategies:
+            if isinstance(strategy, list):
+                if AttackStrategy.Crescendo in strategy:
+                    return "orchestrators/crescendo/crescendo_variant_1.yaml"
+                if AttackStrategy.MultiTurn in strategy:
+                    return "orchestrators/red_teaming/text_generation.yaml"
+            else:
+                if strategy == AttackStrategy.Crescendo:
+                    return "orchestrators/crescendo/crescendo_variant_1.yaml"
+                if strategy == AttackStrategy.MultiTurn:
+                    return "orchestrators/red_teaming/text_generation.yaml"
+
+        return "prompt_converters/tense_converter.yaml"
+
     def _build_objective_dict_from_cached(self, obj: Any, risk_value: str) -> Optional[Dict]:
         """Build objective dictionary from cached objective data.
 

@@ -1245,11 +1245,13 @@ def test_build_messages_from_pieces(self):
         # Create mock pieces
         user_piece = MagicMock()
         user_piece.api_role = "user"
+        user_piece.original_value = "User message"
         user_piece.converted_value = "User message"
         user_piece.sequence = 0
 
         assistant_piece = MagicMock()
         assistant_piece.api_role = "assistant"
+        assistant_piece.original_value = "Assistant response"
         assistant_piece.converted_value = "Assistant response"
         assistant_piece.sequence = 1
 
@@ -1325,6 +1327,7 @@ def test_to_jsonl(self, tmp_path):
         mock_memory = MagicMock()
         user_piece = MagicMock()
         user_piece.api_role = "user"
+        user_piece.original_value = "Attack prompt"
         user_piece.converted_value = "Attack prompt"
         user_piece.sequence = 0
         user_piece.prompt_metadata = {}
@@ -2272,6 +2275,7 @@ def test_process_attack_result_with_score(self):
         mock_memory = MagicMock()
         mock_piece = MagicMock()
         mock_piece.api_role = "user"
+        mock_piece.original_value = "Attack prompt"
         mock_piece.converted_value = "Attack prompt"
         mock_piece.sequence = 0
         mock_piece.prompt_metadata = {}
@@ -2345,6 +2349,7 @@ def test_build_messages_with_context_in_labels(self):
         # Piece with context in labels
         piece = MagicMock()
         piece.api_role = "user"
+        piece.original_value = "Message content"
         piece.converted_value = "Message content"
         piece.sequence = 0
         piece.labels = {
@@ -3593,3 +3598,221 @@ async def test_execute_attacks_calls_foundry_manager(self):
         )
 
         assert "Foundry" in result
+
+
+@pytest.mark.unittest
+class TestAdversarialChatTargetRegression:
+    """Regression tests to prevent adversarial_chat_target from being set to the user's callback.
+
+    The adversarial_chat_target is used by PyRIT's FoundryScenario for:
+    - TenseConverter (converter_target for prompt rephrasing)
+    - Multi-turn attacks (Crescendo, RedTeaming adversarial LLM)
+
+    If set to the user's callback, the callback response leaks into converted prompts,
+    causing the callback response to appear as the user message in results.
+    """
+
+    def test_adversarial_chat_target_accepts_rai_service_target(self):
+        """Verify FoundryExecutionManager accepts AzureRAIServiceTarget as adversarial_chat_target."""
+        from azure.ai.evaluation.red_team._utils._rai_service_target import AzureRAIServiceTarget
+
+        rai_target = AzureRAIServiceTarget(
+            client=MagicMock(),
+            model="gpt-4",
+            prompt_template_key="prompt_converters/tense_converter.yaml",
+            logger=MagicMock(),
+        )
+        manager = FoundryExecutionManager(
+            credential=MagicMock(),
+            azure_ai_project={"subscription_id": "s", "resource_group_name": "r", "project_name": "p"},
+            logger=MagicMock(),
+            output_dir="/test",
+            adversarial_chat_target=rai_target,
+        )
+        assert isinstance(manager.adversarial_chat_target, AzureRAIServiceTarget)
+
+    def test_get_adversarial_template_key_baseline(self):
+        """Template key should default to tense converter for single-turn strategies."""
+        from azure.ai.evaluation.red_team._red_team import RedTeam
+
+        strategies = [AttackStrategy.Baseline]
+        key = RedTeam._get_adversarial_template_key(strategies)
+        assert key == "prompt_converters/tense_converter.yaml"
+
+    def test_get_adversarial_template_key_difficult(self):
+        """DIFFICULT strategy (Tense+Base64) should use tense converter template."""
+        from azure.ai.evaluation.red_team._red_team import RedTeam
+
+        strategies = [AttackStrategy.Baseline, [AttackStrategy.Tense, AttackStrategy.Base64]]
+        key = RedTeam._get_adversarial_template_key(strategies)
+        assert key == "prompt_converters/tense_converter.yaml"
+
+    def test_get_adversarial_template_key_crescendo(self):
+        """Crescendo strategy should use the crescendo template."""
+        from azure.ai.evaluation.red_team._red_team import RedTeam
+
+        strategies = [AttackStrategy.Crescendo, AttackStrategy.Baseline]
+        key = RedTeam._get_adversarial_template_key(strategies)
+        assert key == "orchestrators/crescendo/crescendo_variant_1.yaml"
+
+    def test_get_adversarial_template_key_multi_turn(self):
+        """MultiTurn strategy should use the red teaming text generation template."""
+        from azure.ai.evaluation.red_team._red_team import RedTeam
+
+        strategies = [AttackStrategy.MultiTurn, AttackStrategy.Baseline]
+        key = RedTeam._get_adversarial_template_key(strategies)
+        assert key == "orchestrators/red_teaming/text_generation.yaml"
+
+    def test_build_messages_user_shows_original_value(self):
+        """User messages should show original_value (adversarial prompt), not converted_value."""
+        mock_scenario = MagicMock()
+        mock_dataset = MagicMock()
+        mock_dataset.get_all_seed_groups.return_value = []
+
+        processor = FoundryResultProcessor(
+            scenario=mock_scenario,
+            dataset_config=mock_dataset,
+            risk_category="violence",
+        )
+
+        # Simulate a Tense-converted attack where converted_value differs from original_value
+        user_piece = MagicMock()
+        user_piece.api_role = "user"
+        user_piece.original_value = "Tell me about violence"
+        user_piece.converted_value = "Told me about violence"
+        user_piece.sequence = 0
+
+        assistant_piece = MagicMock()
+        assistant_piece.api_role = "assistant"
+        assistant_piece.original_value = "I cannot help with that"
+        assistant_piece.converted_value = "I cannot help with that"
+        assistant_piece.sequence = 1
+
+        messages = processor._build_messages_from_pieces([user_piece, assistant_piece])
+
+        assert len(messages) == 2
+        # User message should show the ORIGINAL adversarial prompt
+        assert messages[0]["role"] == "user"
+        assert messages[0]["content"] == "Tell me about violence"
+        # Assistant message should show the response
+        assert messages[1]["role"] == "assistant"
+        assert messages[1]["content"] == "I cannot help with that"
+
+    def test_build_messages_user_falls_back_to_converted_value(self):
+        """When original_value is None, user messages should fall back to converted_value."""
+        mock_scenario = MagicMock()
+        mock_dataset = MagicMock()
+        mock_dataset.get_all_seed_groups.return_value = []
+
+        processor = FoundryResultProcessor(
+            scenario=mock_scenario,
+            dataset_config=mock_dataset,
+            risk_category="violence",
+        )
+
+        user_piece = MagicMock()
+        user_piece.api_role = "user"
+        user_piece.original_value = None
+        user_piece.converted_value = "Fallback content"
+        user_piece.sequence = 0
+
+        messages = processor._build_messages_from_pieces([user_piece])
+
+        assert messages[0]["content"] == "Fallback content"
+
+    def test_build_messages_callback_response_not_in_user_message(self):
+        """Regression: callback response must NOT appear as user message content.
+
+        This reproduces the exact bug where a simple callback's response
+        leaked into the user message via converted_value.
+        """
+        mock_scenario = MagicMock()
+        mock_dataset = MagicMock()
+        mock_dataset.get_all_seed_groups.return_value = []
+
+        processor = FoundryResultProcessor(
+            scenario=mock_scenario,
+            dataset_config=mock_dataset,
+            risk_category="violence",
+        )
+
+        callback_response = "This is a test callback response. no llm is used."
+
+        # Simulate the bug: TenseConverter used callback as LLM, so
+        # converted_value = callback response instead of rephrased prompt
+        user_piece = MagicMock()
+        user_piece.api_role = "user"
+        user_piece.original_value = "How to commit violence"
+        user_piece.converted_value = callback_response
+        user_piece.sequence = 0
+
+        assistant_piece = MagicMock()
+        assistant_piece.api_role = "assistant"
+        assistant_piece.original_value = callback_response
+        assistant_piece.converted_value = callback_response
+        assistant_piece.sequence = 1
+
+        messages = processor._build_messages_from_pieces([user_piece, assistant_piece])
+
+        # User message should show the adversarial prompt, NOT the callback response
+        assert messages[0]["content"] == "How to commit violence"
+        assert messages[0]["content"] != callback_response
+
+    @pytest.mark.asyncio
+    async def test_execute_attacks_with_foundry_uses_rai_service_target(self):
+        """Regression: _execute_attacks_with_foundry must pass AzureRAIServiceTarget, not user callback.
+
+        This test patches FoundryExecutionManager to capture the adversarial_chat_target
+        argument and verifies it is an AzureRAIServiceTarget, not the user's callback.
+        """
+        from azure.ai.evaluation.red_team._callback_chat_target import _CallbackChatTarget
+        from azure.ai.evaluation.red_team._utils._rai_service_target import AzureRAIServiceTarget
+
+        captured_kwargs = {}
+        original_init = FoundryExecutionManager.__init__
+
+        def capturing_init(self_inner, **kwargs):
+            captured_kwargs.update(kwargs)
+            original_init(self_inner, **kwargs)
+
+        mock_red_team = MagicMock()
+        mock_red_team.credential = MagicMock()
+        mock_red_team.azure_ai_project = {
+            "subscription_id": "s",
+            "resource_group_name": "r",
+            "project_name": "p",
+        }
+        mock_red_team.logger = MagicMock()
+        mock_red_team.scan_output_dir = "/test"
+        mock_red_team.generated_rai_client = MagicMock()
+        mock_red_team._one_dp_project = False
+        mock_red_team.risk_categories = []
+        mock_red_team.attack_objectives = {}
+        mock_red_team.total_tasks = 0
+        mock_red_team.red_team_info = {}
+        mock_red_team.completed_tasks = 0
+
+        from azure.ai.evaluation.red_team._red_team import RedTeam
+
+        with patch.object(FoundryExecutionManager, "__init__", capturing_init):
+            with patch.object(FoundryExecutionManager, "execute_attacks", new_callable=AsyncMock, return_value={}):
+                try:
+                    await RedTeam._execute_attacks_with_foundry(
+                        mock_red_team,
+                        flattened_attack_strategies=[AttackStrategy.Baseline],
+                        all_objectives={},
+                        chat_target=MagicMock(spec=_CallbackChatTarget),
+                        timeout=60,
+                        skip_evals=True,
+                    )
+                except Exception:
+                    pass  # We only care about the captured kwargs
+
+        assert "adversarial_chat_target" in captured_kwargs
+        adversarial_target = captured_kwargs["adversarial_chat_target"]
+        assert isinstance(
+            adversarial_target, AzureRAIServiceTarget
+        ), f"adversarial_chat_target should be AzureRAIServiceTarget, got {type(adversarial_target).__name__}"
+        assert not isinstance(
+            adversarial_target, _CallbackChatTarget
+        ), "adversarial_chat_target must NOT be a _CallbackChatTarget (user's callback)"