Azure · slister1001 · Mar 18, 2026 · Mar 19, 2026 · Copilot · Mar 19, 2026
@@ -114,6 +114,8 @@ async def execute_attacks(
         has_indirect = StrategyMapper.has_indirect_attack(attack_strategies)
 
         red_team_info: Dict[str, Dict[str, Any]] = {}
+        consecutive_config_failures = 0
+        _MAX_CONSECUTIVE_CONFIG_FAILURES = 2
-        _MAX_CONSECUTIVE_CONFIG_FAILURES = 2
+        max_consecutive_config_failures = 2
-        _MAX_CONSECUTIVE_CONFIG_FAILURES = 2
+        max_consecutive_config_failures = 2
 
         try:
             # Process each risk category
@@ -169,7 +171,11 @@ async def execute_attacks(
                     try:
                         partial_results = orchestrator.get_attack_results()
                     except Exception:
-                        self.logger.debug("Failed to recover partial results for %s", risk_value, exc_info=True)
+                        self.logger.debug(
+                            "Failed to recover partial results for %s",
+                            risk_value,
+                            exc_info=True,
+                        )
 
                     if partial_results:
                         self.logger.warning(
@@ -198,6 +204,37 @@ async def execute_attacks(
                             "error": str(e),
                             "asr": 0.0,
                         }
+
+                        # Track consecutive failures to detect systemic issues
+                        # (e.g., unavailable model, bad credentials)
+                        if self._is_configuration_error(e):
+                            consecutive_config_failures += 1
+                            if consecutive_config_failures >= _MAX_CONSECUTIVE_CONFIG_FAILURES:
+                                remaining = [
+                                    rc.value
+                                    for rc in risk_categories
+                                    if rc.value
+                                    not in {rv for rd in red_team_info.values() if isinstance(rd, dict) for rv in rd}
+                                ]
+                                if remaining:
+                                    abort_msg = (
+                                        f"Aborting remaining {len(remaining)} risk categories "
+                                        f"after {consecutive_config_failures} consecutive configuration errors. "
+                                        f"Root cause: {e}"
+                                    )
+                                    self.logger.error(abort_msg)
+                                    for rv in remaining:
+                                        if "Foundry" not in red_team_info:
+                                            red_team_info["Foundry"] = {}
+                                        red_team_info["Foundry"][rv] = {
+                                            "data_file": "",
+                                            "status": "failed",
+                                            "error": str(e),
+                                            "asr": 0.0,
+                                        }
+                                    break
+                        else:
+                            consecutive_config_failures = 0
                         continue
 
                 # Process results (handles both full success and partial recovery)
@@ -229,6 +266,9 @@ async def execute_attacks(
                     if strategy_name not in red_team_info:
                         red_team_info[strategy_name] = {}
                     red_team_info[strategy_name][risk_value] = strategy_data
+
+                # Reset consecutive failure counter on success
+                consecutive_config_failures = 0
         finally:
             # Clean up all builder temp directories
             for builder in self._builders:
@@ -237,6 +277,31 @@ async def execute_attacks(
 
         return red_team_info
 
+    @staticmethod
+    def _is_configuration_error(exception: Exception) -> bool:
+        """Check if an exception indicates a systemic configuration problem.
+
+        Configuration errors (bad model name, auth failures, etc.) will
+        affect all risk categories identically, so there is no value in
+        retrying subsequent categories.
+        """
+        # HTTP 400 / 401 / 403 from OpenAI / Azure
+        if hasattr(exception, "response") and hasattr(exception.response, "status_code"):
+            return exception.response.status_code in (400, 401, 403)
+
+        # Keyword heuristics for wrapped or chained errors
+        msg = str(exception).lower()
+        config_keywords = (
+            "unavailable_model",
+            "bad request",
+            "unauthorized",
+            "forbidden",
+            "authentication",
+            "permission denied",
+            "invalid_api_key",
+        )
+        return any(kw in msg for kw in config_keywords)
+
     def _build_dataset_config(
         self,
         risk_category: str,

@@ -494,6 +494,9 @@ class RedTeamRun(TypedDict, total=False):
     :type created_at: int
     :param status: Status of the run (e.g., "completed", "failed", "in_progress")
     :type status: str
+    :param error: Run-level error information when the scan failed (e.g., target model unavailable,
+                  authentication failure). Contains ``code`` and ``message`` keys.
+    :type error: Optional[Dict[str, Any]]
     :param name: Display name for the run
     :type name: str
     :param report_url: URL to view the run report in Azure AI Studio
@@ -519,6 +522,7 @@ class RedTeamRun(TypedDict, total=False):
     eval_id: str
     created_at: int
     status: str
+    error: Optional[Dict[str, Any]]
     name: str
     report_url: Optional[str]
     data_source: DataSource

@@ -1654,11 +1654,66 @@ def _determine_run_status(
                     if not isinstance(details, dict):
                         continue
                     status = details.get("status", "").lower()
-                    if status in ("incomplete", "failed", "timeout", "pending", "running"):
+                    if status in (
+                        "incomplete",
+                        "failed",
+                        "timeout",
+                        "pending",
+                        "running",
+                    ):
                         return "failed"
 
         return "completed"
 
+    @staticmethod
+    def _aggregate_run_errors(
+        red_team_info: Optional[Dict],
+    ) -> Optional[Dict[str, Any]]:
+        """Collect error messages from failed risk categories into a run-level error.
+
+        Scans red_team_info for tasks with failed/incomplete/timeout status and
+        extracts their error messages. Deduplicates identical errors (common when
+        a config issue like an unavailable model affects all categories).
+
+        :param red_team_info: Dictionary of strategy -> risk_category -> tracking data
+        :return: Error dict with ``code`` and ``message``, or None if no errors found
+        """
+        errors: List[str] = []
+        if not isinstance(red_team_info, dict):
+            return None
+
+        for strategy_name, risk_data in red_team_info.items():
+            if not isinstance(risk_data, dict):
+                continue
+            for risk_value, details in risk_data.items():
+                if not isinstance(details, dict):
+                    continue
+                task_status = details.get("status", "").lower()
+                if task_status in (
+                    "failed",
+                    "incomplete",
+                    "timeout",
+                    "pending",
+                    "running",
+                ):
+                    error_msg = details.get("error")
+                    if error_msg:
+                        errors.append(f"{risk_value}: {error_msg}")
+
+        if not errors:
+            return {
+                "code": "scan_failed",
+                "message": "One or more risk categories failed during the scan.",
+            }
+
+        unique_errors = list(dict.fromkeys(errors))
+        if len(unique_errors) == 1:
+            return {"code": "scan_failed", "message": unique_errors[0]}
+        return {
+            "code": "scan_failed",
+            "message": f"{len(unique_errors)} risk categories failed. First error: {unique_errors[0]}",
+        }
+
     def _build_results_payload(
         self,
         redteam_result: RedTeamResult,
@@ -1753,6 +1808,7 @@ def _build_results_payload(
         data_source = self._build_data_source_section(parameters, red_team_info)
         status = self._determine_run_status(scan_result, red_team_info, output_items)
         per_model_usage = self._compute_per_model_usage(output_items)
+        error = self._aggregate_run_errors(red_team_info) if status == "failed" else None
 
         list_wrapper: OutputItemsList = {
             "object": "list",
@@ -1765,6 +1821,7 @@ def _build_results_payload(
             "eval_id": eval_id,
             "created_at": created_at,
             "status": status,
+            "error": error,
             "name": run_name,
             "report_url": scan_result.get("studio_url") or self.ai_studio_url,
             "data_source": data_source,

@@ -104,7 +104,9 @@ def categorize_exception(self, exception: Exception) -> ErrorCategory:
         # HTTP status code specific errors
         if hasattr(exception, "response") and hasattr(exception.response, "status_code"):
             status_code = exception.response.status_code
-            if 500 <= status_code < 600:
+            if status_code == 400:
+                return ErrorCategory.CONFIGURATION
+            elif 500 <= status_code < 600:
                 return ErrorCategory.NETWORK
             elif status_code == 401:
                 return ErrorCategory.AUTHENTICATION