-
Notifications
You must be signed in to change notification settings - Fork 3.3k
Surface red team scan errors in run results #45772
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -114,6 +114,8 @@ async def execute_attacks( | |
| has_indirect = StrategyMapper.has_indirect_attack(attack_strategies) | ||
|
|
||
| red_team_info: Dict[str, Dict[str, Any]] = {} | ||
| consecutive_config_failures = 0 | ||
| _MAX_CONSECUTIVE_CONFIG_FAILURES = 2 | ||
|
|
||
| try: | ||
| # Process each risk category | ||
|
|
@@ -169,7 +171,11 @@ async def execute_attacks( | |
| try: | ||
| partial_results = orchestrator.get_attack_results() | ||
| except Exception: | ||
| self.logger.debug("Failed to recover partial results for %s", risk_value, exc_info=True) | ||
| self.logger.debug( | ||
| "Failed to recover partial results for %s", | ||
| risk_value, | ||
| exc_info=True, | ||
| ) | ||
|
|
||
| if partial_results: | ||
| self.logger.warning( | ||
|
|
@@ -198,6 +204,37 @@ async def execute_attacks( | |
| "error": str(e), | ||
| "asr": 0.0, | ||
| } | ||
|
|
||
| # Track consecutive failures to detect systemic issues | ||
| # (e.g., unavailable model, bad credentials) | ||
| if self._is_configuration_error(e): | ||
| consecutive_config_failures += 1 | ||
| if consecutive_config_failures >= _MAX_CONSECUTIVE_CONFIG_FAILURES: | ||
| remaining = [ | ||
| rc.value | ||
| for rc in risk_categories | ||
| if rc.value | ||
| not in {rv for rd in red_team_info.values() if isinstance(rd, dict) for rv in rd} | ||
| ] | ||
| if remaining: | ||
| abort_msg = ( | ||
| f"Aborting remaining {len(remaining)} risk categories " | ||
| f"after {consecutive_config_failures} consecutive configuration errors. " | ||
| f"Root cause: {e}" | ||
| ) | ||
| self.logger.error(abort_msg) | ||
| for rv in remaining: | ||
| if "Foundry" not in red_team_info: | ||
| red_team_info["Foundry"] = {} | ||
| red_team_info["Foundry"][rv] = { | ||
| "data_file": "", | ||
| "status": "failed", | ||
| "error": str(e), | ||
| "asr": 0.0, | ||
| } | ||
| break | ||
|
Comment on lines
+208
to
+235
|
||
| else: | ||
| consecutive_config_failures = 0 | ||
| continue | ||
|
|
||
| # Process results (handles both full success and partial recovery) | ||
|
|
@@ -229,6 +266,9 @@ async def execute_attacks( | |
| if strategy_name not in red_team_info: | ||
| red_team_info[strategy_name] = {} | ||
| red_team_info[strategy_name][risk_value] = strategy_data | ||
|
|
||
| # Reset consecutive failure counter on success | ||
| consecutive_config_failures = 0 | ||
| finally: | ||
| # Clean up all builder temp directories | ||
| for builder in self._builders: | ||
|
|
@@ -237,6 +277,31 @@ async def execute_attacks( | |
|
|
||
| return red_team_info | ||
|
|
||
| @staticmethod | ||
| def _is_configuration_error(exception: Exception) -> bool: | ||
| """Check if an exception indicates a systemic configuration problem. | ||
|
|
||
| Configuration errors (bad model name, auth failures, etc.) will | ||
| affect all risk categories identically, so there is no value in | ||
| retrying subsequent categories. | ||
| """ | ||
| # HTTP 400 / 401 / 403 from OpenAI / Azure | ||
| if hasattr(exception, "response") and hasattr(exception.response, "status_code"): | ||
| return exception.response.status_code in (400, 401, 403) | ||
|
|
||
| # Keyword heuristics for wrapped or chained errors | ||
| msg = str(exception).lower() | ||
| config_keywords = ( | ||
| "unavailable_model", | ||
| "bad request", | ||
| "unauthorized", | ||
| "forbidden", | ||
| "authentication", | ||
| "permission denied", | ||
| "invalid_api_key", | ||
| ) | ||
| return any(kw in msg for kw in config_keywords) | ||
|
|
||
| def _build_dataset_config( | ||
| self, | ||
| risk_category: str, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1654,11 +1654,66 @@ def _determine_run_status( | |
| if not isinstance(details, dict): | ||
| continue | ||
| status = details.get("status", "").lower() | ||
| if status in ("incomplete", "failed", "timeout", "pending", "running"): | ||
| if status in ( | ||
| "incomplete", | ||
| "failed", | ||
| "timeout", | ||
| "pending", | ||
| "running", | ||
| ): | ||
| return "failed" | ||
|
|
||
| return "completed" | ||
|
|
||
| @staticmethod | ||
| def _aggregate_run_errors( | ||
| red_team_info: Optional[Dict], | ||
| ) -> Optional[Dict[str, Any]]: | ||
| """Collect error messages from failed risk categories into a run-level error. | ||
|
|
||
| Scans red_team_info for tasks with failed/incomplete/timeout status and | ||
| extracts their error messages. Deduplicates identical errors (common when | ||
| a config issue like an unavailable model affects all categories). | ||
|
Comment on lines
+1668
to
+1676
|
||
|
|
||
| :param red_team_info: Dictionary of strategy -> risk_category -> tracking data | ||
| :return: Error dict with ``code`` and ``message``, or None if no errors found | ||
| """ | ||
| errors: List[str] = [] | ||
| if not isinstance(red_team_info, dict): | ||
| return None | ||
|
|
||
| for strategy_name, risk_data in red_team_info.items(): | ||
| if not isinstance(risk_data, dict): | ||
| continue | ||
| for risk_value, details in risk_data.items(): | ||
| if not isinstance(details, dict): | ||
| continue | ||
| task_status = details.get("status", "").lower() | ||
| if task_status in ( | ||
| "failed", | ||
| "incomplete", | ||
| "timeout", | ||
| "pending", | ||
| "running", | ||
| ): | ||
| error_msg = details.get("error") | ||
| if error_msg: | ||
| errors.append(f"{risk_value}: {error_msg}") | ||
|
|
||
|
Comment on lines
+1699
to
+1702
|
||
| if not errors: | ||
| return { | ||
| "code": "scan_failed", | ||
| "message": "One or more risk categories failed during the scan.", | ||
| } | ||
|
|
||
| unique_errors = list(dict.fromkeys(errors)) | ||
| if len(unique_errors) == 1: | ||
| return {"code": "scan_failed", "message": unique_errors[0]} | ||
| return { | ||
| "code": "scan_failed", | ||
| "message": f"{len(unique_errors)} risk categories failed. First error: {unique_errors[0]}", | ||
| } | ||
|
|
||
| def _build_results_payload( | ||
| self, | ||
| redteam_result: RedTeamResult, | ||
|
|
@@ -1753,6 +1808,7 @@ def _build_results_payload( | |
| data_source = self._build_data_source_section(parameters, red_team_info) | ||
| status = self._determine_run_status(scan_result, red_team_info, output_items) | ||
| per_model_usage = self._compute_per_model_usage(output_items) | ||
| error = self._aggregate_run_errors(red_team_info) if status == "failed" else None | ||
|
|
||
| list_wrapper: OutputItemsList = { | ||
| "object": "list", | ||
|
|
@@ -1765,6 +1821,7 @@ def _build_results_payload( | |
| "eval_id": eval_id, | ||
| "created_at": created_at, | ||
| "status": status, | ||
| "error": error, | ||
| "name": run_name, | ||
| "report_url": scan_result.get("studio_url") or self.ai_studio_url, | ||
| "data_source": data_source, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -104,7 +104,9 @@ def categorize_exception(self, exception: Exception) -> ErrorCategory: | |
| # HTTP status code specific errors | ||
| if hasattr(exception, "response") and hasattr(exception.response, "status_code"): | ||
| status_code = exception.response.status_code | ||
| if 500 <= status_code < 600: | ||
| if status_code == 400: | ||
| return ErrorCategory.CONFIGURATION | ||
| elif 500 <= status_code < 600: | ||
| return ErrorCategory.NETWORK | ||
| elif status_code == 401: | ||
| return ErrorCategory.AUTHENTICATION | ||
|
Comment on lines
104
to
112
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_MAX_CONSECUTIVE_CONFIG_FAILURESis a function-local variable but is named like a module constant (uppercase with leading underscore). For readability/consistency, consider either making it a class-level constant (e.g.,MAX_CONSECUTIVE_CONFIG_FAILURES) or renaming it to a local variable style (e.g.,max_consecutive_config_failures).