-
Notifications
You must be signed in to change notification settings - Fork 2k
fix(backend): tag BYOK LLM errors by source #7321
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| import os | ||
| import sys | ||
| import types | ||
| from unittest.mock import MagicMock, patch | ||
|
|
||
| os.environ.setdefault('OPENAI_API_KEY', 'sk-test-fake-for-unit-tests') | ||
| os.environ.setdefault('ANTHROPIC_API_KEY', 'ant-test-fake-for-unit-tests') | ||
| os.environ.setdefault('ENCRYPTION_SECRET', 'omi_ZwB2ZNqB2HHpMK6wStk7sTpavJiPTFg7gXUHnc4tFABPU6pZ2c2DKgehtfgi4RZv') | ||
|
|
||
| sys.modules.setdefault('database._client', MagicMock()) | ||
| llm_usage_stub = types.ModuleType('database.llm_usage') | ||
| llm_usage_stub.record_llm_usage = MagicMock() | ||
| sys.modules.setdefault('database.llm_usage', llm_usage_stub) | ||
|
|
||
|
|
||
| class _HTTPError(Exception): | ||
| def __init__(self, message: str, status_code: int): | ||
| super().__init__(message) | ||
| self.status_code = status_code | ||
|
|
||
|
|
||
| def test_classify_byok_llm_error_authentication(): | ||
| from utils.llm.byok_errors import classify_byok_llm_error | ||
|
|
||
| assert classify_byok_llm_error(_HTTPError("bad api key", 401)) == 'invalid' | ||
|
|
||
|
|
||
| def test_classify_byok_llm_error_permission(): | ||
| from utils.llm.byok_errors import classify_byok_llm_error | ||
|
|
||
| assert classify_byok_llm_error(_HTTPError("project denied", 403)) == 'permission' | ||
|
|
||
|
|
||
| def test_classify_byok_llm_error_insufficient_quota(): | ||
| from utils.llm.byok_errors import classify_byok_llm_error | ||
|
|
||
| assert classify_byok_llm_error(_HTTPError("insufficient_quota", 429)) == 'quota' | ||
|
|
||
|
|
||
| def test_classify_byok_llm_error_ignores_transient_rate_limit(): | ||
| from utils.llm.byok_errors import classify_byok_llm_error | ||
|
|
||
| assert classify_byok_llm_error(_HTTPError("rate limit reached, retry later", 429)) is None | ||
|
|
||
|
|
||
| @patch('utils.llm.byok_errors.get_byok_uid', return_value='user-1') | ||
| @patch('utils.llm.byok_errors.get_byok_key', return_value='sk-user') | ||
| def test_handle_llm_error_logs_byok_source(mock_get_key, mock_get_uid): | ||
| from utils.llm.byok_errors import handle_llm_error | ||
|
|
||
| with patch('utils.llm.byok_errors.logger.error') as mock_log: | ||
| handle_llm_error(_HTTPError("insufficient_quota", 429), 'openai', feature='memories', model='gpt-test') | ||
|
|
||
| log_args = mock_log.call_args.args | ||
| assert 'LLM error source=%s' in log_args[0] | ||
| assert log_args[1] == 'byok' | ||
| assert log_args[2] == 'openai' | ||
| assert log_args[8] == 'quota' | ||
|
|
||
|
|
||
| @patch('utils.llm.byok_errors.get_byok_uid', return_value='user-1') | ||
| @patch('utils.llm.byok_errors.get_byok_key', return_value=None) | ||
| def test_handle_llm_error_logs_platform_source(mock_get_key, mock_get_uid): | ||
| from utils.llm.byok_errors import handle_llm_error | ||
|
|
||
| with patch('utils.llm.byok_errors.logger.error') as mock_log: | ||
| handle_llm_error(_HTTPError("server error", 500), 'openai', feature='memories', model='gpt-test') | ||
|
|
||
| assert mock_log.call_args.args[1] == 'platform' | ||
| assert mock_log.call_args.args[8] == 'unknown' | ||
|
|
||
|
|
||
| def test_validate_byok_request_records_current_uid(): | ||
| from utils.byok import get_byok_uid, validate_byok_request | ||
|
|
||
| with patch('utils.byok._check_byok_validity', return_value=None): | ||
| validate_byok_request('user-1') | ||
|
|
||
| assert get_byok_uid() == 'user-1' | ||
|
|
||
|
|
||
| def test_llm_error_callback_uses_provider_context(): | ||
| from utils.llm.clients import _LLMErrorCallback | ||
|
|
||
| callback = _LLMErrorCallback('openai', model='gpt-test', feature='memories') | ||
| error = _HTTPError('bad key', 401) | ||
|
|
||
| with patch('utils.llm.clients.handle_llm_error') as mock_handle: | ||
| callback.on_llm_error(error) | ||
|
|
||
| mock_handle.assert_called_once() | ||
| assert mock_handle.call_args.args[:2] == (error, 'openai') |
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,90 @@ | ||||||||||||||
| import asyncio | ||||||||||||||
| import logging | ||||||||||||||
| from typing import Optional | ||||||||||||||
|
|
||||||||||||||
| from utils.byok import get_byok_key, get_byok_uid | ||||||||||||||
| from utils.executors import storage_executor, submit_with_context | ||||||||||||||
| from utils.log_sanitizer import sanitize | ||||||||||||||
|
|
||||||||||||||
| logger = logging.getLogger(__name__) | ||||||||||||||
|
|
||||||||||||||
| _QUOTA_ERROR_NAMES = frozenset({'RateLimitError'}) | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def get_llm_error_source(provider: Optional[str]) -> str: | ||||||||||||||
| """Return platform/byok for the current request and provider.""" | ||||||||||||||
| if provider and get_byok_key(provider): | ||||||||||||||
| return 'byok' | ||||||||||||||
| return 'platform' | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def classify_byok_llm_error(error: Exception) -> Optional[str]: | ||||||||||||||
| """Classify user-actionable BYOK failures for structured logging.""" | ||||||||||||||
| status_code = _get_status_code(error) | ||||||||||||||
| error_name = type(error).__name__ | ||||||||||||||
| error_text = sanitize(str(error)).lower() | ||||||||||||||
|
|
||||||||||||||
| if status_code == 401 or error_name == 'AuthenticationError': | ||||||||||||||
| return 'invalid' | ||||||||||||||
| if status_code == 403 or error_name == 'PermissionDeniedError': | ||||||||||||||
| return 'permission' | ||||||||||||||
| if status_code == 429 or error_name in _QUOTA_ERROR_NAMES: | ||||||||||||||
| if 'insufficient_quota' in error_text or 'quota' in error_text: | ||||||||||||||
| return 'quota' | ||||||||||||||
|
Comment on lines
+31
to
+33
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||
| return None | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def handle_llm_error( | ||||||||||||||
| error: Exception, | ||||||||||||||
| provider: Optional[str], | ||||||||||||||
| feature: Optional[str] = None, | ||||||||||||||
| model: Optional[str] = None, | ||||||||||||||
| operation: str = 'chat', | ||||||||||||||
| ) -> None: | ||||||||||||||
| """Log LLM failures with source context.""" | ||||||||||||||
| source = get_llm_error_source(provider) | ||||||||||||||
| reason = classify_byok_llm_error(error) if source == 'byok' else None | ||||||||||||||
| uid = get_byok_uid() | ||||||||||||||
| status_code = _get_status_code(error) | ||||||||||||||
|
|
||||||||||||||
| logger.error( | ||||||||||||||
| 'LLM error source=%s provider=%s feature=%s model=%s operation=%s uid=%s status_code=%s reason=%s ' | ||||||||||||||
| 'error_type=%s error=%s', | ||||||||||||||
| source, | ||||||||||||||
| provider or 'unknown', | ||||||||||||||
| feature or 'unknown', | ||||||||||||||
| model or 'unknown', | ||||||||||||||
| operation, | ||||||||||||||
| uid or 'unknown', | ||||||||||||||
| status_code or 'unknown', | ||||||||||||||
| reason or 'unknown', | ||||||||||||||
| type(error).__name__, | ||||||||||||||
| sanitize(str(error)), | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| async def handle_llm_error_async( | ||||||||||||||
| error: Exception, | ||||||||||||||
| provider: Optional[str], | ||||||||||||||
| feature: Optional[str] = None, | ||||||||||||||
| model: Optional[str] = None, | ||||||||||||||
| operation: str = 'chat', | ||||||||||||||
| ) -> None: | ||||||||||||||
| """Run LLM error handling off the event loop while preserving BYOK context.""" | ||||||||||||||
| future = submit_with_context(storage_executor, handle_llm_error, error, provider, feature, model, operation) | ||||||||||||||
| try: | ||||||||||||||
| await asyncio.wrap_future(future) | ||||||||||||||
| except Exception as e: | ||||||||||||||
| logger.error('Async LLM error handler failed provider=%s feature=%s: %s', provider, feature, e) | ||||||||||||||
|
Comment on lines
+66
to
+78
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def _get_status_code(error: Exception) -> Optional[int]: | ||||||||||||||
| status_code = getattr(error, 'status_code', None) | ||||||||||||||
| if isinstance(status_code, int): | ||||||||||||||
| return status_code | ||||||||||||||
|
|
||||||||||||||
| response = getattr(error, 'response', None) | ||||||||||||||
| response_status = getattr(response, 'status_code', None) | ||||||||||||||
| if isinstance(response_status, int): | ||||||||||||||
| return response_status | ||||||||||||||
| return None | ||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_QUOTA_ERROR_NAMESfrozenset is named after quota errors but containsRateLimitError, which is a superset of quota failures. More critically, Anthropic's credit-exhaustion 429 response body typically reads "Your credit balance is too low…" — the word "quota" never appears. As a result, an Anthropic BYOK user who runs out of credits will logreason='unknown'instead ofreason='quota', defeating the classification for the one provider wired to this path viahandle_llm_error_async. Adding a'credit'/'balance'keyword check (and optionally renaming the frozenset) would fix the gap.