Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions common/config_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ class DefaultLlm(BaseModel):
"""Default LLM configuration."""

default_model: str
fallback_model: str | None = None
default_temperature: float
default_max_tokens: int
default_request_timeout: int = 120


class RetryConfig(BaseModel):
Expand Down
2 changes: 2 additions & 0 deletions common/global_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ example_parent:
########################################################
default_llm:
default_model: gemini/gemini-2.0-flash
fallback_model: null
default_temperature: 0.5
default_max_tokens: 100000
default_request_timeout: 300

llm_config:
cache_enabled: false
Expand Down
14 changes: 7 additions & 7 deletions tests/healthcheck/test_env_var_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_env_var_loading_precedence(monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "system_openai_key")

# 2. Create a temporary .env file
dot_env_content = "DEV_ENV=dotenv\n" "OPENAI_API_KEY=dotenv_openai_key\n"
dot_env_content = "DEV_ENV=dotenv\nOPENAI_API_KEY=dotenv_openai_key\n"
with open(dot_env_path, "w") as f:
f.write(dot_env_content)

Expand All @@ -41,12 +41,12 @@ def test_env_var_loading_precedence(monkeypatch):

# 4. Assert that the variables are loaded with the correct precedence
assert reloaded_config.DEV_ENV == "dotenv", "Should load from .env first"
assert (
reloaded_config.ANTHROPIC_API_KEY == "system_anthropic_key"
), "Should fall back to system env"
assert (
reloaded_config.OPENAI_API_KEY == "dotenv_openai_key"
), "Should load from .env"
assert reloaded_config.ANTHROPIC_API_KEY == "system_anthropic_key", (
"Should fall back to system env"
)
assert reloaded_config.OPENAI_API_KEY == "dotenv_openai_key", (
"Should load from .env"
)

finally:
# Clean up and restore the original .env file if it existed
Expand Down
62 changes: 31 additions & 31 deletions tests/healthcheck/test_pydantic_type_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,51 +39,51 @@ def test_pydantic_type_coercion(monkeypatch):
config = common_module.global_config

# Verify integer coercion
assert isinstance(
config.default_llm.default_max_tokens, int
), "default_max_tokens should be int"
assert (
config.default_llm.default_max_tokens == 50000
), "default_max_tokens should be 50000"

assert isinstance(
config.llm_config.retry.max_attempts, int
), "max_attempts should be int"
assert isinstance(config.default_llm.default_max_tokens, int), (
"default_max_tokens should be int"
)
assert config.default_llm.default_max_tokens == 50000, (
"default_max_tokens should be 50000"
)

assert isinstance(config.llm_config.retry.max_attempts, int), (
"max_attempts should be int"
)
assert config.llm_config.retry.max_attempts == 5, "max_attempts should be 5"

assert isinstance(
config.llm_config.retry.min_wait_seconds, int
), "min_wait_seconds should be int"
assert isinstance(config.llm_config.retry.min_wait_seconds, int), (
"min_wait_seconds should be int"
)
assert config.llm_config.retry.min_wait_seconds == 2, "min_wait_seconds should be 2"

assert isinstance(
config.llm_config.retry.max_wait_seconds, int
), "max_wait_seconds should be int"
assert (
config.llm_config.retry.max_wait_seconds == 10
), "max_wait_seconds should be 10"
assert isinstance(config.llm_config.retry.max_wait_seconds, int), (
"max_wait_seconds should be int"
)
assert config.llm_config.retry.max_wait_seconds == 10, (
"max_wait_seconds should be 10"
)

# Verify float coercion
assert isinstance(
config.default_llm.default_temperature, float
), "default_temperature should be float"
assert (
config.default_llm.default_temperature == 0.7
), "default_temperature should be 0.7"
assert isinstance(config.default_llm.default_temperature, float), (
"default_temperature should be float"
)
assert config.default_llm.default_temperature == 0.7, (
"default_temperature should be 0.7"
)

# Verify boolean coercion
assert isinstance(
config.llm_config.cache_enabled, bool
), "cache_enabled should be bool"
assert isinstance(config.llm_config.cache_enabled, bool), (
"cache_enabled should be bool"
)
assert config.llm_config.cache_enabled is True, "cache_enabled should be True"

assert isinstance(config.logging.verbose, bool), "verbose should be bool"
assert config.logging.verbose is False, "verbose should be False"

assert isinstance(config.logging.format.show_time, bool), "show_time should be bool"
assert (
config.logging.format.show_time is True
), "show_time should be True (from '1')"
assert config.logging.format.show_time is True, (
"show_time should be True (from '1')"
)

assert isinstance(config.logging.levels.debug, bool), "debug should be bool"
assert config.logging.levels.debug is True, "debug should be True"
Expand Down
142 changes: 142 additions & 0 deletions tests/utils/llm/test_dspy_inference_robustness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import asyncio
from unittest.mock import AsyncMock, patch
from tests.test_template import TestTemplate
from utils.llm.dspy_inference import DSPYInference
from litellm.exceptions import RateLimitError, ServiceUnavailableError, Timeout
import dspy

Comment on lines +1 to +7
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

syntax: Missing __init__.py files in tests/utils/ and tests/utils/llm/ directories. Per .cursor/rules/tests.mdc, new test directories require __init__.py files so Python can recognize them.

Create:

  • tests/utils/__init__.py
  • tests/utils/llm/__init__.py

Context Used: Context from dashboard - On how to write tests (source)

Prompt To Fix With AI
This is a comment left during a code review.
Path: tests/utils/llm/test_dspy_inference_robustness.py
Line: 1:7

Comment:
**syntax:** Missing `__init__.py` files in `tests/utils/` and `tests/utils/llm/` directories. Per `.cursor/rules/tests.mdc`, new test directories require `__init__.py` files so Python can recognize them.

Create:
- `tests/utils/__init__.py` 
- `tests/utils/llm/__init__.py`

**Context Used:** Context from `dashboard` - On how to write tests ([source](https://app.greptile.com/review/custom-context?memory=d6a5ba7a-9b07-47ff-8835-a1b5ee599b6f))

How can I resolve this? If you propose a fix, please make it concise.

class MockSignature(dspy.Signature):
"""A mock DSPy signature for testing purposes."""
input = dspy.InputField()
output = dspy.OutputField()

class TestDSPYInferenceRobustness(TestTemplate):
"""Tests for DSPYInference robustness features (retry, fallback, timeout)."""

def test_retry_on_rate_limit(self):
"""
Tests that the DSPYInference class correctly retries operations when encountering
a RateLimitError from the LLM provider.
"""
async def _test():
with patch("common.global_config.global_config.llm_api_key", return_value="fake-key"), \
patch("common.global_config.global_config.default_llm.default_request_timeout", 1):

inference = DSPYInference(pred_signature=MockSignature, observe=False)

error = RateLimitError("Rate limit", llm_provider="openai", model="gpt-4")

mock_method = AsyncMock(side_effect=[
error,
dspy.Prediction(output="Success")
])
inference.inference_module_async = mock_method
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: Test bypasses the actual retry logic by mocking inference_module_async directly. The @retry decorator on _run_inference is never executed since you're replacing the method it calls.

Mock _run_inference instead to test the actual retry behavior:

mock_method = AsyncMock(side_effect=[
    error,
    dspy.Prediction(output="Success")
])
inference._run_inference = mock_method

Context Used: Context from dashboard - On how to write tests (source)

Prompt To Fix With AI
This is a comment left during a code review.
Path: tests/utils/llm/test_dspy_inference_robustness.py
Line: 33:33

Comment:
**logic:** Test bypasses the actual retry logic by mocking `inference_module_async` directly. The `@retry` decorator on `_run_inference` is never executed since you're replacing the method it calls.

Mock `_run_inference` instead to test the actual retry behavior:
```
mock_method = AsyncMock(side_effect=[
    error,
    dspy.Prediction(output="Success")
])
inference._run_inference = mock_method
```

**Context Used:** Context from `dashboard` - On how to write tests ([source](https://app.greptile.com/review/custom-context?memory=d6a5ba7a-9b07-47ff-8835-a1b5ee599b6f))

How can I resolve this? If you propose a fix, please make it concise.


result = await inference.run(input="test")

assert result.output == "Success"
assert mock_method.call_count == 2

asyncio.run(_test())

def test_retry_on_timeout(self):
"""
Tests that the DSPYInference class correctly retries operations when encountering
a Timeout error.
"""
async def _test():
with patch("common.global_config.global_config.llm_api_key", return_value="fake-key"), \
patch("common.global_config.global_config.default_llm.default_request_timeout", 1):

inference = DSPYInference(pred_signature=MockSignature, observe=False)

error = Timeout("Timeout", llm_provider="openai", model="gpt-4")

mock_method = AsyncMock(side_effect=[
error,
dspy.Prediction(output="Success")
])
inference.inference_module_async = mock_method
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: Same issue - test bypasses retry logic. Mock _run_inference instead of inference_module_async to test actual retry behavior with exponential backoff.

Context Used: Context from dashboard - On how to write tests (source)

Prompt To Fix With AI
This is a comment left during a code review.
Path: tests/utils/llm/test_dspy_inference_robustness.py
Line: 59:59

Comment:
**logic:** Same issue - test bypasses retry logic. Mock `_run_inference` instead of `inference_module_async` to test actual retry behavior with exponential backoff.

**Context Used:** Context from `dashboard` - On how to write tests ([source](https://app.greptile.com/review/custom-context?memory=d6a5ba7a-9b07-47ff-8835-a1b5ee599b6f))

How can I resolve this? If you propose a fix, please make it concise.


result = await inference.run(input="test")

assert result.output == "Success"
assert mock_method.call_count == 2

asyncio.run(_test())

def test_fallback_logic(self):
"""
Tests the model fallback mechanism.
"""
async def _test():
with patch("common.global_config.global_config.llm_api_key", return_value="fake-key"):

# Setup with fallback
inference = DSPYInference(
pred_signature=MockSignature,
observe=False,
model_name="primary-model",
fallback_model="fallback-model"
)

async def side_effect(*args, **kwargs):
lm = kwargs.get('lm')
if lm.model == "primary-model":
raise ServiceUnavailableError("Down", llm_provider="openai", model="primary-model")
elif lm.model == "fallback-model":
return dspy.Prediction(output="Fallback Success")
else:
raise ValueError(f"Unknown model: {lm.model}")

inference.inference_module_async = AsyncMock(side_effect=side_effect)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: Mocking inference_module_async bypasses both retry and fallback logic. Mock _run_inference to properly test the retry-then-fallback flow.

Context Used: Context from dashboard - On how to write tests (source)

Prompt To Fix With AI
This is a comment left during a code review.
Path: tests/utils/llm/test_dspy_inference_robustness.py
Line: 92:92

Comment:
**logic:** Mocking `inference_module_async` bypasses both retry and fallback logic. Mock `_run_inference` to properly test the retry-then-fallback flow.

**Context Used:** Context from `dashboard` - On how to write tests ([source](https://app.greptile.com/review/custom-context?memory=d6a5ba7a-9b07-47ff-8835-a1b5ee599b6f))

How can I resolve this? If you propose a fix, please make it concise.


result = await inference.run(input="test")

assert result.output == "Fallback Success"

# Primary model should have been retried max_attempts times (default 3)
# Fallback model called once
# Total 4
assert inference.inference_module_async.call_count == 4

asyncio.run(_test())

def test_fallback_failure(self):
"""
Tests the scenario where both the primary and fallback models fail.
"""
async def _test():
with patch("common.global_config.global_config.llm_api_key", return_value="fake-key"):

# Setup with fallback where fallback also fails
inference = DSPYInference(
pred_signature=MockSignature,
observe=False,
model_name="primary-model",
fallback_model="fallback-model"
)

async def side_effect(*args, **kwargs):
lm = kwargs.get('lm')
if lm.model == "primary-model":
raise ServiceUnavailableError("Down", llm_provider="openai", model="primary-model")
elif lm.model == "fallback-model":
raise ServiceUnavailableError("Also Down", llm_provider="openai", model="fallback-model")
else:
raise ValueError(f"Unknown model: {lm.model}")

inference.inference_module_async = AsyncMock(side_effect=side_effect)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: Same mocking issue. Test should mock _run_inference to verify the actual retry and fallback behavior.

Context Used: Context from dashboard - On how to write tests (source)

Prompt To Fix With AI
This is a comment left during a code review.
Path: tests/utils/llm/test_dspy_inference_robustness.py
Line: 129:129

Comment:
**logic:** Same mocking issue. Test should mock `_run_inference` to verify the actual retry and fallback behavior.

**Context Used:** Context from `dashboard` - On how to write tests ([source](https://app.greptile.com/review/custom-context?memory=d6a5ba7a-9b07-47ff-8835-a1b5ee599b6f))

How can I resolve this? If you propose a fix, please make it concise.


# Execute and expect exception
try:
await inference.run(input="test")
assert False, "Should have raised ServiceUnavailableError"
except ServiceUnavailableError:
pass

# Primary called 3 times, Fallback called 3 times
# Total 6
assert inference.inference_module_async.call_count == 6

asyncio.run(_test())
49 changes: 42 additions & 7 deletions utils/llm/dspy_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
retry_if_exception_type,
)
from utils.llm.dspy_langfuse import LangFuseDSPYCallback
from litellm.exceptions import ServiceUnavailableError
from litellm.exceptions import ServiceUnavailableError, RateLimitError, Timeout
from langfuse.decorators import observe # type: ignore


Expand All @@ -21,8 +21,10 @@ def __init__(
tools: list[Callable[..., Any]] | None = None,
observe: bool = True,
model_name: str = global_config.default_llm.default_model,
fallback_model: str | None = global_config.default_llm.fallback_model,
temperature: float = global_config.default_llm.default_temperature,
max_tokens: int = global_config.default_llm.default_max_tokens,
request_timeout: int = global_config.default_llm.default_request_timeout,
max_iters: int = 5,
) -> None:
if tools is None:
Expand All @@ -35,7 +37,26 @@ def __init__(
cache=global_config.llm_config.cache_enabled,
temperature=temperature,
max_tokens=max_tokens,
timeout=request_timeout,
)

self.fallback_lm = None
if fallback_model:
try:
fallback_api_key = global_config.llm_api_key(fallback_model)
self.fallback_lm = dspy.LM(
model=fallback_model,
api_key=fallback_api_key,
cache=global_config.llm_config.cache_enabled,
temperature=temperature,
max_tokens=max_tokens,
timeout=request_timeout,
)
except Exception as e:
log.warning(
f"Failed to initialize fallback model {fallback_model}: {e}"
)

if observe:
# Initialize a LangFuseDSPYCallback and configure the LM instance for generation tracing
self.callback = LangFuseDSPYCallback(pred_signature)
Expand All @@ -56,26 +77,40 @@ def __init__(
self.inference_module
)

@observe()
@retry(
retry=retry_if_exception_type(ServiceUnavailableError),
retry=retry_if_exception_type(
(ServiceUnavailableError, RateLimitError, Timeout)
),
stop=stop_after_attempt(global_config.llm_config.retry.max_attempts),
wait=wait_exponential(
multiplier=global_config.llm_config.retry.min_wait_seconds,
max=global_config.llm_config.retry.max_wait_seconds,
),
before_sleep=lambda retry_state: log.warning(
f"Retrying due to ServiceUnavailableError. Attempt {retry_state.attempt_number}"
f"Retrying due to LLM Error ({retry_state.outcome.exception()}). Attempt {retry_state.attempt_number}"
),
reraise=True,
)
async def _run_inference(self, lm, **kwargs) -> Any:
return await self.inference_module_async(**kwargs, lm=lm)

@observe()
async def run(
self,
**kwargs: Any,
) -> Any:
try:
# user_id is passed if the pred_signature requires it.
result = await self.inference_module_async(**kwargs, lm=self.lm)
result = await self._run_inference(lm=self.lm, **kwargs)
except Exception as e:
log.error(f"Error in run: {str(e)}")
raise
if self.fallback_lm:
log.warning(f"Primary model failed: {e}. Switching to fallback model.")
try:
result = await self._run_inference(lm=self.fallback_lm, **kwargs)
except Exception as fallback_error:
log.error(f"Fallback model also failed: {fallback_error}")
raise fallback_error
else:
log.error(f"Error in run: {str(e)}")
raise
return result
Loading
Loading