Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions src/google/adk/cli/adk_web_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from pydantic import Field
from pydantic import ValidationError
from starlette.types import Lifespan
from typing_extensions import deprecated
from typing_extensions import override
from watchdog.observers import Observer

Expand All @@ -66,6 +67,7 @@
from ..evaluation.eval_metrics import EvalMetricResultPerInvocation
from ..evaluation.eval_metrics import MetricInfo
from ..evaluation.eval_result import EvalSetResult
from ..evaluation.eval_set import EvalSet
from ..evaluation.eval_set_results_manager import EvalSetResultsManager
from ..evaluation.eval_sets_manager import EvalSetsManager
from ..events.event import Event
Expand Down Expand Up @@ -197,6 +199,10 @@ class GetEventGraphResult(common.BaseModel):
dot_src: str


class CreateEvalSetRequest(common.BaseModel):
eval_set: EvalSet


class AdkWebServer:
"""Helper class for setting up and running the ADK web server on FastAPI.

Expand Down Expand Up @@ -466,23 +472,45 @@ async def delete_session(
)

@app.post(
"/apps/{app_name}/eval_sets/{eval_set_id}",
"/apps/{app_name}/eval-sets",
response_model_exclude_none=True,
tags=[TAG_EVALUATION],
)
async def create_eval_set(
app_name: str,
eval_set_id: str,
):
"""Creates an eval set, given the id."""
app_name: str, create_eval_set_request: CreateEvalSetRequest
) -> EvalSet:
try:
self.eval_sets_manager.create_eval_set(app_name, eval_set_id)
return self.eval_sets_manager.create_eval_set(
app_name=app_name,
eval_set_id=create_eval_set_request.eval_set.eval_set_id,
)
except ValueError as ve:
raise HTTPException(
status_code=400,
detail=str(ve),
) from ve

@deprecated(
"Please use create_eval_set instead. This will be removed in future"
" releases."
)
@app.post(
"/apps/{app_name}/eval_sets/{eval_set_id}",
response_model_exclude_none=True,
tags=[TAG_EVALUATION],
)
async def create_eval_set_legacy(
app_name: str,
eval_set_id: str,
):
"""Creates an eval set, given the id."""
await create_eval_set(
app_name=app_name,
create_eval_set_request=CreateEvalSetRequest(
eval_set=EvalSet(eval_set_id=eval_set_id, eval_cases=[])
),
)

@app.get(
"/apps/{app_name}/eval_sets",
response_model_exclude_none=True,
Expand Down
14 changes: 11 additions & 3 deletions src/google/adk/evaluation/eval_sets_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from abc import abstractmethod
from typing import Optional

from ..errors.not_found_error import NotFoundError
from .eval_case import EvalCase
from .eval_set import EvalSet

Expand All @@ -31,8 +30,17 @@ def get_eval_set(self, app_name: str, eval_set_id: str) -> Optional[EvalSet]:
"""Returns an EvalSet identified by an app_name and eval_set_id."""

@abstractmethod
def create_eval_set(self, app_name: str, eval_set_id: str):
"""Creates an empty EvalSet given the app_name and eval_set_id."""
def create_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
"""Creates and returns an empty EvalSet given the app_name and eval_set_id.

Raises:
ValueError: If eval set id is not valid or an eval set already exists. A
valid eval set id is string that has one or more of following characters:
- Lower case characters
- Upper case characters
- 0-9
- Underscore
"""

@abstractmethod
def list_eval_sets(self, app_name: str) -> list[str]:
Expand Down
9 changes: 7 additions & 2 deletions src/google/adk/evaluation/gcs_eval_sets_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,12 @@ def get_eval_set(self, app_name: str, eval_set_id: str) -> Optional[EvalSet]:
return self._load_eval_set_from_blob(eval_set_blob_name)

@override
def create_eval_set(self, app_name: str, eval_set_id: str):
"""Creates an empty EvalSet and saves it to GCS."""
def create_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
"""Creates an empty EvalSet and saves it to GCS.

Raises:
ValueError: If eval set id is not valid or an eval set already exists.
"""
self._validate_id(id_name="Eval Set Id", id_value=eval_set_id)
new_eval_set_blob_name = self._get_eval_set_blob_name(app_name, eval_set_id)
if self.bucket.blob(new_eval_set_blob_name).exists():
Expand All @@ -115,6 +119,7 @@ def create_eval_set(self, app_name: str, eval_set_id: str):
creation_timestamp=time.time(),
)
self._write_eval_set_to_blob(new_eval_set_blob_name, new_eval_set)
return new_eval_set

@override
def list_eval_sets(self, app_name: str) -> list[str]:
Expand Down
1 change: 1 addition & 0 deletions src/google/adk/evaluation/in_memory_eval_sets_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def create_eval_set(self, app_name: str, eval_set_id: str):
)
self._eval_sets[app_name][eval_set_id] = new_eval_set
self._eval_cases[app_name][eval_set_id] = {}
return new_eval_set

@override
def list_eval_sets(self, app_name: str) -> list[str]:
Expand Down
13 changes: 11 additions & 2 deletions src/google/adk/evaluation/local_eval_sets_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,12 @@ def get_eval_set(self, app_name: str, eval_set_id: str) -> Optional[EvalSet]:
return None

@override
def create_eval_set(self, app_name: str, eval_set_id: str):
"""Creates an empty EvalSet given the app_name and eval_set_id."""
def create_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
"""Creates and returns an empty EvalSet given the app_name and eval_set_id.

Raises:
ValueError: If eval set id is not valid or an eval set already exists.
"""
self._validate_id(id_name="Eval Set Id", id_value=eval_set_id)

# Define the file path
Expand All @@ -224,6 +228,11 @@ def create_eval_set(self, app_name: str, eval_set_id: str):
creation_timestamp=time.time(),
)
self._write_eval_set_to_path(new_eval_set_path, new_eval_set)
return new_eval_set

raise ValueError(
f"EvalSet {eval_set_id} already exists for app {app_name}."
)

@override
def list_eval_sets(self, app_name: str) -> list[str]:
Expand Down
18 changes: 11 additions & 7 deletions tests/unittests/evaluation/test_gcs_eval_sets_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,21 @@ def test_gcs_eval_sets_manager_create_eval_set_success(
app_name, eval_set_id
)

gcs_eval_sets_manager.create_eval_set(app_name, eval_set_id)
created_eval_set = gcs_eval_sets_manager.create_eval_set(
app_name, eval_set_id
)

expected_eval_set = EvalSet(
eval_set_id=eval_set_id,
name=eval_set_id,
eval_cases=[],
creation_timestamp=mocked_time,
)
mock_write_eval_set_to_blob.assert_called_once_with(
eval_set_blob_name,
EvalSet(
eval_set_id=eval_set_id,
name=eval_set_id,
eval_cases=[],
creation_timestamp=mocked_time,
),
expected_eval_set,
)
assert created_eval_set == expected_eval_set

def test_gcs_eval_sets_manager_create_eval_set_invalid_id(
self, gcs_eval_sets_manager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ def eval_case_id():


def test_create_eval_set(manager, app_name, eval_set_id):
manager.create_eval_set(app_name, eval_set_id)
eval_set = manager.get_eval_set(app_name, eval_set_id)
eval_set = manager.create_eval_set(app_name, eval_set_id)
assert eval_set is not None
assert eval_set.eval_set_id == eval_set_id
assert eval_set.eval_cases == []
Expand Down
32 changes: 25 additions & 7 deletions tests/unittests/evaluation/test_local_eval_sets_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,16 +370,21 @@ def test_local_eval_sets_manager_create_eval_set_success(
eval_set_id + _EVAL_SET_FILE_EXTENSION,
)

local_eval_sets_manager.create_eval_set(app_name, eval_set_id)
created_eval_set = local_eval_sets_manager.create_eval_set(
app_name, eval_set_id
)

expected_eval_set = EvalSet(
eval_set_id=eval_set_id,
name=eval_set_id,
eval_cases=[],
creation_timestamp=mocked_time,
)
mock_write_eval_set_to_path.assert_called_once_with(
eval_set_file_path,
EvalSet(
eval_set_id=eval_set_id,
name=eval_set_id,
eval_cases=[],
creation_timestamp=mocked_time,
),
expected_eval_set,
)
assert created_eval_set == expected_eval_set

def test_local_eval_sets_manager_create_eval_set_invalid_id(
self, local_eval_sets_manager
Expand All @@ -390,6 +395,19 @@ def test_local_eval_sets_manager_create_eval_set_invalid_id(
with pytest.raises(ValueError, match="Invalid Eval Set Id"):
local_eval_sets_manager.create_eval_set(app_name, eval_set_id)

def test_local_eval_sets_manager_create_eval_set_already_exists(
self, local_eval_sets_manager, mocker
):
app_name = "test_app"
eval_set_id = "existing_eval_set_id"
mocker.patch("os.path.exists", return_value=True)

with pytest.raises(
ValueError,
match="EvalSet existing_eval_set_id already exists for app test_app.",
):
local_eval_sets_manager.create_eval_set(app_name, eval_set_id)

def test_local_eval_sets_manager_list_eval_sets_success(
self, local_eval_sets_manager, mocker
):
Expand Down