Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 205 additions & 0 deletions tests/unit/vertexai/genai/replays/test_run_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,211 @@ def test_inference_with_eval_cases_multi_turn_agent_data(client):
assert "agent_data" in inference_result.eval_dataset_df.columns


def test_inference_with_eval_cases_agent_engine_agent_data(client):
"""Tests N+1 inference with agent_data via remote Agent Engine."""
agent_engine = client.agent_engines.get(
name="projects/977012026409/locations/us-central1"
"/reasoningEngines/7188347537655332864"
)

eval_case = types.EvalCase(
agent_data=types.evals.AgentData(
turns=[
types.evals.ConversationTurn(
turn_index=0,
events=[
types.evals.AgentEvent(
author="user",
content=genai_types.Content(
role="user",
parts=[genai_types.Part(text="My name is Bob.")],
),
),
types.evals.AgentEvent(
author="model",
content=genai_types.Content(
role="model",
parts=[
genai_types.Part(text="Hi Bob! Nice to meet you.")
],
),
),
],
),
types.evals.ConversationTurn(
turn_index=1,
events=[
types.evals.AgentEvent(
author="user",
content=genai_types.Content(
role="user",
parts=[genai_types.Part(text="What is my name?")],
),
),
],
),
],
),
)
eval_dataset = types.EvaluationDataset(eval_cases=[eval_case])

inference_result = client.evals.run_inference(
agent=agent_engine,
src=eval_dataset,
)
assert isinstance(inference_result, types.EvaluationDataset)
assert inference_result.eval_dataset_df is not None
assert "agent_data" in inference_result.eval_dataset_df.columns


def test_inference_with_prompt_column_local_agent(client):
"""Tests run_inference with a prompt column and a local ADK agent.

Verifies the existing prompt-based inference path: a DataFrame with
a 'prompt' column is passed alongside a local LlmAgent. The agent
should respond to the prompt normally.
"""
import pandas as pd

agent = LlmAgent(
name="prompt_agent",
model="gemini-2.5-flash",
instruction="You are a helpful assistant. Answer questions concisely.",
)

prompt_df = pd.DataFrame(
{
"prompt": ["What is the capital of France?"],
}
)
eval_dataset = types.EvaluationDataset(eval_dataset_df=prompt_df)

inference_result = client.evals.run_inference(
agent=agent,
src=eval_dataset,
)
assert isinstance(inference_result, types.EvaluationDataset)
result_df = inference_result.eval_dataset_df
assert result_df is not None
assert "response" in result_df.columns
# The response should be a non-empty string (actual model answer).
response_val = result_df["response"].iloc[0]
assert response_val is not None
assert isinstance(response_val, str)
assert len(response_val) > 0


def test_inference_with_completed_and_incomplete_agent_data(client):
"""Tests run_inference with a mix of completed and N+1 agent traces.

Provides two eval_cases:
- Row 0: completed trace (last event from agent) — BYOD, no inference.
- Row 1: incomplete trace (last event from user) — N+1 inference.

The completed row should return the existing agent response without
making any API calls. The N+1 row should run inference normally.
"""
agent = LlmAgent(
name="mixed_agent",
model="gemini-2.5-flash",
instruction="You are a helpful assistant. Answer questions concisely.",
)

# Row 0: Completed trace — last event is from the agent.
completed_case = types.EvalCase(
agent_data=types.evals.AgentData(
turns=[
types.evals.ConversationTurn(
turn_index=0,
events=[
types.evals.AgentEvent(
author="user",
content=genai_types.Content(
role="user",
parts=[genai_types.Part(text="What color is the sky?")],
),
),
types.evals.AgentEvent(
author="mixed_agent",
content=genai_types.Content(
role="model",
parts=[genai_types.Part(text="The sky is blue.")],
),
),
],
),
],
),
)

# Row 1: N+1 trace — last event is from the user.
n_plus_1_case = types.EvalCase(
agent_data=types.evals.AgentData(
turns=[
types.evals.ConversationTurn(
turn_index=0,
events=[
types.evals.AgentEvent(
author="user",
content=genai_types.Content(
role="user",
parts=[
genai_types.Part(text="My favorite number is 7.")
],
),
),
types.evals.AgentEvent(
author="mixed_agent",
content=genai_types.Content(
role="model",
parts=[genai_types.Part(text="Got it, 7!")],
),
),
],
),
types.evals.ConversationTurn(
turn_index=1,
events=[
types.evals.AgentEvent(
author="user",
content=genai_types.Content(
role="user",
parts=[
genai_types.Part(text="What is my favorite number?")
],
),
),
],
),
],
),
)

eval_dataset = types.EvaluationDataset(eval_cases=[completed_case, n_plus_1_case])

inference_result = client.evals.run_inference(
agent=agent,
src=eval_dataset,
)
assert isinstance(inference_result, types.EvaluationDataset)
result_df = inference_result.eval_dataset_df
assert result_df is not None
assert len(result_df) == 2

# Row 0 (completed trace): response should contain the existing
# agent answer — "The sky is blue."
row0_response = result_df["response"].iloc[0]
assert row0_response is not None
assert "blue" in row0_response.lower()

# Row 1 (N+1 inference): response should be a non-empty string
# from the model (actual inference).
row1_response = result_df["response"].iloc[1]
assert row1_response is not None
assert isinstance(row1_response, str)
assert len(row1_response) > 0


pytestmark = pytest_helper.setup(
file=__file__,
globals_for_file=globals(),
Expand Down
Loading
Loading