Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/components/Sidebar.astro
Original file line number Diff line number Diff line change
Expand Up @@ -94,18 +94,18 @@ const isApiTab = activeTab?.tab === 'API';

function inferApiMethod(title: string): { method: string; css: string } | null {
const t = title.toLowerCase();
if (/\b(delete|remove)\b/.test(t)) {
return { method: 'DELETE', css: 'api-method-delete' };
}
if (/\b(update|edit|apply|restore)\b/.test(t)) {
return { method: 'PATCH', css: 'api-method-patch' };
}
if (/\b(list|get|retrieve|health|find|export|progress|analytics|agreement|compare|stats|summary|voices|tts)\b/.test(t)) {
return { method: 'GET', css: 'api-method-get' };
}
if (/\b(create|add|generate|execute|submit|assign|bulk|complete|skip|release|pause|unpause|check|upload|start|duplicate|fetch|run|rerun|cancel|clone|merge)\b/.test(t)) {
return { method: 'POST', css: 'api-method-post' };
}
if (/\b(delete|remove)\b/.test(t)) {
return { method: 'DEL', css: 'api-method-delete' };
}
if (/\b(update|edit|apply|restore)\b/.test(t)) {
return { method: 'PATCH', css: 'api-method-patch' };
}
return null;
}
---
Expand Down
15 changes: 0 additions & 15 deletions src/lib/api-navigation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,6 @@ export const apiNavigation: ApiNavGroup[] = [
"title": "Add columns to a scenario",
"href": "/docs/api/scenarios/addcolumns",
"method": "POST"
},
{
"title": "Add empty rows to a scenario",
"href": "/docs/api/scenarios/addemptyrowstodataset",
"method": "POST"
}
]
},
Expand Down Expand Up @@ -187,11 +182,6 @@ export const apiNavigation: ApiNavGroup[] = [
"href": "/docs/api/run-tests/executeruntest",
"method": "POST"
},
{
"title": "Update test run components",
"href": "/docs/api/run-tests/updatetestcomponents",
"method": "PATCH"
},
{
"title": "Get test executions",
"href": "/docs/api/run-tests/gettestexecutions",
Expand All @@ -202,11 +192,6 @@ export const apiNavigation: ApiNavGroup[] = [
"href": "/docs/api/run-tests/gettestscenarios",
"method": "GET"
},
{
"title": "Get call executions for a test run",
"href": "/docs/api/run-tests/getcallexecutions",
"method": "GET"
},
{
"title": "Get evaluation summary",
"href": "/docs/api/run-tests/getevalsummary",
Expand Down
3 changes: 0 additions & 3 deletions src/lib/navigation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,6 @@ export const tabNavigation: NavTab[] = [
{ title: 'Delete Scenario', href: '/docs/api/scenarios/deletescenario' },
{ title: 'Add Rows with AI', href: '/docs/api/scenarios/addscenariorowswithai' },
{ title: 'Add Columns', href: '/docs/api/scenarios/addcolumns' },
{ title: 'Add Empty Rows', href: '/docs/api/scenarios/addemptyrowstodataset' },
]
},
{
Expand Down Expand Up @@ -1056,10 +1055,8 @@ export const tabNavigation: NavTab[] = [
{ title: 'Get Test Run Details', href: '/docs/api/run-tests/getruntestdetails' },
{ title: 'Delete Test Run', href: '/docs/api/run-tests/deleteruntest' },
{ title: 'Execute Run Test', href: '/docs/api/run-tests/executeruntest' },
{ title: 'Update Components', href: '/docs/api/run-tests/updatetestcomponents' },
{ title: 'Get Test Executions', href: '/docs/api/run-tests/gettestexecutions' },
{ title: 'Get Test Scenarios', href: '/docs/api/run-tests/gettestscenarios' },
{ title: 'Get Call Executions', href: '/docs/api/run-tests/getcallexecutions' },
{ title: 'Get Eval Summary', href: '/docs/api/run-tests/getevalsummary' },
{ title: 'Compare Eval Summaries', href: '/docs/api/run-tests/compareevalsummaries' },
{ title: 'Add Eval Configs', href: '/docs/api/run-tests/addevalconfigs' },
Expand Down
1 change: 0 additions & 1 deletion src/lib/redirects.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ export const redirectMap: Record<string, string> = {
'/api-reference/prompt-workbench/get-prompt-version-by-name': '/docs/api',
'/api-reference/run-tests/create-a-new-test-run': '/docs/api/run-tests/createruntest',
'/api-reference/run-tests/execute-a-test-run': '/docs/api/run-tests/executeruntest',
'/api-reference/scenarios/add-empty-rows-to-a-scenario': '/docs/api/scenarios/addemptyrowstodataset',
'/api-reference/scenarios/add-rows-to-a-scenario-using-ai': '/docs/api/scenarios/addscenariorowswithai',
'/api-reference/scenarios/edit-a-scenario': '/docs/api/scenarios/editscenario',
'/api-reference/scenarios/generate-or-create-a-scenario': '/docs/api/scenarios/createscenario',
Expand Down
61 changes: 49 additions & 12 deletions src/pages/docs/api/run-tests/addevalconfigs.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,27 @@ description: "Adds evaluation configurations to a test run."
parameters={[
{"name": "run_test_id", "in": "path", "required": true, "description": "UUID of the test run to add evaluation configurations to.", "type": "string"}
]}
requestBody={{"evaluationsConfig": [{"templateId": "your-template-id", "name": "My Eval Config", "config": {}, "mapping": {}, "errorLocalizer": false, "model": "turing_large"}]}}
responseExample={{"message": "Evaluation configs added successfully"}}
responseStatus={200}
responseStatusText="OK"
requestBody={{"evaluations_config": [{"template_id": "your-template-id", "name": "My Eval Config", "config": {}, "mapping": {}, "filters": {}, "error_localizer": false, "model": "turing_large"}]}}
responseExample={{
message: "Successfully added 1 evaluation config(s) to run test",
created_eval_configs: [
{
id: "ec1b2c3d-e5f6-7890-abcd-ef1234567890",
name: "My Eval Config",
config: {},
mapping: {},
filters: {},
error_localizer: false,
model: "turing_large",
status: null,
eval_group: null,
template_id: "your-template-uuid"
}
],
run_test_id: "f7a8b9c0-d1e2-3456-789a-bcdef0123456"
}}
responseStatus={201}
responseStatusText="Created"
/>

<ApiSection title="Authentication">
Expand All @@ -32,40 +49,60 @@ description: "Adds evaluation configurations to a test run."
</ApiSection>

<ApiSection title="Request body">
<ParamField body="evaluationsConfig" type="array of objects" required>
<ParamField body="evaluations_config" type="array of objects" required>
Array of evaluation configuration objects. Each object supports the following fields:

- **`templateId`** (string, UUID, required) -- UUID of the evaluation template to use.
- **`template_id`** (string, UUID, required) -- UUID of the evaluation template to use.

- **`name`** (string, required) -- Name for this evaluation configuration. Must be unique within the test run.
- **`name`** (string, optional) -- Name for this evaluation configuration. Defaults to `Eval-<template_id>` if omitted. Must be unique within the test run.

- **`config`** (object, optional) -- Template-specific configuration parameters.

- **`mapping`** (object, optional) -- Maps test execution data fields to the evaluation template's expected inputs.

- **`filters`** (object, optional) -- Filter criteria to restrict which test results are evaluated.

- **`errorLocalizer`** (boolean, optional) -- Enables granular error localization on evaluation failures. Defaults to `false`.
- **`error_localizer`** (boolean, optional) -- Enables granular error localization on evaluation failures. Defaults to `false`.

- **`model`** (string, optional) -- Model to use for running this evaluation.
</ParamField>
</ApiSection>

<ApiSection title="Response" status={200} statusText="OK">
<ResponseField name="message" type="string">Confirmation of successful addition.</ResponseField>
<ApiSection title="Response" status={201} statusText="Created">
<ResponseField name="message" type="string">Confirmation message indicating how many evaluation configs were added.</ResponseField>
<ResponseField name="created_eval_configs" type="array of objects">
Array of created evaluation configuration objects. Each object contains: `id`, `name`, `config`, `mapping`, `filters`, `error_localizer`, `model`, `status`, `eval_group`, and `template_id`.
</ResponseField>
<ResponseField name="run_test_id" type="string">UUID of the parent test run.</ResponseField>
<ResponseField name="warnings" type="array of strings">Non-fatal issues encountered while processing individual configs. Only present if partial failures occurred.</ResponseField>
</ApiSection>

<ApiSection title="Errors">
<ParamField name="400" type="Bad Request">
Invalid or missing fields such as a non-existent `templateId`, duplicate `name`, or malformed `config`/`mapping`.
Validation error. Common causes: empty `evaluations_config`, duplicate `name` within request, name already exists in test run, non-existent `template_id`.
```json
{
"evaluations_config": ["Duplicate eval name 'My Eval Config' found in the request. Each evaluation config must have a unique name."]
}
```
Or for existing name conflict:
```json
{"error": "An evaluation config with the name 'My Eval Config' already exists in this run test. Please use a different name."}
```
</ParamField>
<ParamField name="401" type="Unauthorized">
Missing or invalid `X-Api-Key` or `X-Secret-Key` headers.
</ParamField>
<ParamField name="404" type="Not Found">
No test run found with the specified `run_test_id`.
```json
{"detail": "No RunTest matches the given query."}
```
</ParamField>
<ParamField name="500" type="Internal Server Error">
Unexpected server error. Retry later or contact support.
Unexpected server error.
```json
{"error": "Failed to add evaluation configs: <message>"}
```
</ParamField>
</ApiSection>
35 changes: 30 additions & 5 deletions src/pages/docs/api/run-tests/compareevalsummaries.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ description: "Compares evaluation summaries across multiple test executions."
{"name": "run_test_id", "in": "path", "required": true, "description": "UUID of the test run containing the executions to compare.", "type": "string"},
{"name": "execution_ids", "in": "query", "required": false, "description": "JSON-encoded array of test execution UUIDs to compare.", "type": "string"}
]}
responseExample={{"execution-uuid-1": {"evaluations": [{"name": "Tone Check", "average_score": 0.85}]}, "execution-uuid-2": {"evaluations": [{"name": "Tone Check", "average_score": 0.92}]}}}
responseExample={{
"execution-uuid-1": [{"name": "Tone Check", "average_score": 0.85, "total_runs": 10, "passed": 8, "failed": 2}],
"execution-uuid-2": [{"name": "Tone Check", "average_score": 0.92, "total_runs": 10, "passed": 9, "failed": 1}]
}}
responseStatus={200}
responseStatusText="OK"
/>
Expand All @@ -33,25 +36,47 @@ description: "Compares evaluation summaries across multiple test executions."

<ApiSection title="Query parameters">
<ParamField query="execution_ids" type="string" required>
JSON-encoded array of test execution UUIDs to compare. Must be URL-encoded.
JSON-encoded array of test execution UUIDs to compare. Must be URL-encoded. Example: `["uuid1","uuid2"]`.
</ParamField>
</ApiSection>

<ApiSection title="Response" status={200} statusText="OK">
<ResponseField name="comparison" type="object">Dictionary keyed by execution ID, each mapping to its evaluation summary metrics.</ResponseField>
<ResponseField name="(execution_id)" type="object">
Dictionary keyed by execution UUID. Each value is an array of evaluation summary objects for that execution.
</ResponseField>
<ApiCollapsible title="Show evaluation summary object properties">
<ResponseField name="name" type="string">Name of the evaluation configuration.</ResponseField>
<ResponseField name="average_score" type="number">Average score across all evaluated calls.</ResponseField>
<ResponseField name="total_runs" type="integer">Total evaluation runs for this config.</ResponseField>
<ResponseField name="passed" type="integer">Number of passing evaluations.</ResponseField>
<ResponseField name="failed" type="integer">Number of failing evaluations.</ResponseField>
</ApiCollapsible>
</ApiSection>

<ApiSection title="Errors">
<ParamField name="400" type="Bad Request">
Missing, malformed, or invalid `execution_ids` parameter.
Missing, malformed, or empty `execution_ids` parameter.
```json
{"execution_ids": ["execution_ids must be valid JSON"]}
```
Or when empty:
```json
{"execution_ids": ["execution_ids list is required"]}
```
</ParamField>
<ParamField name="401" type="Unauthorized">
Missing or invalid `X-Api-Key` or `X-Secret-Key` headers.
</ParamField>
<ParamField name="404" type="Not Found">
No test run found with the specified `run_test_id`.
```json
{"error": "RunTest not found."}
```
</ParamField>
<ParamField name="500" type="Internal Server Error">
Unexpected server error. Retry later or contact support.
Unexpected server error.
```json
{"error": "Unable to fetch eval summary"}
```
</ParamField>
</ApiSection>
67 changes: 56 additions & 11 deletions src/pages/docs/api/run-tests/createruntest.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ description: "Creates a new test run."
method="POST"
endpoint="/simulate/run-tests/create/"
baseUrl="https://api.futureagi.com"
requestBody={{"name": "your-name", "description": "your-description", "scenarioIds": [], "agentDefinitionId": "your-agentDefinitionId", "agentVersion": "your-agentVersion", "evalConfigIds": [], "evaluationsConfig": [], "datasetRowIds": [], "enableToolEvaluation": true}}
responseExample={{"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "name": "your-name", "description": "your-description", "agent_definition": "your-agentDefinitionId", "scenarios": [], "enable_tool_evaluation": true, "created_at": "2026-04-04T12:00:00Z", "updated_at": "2026-04-04T12:00:00Z"}}
requestBody={{"name": "your-name", "description": "your-description", "scenario_ids": [], "agent_definition_id": "your-agent-definition-id", "eval_config_ids": [], "evaluations_config": [], "dataset_row_ids": [], "enable_tool_evaluation": true}}
responseExample={{"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "name": "your-name", "description": "your-description", "agent_definition": "your-agent-definition-id", "agent_version": null, "agent_definition_detail": null, "source_type": "agent_definition", "source_type_display": "Agent Definition", "scenarios": [], "scenarios_detail": [], "dataset_row_ids": [], "simulator_agent": null, "simulator_agent_detail": null, "simulate_eval_configs": [], "simulate_eval_configs_detail": [], "evals_detail": [], "organization": "org-uuid", "enable_tool_evaluation": true, "created_at": "2026-04-04T12:00:00Z", "updated_at": "2026-04-04T12:00:00Z", "last_run_at": null, "deleted": false, "deleted_at": null}}
responseStatus={201}
responseStatusText="Created"
/>
Expand All @@ -29,27 +29,27 @@ description: "Creates a new test run."
<ParamField body="description" type="string">
Optional free-text description of the test run.
</ParamField>
<ParamField body="scenarioIds" type="array of string" required>
<ParamField body="scenario_ids" type="array of string" required>
Array of scenario UUIDs to execute against. Must contain at least one valid scenario ID.
</ParamField>
<ParamField body="agentDefinitionId" type="string" required>
<ParamField body="agent_definition_id" type="string" required>
UUID of the agent definition to evaluate.
</ParamField>
<ParamField body="agentVersion" type="string">
UUID of a specific agent version to test against. Defaults to the currently active version if omitted.
</ParamField>
<ParamField body="evalConfigIds" type="array of string">
<ParamField body="eval_config_ids" type="array of string">
Array of existing evaluation configuration UUIDs to associate with this test run.
</ParamField>
<ParamField body="evaluationsConfig" type="array of objects">
<ParamField body="evaluations_config" type="array of objects">
Array of inline evaluation configuration objects to create and associate. Each object must include `template_id`, `name`, `config`, and `mapping`.
</ParamField>
<ParamField body="datasetRowIds" type="array of string">
<ParamField body="dataset_row_ids" type="array of string">
Array of dataset row UUIDs to restrict execution to specific data entries. If omitted, all rows are included.
</ParamField>
<ParamField body="enableToolEvaluation" type="boolean">
<ParamField body="enable_tool_evaluation" type="boolean">
When `true`, evaluates correctness of tool calls made by the agent. Defaults to `false`.
</ParamField>
<ParamField body="replay_session_id" type="string">
Optional UUID of a session to replay. When provided, execution replays the specified session.
</ParamField>
</ApiSection>

<ApiSection title="Response" status={201} statusText="Created">
Expand All @@ -65,9 +65,45 @@ description: "Creates a new test run."
<ResponseField name="agent_definition" type="string">
UUID of the associated agent definition.
</ResponseField>
<ResponseField name="agent_version" type="string">
UUID of the specific agent version, or `null` if using the active version.
</ResponseField>
<ResponseField name="agent_definition_detail" type="object">
Detailed agent definition object, or `null`.
</ResponseField>
<ResponseField name="source_type" type="string">
Source type identifier (e.g. `"agent_definition"`).
</ResponseField>
<ResponseField name="source_type_display" type="string">
Human-readable source type label (e.g. `"Agent Definition"`).
</ResponseField>
<ResponseField name="scenarios" type="array of string">
Array of linked scenario UUIDs.
</ResponseField>
<ResponseField name="scenarios_detail" type="array of objects">
Array of detailed scenario objects.
</ResponseField>
<ResponseField name="dataset_row_ids" type="array of string">
Array of dataset row UUIDs associated with this test run.
</ResponseField>
<ResponseField name="simulator_agent" type="string">
UUID of the simulator agent, or `null`.
</ResponseField>
<ResponseField name="simulator_agent_detail" type="object">
Detailed simulator agent object, or `null`.
</ResponseField>
<ResponseField name="simulate_eval_configs" type="array of string">
Array of evaluation configuration UUIDs.
</ResponseField>
<ResponseField name="simulate_eval_configs_detail" type="array of objects">
Array of detailed evaluation configuration objects.
</ResponseField>
<ResponseField name="evals_detail" type="array of objects">
Array of detailed evaluation result objects.
</ResponseField>
<ResponseField name="organization" type="string">
UUID of the owning organization.
</ResponseField>
<ResponseField name="enable_tool_evaluation" type="boolean">
Whether tool evaluation is enabled.
</ResponseField>
Expand All @@ -77,6 +113,15 @@ description: "Creates a new test run."
<ResponseField name="updated_at" type="string">
ISO 8601 last-modified timestamp.
</ResponseField>
<ResponseField name="last_run_at" type="string">
ISO 8601 timestamp of the most recent execution, or `null`.
</ResponseField>
<ResponseField name="deleted" type="boolean">
Whether the test run has been soft-deleted.
</ResponseField>
<ResponseField name="deleted_at" type="string">
ISO 8601 timestamp of soft-deletion, or `null`.
</ResponseField>
</ApiSection>

<ApiSection title="Errors">
Expand Down
11 changes: 10 additions & 1 deletion src/pages/docs/api/run-tests/deleteevalconfig.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,23 @@ description: "Deletes an evaluation configuration from a test run."
<ApiSection title="Errors">
<ParamField name="400" type="Bad Request">
Cannot delete the last remaining evaluation configuration in the test run.
```json
{"error": "Cannot delete the last evaluation config. At least one evaluation config must remain."}
```
</ParamField>
<ParamField name="401" type="Unauthorized">
Missing or invalid `X-Api-Key` or `X-Secret-Key` headers.
</ParamField>
<ParamField name="404" type="Not Found">
Test run or evaluation configuration not found.
```json
{"error": "Evaluation config not found"}
```
</ParamField>
<ParamField name="500" type="Internal Server Error">
Unexpected server error. Retry later or contact support.
Unexpected server error.
```json
{"error": "Failed to delete evaluation config: <message>"}
```
</ParamField>
</ApiSection>
Loading