Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 155 additions & 8 deletions genkit-tools/cli/src/commands/dev-test-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import {
GenerateRequestData,
GenerateResponseChunkData,
GenerateResponseData,
GenerateResponseSchema,
Part,
Expand All @@ -40,6 +41,7 @@ type TestCase = {
name: string;
input: GenerateRequestData;
validators: string[];
stream?: boolean;
};

type TestSuite = {
Expand Down Expand Up @@ -68,7 +70,11 @@ const imageBase64 =

const VALIDATORS: Record<
string,
(response: GenerateResponseData, arg?: string) => void
(
response: GenerateResponseData,
arg?: string,
chunks?: GenerateResponseChunkData[]
) => void
> = {
'has-tool-request': (response, toolName) => {
const content = getMessageContent(response);
Expand Down Expand Up @@ -137,6 +143,53 @@ const VALIDATORS: Record<
);
}
},
'stream-text-includes': (response, expected, chunks) => {
if (!chunks || chunks.length === 0) {
throw new Error('Streaming expected but no chunks were received');
}

const streamedText = chunks
.map((c) => c.content?.find((p: any) => p.text)?.text || '')
.join('');

if (expected && !streamedText.includes(expected)) {
throw new Error(`Streaming response did not include ${expected}'`);
}
},
'stream-has-tool-request': (response, toolName, chunks) => {
if (!chunks || chunks.length === 0) {
throw new Error('Streaming expected but no chunks were received');
}

const hasTool = chunks.some((c) =>
c.content?.some((p: any) => !!p.toolRequest)
);
if (!hasTool) {
throw new Error('No tool request found in the streamed chunks');
}

if (toolName) {
VALIDATORS['has-tool-request'](response, toolName);
}
},
'stream-valid-json': (response, arg, chunks) => {
if (!chunks || chunks.length === 0) {
throw new Error('Streaming expected but no chunks were received');
}

const streamedText = chunks
.map((c) => c.content?.find((p: any) => p.text)?.text || '')
.join('');

if (!streamedText.trim()) {
throw new Error('Streamed response contained no text');
}
try {
JSON.parse(streamedText);
} catch (e) {
throw new Error(`Streamed text is not valid JSON: ${streamedText}`);
}
},
'text-starts-with': (response, expected) => {
const text = getMessageText(response);
if (!text || (expected && !text.trim().startsWith(expected))) {
Expand Down Expand Up @@ -184,6 +237,18 @@ const VALIDATORS: Record<
}
}
},
reasoning: (response) => {
const content = getMessageContent(response);

if (!content || !Array.isArray(content)) {
throw new Error(`Response is missing message content`);
}

const hasReasoning = content.some((p: any) => !!p.reasoning);
if (!hasReasoning) {
throw new Error(`reasoning content not found`);
}
},
};

const TEST_CASES: Record<string, TestCase> = {
Expand Down Expand Up @@ -247,6 +312,71 @@ const TEST_CASES: Record<string, TestCase> = {
},
validators: ['text-includes:Genkit'],
},
'streaming-multiturn': {
name: 'Multiturn Conformance with streaming',
stream: true,
input: {
messages: [
{ role: 'user', content: [{ text: 'My name is Genkit.' }] },
{ role: 'model', content: [{ text: 'Hello Genkit.' }] },
{ role: 'user', content: [{ text: 'What is my name?' }] },
],
},
validators: ['stream-text-includes:Genkit'],
},
'streaming-tool-request': {
name: 'Tool Request Conformance with streaming',
stream: true,
input: {
messages: [
{
role: 'user',
content: [
{ text: 'What is the weather in New York? Use the weather tool' },
],
},
],
tools: [
{
name: 'weather',
description: 'Get the weather for a city',
inputSchema: {
type: 'object',
properties: {
city: { type: 'string' },
},
required: ['city'],
},
},
],
},
validators: ['stream-has-tool-request:weather'],
},
'streaming-structured-output': {
name: 'Structured Output Conformance with streaming',
stream: true,
input: {
messages: [
{
role: 'user',
content: [{ text: 'Generate a movie review for John Wick' }],
},
],
output: {
format: 'json',
schema: {
type: 'object',
properties: {
name: { type: 'string' },
rating: { type: 'number' },
},
required: ['name', 'rating'],
},
constrained: true,
},
},
validators: ['stream-valid-json'],
},
'system-role': {
name: 'System Role Conformance',
input: {
Expand Down Expand Up @@ -363,19 +493,35 @@ async function runTest(
try {
// Adjust model name if needed (e.g. /model/ prefix)
const modelKey = model.startsWith('/') ? model : `/model/${model}`;
const actionResponse = await manager.runAction({
key: modelKey,
input: testCase.input,
});
const shouldStream = !!testCase.stream;

let chunks = 0;
const collectedChunks: any[] = [];

const actionResponse = await manager.runAction(
{
key: modelKey,
input: testCase.input,
},
shouldStream
? (chunk) => {
collectedChunks.push(chunk);
chunks++;
}
: undefined
);

if (shouldStream && chunks === 0) {
throw new Error('Streaming requested but no chunks received.');
}
const response = GenerateResponseSchema.parse(actionResponse.result);

for (const v of testCase.validators) {
const [valName, ...args] = v.split(':');
const arg = args.join(':');
const validator = VALIDATORS[valName];
if (!validator) throw new Error(`Unknown validator: ${valName}`);
validator(response, arg);
validator(response, arg, collectedChunks);
}

logger.info(`✅ Passed: ${testCase.name}`);
Expand Down Expand Up @@ -424,6 +570,7 @@ async function runTestSuite(
name: test.name || 'Custom Test',
input: test.input,
validators: test.validators || [],
stream: test.stream,
};
promises.push(runTest(manager, suite.model, customTestCase));
}
Expand All @@ -442,8 +589,8 @@ export const devTestModel = new Command('dev:test-model')
.argument('[args...]', 'Command arguments')
.option(
'--supports <list>',
'Comma-separated list of supported capabilities (tool-request, structured-output, multiturn, system-role, input-image-base64, input-image-url, input-video-youtube, output-audio, output-image)',
'tool-request,structured-output,multiturn,system-role,input-image-base64,input-image-url'
'Comma-separated list of supported capabilities (tool-request, structured-output, multiturn, system-role, input-image-base64, input-image-url, input-video-youtube, output-audio, output-image, streaming-multiturn, reasoning)',
'tool-request,structured-output,multiturn,system-role,input-image-base64,input-image-url,streaming-multiturn,streaming-tool-request,streaming-structured-output'
)
.option('--from-file <file>', 'Path to a file containing test payloads')
.action(
Expand Down
23 changes: 23 additions & 0 deletions js/plugins/google-genai/tests/model-tests-tts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,28 @@
- input-image-url
- input-video-youtube
tests:
- name: Reasoning conformance
input:
messages:
- role: user
content:
- text: A banana farmer harvest 10 bananas but he eats 3 and sells 4. How many bananas are remaining?
config:
thinkingConfig:
thinkingBudget: 1024
includeThoughts: true
validators:
- reasoning

- name: Streaming conformance
stream: true
input:
messages:
- role: user
content: [{ text: 'Count to 10' }]
validators:
- text-not-empty

- name: Tool Response Conformance
input:
messages:
Expand Down Expand Up @@ -79,6 +101,7 @@
- city
validators:
- text-includes:21

- model: googleai/gemini-2.5-flash
supports:
- tool-request
Expand Down
Loading