Skip to content

Commit d6ec995

Browse files
jahoomacharleslien
andauthored
Sonnet 4 5 (#324)
Co-authored-by: Charles Lien <charleslien97@gmail.com>
1 parent b75c35f commit d6ec995

File tree

5 files changed

+23
-3
lines changed

5 files changed

+23
-3
lines changed

.agents/base.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import type { SecretAgentDefinition } from './types/secret-agent-definition'
66
const definition: SecretAgentDefinition = {
77
id: 'base',
88
publisher,
9-
...base('anthropic/claude-4-sonnet-20250522', 'normal'),
9+
...base('anthropic/claude-4.5-sonnet', 'normal'),
1010
}
1111

1212
export default definition

backend/src/llm-apis/openrouter.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ const providerOrder = {
1212
'Anthropic',
1313
'Amazon Bedrock',
1414
],
15+
[models.openrouter_claude_sonnet_4_5]: [
16+
'Google',
17+
'Anthropic',
18+
'Amazon Bedrock',
19+
],
1520
[models.openrouter_claude_opus_4]: ['Google', 'Anthropic'],
1621
} as const
1722

backend/src/tools/definitions/tool/end-turn.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,15 @@ Only use this tool to hand control back to the user.
1313
- Before calling: finish all pending steps, resolve tool results, and include any outputs the user needs to review.
1414
- Effect: Signals the UI to wait for the user's reply; any pending tool results will be ignored.
1515
16-
Correct usage:
16+
*INCORRECT USAGE*:
17+
${getToolCallString('some_tool_that_produces_results', { query: 'some example search term' }, false)}
18+
1719
${getToolCallString(toolName, {})}
20+
21+
*CORRECT USAGE*:
22+
All done! Would you like some more help with xyz?
23+
24+
${getToolCallString(toolName, {})}
25+
1826
`.trim(),
1927
} satisfies ToolDescription

common/src/old-constants.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ export const geminiModels = {
194194
export type GeminiModel = (typeof geminiModels)[keyof typeof geminiModels]
195195

196196
export const openrouterModels = {
197+
openrouter_claude_sonnet_4_5: 'anthropic/claude-4.5-sonnet',
197198
openrouter_claude_sonnet_4: 'anthropic/claude-4-sonnet-20250522',
198199
openrouter_claude_opus_4: 'anthropic/claude-opus-4.1',
199200
openrouter_claude_3_5_haiku: 'anthropic/claude-3.5-haiku-20241022',
@@ -259,6 +260,7 @@ export const shortModelNames = {
259260
'gemini-2.5-pro': models.openrouter_gemini2_5_pro_preview,
260261
'flash-2.5': models.openrouter_gemini2_5_flash,
261262
'opus-4': models.openrouter_claude_opus_4,
263+
'sonnet-4.5': models.openrouter_claude_sonnet_4_5,
262264
'sonnet-4': models.openrouter_claude_sonnet_4,
263265
'sonnet-3.7': models.openrouter_claude_sonnet_4,
264266
'sonnet-3.6': models.openrouter_claude_3_5_sonnet,

evals/git-evals/run-git-evals.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import path from 'path'
44

55
import { disableLiveUserInputCheck } from '@codebuff/backend/live-user-inputs'
66
import { promptAiSdkStructured } from '@codebuff/backend/llm-apis/vercel-ai-sdk/ai-sdk'
7+
import { errorToObject } from '@codebuff/common/util/object'
78
import { withTimeout } from '@codebuff/common/util/promise'
89
import { generateCompactId } from '@codebuff/common/util/string'
910
import { cloneDeep } from 'lodash'
@@ -247,7 +248,11 @@ Explain your reasoning in detail.`,
247248
return {
248249
...evalRun,
249250
judging_results: {
250-
analysis: 'Judging failed due to error',
251+
analysis: `Judging failed due to error:\n${JSON.stringify(
252+
judgingError instanceof Error
253+
? errorToObject(judgingError)
254+
: judgingError,
255+
)}`,
251256
strengths: [],
252257
weaknesses: ['Judging process encountered an error'],
253258
metrics: {

0 commit comments

Comments
 (0)