Sonnet 4 5 (#324)

jahooma · charleslien · web-flow · commit d6ec9959cbb4 · 2025-09-29T14:54:19.000-05:00
Co-authored-by: Charles Lien &lt;charleslien97@gmail.com&gt;
diff --git a/.agents/base.ts b/.agents/base.ts
@@ -6,7 +6,7 @@ import type { SecretAgentDefinition } from './types/secret-agent-definition'
 const definition: SecretAgentDefinition = {
   id: 'base',
   publisher,
-  ...base('anthropic/claude-4-sonnet-20250522', 'normal'),
+  ...base('anthropic/claude-4.5-sonnet', 'normal'),
 }
 
 export default definition
diff --git a/backend/src/llm-apis/openrouter.ts b/backend/src/llm-apis/openrouter.ts
@@ -12,6 +12,11 @@ const providerOrder = {
     'Anthropic',
     'Amazon Bedrock',
   ],
+  [models.openrouter_claude_sonnet_4_5]: [
+    'Google',
+    'Anthropic',
+    'Amazon Bedrock',
+  ],
   [models.openrouter_claude_opus_4]: ['Google', 'Anthropic'],
 } as const
 
diff --git a/backend/src/tools/definitions/tool/end-turn.ts b/backend/src/tools/definitions/tool/end-turn.ts
@@ -13,7 +13,15 @@ Only use this tool to hand control back to the user.
 - Before calling: finish all pending steps, resolve tool results, and include any outputs the user needs to review.
 - Effect: Signals the UI to wait for the user's reply; any pending tool results will be ignored.
 
-Correct usage:
+*INCORRECT USAGE*:
+${getToolCallString('some_tool_that_produces_results', { query: 'some example search term' }, false)}
+
 ${getToolCallString(toolName, {})}
+
+*CORRECT USAGE*:
+All done! Would you like some more help with xyz?
+
+${getToolCallString(toolName, {})}
+
     `.trim(),
 } satisfies ToolDescription
diff --git a/common/src/old-constants.ts b/common/src/old-constants.ts
@@ -194,6 +194,7 @@ export const geminiModels = {
 export type GeminiModel = (typeof geminiModels)[keyof typeof geminiModels]
 
 export const openrouterModels = {
+  openrouter_claude_sonnet_4_5: 'anthropic/claude-4.5-sonnet',
   openrouter_claude_sonnet_4: 'anthropic/claude-4-sonnet-20250522',
   openrouter_claude_opus_4: 'anthropic/claude-opus-4.1',
   openrouter_claude_3_5_haiku: 'anthropic/claude-3.5-haiku-20241022',
@@ -259,6 +260,7 @@ export const shortModelNames = {
   'gemini-2.5-pro': models.openrouter_gemini2_5_pro_preview,
   'flash-2.5': models.openrouter_gemini2_5_flash,
   'opus-4': models.openrouter_claude_opus_4,
+  'sonnet-4.5': models.openrouter_claude_sonnet_4_5,
   'sonnet-4': models.openrouter_claude_sonnet_4,
   'sonnet-3.7': models.openrouter_claude_sonnet_4,
   'sonnet-3.6': models.openrouter_claude_3_5_sonnet,
diff --git a/evals/git-evals/run-git-evals.ts b/evals/git-evals/run-git-evals.ts
@@ -4,6 +4,7 @@ import path from 'path'
 
 import { disableLiveUserInputCheck } from '@codebuff/backend/live-user-inputs'
 import { promptAiSdkStructured } from '@codebuff/backend/llm-apis/vercel-ai-sdk/ai-sdk'
+import { errorToObject } from '@codebuff/common/util/object'
 import { withTimeout } from '@codebuff/common/util/promise'
 import { generateCompactId } from '@codebuff/common/util/string'
 import { cloneDeep } from 'lodash'
@@ -247,7 +248,11 @@ Explain your reasoning in detail.`,
     return {
       ...evalRun,
       judging_results: {
-        analysis: 'Judging failed due to error',
+        analysis: `Judging failed due to error:\n${JSON.stringify(
+          judgingError instanceof Error
+            ? errorToObject(judgingError)
+            : judgingError,
+        )}`,
         strengths: [],
         weaknesses: ['Judging process encountered an error'],
         metrics: {

Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@ import type { SecretAgentDefinition } from './types/secret-agent-definition'`
`6`	`6`	`const definition: SecretAgentDefinition = {`
`7`	`7`	`id: 'base',`
`8`	`8`	`publisher,`
`9`		`- ...base('anthropic/claude-4-sonnet-20250522', 'normal'),`
	`9`	`+ ...base('anthropic/claude-4.5-sonnet', 'normal'),`
`10`	`10`	`}`
`11`	`11`
`12`	`12`	`export default definition`