Skip to content

Commit adb75a8

Browse files
committed
eval-planner: load local agents
1 parent daefd57 commit adb75a8

File tree

1 file changed

+17
-12
lines changed

1 file changed

+17
-12
lines changed

evals/subagents/eval-planner.ts

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { CodebuffClient } from '../../sdk/src/client'
88
import { AgentDefinition } from '../../sdk/src'
99
import { getUserCredentials } from '@codebuff/npm-app/credentials'
1010
import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants'
11-
import implementationPlannerAgent from '../../.agents/implementation-planner/implementation-planner'
11+
import { loadLocalAgents } from '@codebuff/npm-app/agents/load-agents'
1212

1313
/**
1414
* Helper function to manage test repository lifecycle
@@ -78,6 +78,7 @@ const getPreviousCommitSha = (commitSha: string, repoDir: string): string => {
7878
}
7979

8080
export const evalPlannerAgent = async (params: {
81+
agentId: string
8182
spec: string
8283
repoUrl: string
8384
commitSha: string
@@ -88,30 +89,33 @@ export const evalPlannerAgent = async (params: {
8889
postContent: string
8990
}>
9091
}) => {
91-
const { spec, repoUrl, commitSha, initCommand, fileStates } = params
92+
const { agentId, spec, repoUrl, commitSha, initCommand, fileStates } = params
9293
const getLocalAuthToken = () => {
9394
return getUserCredentials()?.authToken
9495
}
9596
const client = new CodebuffClient({
9697
apiKey: process.env[API_KEY_ENV_VAR] || getLocalAuthToken(),
9798
})
99+
100+
const agentsPath = path.join(__dirname, '../../.agents')
101+
const localAgentDefinitions = Object.values(
102+
await loadLocalAgents({
103+
agentsPath,
104+
}),
105+
)
106+
98107
const result = await withTestRepo(
99108
{ repoUrl, commitSha, initCommand, checkoutPrevious: true },
100109
async (cwd) => {
101110
// Run the agent with the test repository as cwd
102-
console.log(
103-
`Running agent ${implementationPlannerAgent.id} with prompt: ${spec}...`,
104-
)
111+
console.log(`Running agent ${agentId} with prompt: ${spec}...`)
105112
return await client.run({
106-
agent: implementationPlannerAgent.id,
113+
agent: agentId,
107114
prompt: `Please plan a full implementation of the following spec: ${spec}`,
108115
cwd,
109-
agentDefinitions: [implementationPlannerAgent],
116+
agentDefinitions: localAgentDefinitions,
110117
handleEvent: (event) => {
111-
console.log(
112-
implementationPlannerAgent.id,
113-
JSON.stringify(event, null, 2),
114-
)
118+
console.log(agentId, JSON.stringify(event, null, 2))
115119
},
116120
})
117121
},
@@ -200,7 +204,7 @@ Evaluate how well the implementation plan matches the real commit changes. Consi
200204
const judgeAgent: AgentDefinition = {
201205
id: 'eval-judge',
202206
displayName: 'Eval Judge',
203-
model: 'x-ai/grok-4-fast',
207+
model: 'openai/gpt-5',
204208
toolNames: ['set_output'],
205209
inputSchema: {
206210
prompt: { type: 'string', description: 'The prompt to judge' },
@@ -276,6 +280,7 @@ async function main() {
276280

277281
try {
278282
const result = await evalPlannerAgent({
283+
agentId: 'implementation-planner',
279284
spec,
280285
repoUrl,
281286
commitSha: sha,

0 commit comments

Comments
 (0)