Skip to content

Commit b38d63f

Browse files
[feat] benchify integration (#312)
Co-authored-by: Cole Vick <cole.d.vick@gmail.com>
1 parent 1fed4df commit b38d63f

File tree

12 files changed

+1331
-42
lines changed

12 files changed

+1331
-42
lines changed

.github/workflows/npm-app-release-staging.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ jobs:
134134
new-version: ${{ needs.prepare-and-commit-staging.outputs.new_version }}
135135
artifact-name: updated-staging-package
136136
checkout-ref: ${{ github.event.pull_request.head.sha }}
137-
env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "backend-pr-221-we0m.onrender.com"}'
137+
env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "backend-pr-312-3hui.onrender.com"}'
138138
secrets: inherit
139139

140140
# Create GitHub prerelease with all binaries

backend/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
},
2525
"dependencies": {
2626
"@ai-sdk/google-vertex": "3.0.6",
27+
"benchify": "^0.1.0-alpha.41",
2728
"@ai-sdk/openai": "2.0.11",
2829
"@codebuff/billing": "workspace:*",
2930
"@codebuff/common": "workspace:*",

backend/src/__tests__/process-str-replace.test.ts

Lines changed: 182 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,22 @@
1-
import { describe, expect, it } from 'bun:test'
1+
import { describe, expect, it, spyOn, beforeEach, afterEach, mock } from 'bun:test'
22
import { applyPatch } from 'diff'
33

4+
// Mock the benchify module to simulate missing API key
5+
mock.module('benchify', () => ({
6+
Benchify: class MockBenchify {
7+
constructor() {}
8+
runFixer() {
9+
return Promise.resolve([])
10+
}
11+
}
12+
}))
13+
414
import { processStrReplace } from '../process-str-replace'
15+
import { mockFileContext } from './test-utils'
16+
import {
17+
executeBatchStrReplaces,
18+
benchifyCanFixLanguage,
19+
} from '../tools/batch-str-replace'
520

621
describe('processStrReplace', () => {
722
it('should replace exact string matches', async () => {
@@ -213,6 +228,25 @@ describe('processStrReplace', () => {
213228
}
214229
})
215230

231+
it('should handle replacement where old string equals new string', async () => {
232+
const initialContent = 'const x = 1;\nconst y = 2;\n'
233+
const oldStr = 'const y = 2;'
234+
const newStr = 'const y = 2;' // Same as old string
235+
236+
const result = await processStrReplace(
237+
'test.ts',
238+
[{ old: oldStr, new: newStr, allowMultiple: false }],
239+
Promise.resolve(initialContent),
240+
)
241+
242+
expect(result).not.toBeNull()
243+
expect('content' in result).toBe(true)
244+
if ('content' in result) {
245+
expect(result.content).toBe('const x = 1;\nconst y = 2;\n')
246+
expect(result.messages).toEqual([])
247+
}
248+
})
249+
216250
// New comprehensive tests for allowMultiple functionality
217251
describe('allowMultiple functionality', () => {
218252
it('should error when multiple occurrences exist and allowMultiple is false', async () => {
@@ -417,3 +451,150 @@ function test3() {
417451
)
418452
})
419453
})
454+
455+
// Tests for Benchify resilience
456+
describe('Benchify resilience', () => {
457+
describe('happy path', () => {
458+
it('should identify Benchify-supported file types correctly', () => {
459+
const testCases = [
460+
{ path: 'component.tsx', expected: true },
461+
{ path: 'utils.ts', expected: true },
462+
{ path: 'script.js', expected: true },
463+
{ path: 'styles.jsx', expected: true },
464+
{ path: 'README.md', expected: false },
465+
{ path: 'config.json', expected: false },
466+
{ path: 'styles.css', expected: false },
467+
{ path: 'index.html', expected: false },
468+
{ path: 'test.py', expected: false },
469+
]
470+
471+
for (const { path, expected } of testCases) {
472+
const result = benchifyCanFixLanguage(path)
473+
expect(result).toBe(expected)
474+
}
475+
})
476+
477+
it('should handle file extensions case sensitivity', () => {
478+
expect(benchifyCanFixLanguage('Component.TSX')).toBe(false) // Wrong case
479+
expect(benchifyCanFixLanguage('component.tsx')).toBe(true) // Correct case
480+
expect(benchifyCanFixLanguage('utils.TS')).toBe(false) // Wrong case
481+
expect(benchifyCanFixLanguage('utils.ts')).toBe(true) // Correct case
482+
})
483+
484+
it('should handle file paths with multiple dots', () => {
485+
expect(benchifyCanFixLanguage('component.test.tsx')).toBe(true)
486+
expect(benchifyCanFixLanguage('utils.spec.ts')).toBe(true)
487+
expect(benchifyCanFixLanguage('config.local.js')).toBe(true)
488+
expect(benchifyCanFixLanguage('styles.module.css')).toBe(false)
489+
})
490+
491+
it('should handle files without extensions', () => {
492+
expect(benchifyCanFixLanguage('Dockerfile')).toBe(false)
493+
expect(benchifyCanFixLanguage('Makefile')).toBe(false)
494+
expect(benchifyCanFixLanguage('README')).toBe(false)
495+
})
496+
})
497+
498+
it('should fall back gracefully when Benchify is disabled', async () => {
499+
// Mock the process.env to simulate missing BENCHIFY_API_KEY
500+
const originalEnv = process.env.BENCHIFY_API_KEY
501+
delete process.env.BENCHIFY_API_KEY
502+
503+
try {
504+
const result = await executeBatchStrReplaces({
505+
deferredStrReplaces: [
506+
{
507+
toolCall: {
508+
toolName: 'str_replace' as const,
509+
toolCallId: 'test-call',
510+
input: {
511+
path: 'test.ts',
512+
replacements: [
513+
{ old: 'old', new: 'new', allowMultiple: false },
514+
],
515+
},
516+
},
517+
},
518+
],
519+
toolCalls: [],
520+
toolResults: [],
521+
ws: {} as any,
522+
fileContext: mockFileContext,
523+
agentStepId: 'test-step',
524+
clientSessionId: 'test-session',
525+
userInputId: 'test-input',
526+
onResponseChunk: () => {},
527+
state: { messages: [] },
528+
userId: 'test-user',
529+
})
530+
531+
// Should complete without error even when Benchify is unavailable
532+
expect(result).toBeUndefined() // Function returns void
533+
} finally {
534+
// Restore the original environment variable
535+
if (originalEnv !== undefined) {
536+
process.env.BENCHIFY_API_KEY = originalEnv
537+
}
538+
}
539+
})
540+
541+
describe('Batch str_replace integration tests', () => {
542+
it('should handle empty deferred list without error', async () => {
543+
// Simple test that doesn't require complex mocking
544+
expect(
545+
executeBatchStrReplaces({
546+
deferredStrReplaces: [],
547+
toolCalls: [],
548+
toolResults: [],
549+
ws: {} as any,
550+
fileContext: mockFileContext,
551+
agentStepId: 'test-step',
552+
clientSessionId: 'test-session',
553+
userInputId: 'test-input',
554+
onResponseChunk: () => {},
555+
state: { messages: [] },
556+
userId: 'test-user',
557+
}),
558+
).resolves.toBeUndefined() // Should complete without throwing
559+
})
560+
})
561+
562+
it('should identify Benchify-supported file types correctly', () => {
563+
const testCases = [
564+
{ path: 'component.tsx', expected: true },
565+
{ path: 'utils.ts', expected: true },
566+
{ path: 'script.js', expected: true },
567+
{ path: 'styles.jsx', expected: true },
568+
{ path: 'README.md', expected: false },
569+
{ path: 'config.json', expected: false },
570+
{ path: 'styles.css', expected: false },
571+
{ path: 'index.html', expected: false },
572+
{ path: 'test.py', expected: false },
573+
]
574+
575+
for (const { path, expected } of testCases) {
576+
const result = benchifyCanFixLanguage(path)
577+
expect(result).toBe(expected)
578+
}
579+
})
580+
581+
it('should handle executeBatchStrReplaces with empty list', async () => {
582+
// Simple test that doesn't require complex mocking
583+
const result = await executeBatchStrReplaces({
584+
deferredStrReplaces: [],
585+
toolCalls: [],
586+
toolResults: [],
587+
ws: {} as any,
588+
fileContext: mockFileContext,
589+
agentStepId: 'test-step',
590+
clientSessionId: 'test-session',
591+
userInputId: 'test-input',
592+
onResponseChunk: () => {},
593+
state: { messages: [] },
594+
userId: 'test-user',
595+
})
596+
597+
// Should complete without throwing an error
598+
expect(result).toBeUndefined() // Function returns void
599+
})
600+
})

backend/src/process-str-replace.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ export async function processStrReplace(
3535
let currentContent = initialContent
3636
let messages: string[] = []
3737
const lineEnding = currentContent.includes('\r\n') ? '\r\n' : '\n'
38+
let anyReplacementSuccessful = false
3839

3940
for (const { old: oldStr, new: newStr, allowMultiple } of replacements) {
4041
// Regular case: require oldStr for replacements
@@ -59,6 +60,7 @@ export async function processStrReplace(
5960

6061
if (match.success) {
6162
updatedOldStr = match.oldStr
63+
anyReplacementSuccessful = true
6264
} else {
6365
messages.push(match.error)
6466
updatedOldStr = null
@@ -72,15 +74,15 @@ export async function processStrReplace(
7274

7375
currentContent = currentContent.replaceAll('\n', lineEnding)
7476

75-
if (initialContent === currentContent) {
77+
// If no successful replacements occurred, return error
78+
if (!anyReplacementSuccessful) {
7679
logger.debug(
7780
{
7881
path,
7982
initialContent,
8083
},
81-
`processStrReplace: No change to ${path}`,
84+
`processStrReplace: No successful replacements for ${path}`,
8285
)
83-
messages.push('No change to the file.')
8486
return {
8587
tool: 'str_replace' as const,
8688
path,

backend/src/run-agent-step.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,6 @@ export const runAgentStep = async (
305305
state,
306306
fullResponse: fullResponseAfterStream,
307307
fullResponseChunks,
308-
messageId,
309308
} = await processStreamWithTools({
310309
stream,
311310
ws,
@@ -414,7 +413,7 @@ export const runAgentStep = async (
414413
agentState,
415414
fullResponse,
416415
shouldEndTurn,
417-
messageId,
416+
messageId: null,
418417
}
419418
}
420419

0 commit comments

Comments
 (0)