Skip to content

Commit 28311ab

Browse files
committed
Retry sdk/agent-runtime client fetches
1 parent 261ba15 commit 28311ab

File tree

2 files changed

+217
-88
lines changed

2 files changed

+217
-88
lines changed

packages/agent-runtime/src/llm-api/codebuff-web-api.ts

Lines changed: 92 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ import type { ClientEnv, CiEnv } from '@codebuff/common/types/contracts/env'
33
import type { Logger } from '@codebuff/common/types/contracts/logger'
44

55
const FETCH_TIMEOUT_MS = 30_000
6+
const MAX_RETRIES = 3
7+
const RETRY_BASE_DELAY_MS = 1000
8+
const RETRYABLE_STATUS_CODES = new Set([408, 429, 500, 502, 503, 504])
69

710
interface CodebuffWebApiEnv {
811
clientEnv: ClientEnv
@@ -50,55 +53,103 @@ const callCodebuffV1 = async (params: {
5053
}
5154

5255
const url = `${baseUrl}${endpoint}`
56+
let lastError: string | undefined
5357

54-
try {
55-
const res = await withTimeout(
56-
fetch(url, {
57-
method: 'POST',
58-
headers: {
59-
'Content-Type': 'application/json',
60-
Authorization: `Bearer ${apiKey}`,
61-
'x-codebuff-api-key': apiKey,
62-
},
63-
body: JSON.stringify(payload),
64-
}),
65-
FETCH_TIMEOUT_MS,
66-
)
58+
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
59+
try {
60+
const res = await withTimeout(
61+
fetch(url, {
62+
method: 'POST',
63+
headers: {
64+
'Content-Type': 'application/json',
65+
Authorization: `Bearer ${apiKey}`,
66+
'x-codebuff-api-key': apiKey,
67+
},
68+
body: JSON.stringify(payload),
69+
}),
70+
FETCH_TIMEOUT_MS,
71+
)
6772

68-
const text = await res.text()
69-
const json = tryParseJson(text)
73+
const text = await res.text()
74+
const json = tryParseJson(text)
7075

71-
if (!res.ok) {
72-
const err =
73-
getStringField(json, 'error') ??
74-
getStringField(json, 'message') ??
75-
text ??
76-
'Request failed'
77-
logger.warn(
76+
if (!res.ok) {
77+
const err =
78+
getStringField(json, 'error') ??
79+
getStringField(json, 'message') ??
80+
text ??
81+
'Request failed'
82+
83+
// Retry on transient errors
84+
if (RETRYABLE_STATUS_CODES.has(res.status) && attempt < MAX_RETRIES) {
85+
const delay = RETRY_BASE_DELAY_MS * Math.pow(2, attempt - 1)
86+
logger.warn(
87+
{
88+
url,
89+
status: res.status,
90+
statusText: res.statusText,
91+
attempt,
92+
maxRetries: MAX_RETRIES,
93+
nextRetryDelayMs: delay,
94+
},
95+
`Web API ${requestName} request failed with retryable status, retrying...`,
96+
)
97+
await new Promise((resolve) => setTimeout(resolve, delay))
98+
lastError = err
99+
continue
100+
}
101+
102+
logger.warn(
103+
{
104+
url,
105+
status: res.status,
106+
statusText: res.statusText,
107+
body: text?.slice(0, 500),
108+
attempt,
109+
},
110+
`Web API ${requestName} request failed`,
111+
)
112+
return { error: err }
113+
}
114+
115+
return { json, creditsUsed: getNumberField(json, 'creditsUsed') }
116+
} catch (error) {
117+
lastError = error instanceof Error ? error.message : 'Network error'
118+
119+
// Retry on network errors
120+
if (attempt < MAX_RETRIES) {
121+
const delay = RETRY_BASE_DELAY_MS * Math.pow(2, attempt - 1)
122+
logger.warn(
123+
{
124+
error:
125+
error instanceof Error
126+
? { name: error.name, message: error.message }
127+
: error,
128+
attempt,
129+
maxRetries: MAX_RETRIES,
130+
nextRetryDelayMs: delay,
131+
},
132+
`Web API ${requestName} network error, retrying...`,
133+
)
134+
await new Promise((resolve) => setTimeout(resolve, delay))
135+
continue
136+
}
137+
138+
logger.error(
78139
{
79-
url,
80-
status: res.status,
81-
statusText: res.statusText,
82-
body: text?.slice(0, 500),
140+
error:
141+
error instanceof Error
142+
? { name: error.name, message: error.message, stack: error.stack }
143+
: error,
144+
attempt,
83145
},
84-
`Web API ${requestName} request failed`,
146+
`Web API ${requestName} network error after all retries`,
85147
)
86-
return { error: err }
148+
return { error: lastError }
87149
}
88-
89-
return { json, creditsUsed: getNumberField(json, 'creditsUsed') }
90-
} catch (error) {
91-
logger.error(
92-
{
93-
error:
94-
error instanceof Error
95-
? { name: error.name, message: error.message, stack: error.stack }
96-
: error,
97-
},
98-
`Web API ${requestName} network error`,
99-
)
100-
return { error: error instanceof Error ? error.message : 'Network error' }
101150
}
151+
152+
return { error: lastError ?? 'Request failed after all retries' }
102153
}
103154

104155
export async function callWebSearchAPI(params: {

sdk/src/impl/database.ts

Lines changed: 125 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@ import {
99
createNetworkError,
1010
createServerError,
1111
createHttpError,
12+
isRetryableStatusCode,
1213
} from '../error-utils'
14+
import {
15+
MAX_RETRIES_PER_MESSAGE,
16+
RETRY_BACKOFF_BASE_DELAY_MS,
17+
RETRY_BACKOFF_MAX_DELAY_MS,
18+
} from '../retry-config'
1319

1420
import type {
1521
AddAgentStepFn,
@@ -37,6 +43,58 @@ const agentsResponseSchema = z.object({
3743
data: DynamicAgentTemplateSchema,
3844
})
3945

46+
/**
47+
* Fetch with retry logic for transient errors (502, 503, etc.)
48+
* Implements exponential backoff between retries.
49+
*/
50+
async function fetchWithRetry(
51+
url: URL | string,
52+
options: RequestInit,
53+
logger?: { warn: (obj: object, msg: string) => void },
54+
): Promise<Response> {
55+
let lastError: Error | null = null
56+
let backoffDelay = RETRY_BACKOFF_BASE_DELAY_MS
57+
58+
for (let attempt = 0; attempt <= MAX_RETRIES_PER_MESSAGE; attempt++) {
59+
try {
60+
const response = await fetch(url, options)
61+
62+
// If response is OK or not retryable, return it
63+
if (response.ok || !isRetryableStatusCode(response.status)) {
64+
return response
65+
}
66+
67+
// Retryable error - log and continue to retry
68+
if (attempt < MAX_RETRIES_PER_MESSAGE) {
69+
logger?.warn(
70+
{ status: response.status, attempt: attempt + 1, url: String(url) },
71+
`Retryable HTTP error, retrying in ${backoffDelay}ms`,
72+
)
73+
await new Promise((resolve) => setTimeout(resolve, backoffDelay))
74+
backoffDelay = Math.min(backoffDelay * 2, RETRY_BACKOFF_MAX_DELAY_MS)
75+
} else {
76+
// Last attempt, return the response even if it's an error
77+
return response
78+
}
79+
} catch (error) {
80+
// Network-level error (DNS, connection refused, etc.)
81+
lastError = error instanceof Error ? error : new Error(String(error))
82+
83+
if (attempt < MAX_RETRIES_PER_MESSAGE) {
84+
logger?.warn(
85+
{ error: getErrorObject(lastError), attempt: attempt + 1, url: String(url) },
86+
`Network error, retrying in ${backoffDelay}ms`,
87+
)
88+
await new Promise((resolve) => setTimeout(resolve, backoffDelay))
89+
backoffDelay = Math.min(backoffDelay * 2, RETRY_BACKOFF_MAX_DELAY_MS)
90+
}
91+
}
92+
}
93+
94+
// All retries exhausted - throw the last error
95+
throw lastError ?? new Error('Request failed after retries')
96+
}
97+
4098
export async function getUserInfoFromApiKey<T extends UserColumn>(
4199
params: GetUserInfoFromApiKeyInput<T>,
42100
): GetUserInfoFromApiKeyOutput<T> {
@@ -70,12 +128,16 @@ export async function getUserInfoFromApiKey<T extends UserColumn>(
70128

71129
let response: Response
72130
try {
73-
response = await fetch(url, {
74-
method: 'GET',
75-
headers: {
76-
Authorization: `Bearer ${apiKey}`,
131+
response = await fetchWithRetry(
132+
url,
133+
{
134+
method: 'GET',
135+
headers: {
136+
Authorization: `Bearer ${apiKey}`,
137+
},
77138
},
78-
})
139+
logger,
140+
)
79141
} catch (error) {
80142
logger.error(
81143
{ error: getErrorObject(error), apiKey, fields },
@@ -161,12 +223,16 @@ export async function fetchAgentFromDatabase(
161223
)
162224

163225
try {
164-
const response = await fetch(url, {
165-
method: 'GET',
166-
headers: {
167-
Authorization: `Bearer ${apiKey}`,
226+
const response = await fetchWithRetry(
227+
url,
228+
{
229+
method: 'GET',
230+
headers: {
231+
Authorization: `Bearer ${apiKey}`,
232+
},
168233
},
169-
})
234+
logger,
235+
)
170236

171237
if (!response.ok) {
172238
logger.error({ response }, 'fetchAgentFromDatabase request failed')
@@ -240,17 +306,21 @@ export async function startAgentRun(
240306
const url = new URL(`/api/v1/agent-runs`, WEBSITE_URL)
241307

242308
try {
243-
const response = await fetch(url, {
244-
method: 'POST',
245-
headers: {
246-
Authorization: `Bearer ${apiKey}`,
309+
const response = await fetchWithRetry(
310+
url,
311+
{
312+
method: 'POST',
313+
headers: {
314+
Authorization: `Bearer ${apiKey}`,
315+
},
316+
body: JSON.stringify({
317+
action: 'START',
318+
agentId,
319+
ancestorRunIds,
320+
}),
247321
},
248-
body: JSON.stringify({
249-
action: 'START',
250-
agentId,
251-
ancestorRunIds,
252-
}),
253-
})
322+
logger,
323+
)
254324

255325
if (!response.ok) {
256326
logger.error({ response }, 'startAgentRun request failed')
@@ -290,20 +360,24 @@ export async function finishAgentRun(
290360
const url = new URL(`/api/v1/agent-runs`, WEBSITE_URL)
291361

292362
try {
293-
const response = await fetch(url, {
294-
method: 'POST',
295-
headers: {
296-
Authorization: `Bearer ${apiKey}`,
363+
const response = await fetchWithRetry(
364+
url,
365+
{
366+
method: 'POST',
367+
headers: {
368+
Authorization: `Bearer ${apiKey}`,
369+
},
370+
body: JSON.stringify({
371+
action: 'FINISH',
372+
runId,
373+
status,
374+
totalSteps,
375+
directCredits,
376+
totalCredits,
377+
}),
297378
},
298-
body: JSON.stringify({
299-
action: 'FINISH',
300-
runId,
301-
status,
302-
totalSteps,
303-
directCredits,
304-
totalCredits,
305-
}),
306-
})
379+
logger,
380+
)
307381

308382
if (!response.ok) {
309383
logger.error({ response }, 'finishAgentRun request failed')
@@ -336,21 +410,25 @@ export async function addAgentStep(
336410
const url = new URL(`/api/v1/agent-runs/${agentRunId}/steps`, WEBSITE_URL)
337411

338412
try {
339-
const response = await fetch(url, {
340-
method: 'POST',
341-
headers: {
342-
Authorization: `Bearer ${apiKey}`,
413+
const response = await fetchWithRetry(
414+
url,
415+
{
416+
method: 'POST',
417+
headers: {
418+
Authorization: `Bearer ${apiKey}`,
419+
},
420+
body: JSON.stringify({
421+
stepNumber,
422+
credits,
423+
childRunIds,
424+
messageId,
425+
status,
426+
errorMessage,
427+
startTime,
428+
}),
343429
},
344-
body: JSON.stringify({
345-
stepNumber,
346-
credits,
347-
childRunIds,
348-
messageId,
349-
status,
350-
errorMessage,
351-
startTime,
352-
}),
353-
})
430+
logger,
431+
)
354432

355433
const responseBody = await response.json()
356434
if (!response.ok) {

0 commit comments

Comments
 (0)