Skip to content

Commit fae9205

Browse files
committed
Fireworks: use custom deployment during business hours
1 parent da31b5c commit fae9205

File tree

2 files changed

+495
-13
lines changed

2 files changed

+495
-13
lines changed
Lines changed: 388 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,388 @@
1+
import { afterEach, beforeEach, describe, expect, it, mock } from 'bun:test'
2+
3+
import {
4+
createFireworksRequestWithFallback,
5+
DEPLOYMENT_COOLDOWN_MS,
6+
FireworksError,
7+
isDeploymentCoolingDown,
8+
isDeploymentHours,
9+
markDeploymentScalingUp,
10+
resetDeploymentCooldown,
11+
} from '../fireworks'
12+
13+
import type { Logger } from '@codebuff/common/types/contracts/logger'
14+
15+
const STANDARD_MODEL_ID = 'accounts/fireworks/models/minimax-m2p5'
16+
const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/qne3jo8v'
17+
18+
function createMockLogger(): Logger {
19+
return {
20+
info: mock(() => {}),
21+
warn: mock(() => {}),
22+
error: mock(() => {}),
23+
debug: mock(() => {}),
24+
}
25+
}
26+
27+
// Helper: create a Date at a specific ET hour using a known EDT date (June 2025, UTC-4)
28+
function dateAtEtHour(hour: number): Date {
29+
// June 15, 2025 is EDT (UTC-4), so ET hour H = UTC hour H+4
30+
const utcHour = hour + 4
31+
if (utcHour < 24) {
32+
return new Date(`2025-06-15T${String(utcHour).padStart(2, '0')}:30:00Z`)
33+
}
34+
// Wraps to next day
35+
return new Date(`2025-06-16T${String(utcHour - 24).padStart(2, '0')}:30:00Z`)
36+
}
37+
38+
describe('Fireworks deployment routing', () => {
39+
describe('isDeploymentHours', () => {
40+
it('returns true at 10am ET (start of window)', () => {
41+
expect(isDeploymentHours(dateAtEtHour(10))).toBe(true)
42+
})
43+
44+
it('returns true at 2pm ET (mid-day)', () => {
45+
expect(isDeploymentHours(dateAtEtHour(14))).toBe(true)
46+
})
47+
48+
it('returns true at 7pm ET (19:00, near end of window)', () => {
49+
expect(isDeploymentHours(dateAtEtHour(19))).toBe(true)
50+
})
51+
52+
it('returns false at 9am ET (before window)', () => {
53+
expect(isDeploymentHours(dateAtEtHour(9))).toBe(false)
54+
})
55+
56+
it('returns false at 8pm ET (20:00, window closed)', () => {
57+
expect(isDeploymentHours(dateAtEtHour(20))).toBe(false)
58+
})
59+
60+
it('returns false at midnight ET', () => {
61+
expect(isDeploymentHours(dateAtEtHour(0))).toBe(false)
62+
})
63+
64+
it('returns false at 3am ET', () => {
65+
expect(isDeploymentHours(dateAtEtHour(3))).toBe(false)
66+
})
67+
68+
it('returns false at 11pm ET', () => {
69+
expect(isDeploymentHours(dateAtEtHour(23))).toBe(false)
70+
})
71+
})
72+
73+
describe('deployment cooldown', () => {
74+
beforeEach(() => {
75+
resetDeploymentCooldown()
76+
})
77+
78+
afterEach(() => {
79+
resetDeploymentCooldown()
80+
})
81+
82+
it('isDeploymentCoolingDown returns false initially', () => {
83+
expect(isDeploymentCoolingDown()).toBe(false)
84+
})
85+
86+
it('isDeploymentCoolingDown returns true after markDeploymentScalingUp', () => {
87+
markDeploymentScalingUp()
88+
expect(isDeploymentCoolingDown()).toBe(true)
89+
})
90+
91+
it('isDeploymentCoolingDown returns false after resetDeploymentCooldown', () => {
92+
markDeploymentScalingUp()
93+
expect(isDeploymentCoolingDown()).toBe(true)
94+
resetDeploymentCooldown()
95+
expect(isDeploymentCoolingDown()).toBe(false)
96+
})
97+
98+
it('DEPLOYMENT_COOLDOWN_MS is 2 minutes', () => {
99+
expect(DEPLOYMENT_COOLDOWN_MS).toBe(2 * 60 * 1000)
100+
})
101+
})
102+
103+
describe('createFireworksRequestWithFallback', () => {
104+
let logger: Logger
105+
106+
beforeEach(() => {
107+
resetDeploymentCooldown()
108+
logger = createMockLogger()
109+
})
110+
111+
afterEach(() => {
112+
resetDeploymentCooldown()
113+
})
114+
115+
const minimalBody = {
116+
model: 'minimax/minimax-m2.5',
117+
messages: [{ role: 'user' as const, content: 'test' }],
118+
}
119+
120+
function spyDeploymentHours(inHours: boolean) {
121+
// Control isDeploymentHours by mocking Date.prototype.toLocaleString
122+
// When called with the ET timezone options, return an hour inside or outside the window
123+
const original = Date.prototype.toLocaleString
124+
const spy = {
125+
restore: () => {
126+
Date.prototype.toLocaleString = original
127+
},
128+
}
129+
Date.prototype.toLocaleString = function (
130+
this: Date,
131+
...args: Parameters<Date['toLocaleString']>
132+
) {
133+
const options = args[1] as Intl.DateTimeFormatOptions | undefined
134+
if (options?.timeZone === 'America/New_York' && options?.hour === 'numeric') {
135+
return inHours ? '14' : '3'
136+
}
137+
return original.apply(this, args)
138+
}
139+
return spy
140+
}
141+
142+
it('uses standard API outside deployment hours', async () => {
143+
const spy = spyDeploymentHours(false)
144+
const fetchCalls: string[] = []
145+
146+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
147+
const body = JSON.parse(init?.body as string)
148+
fetchCalls.push(body.model)
149+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
150+
}) as unknown as typeof globalThis.fetch
151+
152+
try {
153+
const response = await createFireworksRequestWithFallback({
154+
body: minimalBody as never,
155+
originalModel: 'minimax/minimax-m2.5',
156+
fetch: mockFetch,
157+
logger,
158+
})
159+
160+
expect(response.status).toBe(200)
161+
expect(fetchCalls).toHaveLength(1)
162+
expect(fetchCalls[0]).toBe(STANDARD_MODEL_ID)
163+
} finally {
164+
spy.restore()
165+
}
166+
})
167+
168+
it('tries custom deployment during deployment hours', async () => {
169+
const spy = spyDeploymentHours(true)
170+
const fetchCalls: string[] = []
171+
172+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
173+
const body = JSON.parse(init?.body as string)
174+
fetchCalls.push(body.model)
175+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
176+
}) as unknown as typeof globalThis.fetch
177+
178+
try {
179+
const response = await createFireworksRequestWithFallback({
180+
body: minimalBody as never,
181+
originalModel: 'minimax/minimax-m2.5',
182+
fetch: mockFetch,
183+
logger,
184+
})
185+
186+
expect(response.status).toBe(200)
187+
expect(fetchCalls).toHaveLength(1)
188+
expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
189+
} finally {
190+
spy.restore()
191+
}
192+
})
193+
194+
it('falls back to standard API on 503 DEPLOYMENT_SCALING_UP', async () => {
195+
const spy = spyDeploymentHours(true)
196+
const fetchCalls: string[] = []
197+
let callCount = 0
198+
199+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
200+
const body = JSON.parse(init?.body as string)
201+
fetchCalls.push(body.model)
202+
callCount++
203+
204+
if (callCount === 1) {
205+
return new Response(
206+
JSON.stringify({
207+
error: {
208+
message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.',
209+
code: 'DEPLOYMENT_SCALING_UP',
210+
type: 'error',
211+
},
212+
}),
213+
{ status: 503, statusText: 'Service Unavailable' },
214+
)
215+
}
216+
217+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
218+
}) as unknown as typeof globalThis.fetch
219+
220+
try {
221+
const response = await createFireworksRequestWithFallback({
222+
body: minimalBody as never,
223+
originalModel: 'minimax/minimax-m2.5',
224+
fetch: mockFetch,
225+
logger,
226+
})
227+
228+
expect(response.status).toBe(200)
229+
expect(fetchCalls).toHaveLength(2)
230+
expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
231+
expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
232+
// Verify cooldown was activated
233+
expect(isDeploymentCoolingDown()).toBe(true)
234+
} finally {
235+
spy.restore()
236+
}
237+
})
238+
239+
it('throws FireworksError on non-scaling 503 from deployment', async () => {
240+
const spy = spyDeploymentHours(true)
241+
242+
const mockFetch = mock(async () => {
243+
return new Response(
244+
JSON.stringify({
245+
error: {
246+
message: 'Service temporarily unavailable',
247+
code: 'SERVICE_UNAVAILABLE',
248+
type: 'error',
249+
},
250+
}),
251+
{ status: 503, statusText: 'Service Unavailable' },
252+
)
253+
}) as unknown as typeof globalThis.fetch
254+
255+
try {
256+
await expect(
257+
createFireworksRequestWithFallback({
258+
body: minimalBody as never,
259+
originalModel: 'minimax/minimax-m2.5',
260+
fetch: mockFetch,
261+
logger,
262+
}),
263+
).rejects.toBeInstanceOf(FireworksError)
264+
} finally {
265+
spy.restore()
266+
}
267+
})
268+
269+
it('skips deployment during cooldown and goes straight to standard API', async () => {
270+
const spy = spyDeploymentHours(true)
271+
markDeploymentScalingUp()
272+
273+
const fetchCalls: string[] = []
274+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
275+
const body = JSON.parse(init?.body as string)
276+
fetchCalls.push(body.model)
277+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
278+
}) as unknown as typeof globalThis.fetch
279+
280+
try {
281+
const response = await createFireworksRequestWithFallback({
282+
body: minimalBody as never,
283+
originalModel: 'minimax/minimax-m2.5',
284+
fetch: mockFetch,
285+
logger,
286+
})
287+
288+
expect(response.status).toBe(200)
289+
expect(fetchCalls).toHaveLength(1)
290+
expect(fetchCalls[0]).toBe(STANDARD_MODEL_ID)
291+
} finally {
292+
spy.restore()
293+
}
294+
})
295+
296+
it('uses standard API for models without a custom deployment', async () => {
297+
const spy = spyDeploymentHours(true)
298+
const fetchCalls: string[] = []
299+
300+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
301+
const body = JSON.parse(init?.body as string)
302+
fetchCalls.push(body.model)
303+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
304+
}) as unknown as typeof globalThis.fetch
305+
306+
try {
307+
const response = await createFireworksRequestWithFallback({
308+
body: { ...minimalBody, model: 'some-other/model' } as never,
309+
originalModel: 'some-other/model',
310+
fetch: mockFetch,
311+
logger,
312+
})
313+
314+
expect(response.status).toBe(200)
315+
expect(fetchCalls).toHaveLength(1)
316+
// Model without mapping falls through to the original model
317+
expect(fetchCalls[0]).toBe('some-other/model')
318+
} finally {
319+
spy.restore()
320+
}
321+
})
322+
323+
it('returns non-200 responses from deployment without fallback (non-503)', async () => {
324+
const spy = spyDeploymentHours(true)
325+
const fetchCalls: string[] = []
326+
327+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
328+
const body = JSON.parse(init?.body as string)
329+
fetchCalls.push(body.model)
330+
return new Response(
331+
JSON.stringify({ error: { message: 'Rate limited' } }),
332+
{ status: 429, statusText: 'Too Many Requests' },
333+
)
334+
}) as unknown as typeof globalThis.fetch
335+
336+
try {
337+
const response = await createFireworksRequestWithFallback({
338+
body: minimalBody as never,
339+
originalModel: 'minimax/minimax-m2.5',
340+
fetch: mockFetch,
341+
logger,
342+
})
343+
344+
// Non-503 errors from deployment are returned as-is (caller handles them)
345+
expect(response.status).toBe(429)
346+
expect(fetchCalls).toHaveLength(1)
347+
expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
348+
} finally {
349+
spy.restore()
350+
}
351+
})
352+
353+
it('logs when trying deployment and when falling back', async () => {
354+
const spy = spyDeploymentHours(true)
355+
let callCount = 0
356+
357+
const mockFetch = mock(async () => {
358+
callCount++
359+
if (callCount === 1) {
360+
return new Response(
361+
JSON.stringify({
362+
error: {
363+
message: 'Scaling up',
364+
code: 'DEPLOYMENT_SCALING_UP',
365+
type: 'error',
366+
},
367+
}),
368+
{ status: 503, statusText: 'Service Unavailable' },
369+
)
370+
}
371+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
372+
}) as unknown as typeof globalThis.fetch
373+
374+
try {
375+
await createFireworksRequestWithFallback({
376+
body: minimalBody as never,
377+
originalModel: 'minimax/minimax-m2.5',
378+
fetch: mockFetch,
379+
logger,
380+
})
381+
382+
expect(logger.info).toHaveBeenCalledTimes(2)
383+
} finally {
384+
spy.restore()
385+
}
386+
})
387+
})
388+
})

0 commit comments

Comments
 (0)