Skip to content

Commit 331d735

Browse files
fix(redis): drop cached client and restart PING loop after forced reconnect
1 parent ec5793f commit 331d735

3 files changed

Lines changed: 50 additions & 1 deletion

File tree

apps/sim/lib/core/config/redis.test.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,37 @@ describe('redis config', () => {
9090
expect(mockRedisInstance.disconnect).toHaveBeenCalledWith(true)
9191
})
9292

93+
it('should drop the cached client so the next getRedisClient() builds a fresh one', async () => {
94+
getRedisClient()
95+
const callsBefore = MockRedisConstructor.mock.calls.length
96+
97+
mockRedisInstance.ping.mockRejectedValue(new Error('ETIMEDOUT'))
98+
await vi.advanceTimersByTimeAsync(15_000)
99+
await vi.advanceTimersByTimeAsync(15_000)
100+
101+
expect(mockRedisInstance.disconnect).toHaveBeenCalledWith(true)
102+
103+
getRedisClient()
104+
expect(MockRedisConstructor.mock.calls.length).toBe(callsBefore + 1)
105+
})
106+
107+
it('should restart the PING health check against the new client', async () => {
108+
getRedisClient()
109+
110+
mockRedisInstance.ping.mockRejectedValue(new Error('ETIMEDOUT'))
111+
await vi.advanceTimersByTimeAsync(15_000)
112+
await vi.advanceTimersByTimeAsync(15_000)
113+
114+
expect(mockRedisInstance.disconnect).toHaveBeenCalledTimes(1)
115+
116+
getRedisClient()
117+
118+
await vi.advanceTimersByTimeAsync(15_000)
119+
await vi.advanceTimersByTimeAsync(15_000)
120+
121+
expect(mockRedisInstance.disconnect).toHaveBeenCalledTimes(2)
122+
})
123+
93124
it('should handle listener errors gracefully without breaking health check', async () => {
94125
const badListener = vi.fn(() => {
95126
throw new Error('listener crashed')

apps/sim/lib/core/config/redis.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@ function startPingHealthCheck(redis: Redis): void {
4747
consecutiveFailures: pingFailures,
4848
})
4949
pingFailures = 0
50+
// Drop the cached client and stop this health check before disconnecting,
51+
// so the next getRedisClient() builds a fresh client and a fresh PING loop.
52+
// Listeners may call getRedisClient() and must observe the cleared global.
53+
globalRedisClient = null
54+
if (pingInterval) {
55+
clearInterval(pingInterval)
56+
pingInterval = null
57+
}
5058
for (const cb of reconnectListeners) {
5159
try {
5260
cb()

apps/sim/lib/execution/isolated-vm.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,10 +425,20 @@ async function releaseDistributedLease(ownerKey: string, leaseId: string): Promi
425425
return 1
426426
`
427427

428+
let deadlineTimer: NodeJS.Timeout | undefined
429+
const deadline = new Promise<never>((_, reject) => {
430+
deadlineTimer = setTimeout(
431+
() => reject(new Error(`Redis lease release timed out after ${LEASE_REDIS_DEADLINE_MS}ms`)),
432+
LEASE_REDIS_DEADLINE_MS
433+
)
434+
})
435+
428436
try {
429-
await redis.eval(script, 1, key, leaseId)
437+
await Promise.race([redis.eval(script, 1, key, leaseId), deadline])
430438
} catch (error) {
431439
logger.error('Failed to release distributed owner lease', { ownerKey, error })
440+
} finally {
441+
clearTimeout(deadlineTimer)
432442
}
433443
}
434444

0 commit comments

Comments
 (0)