Skip to content

Commit d541f02

Browse files
committed
fix(discord): improve advisory lock health check to verify lock ownership
- Change health check from SELECT 1 to querying pg_locks to verify lock is still held - Add checks for classid=0, objid=lockId, pid=pg_backend_pid(), granted=true - Reduce health check interval from 30s to 10s for faster detection - Add unit test to verify pg_locks query structure
1 parent 318d908 commit d541f02

File tree

2 files changed

+109
-7
lines changed

2 files changed

+109
-7
lines changed

packages/internal/src/db/__tests__/advisory-lock.test.ts

Lines changed: 90 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ describe('advisory-lock', () => {
116116
const result = await tryAcquireAdvisoryLock(ADVISORY_LOCK_IDS.DISCORD_BOT)
117117

118118
expect(setIntervalSpy).toHaveBeenCalledTimes(1)
119-
expect(setIntervalSpy.mock.calls[0][1]).toBe(30_000) // 30 seconds
119+
expect(setIntervalSpy.mock.calls[0][1]).toBe(10_000) // 10 seconds
120120

121121
await result.handle?.release()
122122
})
@@ -384,9 +384,17 @@ describe('advisory-lock', () => {
384384
expect(lostCallback).toHaveBeenCalledTimes(1)
385385
})
386386

387-
it('should do nothing when health check succeeds', async () => {
388-
// All calls succeed
389-
mockConnection.tagged.mockResolvedValue([{ acquired: true }])
387+
it('should do nothing when health check succeeds and lock is still held', async () => {
388+
// First call acquires lock, subsequent calls check lock ownership
389+
let callCount = 0
390+
mockConnection.tagged.mockImplementation(() => {
391+
callCount++
392+
if (callCount === 1) {
393+
return Promise.resolve([{ acquired: true }])
394+
}
395+
// Health check returns that lock is still held
396+
return Promise.resolve([{ held: true }])
397+
})
390398

391399
let healthCheckCallback: (() => Promise<void>) | null = null
392400
setIntervalSpy.mockImplementation((callback: () => Promise<void>) => {
@@ -408,6 +416,84 @@ describe('advisory-lock', () => {
408416
// Clean up
409417
await result.handle?.release()
410418
})
419+
420+
it('should trigger onLost when lock is no longer held', async () => {
421+
// First call acquires lock, subsequent calls show lock is not held
422+
let callCount = 0
423+
mockConnection.tagged.mockImplementation(() => {
424+
callCount++
425+
if (callCount === 1) {
426+
return Promise.resolve([{ acquired: true }])
427+
}
428+
// Health check returns that lock is no longer held (e.g., another process took it)
429+
return Promise.resolve([{ held: false }])
430+
})
431+
432+
let healthCheckCallback: (() => Promise<void>) | null = null
433+
setIntervalSpy.mockImplementation((callback: () => Promise<void>) => {
434+
healthCheckCallback = callback
435+
return 123 as unknown as NodeJS.Timeout
436+
})
437+
438+
const result = await tryAcquireAdvisoryLock(ADVISORY_LOCK_IDS.DISCORD_BOT)
439+
440+
const lostCallback = mock(() => {})
441+
result.handle?.onLost(lostCallback)
442+
443+
// Trigger health check
444+
await healthCheckCallback!()
445+
446+
expect(lostCallback).toHaveBeenCalledTimes(1)
447+
expect(consoleErrorSpy).toHaveBeenCalledWith(
448+
'Advisory lock health check failed - lock no longer held',
449+
)
450+
})
451+
452+
it('should query pg_locks with correct structure in health check', async () => {
453+
// First call acquires lock, second call is the health check
454+
let callCount = 0
455+
mockConnection.tagged.mockImplementation(() => {
456+
callCount++
457+
if (callCount === 1) {
458+
return Promise.resolve([{ acquired: true }])
459+
}
460+
return Promise.resolve([{ held: true }])
461+
})
462+
463+
let healthCheckCallback: (() => Promise<void>) | null = null
464+
setIntervalSpy.mockImplementation((callback: () => Promise<void>) => {
465+
healthCheckCallback = callback
466+
return 123 as unknown as NodeJS.Timeout
467+
})
468+
469+
const result = await tryAcquireAdvisoryLock(ADVISORY_LOCK_IDS.DISCORD_BOT)
470+
471+
// Trigger health check
472+
await healthCheckCallback!()
473+
474+
// Verify the health check query was called (second call)
475+
expect(mockConnection.tagged).toHaveBeenCalledTimes(2)
476+
477+
// Get the health check query (second call)
478+
const [queryStrings, lockIdArg] = mockConnection.tagged.mock.calls[1]
479+
const fullQuery = queryStrings.join('')
480+
481+
// Verify the query checks pg_locks with all required conditions
482+
expect(fullQuery).toContain('SELECT EXISTS')
483+
expect(fullQuery).toContain('FROM pg_locks')
484+
expect(fullQuery).toContain("locktype = 'advisory'")
485+
expect(fullQuery).toContain('classid = 0')
486+
expect(fullQuery).toContain('objid =')
487+
expect(fullQuery).toContain('pid = pg_backend_pid()')
488+
expect(fullQuery).toContain('granted = true')
489+
expect(fullQuery).toContain('as held')
490+
491+
// Verify the lock ID is passed as a parameter
492+
expect(lockIdArg).toBe(ADVISORY_LOCK_IDS.DISCORD_BOT)
493+
494+
// Clean up
495+
await result.handle?.release()
496+
})
411497
})
412498

413499
describe('edge cases', () => {

packages/internal/src/db/advisory-lock.ts

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ export const ADVISORY_LOCK_IDS = {
1212

1313
export type AdvisoryLockId = (typeof ADVISORY_LOCK_IDS)[keyof typeof ADVISORY_LOCK_IDS]
1414

15-
const HEALTH_CHECK_INTERVAL_MS = 30_000 // 30 seconds
15+
const HEALTH_CHECK_INTERVAL_MS = 10_000 // 10 seconds
1616

1717
export interface LockHandle {
1818
/** Register a callback to be called if the lock is lost (connection dies) */
@@ -67,11 +67,27 @@ export async function tryAcquireAdvisoryLock(lockId: AdvisoryLockId): Promise<{
6767
}
6868
}
6969

70-
// Start health check interval
70+
// Start health check interval - verify we still hold the lock, not just connection liveness
7171
healthCheckTimer = setInterval(async () => {
7272
if (isReleased) return
7373
try {
74-
await connection`SELECT 1`
74+
// Query pg_locks to verify we still hold this specific advisory lock
75+
// This catches cases where the lock was lost but connection stayed alive
76+
const result = await connection`
77+
SELECT EXISTS (
78+
SELECT 1 FROM pg_locks
79+
WHERE locktype = 'advisory'
80+
AND classid = 0
81+
AND objid = ${lockId}
82+
AND pid = pg_backend_pid()
83+
AND granted = true
84+
) as held
85+
`
86+
const stillHeld = result[0]?.held === true
87+
if (!stillHeld) {
88+
console.error('Advisory lock health check failed - lock no longer held')
89+
triggerLost()
90+
}
7591
} catch {
7692
console.error('Advisory lock health check failed - connection lost')
7793
triggerLost()

0 commit comments

Comments
 (0)