Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion src/bot/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,14 @@ import { questionManager } from "../question/manager.js";
import { interactionManager } from "../interaction/manager.js";
import { clearAllInteractionState } from "../interaction/cleanup.js";
import { keyboardManager } from "../keyboard/manager.js";
import { stopEventListening, subscribeToEvents } from "../opencode/events.js";
import {
getActiveEventDirectory,
getConsecutiveReconnectAttempts,
getLastSseEventTime,
isEventListening,
stopEventListening,
subscribeToEvents,
} from "../opencode/events.js";
import { opencodeReadyLifecycle } from "../opencode/ready-lifecycle.js";
import { summaryAggregator } from "../summary/aggregator.js";
import { formatToolInfo } from "../summary/formatter.js";
Expand Down Expand Up @@ -121,6 +128,7 @@ let botInstance: Bot<Context> | null = null;
let chatIdInstance: number | null = null;
let commandsInitialized = false;
let heartbeatTimer: ReturnType<typeof setInterval> | null = null;
let sseWatchdogTimer: ReturnType<typeof setInterval> | null = null;
let unsubscribeReadyRestore: (() => void) | null = null;

const TELEGRAM_DOCUMENT_CAPTION_MAX_LENGTH = 1024;
Expand Down Expand Up @@ -1055,6 +1063,11 @@ export function createBot(): Bot<Context> {
heartbeatTimer = null;
}

if (sseWatchdogTimer) {
clearInterval(sseWatchdogTimer);
sseWatchdogTimer = null;
}

const botOptions = createTelegramBotOptions(config.telegram);

const bot = new Bot(config.telegram.token, botOptions);
Expand Down Expand Up @@ -1094,6 +1107,37 @@ export function createBot(): Bot<Context> {
}
}, 5000);

// SSE health-check watchdog. The for-await loop cannot detect a silently
// dead stream (no events arrive), so on each tick we look at the last
// event timestamp and the reconnect counter. If we have not seen an event
// for 30s or have piled up >=5 reconnect attempts, we forcibly tear down
// the subscription and re-establish it.
const SSE_STALE_THRESHOLD_MS = 30_000;
const SSE_RECONNECT_LIMIT = 5;
sseWatchdogTimer = setInterval(() => {
if (!isEventListening()) {
return;
}
const lastEventAt = getLastSseEventTime();
const reconnects = getConsecutiveReconnectAttempts();
const timeSinceLastEvent = Date.now() - lastEventAt;
const noEventsForTooLong = lastEventAt > 0 && timeSinceLastEvent > SSE_STALE_THRESHOLD_MS;
const tooManyReconnects = reconnects >= SSE_RECONNECT_LIMIT;

if (!noEventsForTooLong && !tooManyReconnects) {
return;
}

const directory = getActiveEventDirectory();
logger.warn(
`[SSE Watchdog] Restarting SSE subscription (noEvents=${timeSinceLastEvent}ms, reconnects=${reconnects}, directory=${directory ?? "<none>"})`,
);
stopEventListening();
if (directory) {
void ensureEventSubscription(directory);
}
}, 30_000);

// Log all API calls for diagnostics
let lastGetUpdatesTime = Date.now();
bot.api.config.use(async (prev, method, payload, signal) => {
Expand Down Expand Up @@ -1509,6 +1553,11 @@ export function cleanupBotRuntime(reason: string): void {
heartbeatTimer = null;
}

if (sseWatchdogTimer) {
clearInterval(sseWatchdogTimer);
sseWatchdogTimer = null;
}

botInstance = null;
chatIdInstance = null;
}
28 changes: 28 additions & 0 deletions src/opencode/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ let activeDirectory: string | null = null;
let streamAbortController: AbortController | null = null;
let listenerGeneration = 0;

// SSE health tracking — a silently dead stream cannot be detected from inside
// the for-await loop (no events arrive). Expose timestamps + reconnect counter
// so an external watchdog can decide when to forcibly restart the subscription.
let lastSseEventTime = 0;
let consecutiveReconnectAttempts = 0;

function getReconnectDelayMs(attempt: number): number {
const exponentialDelay = RECONNECT_BASE_DELAY_MS * Math.pow(2, Math.max(0, attempt - 1));
return Math.min(exponentialDelay, RECONNECT_MAX_DELAY_MS);
Expand Down Expand Up @@ -201,6 +207,10 @@ export async function subscribeToEvents(directory: string, callback: EventCallba
break;
}

// Mark stream as healthy: any event proves the SSE channel is alive.
lastSseEventTime = Date.now();
consecutiveReconnectAttempts = 0;

// CRITICAL: Explicitly yield to the event loop BEFORE processing the event
// This allows grammY to handle getUpdates between SSE events
await new Promise<void>((resolve) => setImmediate(resolve));
Expand Down Expand Up @@ -249,6 +259,7 @@ export async function subscribeToEvents(directory: string, callback: EventCallba
}

reconnectAttempt++;
consecutiveReconnectAttempts++;
const reconnectDelay = getReconnectDelayMs(reconnectAttempt);
logger.warn(
`Event stream ended for ${directory}, reconnecting in ${reconnectDelay}ms (attempt=${reconnectAttempt})`,
Expand All @@ -272,6 +283,7 @@ export async function subscribeToEvents(directory: string, callback: EventCallba
}

reconnectAttempt++;
consecutiveReconnectAttempts++;
const reconnectDelay = getReconnectDelayMs(reconnectAttempt);
if (isExpectedOpencodeUnavailableError(error)) {
logger.warn(
Expand Down Expand Up @@ -330,3 +342,19 @@ export function stopEventListening(): void {
activeDirectory = null;
logger.info("Event listener stopped");
}

export function getLastSseEventTime(): number {
return lastSseEventTime;
}

export function getConsecutiveReconnectAttempts(): number {
return consecutiveReconnectAttempts;
}

export function isEventListening(): boolean {
return isListening;
}

export function getActiveEventDirectory(): string | null {
return activeDirectory;
}
Loading