Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 13 additions & 22 deletions backend/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,39 +118,30 @@ app.use("/api/v1/pool", poolRoutes);
app.use("/api/v1/state", stateRoutes);
app.use("/api/v1/metrics", createMetricsRouter());

// Health check endpoint with database connectivity verification
// Enhanced health check endpoint with comprehensive diagnostics
app.get("/health", async (req: Request, res: Response) => {
const startTime = Date.now();
logger.debug("Health check requested");
logger.debug("Enhanced health check requested");

try {
// Ping DB to ensure it's alive
await prisma.$queryRaw`SELECT 1`;
const duration = Date.now() - startTime;
const healthResult = await performHealthCheck();

logger.info("Health check passed", {
status: "ok",
db: "connected",
duration,
});
const statusCode = healthResult.status === "healthy" ? 200 :
healthResult.status === "degraded" ? 200 : 503;

res.status(200).json({
status: "ok",
db: "connected",
timestamp: new Date().toISOString(),
uptime: process.uptime(),
logger.info("Health check completed", {
status: healthResult.status,
dbLatency: healthResult.database.latencyMs,
});

res.status(statusCode).json(healthResult);
} catch (error) {
const duration = Date.now() - startTime;
logger.error("Health check failed", {
logger.error("Health check failed critically", {
error: error instanceof Error ? error.message : String(error),
duration,
});

res.status(503).json({
status: "error",
db: "disconnected",
error: error instanceof Error ? error.message : "Unknown error",
status: "unhealthy",
error: "Health check system failed",
timestamp: new Date().toISOString(),
});
}
Expand Down
173 changes: 173 additions & 0 deletions backend/src/utils/health.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import { pool } from "../config/db";
import { trace } from "../config/tracing";
import { getRecoveryCronStats } from "./recovery-cron";

const logger = trace.getLogger("health");

// ---------------------------------------------------------------------------
// Health Check Result Types
// ---------------------------------------------------------------------------

export interface HealthCheckResult {
status: "healthy" | "degraded" | "unhealthy";
database: DatabaseHealth;
pool: PoolHealth;
recovery: RecoveryHealth;
system: SystemHealth;
timestamp: string;
uptime: number;
}

export interface DatabaseHealth {
connected: boolean;
latencyMs: number;
lastError: string | null;
}

export interface PoolHealth {
totalConnections: number;
idleConnections: number;
activeConnections: number;
waitingRequests: number;
maxConnections: number;
healthCheckOk: boolean;
}

export interface RecoveryHealth {
cronRunning: boolean;
lastRunAt: string | null;
lastRunOk: boolean;
lastError: string | null;
recordsProcessed: number;
recordsAbandoned: number;
}

export interface SystemHealth {
memoryUsageMb: {
rss: number;
heapUsed: number;
heapTotal: number;
external: number;
};
cpuLoad: number[];
nodeVersion: string;
platform: string;
}

// ---------------------------------------------------------------------------
// Individual Health Checks
// ---------------------------------------------------------------------------

/**
* Checks database connectivity by performing a simple query and measuring latency.
*/
async function checkDatabase(): Promise<DatabaseHealth> {
const startTime = process.hrtime();
try {
const client = await pool.connect();
await client.query("SELECT 1");
client.release();
const duration = process.hrtime(startTime);
const latencyMs = duration[0] * 1000 + duration[1] / 1_000_000;
return { connected: true, latencyMs: Math.round(latencyMs * 100) / 100, lastError: null };
} catch (err: any) {
return { connected: false, latencyMs: 0, lastError: err.message };
}
}

/**
* Returns current pool statistics.
*/
function checkPool(): PoolHealth {
return {
totalConnections: pool.totalCount,
idleConnections: pool.idleCount,
activeConnections: pool.totalCount - pool.idleCount,
waitingRequests: pool.waitingCount,
maxConnections: parseInt(process.env.POOL_MAX_CONNECTIONS || "20", 10),
healthCheckOk: pool.totalCount > 0,
};
}

/**
* Returns recovery cron statistics.
*/
function checkRecovery(): RecoveryHealth {
const stats = getRecoveryCronStats();
return {
cronRunning: true,
lastRunAt: stats.lastRunAt,
lastRunOk: stats.lastRunOk,
lastError: stats.lastError,
recordsProcessed: stats.recordsProcessed,
recordsAbandoned: stats.recordsAbandoned,
};
}

/**
* Returns system resource metrics.
*/
function checkSystem(): SystemHealth {
const mem = process.memoryUsage();
return {
memoryUsageMb: {
rss: Math.round(mem.rss / 1024 / 1024),
heapUsed: Math.round(mem.heapUsed / 1024 / 1024),
heapTotal: Math.round(mem.heapTotal / 1024 / 1024),
external: Math.round(mem.external / 1024 / 1024),
},
cpuLoad: process.cpuUsage(),
nodeVersion: process.version,
platform: process.platform,
};
}

// ---------------------------------------------------------------------------
// Comprehensive Health Check
// ---------------------------------------------------------------------------

/**
* Performs a comprehensive health check across all system components.
*
* The overall status is determined as follows:
* - "healthy": All components are functioning normally
* - "degraded": Database is connected but slow, or recovery cron had a recent failure
* - "unhealthy": Database is not connected, or system resources are critically low
*/
export async function performHealthCheck(): Promise<HealthCheckResult> {
const [database] = await Promise.all([checkDatabase()]);

const poolStats = checkPool();
const recoveryStats = checkRecovery();
const systemStats = checkSystem();

// Determine overall status
let status: HealthCheckResult["status"] = "healthy";

if (!database.connected) {
status = "unhealthy";
} else if (database.latencyMs > 1000) {
status = "degraded";
} else if (poolStats.waitingRequests > poolStats.maxConnections * 0.5) {
status = "degraded";
} else if (!recoveryStats.lastRunOk) {
status = "degraded";
}

logger.info("Health check completed", {
status,
dbLatency: database.latencyMs,
poolActive: poolStats.activeConnections,
poolWaiting: poolStats.waitingRequests,
});

return {
status,
database,
pool: poolStats,
recovery: recoveryStats,
system: systemStats,
timestamp: new Date().toISOString(),
uptime: process.uptime(),
};
}
Loading
Loading