Skip to content
This repository was archived by the owner on Jun 30, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/dependabot-auto-approve-minor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ jobs:
matrix:
dependencyStartsWith:
- '@checkernetwork/prettier-config'
- '@sentry/'
- undici
- '@types/'
- typescript
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ $ journalctl -u checker.service

## Disclaimer

The CLI uses [Sentry](https://sentry.io) for error tracking.
[InfluxDB](https://www.influxdata.com/) is used for stats.

## Development
Expand Down
9 changes: 0 additions & 9 deletions bin/checker.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env node

import { checker } from '../commands/checker.js'
import * as Sentry from '@sentry/node'
import yargs from 'yargs/yargs'
import { hideBin } from 'yargs/helpers'
import fs from 'node:fs/promises'
Expand All @@ -10,14 +9,6 @@ import { maybeMigrateRuntimeState } from '../lib/migrate.js'

const pkg = JSON.parse(await fs.readFile(paths.packageJSON, 'utf8'))

Sentry.init({
dsn: 'https://775e0a9786704436a8c135d874657766@o1408530.ingest.us.sentry.io/4504792315199488',
release: pkg.version,
environment: pkg.sentryEnvironment,
tracesSampleRate: 0.1,
ignoreErrors: [/EACCES/, /EPERM/, /ENOSPC/, /EPIPE/],
})

await maybeMigrateRuntimeState()

yargs(hideBin(process.argv))
Expand Down
2 changes: 0 additions & 2 deletions lib/contracts.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,6 @@ async function getContractAddresses() {
revision = await Name.resolve(name)
} catch (err) {
reportW3NameError()
// These errors aren't actionable
err.reportToSentry = false
throw err
}
return revision.value.split('\n').filter(Boolean)
Expand Down
19 changes: 1 addition & 18 deletions lib/metrics.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { writeClient } from './telemetry.js'
import { Point } from '@influxdata/influxdb-client'
import EventEmitter from 'node:events'
import * as Sentry from '@sentry/node'

export class MetricsEvent {
/**
Expand Down Expand Up @@ -52,11 +51,7 @@ export class Metrics {
const diff =
metrics.totalJobsCompleted -
this.subnetMetrics.get(subnetName).totalJobsCompleted
if (diff < 0) {
this.maybeReportErrorToSentry(
new Error(`Negative jobs completed for ${subnetName}`),
)
} else if (diff > 0) {
if (diff > 0) {
writeClient.writePoint(
new Point('jobs-completed')
.tag('subnet', subnetName)
Expand Down Expand Up @@ -91,18 +86,6 @@ export class Metrics {
onUpdate(fn) {
this.#events.on('update', fn)
}

/** @param {unknown} err */
maybeReportErrorToSentry(err) {
const now = Date.now()
if (now - this.lastErrorReportedAt < 4 /* HOURS */ * 3600_000) return
this.lastErrorReportedAt = now

console.error(
'Reporting the problem to Sentry for inspection by the Checker team.',
)
Sentry.captureException(err)
}
}

export const metrics = new Metrics()
2 changes: 0 additions & 2 deletions lib/subnets.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ async function getLatestCID(ipnsKey) {
revision = await Name.resolve(name)
} catch (err) {
reportW3NameError()
// These errors aren't actionable
err.reportToSentry = false
throw err
}
// /ipfs/:cid
Expand Down
20 changes: 3 additions & 17 deletions lib/telemetry.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { InfluxDB, Point } from '@influxdata/influxdb-client'
import { createHash, randomUUID } from 'node:crypto'
import * as Sentry from '@sentry/node'
import assert from 'node:assert'
import { platform, arch, cpus, totalmem } from 'node:os'
import fs from 'node:fs/promises'
Expand Down Expand Up @@ -37,19 +36,6 @@ const writeClientMachines = client.getWriteApi(
's', // precision
)

const unactionableErrors =
/HttpError|getAddrInfo|RequestTimedOutError|ECONNRESET|EPIPE|ENETDOWN|ENOBUFS|EHOSTUNREACH|ERR_TLS_CERT_ALTNAME_INVALID|ETIMEDOUT|EPROTO|ENETUNREACH/i

const handleFlushError = (err) => {
if (unactionableErrors.test(String(err))) {
return
}
if (typeof err?.code === 'string' && unactionableErrors.test(err.code)) {
return
}
Sentry.captureException(err)
}

export const runPingLoop = async ({ CHECKER_ID }) => {
assert(FIL_WALLET_ADDRESS)

Expand All @@ -67,7 +53,7 @@ export const runPingLoop = async ({ CHECKER_ID }) => {
point.tag('arch', arch())
point.tag('deployment_type', DEPLOYMENT_TYPE)
writeClient.writePoint(point)
writeClient.flush().catch(handleFlushError)
writeClient.flush().catch(() => {})
await timers.setTimeout(10 * 60 * 1000) // 10 minutes
}
}
Expand Down Expand Up @@ -98,7 +84,7 @@ export const runMachinesLoop = async ({ CHECKER_ID }) => {
point.tag('arch', arch())
point.intField('memory_total_b', totalmem())
writeClientMachines.writePoint(point)
writeClientMachines.flush().catch(handleFlushError)
writeClientMachines.flush().catch(() => {})
await timers.setTimeout(24 * 3600 * 1000) // 1 day
}
}
Expand All @@ -107,5 +93,5 @@ export const reportW3NameError = () => {
const point = new Point('w3name-error')
point.stringField('version', pkg.version)
writeClient.writePoint(point)
writeClient.flush().catch(handleFlushError)
writeClient.flush().catch(() => {})
}
53 changes: 2 additions & 51 deletions lib/zinnia.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { execa } from 'execa'
import * as Sentry from '@sentry/node'
import { installRuntime, getRuntimeExecutable } from './runtime.js'
import { updateSourceFiles } from './subnets.js'
import os from 'node:os'
Expand Down Expand Up @@ -52,38 +51,6 @@ export const install = () =>
],
})

let lastErrorReportedAt = 0
const maybeReportErrorToSentry = (/** @type {unknown} */ err) => {
const now = Date.now()
if (now - lastErrorReportedAt < 4 /* HOURS */ * 3600_000) return
lastErrorReportedAt = now

/** @type {Parameters<Sentry.captureException>[1]} */
const hint = { extra: {} }
if (typeof err === 'object') {
if ('reportToSentry' in err && err.reportToSentry === false) {
return
}
Object.assign(err, { reportToSentry: false })
if ('details' in err && typeof err.details === 'string') {
// Quoting from https://develop.sentry.dev/sdk/data-handling/
// > Messages are limited to 8192 characters.
// > Individual extra data items are limited to 16kB. Total extra data is limited to 256kb.
// Let's store the additional details (e.g. stdout && stderr) in an extra field
const tail = err.details.split(/\n/g).slice(-50).join('\n')
hint.extra.details = tail
}
if ('subnetName' in err && typeof err.subnetName === 'string') {
hint.extra.subnetName = err.subnetName
}
}

console.error(
'Reporting the problem to Sentry for inspection by the Checker team.',
)
Sentry.captureException(err, hint)
}

const matchesSubnetFilter = (subnet) =>
SUBNET_FILTER === '' || subnet === SUBNET_FILTER

Expand Down Expand Up @@ -170,7 +137,6 @@ const runUpdateSourceFilesLoop = async ({
message: 'Failed to update subnet source code',
})
console.error(err)
maybeReportErrorToSentry(err)
}
}
}
Expand Down Expand Up @@ -207,9 +173,7 @@ const catchChildProcessExit = async ({

await Promise.race(tasks)
} catch (err) {
if (err.name === 'AbortError') {
Object.assign(err, { reportToSentry: false })
} else {
if (err.name !== 'AbortError') {
const subnetName = capitalize(err.subnetName ?? 'Zinnia')
const exitReason = err.exitReason ?? 'for unknown reason'
const message = `${subnetName} crashed ${exitReason}`
Expand All @@ -225,17 +189,10 @@ const catchChildProcessExit = async ({
) {
// These signal codes are triggered when somebody terminates the process from outside.
// It's not a problem in Zinnia, there is nothing we can do about this.
// Don't report this error to Sentry and don't print the stack trace to stderr,
// Don't print the stack trace to stderr,
// treat this as a regular exit (successful completion of the process).
// (Note that this event has been already logged via `onActivity()` call above.)
return
} else {
// Apply a custom rule to force Sentry to group all issues with the same subnet & exit code
// See https://docs.sentry.io/platforms/node/usage/sdk-fingerprinting/#basic-example
Sentry.withScope((scope) => {
scope.setFingerprint([message])
maybeReportErrorToSentry(subnetErr)
})
}
}
throw err
Expand Down Expand Up @@ -318,10 +275,6 @@ export async function run({
message: `${capitalize(subnet)} has been inactive for 5 minutes, restarting...`,
})

const err = new Error('Module inactive for 5 minutes')
Object.assign(err, { module })
maybeReportErrorToSentry(err)

controller.abort()
},
5 * 60 * 1000,
Expand All @@ -340,7 +293,6 @@ export async function run({
text: data,
}).catch((err) => {
console.error(err)
Sentry.captureException(err)
})
})
childProcess.stderr.setEncoding('utf-8')
Expand All @@ -367,7 +319,6 @@ export async function run({
console.error('Zinnia main loop aborted')
} else {
console.error('Zinnia main loop errored', err)
maybeReportErrorToSentry(err)
}
} finally {
controller.abort()
Expand Down
Loading