Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
1ea4e85
feat: add Mimir alerting integration
gsanchietti Apr 8, 2026
26763b2
feat: add alert history webhook
gsanchietti Apr 9, 2026
62458bc
feat: add HostDown alert monitor to collect service
gsanchietti Apr 9, 2026
9cb2dbf
refactor: separate stdout/stderr in Python scripts for jq compatibility
gsanchietti Apr 9, 2026
5d1802e
feat: ALPHA ui for alerting
gsanchietti Apr 9, 2026
508143e
fix: improve HostDown logic
gsanchietti Apr 10, 2026
611d392
fix: improve alert.py script for resolving alerts
gsanchietti Apr 10, 2026
573bead
fix: improve Mimir alerting integration consistency and add totals/tr…
edospadoni Apr 9, 2026
61cb1ec
test: add unit tests for alerting webhook, handlers, and repository
edospadoni Apr 9, 2026
4f2e983
feat: improve alert emails with localization, system link, and system…
edospadoni Apr 10, 2026
36db989
fix: rename alert_history migration from 018 to 019 to avoid conflict…
edospadoni Apr 10, 2026
2ee39e0
docs: consolidate alerting docs into mimir README and add user guide
edospadoni Apr 10, 2026
b4d8a23
refactor: remove unused dedup index and ON CONFLICT from alert_history
edospadoni Apr 10, 2026
168748c
feat(alerting): auto-provision org configs and enrich alert emails wi…
edospadoni Apr 10, 2026
d9f528b
fix(alerting): gracefully handle Mimir 404 and empty config
edospadoni Apr 10, 2026
cb2d388
refactor(frontend): align alerting API paths with backend /alerts naming
edospadoni Apr 10, 2026
3446649
feat(mimir): add Makefile for local development
edospadoni Apr 10, 2026
543fdae
fix: reduce log verbosity
gsanchietti Apr 10, 2026
3432056
fix (mimir scripts): fix small regression
gsanchietti Apr 10, 2026
77fb088
fix(mimir scripts): align alerting_config.py with /alerts API rename
edospadoni Apr 10, 2026
79aee3d
fix(mimir scripts): add required --tenant-id and --app-id arguments
gsanchietti Apr 10, 2026
894087e
docs(mimir scripts): update README with --tenant-id and --app-id argu…
gsanchietti Apr 10, 2026
3dce98f
fix(mimir scripts): make --app-id optional with sensible default
gsanchietti Apr 10, 2026
8e45c03
fix(mimir scripts): improve error messages and validation
gsanchietti Apr 10, 2026
6881870
fix: fix regressions after rebase
gsanchietti Apr 13, 2026
1439615
feat(ui): improve alerting draft
gsanchietti Apr 14, 2026
2b6df7a
fix(alerting): improve HostDown alert logic
gsanchietti Apr 14, 2026
c7ec2da
fix(ui): draft, cleanup alerting
gsanchietti Apr 14, 2026
133bfb1
fix(alerting): force system_key on server side
gsanchietti Apr 14, 2026
f7dc363
feat(backend): add API to manage alert silence from UI
gsanchietti Apr 14, 2026
13d5abe
feat(ui): draft implementation for silences
gsanchietti Apr 14, 2026
b0f5859
feat(backend): handle delete silence
gsanchietti Apr 14, 2026
f57c9b4
feat(ui): disable alert silence
gsanchietti Apr 14, 2026
2b1f116
fix(mimir): collect, restrict access to machine alerts and silences
gsanchietti Apr 15, 2026
178161d
feat: add golang template for annotations
gsanchietti Apr 16, 2026
b25c36e
feat: silence CRUD, end date picker, silences card in system alerts tab
gsanchietti Apr 16, 2026
6a5bc33
fix: improve label for hostdown
gsanchietti Apr 16, 2026
8c6e2fb
feat: rename HostDown alert to LinkFailed, update descriptions
gsanchietti Apr 16, 2026
d5404f0
fix: disable mail and webhook notifications on new organization creation
gsanchietti Apr 16, 2026
b0ed5d6
feat: alert history card, suppressed row grayout, silence alert name,…
gsanchietti Apr 16, 2026
331efc6
feat: add telegram integration
gsanchietti Apr 16, 2026
9677900
fix(ui): improve silence list
gsanchietti Apr 16, 2026
7e622f3
fix(ui): improve telegram config
gsanchietti Apr 16, 2026
298ed5f
fix(ui): telegram
gsanchietti Apr 16, 2026
1e15441
docs: add Telegram bot configuration guide to alerting docs
gsanchietti Apr 16, 2026
01ca11c
fix(alerting): switch Telegram parse_mode from MarkdownV2 to HTML
gsanchietti Apr 16, 2026
d230ebc
chore(alerting): enhance Telegram message templates with better forma…
gsanchietti Apr 16, 2026
09de3a0
fix: refactor link failed alert
gsanchietti Apr 16, 2026
aa2255a
fix: improve telegram template
gsanchietti Apr 16, 2026
485d179
fix: fix unmarshall of alert list from mimir
gsanchietti Apr 16, 2026
7bd892b
fix: improve LinkFailed logic
gsanchietti Apr 16, 2026
89ba753
chore(doc): improve telegram instructions
gsanchietti Apr 16, 2026
c8fbcd9
fix(collect): stop LinkFailed alert flapping with TTL-refresh model
gsanchietti Apr 16, 2026
5f05d92
fix(collect): explicit resolve on recovery + debug logging in LinkFai…
gsanchietti Apr 16, 2026
d5b8793
refactor(collect): drop explicit resolve from LinkFailed monitor
gsanchietti Apr 16, 2026
1bc1931
fix: improve linkfailed logic
gsanchietti Apr 17, 2026
1baa9dc
frontend: add alert details tooltip in system active alerts
gsanchietti Apr 17, 2026
380fe75
chore: add debug log for mimir success post
gsanchietti Apr 17, 2026
ccfe65a
fix: reduce linkfailed interval for resolution
gsanchietti Apr 17, 2026
024c937
chore: update doc
gsanchietti Apr 17, 2026
e9b1810
chore: remove test scripts
gsanchietti Apr 17, 2026
50088b0
chore(alerting): remove dead code and simplify handlers
gsanchietti Apr 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/ci-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ jobs:
context: proxy
- component: mimir
context: services/mimir
image_description: Mimir alertmanager backend for My Nethesis

steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -175,7 +176,9 @@ jobs:
platforms: linux/amd64
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
labels: |
${{ steps.meta.outputs.labels }}
${{ matrix.image_description != '' && format('org.opencontainers.image.description={0}', matrix.image_description) || '' }}
cache-from: type=gha,scope=${{ matrix.component }}
cache-to: type=gha,mode=max,scope=${{ matrix.component }}
build-args: |
Expand Down
4 changes: 2 additions & 2 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ Source of truth for routes: `backend/main.go`. Source of truth for the API contr

### 3.2 Collect (`collect/`)

Inventory ingestion + Mimir proxy + HostDown cron.
Inventory ingestion + Mimir proxy + LinkFailed cron.

```
main.go
Expand All @@ -111,7 +111,7 @@ main.go
--> workers/ (InventoryWorker, DiffWorker, NotificationWorker, CleanupWorker,
QueueMonitorWorker, DelayedMessageWorker β€” all started by manager.go)
--> differ/ (YAML-configured JSON diff engine, severity/significance)
--> cron/ (heartbeat_monitor.go β€” alive/dead/zombie + HostDown alert poster)
--> cron/ (heartbeat_monitor.go β€” alive/dead/zombie + LinkFailed alert poster)
```

Key properties:
Expand Down
9 changes: 9 additions & 0 deletions backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,15 @@ REDIS_URL=redis://localhost:6379
# System configuration
#SYSTEM_TYPES=ns8,nsec

# ===========================================
# MIMIR CONFIGURATION (Optional)
# ===========================================
#MIMIR_URL=http://localhost:9009

# Built-in history webhook: when set, Alertmanager always fires this webhook
# for every alert (non-bypassable). Leave unset to disable.
#ALERTING_HISTORY_WEBHOOK_URL=

# ===========================================
# AUTO-DERIVED URLS (DO NOT SET MANUALLY)
# ===========================================
Expand Down
6 changes: 3 additions & 3 deletions backend/.render-build-trigger
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# This file is used to force Docker service rebuilds in PR previews
# Modify LAST_UPDATE to trigger rebuilds

LAST_UPDATE=2026-03-09T09:01:19Z
LAST_UPDATE=2026-04-07T13:40:58Z

# Instructions:
# 1. To force rebuild of Docker services in a PR, update LAST_UPDATE
# 2. Run: perl -i -pe "s/LAST_UPDATE=2026-03-09T09:01:19Z
# 2. Commit and push changes to trigger Docker rebuilds
# 2. Run: perl -i -pe "s/LAST_UPDATE=2026-04-07T13:40:58Z/LAST_UPDATE=$(date -u +%Y-%m-%dT%H:%M:%SZ)/" $f/.render-build-trigger
# 2. Commit and push changes to trigger Docker rebuilds
17 changes: 17 additions & 0 deletions backend/configuration/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ type Configuration struct {
SMTPFrom string `json:"smtp_from"`
SMTPFromName string `json:"smtp_from_name"`
SMTPTLS bool `json:"smtp_tls"`
// Mimir configuration
MimirURL string `json:"mimir_url"`
// Alerting configuration
AlertingHistoryWebhookURL string `json:"alerting_history_webhook_url"`
AlertingHistoryWebhookToken string `json:"alerting_history_webhook_token"`
}

var Config = Configuration{}
Expand Down Expand Up @@ -204,6 +209,18 @@ func Init() {
}
Config.SMTPTLS = parseBoolWithDefault("SMTP_TLS", true)

// Mimir configuration
if mimirURL := os.Getenv("MIMIR_URL"); mimirURL != "" {
Config.MimirURL = mimirURL
} else {
Config.MimirURL = "http://localhost:9009"
logger.LogConfigLoad("env", "MIMIR_URL", true, fmt.Errorf("MIMIR_URL variable is empty, using default http://localhost:9009"))
}

// Alerting configuration β€” optional, empty means no built-in history webhook
Config.AlertingHistoryWebhookURL = os.Getenv("ALERTING_HISTORY_WEBHOOK_URL")
Config.AlertingHistoryWebhookToken = os.Getenv("ALERTING_HISTORY_WEBHOOK_TOKEN")

// Log successful configuration load
logger.LogConfigLoad("env", "configuration", true, nil)
}
Expand Down
41 changes: 41 additions & 0 deletions backend/database/migrations/019_add_alert_history.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
-- Migration 019: Add alert_history table
-- Stores resolved/inactive alerts received from Alertmanager webhooks

CREATE TABLE IF NOT EXISTS alert_history (
id BIGSERIAL PRIMARY KEY,

-- System identification
system_key VARCHAR(255) NOT NULL,

-- Alert identity
alertname VARCHAR(255) NOT NULL,
severity VARCHAR(50),
status VARCHAR(50) NOT NULL, -- resolved
fingerprint VARCHAR(255) NOT NULL,

-- Timing
starts_at TIMESTAMP WITH TIME ZONE NOT NULL,
ends_at TIMESTAMP WITH TIME ZONE,

-- Human-readable summary (from annotations.summary)
summary TEXT,

-- Raw labels and annotations from the alert
labels JSONB NOT NULL DEFAULT '{}',
annotations JSONB NOT NULL DEFAULT '{}',

-- Alertmanager receiver that handled the alert
receiver VARCHAR(255),

-- Timestamps
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);

COMMENT ON TABLE alert_history IS 'Resolved and inactive alerts received from Alertmanager webhooks';
COMMENT ON COLUMN alert_history.system_key IS 'System key extracted from alert labels.system_key';
COMMENT ON COLUMN alert_history.fingerprint IS 'Alert fingerprint from Alertmanager (hash of labels)';
COMMENT ON COLUMN alert_history.status IS 'Alert status at time of receipt: resolved';
COMMENT ON COLUMN alert_history.ends_at IS 'NULL when end time is the zero time (0001-01-01)';

CREATE INDEX IF NOT EXISTS idx_alert_history_system_key_created_at ON alert_history(system_key, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_alert_history_starts_at ON alert_history(starts_at DESC);
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Rollback migration 018: Drop alert_history table

DROP TABLE IF EXISTS alert_history;
46 changes: 46 additions & 0 deletions backend/database/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,52 @@ CREATE INDEX IF NOT EXISTS idx_inventory_alerts_system_id_created_at ON inventor
CREATE INDEX IF NOT EXISTS idx_inventory_alerts_severity ON inventory_alerts(severity);
CREATE INDEX IF NOT EXISTS idx_inventory_alerts_resolved ON inventory_alerts(is_resolved) WHERE is_resolved = FALSE;

-- =============================================================================
-- ALERT HISTORY TABLE
-- =============================================================================
-- Resolved and inactive alerts received from Alertmanager webhooks

CREATE TABLE IF NOT EXISTS alert_history (
id BIGSERIAL PRIMARY KEY,

-- System identification
system_key VARCHAR(255) NOT NULL,

-- Alert identity
alertname VARCHAR(255) NOT NULL,
severity VARCHAR(50),
status VARCHAR(50) NOT NULL, -- resolved
fingerprint VARCHAR(255) NOT NULL,

-- Timing
starts_at TIMESTAMP WITH TIME ZONE NOT NULL,
ends_at TIMESTAMP WITH TIME ZONE,

-- Human-readable summary (from annotations.summary)
summary TEXT,

-- Raw labels and annotations from the alert
labels JSONB NOT NULL DEFAULT '{}',
annotations JSONB NOT NULL DEFAULT '{}',

-- Alertmanager receiver that handled the alert
receiver VARCHAR(255),

-- Timestamps
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);

-- Table documentation
COMMENT ON TABLE alert_history IS 'Resolved and inactive alerts received from Alertmanager webhooks';
COMMENT ON COLUMN alert_history.system_key IS 'System key extracted from alert labels.system_key';
COMMENT ON COLUMN alert_history.fingerprint IS 'Alert fingerprint from Alertmanager (hash of labels)';
COMMENT ON COLUMN alert_history.status IS 'Alert status at time of receipt: resolved';
COMMENT ON COLUMN alert_history.ends_at IS 'NULL when end time is the zero time (0001-01-01)';

-- Performance indexes
CREATE INDEX IF NOT EXISTS idx_alert_history_system_key_created_at ON alert_history(system_key, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_alert_history_starts_at ON alert_history(starts_at DESC);

-- =============================================================================
-- SCHEMA MIGRATIONS TABLE
-- =============================================================================
Expand Down
Loading
Loading