Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ Standards-Version: 4.1.2

Package: performance-diagnostics
Architecture: any
Depends: python3-bpfcc, python3-minimal, python3-psutil, telegraf, docker-ce
Depends: python3-bpfcc, python3-minimal, python3-psutil, telegraf, docker-ce, influxdb2, curl
Description: eBPF-based Performance Diagnostic Tools
A collection of eBPF-based tools for diagnosing performance issues.
8 changes: 8 additions & 0 deletions debian/postinst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ if ! groups "$USER" | grep -q "\b$GROUP\b"; then
fi
fi

# Remove the influxdb2 package default config — we use influxdb.toml exclusively.
rm -f /etc/influxdb/config.toml
Comment thread
dbshah12 marked this conversation as resolved.

# Reload nginx to pick up the InfluxDB proxy location block.
if nginx -t -c /etc/nginx/nginx.conf &>/dev/null && systemctl is-active --quiet nginx; then
nginx -s reload
fi

#DEBHELPER#

exit 0
7 changes: 6 additions & 1 deletion debian/rules
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
# need to rename a couple files, so do that here.
#
override_dh_auto_build:
mkdir -p build/cmd/
mkdir -p build/cmd/ build/influxdb/
cp cmd/estat.py build/cmd/estat
cp cmd/stbtrace.py build/cmd/stbtrace
cp cmd/nfs_threads.py build/cmd/nfs_threads
cp cmd/dsp.py build/cmd/dsp
cp influxdb/influxdb-nginx.conf build/influxdb/influxdb.conf

override_dh_auto_install:
dh_install build/cmd/* /usr/bin
Expand All @@ -26,3 +27,7 @@ override_dh_auto_install:
dh_install telegraf/delphix-telegraf-service telegraf/perf_playbook /usr/bin
dh_install telegraf/delphix-telegraf.service /lib/systemd/system
dh_install telegraf/telegraf* telegraf/*.sh /etc/telegraf
dh_install influxdb/delphix-influxdb-service influxdb/delphix-influxdb-init influxdb/perf_influxdb /usr/bin
dh_install influxdb/delphix-influxdb.service /lib/systemd/system
dh_install influxdb/influxdb.toml influxdb/influxdb-init.conf /etc/influxdb
dh_install build/influxdb/influxdb.conf /opt/delphix/server/etc/nginx/conf.d
218 changes: 218 additions & 0 deletions influxdb/delphix-influxdb-init
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
#!/bin/bash -eu
#
# Copyright (c) 2026 by Delphix. All rights reserved.
#
# One-time InfluxDB initialization: creates org, bucket, admin token,
# a read-only token for DCT Smart Proxy, and writes the
# [[outputs.influxdb_v2]] stanza to /etc/telegraf/telegraf.outputs.influxdb,
# which is included by delphix-telegraf-service when INFLUXDB_ENABLED flag exists.
# Skips setup if InfluxDB is already initialized.
#
Comment thread
dbshah12 marked this conversation as resolved.

INFLUXDB_URL="http://127.0.0.1:8086"
INFLUXDB_CONFIG_DIR="/etc/influxdb"
INFLUXDB_META_FILE="$INFLUXDB_CONFIG_DIR/influxdb_meta"
# State file written immediately after /api/v2/setup so the script can resume
# if it is interrupted before the metadata file is fully written.
INFLUXDB_SETUP_STATE_FILE="$INFLUXDB_CONFIG_DIR/influxdb_setup_state"
INFLUXDB_FLAG=/etc/telegraf/INFLUXDB_ENABLED
INFLUXDB_OUTPUT=/etc/telegraf/telegraf.outputs.influxdb
INFLUXDB_INIT_CONF="$INFLUXDB_CONFIG_DIR/influxdb-init.conf"

# Load tunable configuration (org, bucket, retention, wait parameters).
# shellcheck source=/etc/influxdb/influxdb-init.conf
# shellcheck disable=SC1091
source "$INFLUXDB_INIT_CONF"

INFLUXDB_ADMIN_USER="admin"
INFLUXDB_ADMIN_PASSWORD=""

#
# Log a message to stderr with a timestamp.
#
log() {
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*" >&2
}

#
# Extract a field from a JSON string using python3.
#
json_field() {
local json="$1"
local field="$2"
echo "$json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())$field)" ||
{ log "ERROR: Failed to parse field '$field' from JSON response."; return 1; }
}

#
# POST to the InfluxDB HTTP API. Exits with an error if the request fails.
#
influx_post() {
local endpoint="$1"
local data="$2"
local auth_header="${3:-}"

local curl_args=(-sf -X POST "$INFLUXDB_URL$endpoint" -H 'Content-Type: application/json' -d "$data")
[[ -n "$auth_header" ]] && curl_args+=(-H "Authorization: Token $auth_header")

local response
response=$(curl "${curl_args[@]}") ||
{ log "ERROR: HTTP POST to '$endpoint' failed."; return 1; }
echo "$response"
}

mkdir -p "$INFLUXDB_CONFIG_DIR"

# Skip if already fully initialized.
if [[ -f "$INFLUXDB_META_FILE" ]]; then
log "InfluxDB already initialized, skipping."
exit 0
fi

#
# Wait for InfluxDB to be ready.
#
ready=false
for i in $(seq 1 "$INFLUXDB_WAIT_RETRIES"); do
if curl -sf "$INFLUXDB_URL/health" &>/dev/null; then
ready=true
break
fi
sleep "$INFLUXDB_WAIT_INTERVAL"
done

if [[ "$ready" != "true" ]]; then
log "ERROR: InfluxDB did not become ready after $((INFLUXDB_WAIT_RETRIES * INFLUXDB_WAIT_INTERVAL))s."
exit 1
fi

#
# Initial setup — creates org, bucket, and returns admin token + IDs.
# /api/v2/setup is a one-shot operation; if the script is interrupted after
# this point and re-run, the state file lets us skip setup and reuse the
# already-created admin token.
#
ADMIN_TOKEN=""
ORG_ID=""
BUCKET_ID=""

if [[ -f "$INFLUXDB_SETUP_STATE_FILE" ]]; then
while IFS= read -r line; do
key="${line%%=*}"
value="${line#*=}"
case "$key" in
ADMIN_TOKEN) ADMIN_TOKEN="$value" ;;
ORG_ID) ORG_ID="$value" ;;
BUCKET_ID) BUCKET_ID="$value" ;;
INFLUXDB_ADMIN_PASSWORD) INFLUXDB_ADMIN_PASSWORD="$value" ;;
WRITE_TOKEN) WRITE_TOKEN="$value" ;;
READ_TOKEN) READ_TOKEN="$value" ;;
esac
done <"$INFLUXDB_SETUP_STATE_FILE"
else
# Generate password only when actually running setup for the first time.
INFLUXDB_ADMIN_PASSWORD="$(openssl rand -hex 16)"
SETUP_RESPONSE=$(influx_post "/api/v2/setup" "{
\"username\": \"$INFLUXDB_ADMIN_USER\",
\"password\": \"$INFLUXDB_ADMIN_PASSWORD\",
\"org\": \"$INFLUXDB_ORG\",
\"bucket\": \"$INFLUXDB_BUCKET\",
\"retentionPeriodSeconds\": $INFLUXDB_RETENTION_SECONDS
}") || exit 1

Comment thread
dbshah12 marked this conversation as resolved.
ADMIN_TOKEN=$(json_field "$SETUP_RESPONSE" "['auth']['token']") || exit 1
ORG_ID=$(json_field "$SETUP_RESPONSE" "['org']['id']") || exit 1
BUCKET_ID=$(json_field "$SETUP_RESPONSE" "['bucket']['id']") || exit 1

# Persist admin token + IDs + password immediately so a subsequent re-run
# can resume without repeating the one-shot setup call, and so the password
# stored in influxdb_meta always matches what InfluxDB was initialised with.
old_umask="$(umask)"
umask 077
tmp_state="$(mktemp "${INFLUXDB_SETUP_STATE_FILE}.XXXXXX")"
printf 'ADMIN_TOKEN=%s\nORG_ID=%s\nBUCKET_ID=%s\nINFLUXDB_ADMIN_PASSWORD=%s\n' \
"$ADMIN_TOKEN" "$ORG_ID" "$BUCKET_ID" "$INFLUXDB_ADMIN_PASSWORD" >"$tmp_state"
chmod 600 "$tmp_state"
mv "$tmp_state" "$INFLUXDB_SETUP_STATE_FILE"
umask "$old_umask"
fi

# Token creation is guarded so that on crash-resume (setup state exists but
# meta file not yet written), we reuse already-created tokens rather than
# creating orphaned duplicates in InfluxDB on each retry.
WRITE_TOKEN="${WRITE_TOKEN:-}"
READ_TOKEN="${READ_TOKEN:-}"

#
# Create a write-only token for Telegraf (skipped if already persisted in state).
#
if [[ -z "$WRITE_TOKEN" ]]; then
WRITE_TOKEN_RESPONSE=$(influx_post "/api/v2/authorizations" "{
\"orgID\": \"$ORG_ID\",
\"description\": \"telegraf-write-token\",
\"permissions\": [
{\"action\": \"write\", \"resource\": {\"type\": \"buckets\", \"id\": \"$BUCKET_ID\", \"orgID\": \"$ORG_ID\"}}
]
}" "$ADMIN_TOKEN") || exit 1
WRITE_TOKEN=$(json_field "$WRITE_TOKEN_RESPONSE" "['token']") || exit 1
printf 'WRITE_TOKEN=%s\n' "$WRITE_TOKEN" >>"$INFLUXDB_SETUP_STATE_FILE"
fi

#
# Create a read-only token for DCT Smart Proxy (skipped if already persisted in state).
#
if [[ -z "$READ_TOKEN" ]]; then
READ_TOKEN_RESPONSE=$(influx_post "/api/v2/authorizations" "{
\"orgID\": \"$ORG_ID\",
\"description\": \"dct-read-token\",
\"permissions\": [
{\"action\": \"read\", \"resource\": {\"type\": \"buckets\", \"id\": \"$BUCKET_ID\", \"orgID\": \"$ORG_ID\"}}
]
}" "$ADMIN_TOKEN") || exit 1
READ_TOKEN=$(json_field "$READ_TOKEN_RESPONSE" "['token']") || exit 1
printf 'READ_TOKEN=%s\n' "$READ_TOKEN" >>"$INFLUXDB_SETUP_STATE_FILE"
fi

#
# Write the [[outputs.influxdb_v2]] stanza to a dedicated telegraf output file
# and enable it via the INFLUXDB_ENABLED flag. The flag is read by
# delphix-telegraf-service to conditionally include this output.
#
cat >"$INFLUXDB_OUTPUT" <<EOF
[[outputs.influxdb_v2]]
urls = ["http://127.0.0.1:8086"]
token = "$WRITE_TOKEN"
organization = "$INFLUXDB_ORG"
Comment thread
dbshah12 marked this conversation as resolved.
bucket = "$INFLUXDB_BUCKET"
EOF
# Enforce restrictive permissions so the write token is not world-readable.
chmod 640 "$INFLUXDB_OUTPUT"
touch "$INFLUXDB_FLAG"

#
# Persist org/bucket/admin credentials/tokens so DE APIs can expose them to DCT
# and so the admin can access the InfluxDB UI. File is chmod 600 (root-only).
#
# Use a restrictive umask and a temp file to avoid a window where tokens are
# readable by non-root users, then atomically move the file into place.
old_umask="$(umask)"
umask 077
tmp_meta="$(mktemp "${INFLUXDB_META_FILE}.XXXXXX")"
cat >"$tmp_meta" <<EOF
INFLUXDB_ORG=$INFLUXDB_ORG
INFLUXDB_BUCKET=$INFLUXDB_BUCKET
INFLUXDB_ADMIN_USER=$INFLUXDB_ADMIN_USER
INFLUXDB_ADMIN_PASSWORD=$INFLUXDB_ADMIN_PASSWORD
INFLUXDB_WRITE_TOKEN=$WRITE_TOKEN
INFLUXDB_READ_TOKEN=$READ_TOKEN
EOF
chmod 600 "$tmp_meta"
mv "$tmp_meta" "$INFLUXDB_META_FILE"
umask "$old_umask"

rm -f "$INFLUXDB_SETUP_STATE_FILE"
log "InfluxDB initialized successfully."

# Restart Telegraf so it picks up the InfluxDB output stanza written above.
# This is only reached on first boot (subsequent boots exit early at the top).
systemctl restart delphix-telegraf 2>/dev/null || true
23 changes: 23 additions & 0 deletions influxdb/delphix-influxdb-service
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
#
# Copyright (c) 2026 by Delphix. All rights reserved.
#
# Wrapper script to start InfluxDB 2.x and run first-time initialization.
#

INFLUXDB_CONFIG=/etc/influxdb/influxdb.toml
INFLUXDB_INIT=/usr/bin/delphix-influxdb-init

# Start influxd in the background.
# influxd does not support a --config-path flag; config file is passed via env var.
INFLUXD_CONFIG_PATH="$INFLUXDB_CONFIG" /usr/bin/influxd &
Comment thread
dbshah12 marked this conversation as resolved.
INFLUXDB_PID=$!

# Run initialization (the init script handles waiting for InfluxDB to be ready)
if ! $INFLUXDB_INIT; then
echo "ERROR: delphix-influxdb-init failed, stopping influxd" >&2
kill "$INFLUXDB_PID" 2>/dev/null
exit 1
fi

wait "$INFLUXDB_PID"
16 changes: 16 additions & 0 deletions influxdb/delphix-influxdb.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[Unit]
Description=Delphix InfluxDB Time Series Database
Documentation=https://docs.influxdata.com/influxdb/v2/
PartOf=delphix.target
After=delphix-platform.service
Comment thread
dbshah12 marked this conversation as resolved.
PartOf=delphix-platform.service

[Service]
User=root
Comment thread
dbshah12 marked this conversation as resolved.
ExecStart=/usr/bin/delphix-influxdb-service
Restart=on-failure
RestartForceExitStatus=SIGPIPE
KillMode=control-group

[Install]
WantedBy=delphix.target
12 changes: 12 additions & 0 deletions influxdb/influxdb-init.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
# Copyright (c) 2026 by Delphix. All rights reserved.
#
# Configuration for delphix-influxdb-init.
# Sourced by /usr/bin/delphix-influxdb-init at runtime.
#

INFLUXDB_ORG="delphix"
INFLUXDB_BUCKET="default"
INFLUXDB_RETENTION_SECONDS=2592000 # 30 days (720h)
INFLUXDB_WAIT_RETRIES=30
INFLUXDB_WAIT_INTERVAL=2
17 changes: 17 additions & 0 deletions influxdb/influxdb-nginx.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#
# Copyright (c) 2026 by Delphix. All rights reserved.
#
# Proxy InfluxDB 2.x API through nginx so external clients (DCT, Grafana)
# can reach it over HTTPS using the engine's existing TLS certificate.
# InfluxDB itself binds to 127.0.0.1:8086 (HTTP, localhost only).
#
location /influxdb/ {
proxy_pass http://127.0.0.1:8086/;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_read_timeout 999d;
proxy_buffering off;
}
10 changes: 10 additions & 0 deletions influxdb/influxdb.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#
# Copyright 2026 Delphix. All rights reserved.
#
# InfluxDB 2.x Configuration
#

bolt-path = "/var/lib/influxdb/influxd.bolt"
engine-path = "/var/lib/influxdb/engine"
http-bind-address = "127.0.0.1:8086"
log-level = "warn"
47 changes: 47 additions & 0 deletions influxdb/perf_influxdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
#
# Copyright (c) 2026 by Delphix. All rights reserved.
#
# Script that enables and disables InfluxDB metric output for Telegraf.
#

INFLUXDB_FLAG=/etc/telegraf/INFLUXDB_ENABLED
INFLUXDB_OUTPUT=/etc/telegraf/telegraf.outputs.influxdb

function die() {
echo -e "$(date +%T:%N:%z): $(basename $0): $*" >&2
exit 1
}

[[ $EUID -ne 0 ]] && die "must be run as root"

function usage() {
echo "$(basename $0): $*" >&2
echo "Usage: $(basename $0) [enable|disable]"
exit 2
}

function enable_influxdb() {
date
[[ ! -f $INFLUXDB_OUTPUT ]] && die "$INFLUXDB_OUTPUT not found. Run delphix-influxdb-init first."
echo "Enabling InfluxDB Metric Output"
touch $INFLUXDB_FLAG
systemctl restart delphix-telegraf
}

function disable_influxdb() {
date
echo "Disabling InfluxDB Metric Output"
rm -f $INFLUXDB_FLAG
systemctl restart delphix-telegraf
}

if [[ $# -ne 1 ]]; then
usage
fi

case "$1" in
enable) enable_influxdb ;;
disable) disable_influxdb ;;
*) usage ;;
esac
Loading
Loading