Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 65 additions & 8 deletions collector/ethtool_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"os"
"regexp"
"sort"
"strconv"
"strings"
"sync"
"syscall"
Expand All @@ -49,6 +50,7 @@ type Ethtool interface {
DriverInfo(string) (ethtool.DrvInfo, error)
Stats(string) (map[string]uint64, error)
LinkInfo(string) (ethtool.EthtoolCmd, error)
ModuleEeprom(string) ([]byte, error)
}

type ethtoolLibrary struct {
Expand All @@ -69,15 +71,24 @@ func (e *ethtoolLibrary) LinkInfo(intf string) (ethtool.EthtoolCmd, error) {
return ethtoolCmd, err
}

func (e *ethtoolLibrary) ModuleEeprom(intf string) ([]byte, error) {
return e.ethtool.ModuleEeprom(intf)
}

type ethtoolCollector struct {
fs sysfs.FS
entries map[string]*prometheus.Desc
entriesMutex sync.Mutex
ethtool Ethtool
deviceFilter deviceFilter
infoDesc *prometheus.Desc
metricsPattern *regexp.Regexp
logger *slog.Logger
fs sysfs.FS
entries map[string]*prometheus.Desc
entriesMutex sync.Mutex
ethtool Ethtool
deviceFilter deviceFilter
infoDesc *prometheus.Desc
moduleTemperatureDesc *prometheus.Desc
moduleVoltageDesc *prometheus.Desc
moduleTxBiasDesc *prometheus.Desc
moduleTxPowerDesc *prometheus.Desc
moduleRxPowerDesc *prometheus.Desc
metricsPattern *regexp.Regexp
logger *slog.Logger
}

// makeEthtoolCollector is the internal constructor for EthtoolCollector.
Expand Down Expand Up @@ -111,6 +122,31 @@ func makeEthtoolCollector(logger *slog.Logger) (*ethtoolCollector, error) {
deviceFilter: newDeviceFilter(*ethtoolDeviceExclude, *ethtoolDeviceInclude),
metricsPattern: regexp.MustCompile(*ethtoolIncludedMetrics),
logger: logger,
moduleTemperatureDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ethtool", "module_temperature_celsius"),
"Module temperature in degrees Celsius",
[]string{"device"}, nil,
),
moduleVoltageDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ethtool", "module_voltage_volts"),
"Module supply voltage in volts",
[]string{"device"}, nil,
),
moduleTxBiasDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ethtool", "module_tx_bias_milliamperes"),
"Module TX laser bias current in milliamperes",
[]string{"device", "lane"}, nil,
),
moduleTxPowerDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ethtool", "module_tx_power_milliwatts"),
"Module TX optical power in milliwatts",
[]string{"device", "lane"}, nil,
),
moduleRxPowerDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ethtool", "module_rx_power_milliwatts"),
"Module RX optical power in milliwatts",
[]string{"device", "lane"}, nil,
),
entries: map[string]*prometheus.Desc{
"rx_bytes": prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ethtool", "received_bytes_total"),
Expand Down Expand Up @@ -445,6 +481,27 @@ func (c *ethtoolCollector) Update(ch chan<- prometheus.Metric) error {
}
}

eepromData, err := c.ethtool.ModuleEeprom(device)
if err == nil {
modMetrics, parseErr := parseModuleEeprom(eepromData)
if parseErr == nil {
ch <- prometheus.MustNewConstMetric(c.moduleTemperatureDesc, prometheus.GaugeValue, modMetrics.temperature, device)
ch <- prometheus.MustNewConstMetric(c.moduleVoltageDesc, prometheus.GaugeValue, modMetrics.voltage, device)
for i, lane := range modMetrics.lanes {
laneStr := strconv.Itoa(i + 1)
ch <- prometheus.MustNewConstMetric(c.moduleTxBiasDesc, prometheus.GaugeValue, lane.txBias, device, laneStr)
ch <- prometheus.MustNewConstMetric(c.moduleTxPowerDesc, prometheus.GaugeValue, lane.txPower, device, laneStr)
ch <- prometheus.MustNewConstMetric(c.moduleRxPowerDesc, prometheus.GaugeValue, lane.rxPower, device, laneStr)
}
} else {
c.logger.Debug("ethtool module EEPROM parse error", "err", parseErr, "device", device)
}
} else if err != unix.EOPNOTSUPP {
c.logger.Error("ethtool module EEPROM error", "err", err, "device", device)
} else {
c.logger.Debug("ethtool module EEPROM error", "err", err, "device", device)
}

if len(stats) == 0 {
// No stats returned; device does not support ethtool stats.
continue
Expand Down
26 changes: 25 additions & 1 deletion collector/ethtool_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package collector

import (
"bufio"
"errors"
"fmt"
"io"
"log/slog"
Expand Down Expand Up @@ -257,6 +258,14 @@ func (e *EthtoolFixture) LinkInfo(intf string) (ethtool.EthtoolCmd, error) {
return res, err
}

func (e *EthtoolFixture) ModuleEeprom(intf string) ([]byte, error) {
data, err := os.ReadFile(filepath.Join(e.fixturePath, intf, "module_eeprom"))
if errors.Is(err, os.ErrNotExist) {
return nil, unix.EOPNOTSUPP
}
return data, err
}

func NewEthtoolTestCollector(logger *slog.Logger) (Collector, error) {
collector, err := makeEthtoolCollector(logger)
if err != nil {
Expand Down Expand Up @@ -288,7 +297,22 @@ func TestBuildEthtoolFQName(t *testing.T) {
}

func TestEthToolCollector(t *testing.T) {
testcase := `# HELP node_ethtool_align_errors Network interface align_errors
testcase := `# HELP node_ethtool_module_rx_power_milliwatts Module RX optical power in milliwatts
# TYPE node_ethtool_module_rx_power_milliwatts gauge
node_ethtool_module_rx_power_milliwatts{device="eth0",lane="1"} 0.5
# HELP node_ethtool_module_temperature_celsius Module temperature in degrees Celsius
# TYPE node_ethtool_module_temperature_celsius gauge
node_ethtool_module_temperature_celsius{device="eth0"} 25
# HELP node_ethtool_module_tx_bias_milliamperes Module TX laser bias current in milliamperes
# TYPE node_ethtool_module_tx_bias_milliamperes gauge
node_ethtool_module_tx_bias_milliamperes{device="eth0",lane="1"} 20
# HELP node_ethtool_module_tx_power_milliwatts Module TX optical power in milliwatts
# TYPE node_ethtool_module_tx_power_milliwatts gauge
node_ethtool_module_tx_power_milliwatts{device="eth0",lane="1"} 1
# HELP node_ethtool_module_voltage_volts Module supply voltage in volts
# TYPE node_ethtool_module_voltage_volts gauge
node_ethtool_module_voltage_volts{device="eth0"} 3.2976
# HELP node_ethtool_align_errors Network interface align_errors
# TYPE node_ethtool_align_errors untyped
node_ethtool_align_errors{device="eth0"} 0
# HELP node_ethtool_info A metric with a constant '1' value labeled by bus_info, device, driver, expansion_rom_version, firmware_version, version.
Expand Down
215 changes: 215 additions & 0 deletions collector/ethtool_sfp_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
// Copyright 2021 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !noethtool

// SFP/QSFP module EEPROM parsing for Digital Optical Monitoring (DOM) /
// Digital Diagnostic Monitoring (DDM) data.
//
// Standards:
// - SFF-8472: SFP/SFP+ DDM (A0 + A2 EEPROM pages, 512 bytes total)
// - SFF-8636: QSFP/QSFP28 DOM (page 0, 256 bytes)

package collector

import (
"encoding/binary"
"fmt"
)

// SFP/QSFP module identifier values (EEPROM byte 0).
const (
sfpIdentifierSFP = 0x03 // SFP/SFP+/SFP28 (SFF-8472)
sfpIdentifierSFPAlt = 0x0B // SFP+ alternative identifier
sfpIdentifierQSFP = 0x0C // QSFP (SFF-8436)
sfpIdentifierQSFPP = 0x0D // QSFP+ (SFF-8436)
sfpIdentifierQSFP28 = 0x11 // QSFP28 (SFF-8636)
)

// sfpLaneMetrics holds per-lane optical monitoring values.
type sfpLaneMetrics struct {
txBias float64 // TX laser bias current in amperes
txPower float64 // TX optical power in watts
rxPower float64 // RX optical power in watts
}

// sfpMetrics holds parsed DOM/DDM values from a transceiver module.
type sfpMetrics struct {
temperature float64 // Module temperature in degrees Celsius
voltage float64 // Module supply voltage in volts
lanes []sfpLaneMetrics // Per-lane metrics (1 lane for SFP, 4 for QSFP)
}

// parseModuleEeprom parses raw EEPROM bytes returned by ethtool GMODULEEEPROM
// and extracts DOM/DDM values.
//
// Returns an error if the data is too short, the identifier is unrecognised, or DDM is not available.
func parseModuleEeprom(data []byte) (sfpMetrics, error) {
if len(data) < 1 {
return sfpMetrics{}, fmt.Errorf("module EEPROM data too short (%d bytes)", len(data))
}

switch data[0] {
case sfpIdentifierSFP, sfpIdentifierSFPAlt:
return parseSFF8472(data)
case sfpIdentifierQSFP, sfpIdentifierQSFPP, sfpIdentifierQSFP28:
return parseSFF8636(data)
default:
return sfpMetrics{}, fmt.Errorf("unsupported module identifier 0x%02x", data[0])
}
}

// parseSFF8472 parses SFP/SFP+ DDM data per SFF-8472.
func parseSFF8472(data []byte) (sfpMetrics, error) {
const (
a0DiagnosticType = 92 // A0 page: diagnostic monitoring type byte
ddmSupportBit = 0x40 // bit 6: DDM implemented

// Offsets within the full 512-byte dump (A2 page starts at 256).
a2PageOffset = 256
valuesOffset = a2PageOffset + 96

tempOffset = valuesOffset
voltageOffset = tempOffset + 2
txBiasOffset = voltageOffset + 2
txPowerOffset = txBiasOffset + 2
rxPowerOffset = txPowerOffset + 2
minLen = rxPowerOffset + 2
)

if len(data) < a0DiagnosticType+1 {
return sfpMetrics{}, fmt.Errorf("SFF-8472 EEPROM too short for diagnostic type byte (%d bytes)", len(data))
}
if data[a0DiagnosticType]&ddmSupportBit == 0 {
return sfpMetrics{}, fmt.Errorf("SFP module does not support DDM (diagnostic type byte: 0x%02x)", data[a0DiagnosticType])
}
if len(data) < minLen {
return sfpMetrics{}, fmt.Errorf("SFF-8472 EEPROM too short for DDM values (%d bytes, need %d)", len(data), minLen)
}

temp := parseSFPTemperature(data[tempOffset:])
voltage := parseSFPVoltage(data[voltageOffset:])

txBias := parseSFPBias(data[txBiasOffset:])
txPower := parseSFPPower(data[txPowerOffset:])
rxPower := parseSFPPower(data[rxPowerOffset:])

return sfpMetrics{
temperature: temp,
voltage: voltage,
lanes: []sfpLaneMetrics{
{txBias: txBias, txPower: txPower, rxPower: rxPower},
},
}, nil
}

// parseSFF8636 parses QSFP/QSFP28 DOM data per SFF-8636.
func parseSFF8636(data []byte) (sfpMetrics, error) {
// All real-time values are on Page 00h.
const (
// Table 6-8 Free Side Monitoring Values
tempOffset = 22 // Temperature MSB
voltageOffset = 26 // Supply voltage MSB

// Table 6-9 Channel Monitoring Values.
numLanes = 4
rxPowerOffset = 34 // RX power ch1 MSB
txBiasOffset = rxPowerOffset + numLanes*2 // TX bias ch1 MSB
txPowerOffset = txBiasOffset + numLanes*2 // TX power ch1 MSB

minLen = txPowerOffset + numLanes*2
)

if len(data) < minLen {
return sfpMetrics{}, fmt.Errorf("SFF-8636 EEPROM too short (%d bytes, need %d)", len(data), minLen)
}

temp := parseSFPTemperature(data[tempOffset:])
voltage := parseSFPVoltage(data[voltageOffset:])

lanes := make([]sfpLaneMetrics, numLanes)
for i := range numLanes {
lanes[i] = sfpLaneMetrics{
rxPower: parseSFPPower(data[rxPowerOffset+i*2:]),
txBias: parseSFPBias(data[txBiasOffset+i*2:]),
txPower: parseSFPPower(data[txPowerOffset+i*2:]),
}
}

return sfpMetrics{
temperature: temp,
voltage: voltage,
lanes: lanes,
}, nil
}

func parseSFPTemperature(b []byte) float64 {
// SFF-8472
//
// Table 9-1 Bit Weights (°C) for Temperature Reporting Registers
//
// +----------------------------------+----------------------------------+-------+-------+
// | Most Significant Byte (byte 96) | Least Significant Byte (byte 97) | | |
// +------+----+----+----+---+---+---+---+---+---+----+-----+-----+------+-------+-------+
// | D7 | D6 | D5 | D4 | D3| D2| D1| D0| D7| D6| D5 | D4 | D3 | D2 | D1 | D0 |
// +------+----+----+----+---+---+---+---+---+---+----+-----+-----+------+-------+-------+
// | Sign | 64 | 32 | 16 | 8 | 4 | 2 | 1 |1/2|1/4|1/8 |1/16 |1/32 | 1/64 | 1/128 | 1/256 |
// +------+----+----+----+---+---+---+---+---+---+----+-----+-----+------+-------+-------+
//
rawVal := int16(binary.BigEndian.Uint16(b))
return float64(rawVal) / 256.0
}

func parseSFPVoltage(b []byte) float64 {
// SFF-8472
//
// 9.2 Internal Calibration
//
// ...
// 2) Internally measured transceiver supply voltage. Represented as a 16-bit unsigned integer with the voltage
// defined as the full 16-bit value (0-65535) with LSB equal to 100 microvolts, yielding a total range of 0 V to +6.55 V.
rawVal := binary.BigEndian.Uint16(b)
mV := float64(rawVal) / 10
V := mV / 1000
return V
}

func parseSFPBias(b []byte) float64 {
// SFF-8472
//
// 9.2 Internal Calibration
//
// ...
// 3) Measured TX bias current in mA. Represented as a 16-bit unsigned integer with the current defined as the full
// 16-bit value (0-65535) with LSB equal to 2 microamps, yielding a total range of 0 to 131 mA.
rawVal := binary.BigEndian.Uint16(b)
mA := float64(rawVal) / 500
return mA
}

func parseSFPPower(b []byte) float64 {
// SFF-8472
//
// 9.2 Internal Calibration
//
// ...
// 4) Measured TX output power in mW. Represented as a 16-bit unsigned integer with the power defined as the
// full 16-bit value (0-65535) with LSB equal to 0.1 microwatts, yielding a total range of 0 to 6.5535 mW (-40 to +8.2 dBm).
// ...
// 5) Measured RX received optical power in mW. Value can represent either average received power or OMA
// depending upon how bit 3 of byte 92 (A0h) is set. Represented as a 16-bit unsigned integer with the power
// defined as the full 16-bit value (0-65535) with LSB equal to 0.1 microwatts, yielding a total range of 0 to 6.5535 mW (-40 to +8.2 dBm).
rawVal := binary.BigEndian.Uint16(b)
mW := float64(rawVal) / 10000
return mW
}
Loading