Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

collector/diskstats: Add labels and metrics from udev #2404

Merged
merged 7 commits into from
Jul 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
187 changes: 178 additions & 9 deletions collector/diskstats_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,14 @@
package collector

import (
"bufio"
"fmt"
"os"
"strconv"
"strings"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs/blockdevice"
)
Expand All @@ -31,23 +37,55 @@ const (
unixSectorSize = 512.0

diskstatsDefaultIgnoredDevices = "^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$"

// See udevadm(8).
udevDevicePropertyPrefix = "E:"

// Udev device properties.
udevDMLVLayer = "DM_LV_LAYER"
udevDMLVName = "DM_LV_NAME"
udevDMName = "DM_NAME"
udevDMUUID = "DM_UUID"
udevDMVGName = "DM_VG_NAME"
udevIDATA = "ID_ATA"
udevIDATARotationRateRPM = "ID_ATA_ROTATION_RATE_RPM"
udevIDATASATA = "ID_ATA_SATA"
udevIDATASATASignalRateGen1 = "ID_ATA_SATA_SIGNAL_RATE_GEN1"
udevIDATASATASignalRateGen2 = "ID_ATA_SATA_SIGNAL_RATE_GEN2"
udevIDATAWriteCache = "ID_ATA_WRITE_CACHE"
udevIDATAWriteCacheEnabled = "ID_ATA_WRITE_CACHE_ENABLED"
udevIDFSType = "ID_FS_TYPE"
udevIDFSUsage = "ID_FS_USAGE"
udevIDFSUUID = "ID_FS_UUID"
udevIDFSVersion = "ID_FS_VERSION"
udevIDModel = "ID_MODEL"
udevIDPath = "ID_PATH"
udevIDRevision = "ID_REVISION"
udevIDSerialShort = "ID_SERIAL_SHORT"
udevIDWWN = "ID_WWN"
)

type typedFactorDesc struct {
desc *prometheus.Desc
valueType prometheus.ValueType
}

type udevInfo map[string]string

func (d *typedFactorDesc) mustNewConstMetric(value float64, labels ...string) prometheus.Metric {
return prometheus.MustNewConstMetric(d.desc, d.valueType, value, labels...)
}

type diskstatsCollector struct {
deviceFilter deviceFilter
fs blockdevice.FS
infoDesc typedFactorDesc
descs []typedFactorDesc
logger log.Logger
deviceFilter deviceFilter
fs blockdevice.FS
infoDesc typedFactorDesc
descs []typedFactorDesc
filesystemInfoDesc typedFactorDesc
deviceMapperInfoDesc typedFactorDesc
ataDescs map[string]typedFactorDesc
logger log.Logger
getUdevDeviceProperties func(uint32, uint32) (udevInfo, error)
}

func init() {
Expand All @@ -68,13 +106,13 @@ func NewDiskstatsCollector(logger log.Logger) (Collector, error) {
return nil, fmt.Errorf("failed to parse device filter flags: %w", err)
}

return &diskstatsCollector{
collector := diskstatsCollector{
deviceFilter: deviceFilter,
fs: fs,
infoDesc: typedFactorDesc{
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "info"),
"Info of /sys/block/<block_device>.",
[]string{"device", "major", "minor"},
[]string{"device", "major", "minor", "path", "wwn", "model", "serial", "revision"},
nil,
), valueType: prometheus.GaugeValue,
},
Expand Down Expand Up @@ -181,8 +219,54 @@ func NewDiskstatsCollector(logger log.Logger) (Collector, error) {
), valueType: prometheus.CounterValue,
},
},
filesystemInfoDesc: typedFactorDesc{
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "filesystem_info"),
"Info about disk filesystem.",
[]string{"device", "type", "usage", "uuid", "version"},
nil,
), valueType: prometheus.GaugeValue,
},
deviceMapperInfoDesc: typedFactorDesc{
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "device_mapper_info"),
"Info about disk device mapper.",
[]string{"device", "name", "uuid", "vg_name", "lv_name", "lv_layer"},
nil,
), valueType: prometheus.GaugeValue,
},
ataDescs: map[string]typedFactorDesc{
udevIDATAWriteCache: {
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ata_write_cache"),
"ATA disk has a write cache.",
[]string{"device"},
nil,
), valueType: prometheus.GaugeValue,
},
udevIDATAWriteCacheEnabled: {
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ata_write_cache_enabled"),
"ATA disk has its write cache enabled.",
[]string{"device"},
nil,
), valueType: prometheus.GaugeValue,
},
udevIDATARotationRateRPM: {
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ata_rotation_rate_rpm"),
"ATA disk rotation rate in RPMs (0 for SSDs).",
[]string{"device"},
nil,
), valueType: prometheus.GaugeValue,
},
},
logger: logger,
}, nil
}

// Only enable getting device properties from udev if the directory is readable.
if stat, err := os.Stat(*udevDataPath); err != nil || !stat.IsDir() {
level.Error(logger).Log("msg", "Failed to open directory, disabling udev device properties", "path", *udevDataPath)
} else {
collector.getUdevDeviceProperties = getUdevDeviceProperties
}

return &collector, nil
}

func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
Expand All @@ -196,7 +280,21 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
if c.deviceFilter.ignored(dev) {
continue
}
ch <- c.infoDesc.mustNewConstMetric(1.0, dev, fmt.Sprint(stats.MajorNumber), fmt.Sprint(stats.MinorNumber))

info, err := getUdevDeviceProperties(stats.MajorNumber, stats.MinorNumber)
if err != nil {
level.Debug(c.logger).Log("msg", "Failed to parse udev info", "err", err)
}

ch <- c.infoDesc.mustNewConstMetric(1.0, dev,
fmt.Sprint(stats.MajorNumber),
fmt.Sprint(stats.MinorNumber),
info[udevIDPath],
info[udevIDWWN],
info[udevIDModel],
info[udevIDSerialShort],
info[udevIDRevision],
)

statCount := stats.IoStatsCount - 3 // Total diskstats record count, less MajorNumber, MinorNumber and DeviceName

Expand Down Expand Up @@ -224,6 +322,77 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
}
ch <- c.descs[i].mustNewConstMetric(val, dev)
}

if fsType := info[udevIDFSType]; fsType != "" {
ch <- c.filesystemInfoDesc.mustNewConstMetric(1.0, dev,
fsType,
info[udevIDFSUsage],
info[udevIDFSUUID],
info[udevIDFSVersion],
)
}

if name := info[udevDMName]; name != "" {
ch <- c.deviceMapperInfoDesc.mustNewConstMetric(1.0, dev,
name,
info[udevDMUUID],
info[udevDMVGName],
info[udevDMLVName],
info[udevDMLVLayer],
)
}

if ata := info[udevIDATA]; ata != "" {
for attr, desc := range c.ataDescs {
str, ok := info[attr]
if !ok {
level.Debug(c.logger).Log("msg", "Udev attribute does not exist", "attribute", attr)
continue
}

if value, err := strconv.ParseFloat(str, 64); err == nil {
ch <- desc.mustNewConstMetric(value, dev)
} else {
level.Error(c.logger).Log("msg", "Failed to parse ATA value", "err", err)
}
}
}
}
return nil
}

func getUdevDeviceProperties(major, minor uint32) (udevInfo, error) {
filename := udevDataFilePath(fmt.Sprintf("b%d:%d", major, minor))

data, err := os.Open(filename)
if err != nil {
return nil, err
}
defer data.Close()

info := make(udevInfo)

scanner := bufio.NewScanner(data)
for scanner.Scan() {
line := scanner.Text()

// We're only interested in device properties.
if !strings.HasPrefix(line, udevDevicePropertyPrefix) {
continue
}

line = strings.TrimPrefix(line, udevDevicePropertyPrefix)

/* TODO: After we drop support for Go 1.17, the condition below can be simplified to:

if name, value, found := strings.Cut(line, "="); found {
info[name] = value
}
*/
if fields := strings.SplitN(line, "=", 2); len(fields) == 2 {
info[fields[0]] = fields[1]
}
}

return info, nil
}
68 changes: 52 additions & 16 deletions collector/diskstats_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,32 @@ func NewTestDiskStatsCollector(logger log.Logger) (prometheus.Collector, error)
func TestDiskStats(t *testing.T) {
*sysPath = "fixtures/sys"
*procPath = "fixtures/proc"
*udevDataPath = "fixtures/udev/data"
*diskstatsDeviceExclude = "^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$"
testcase := `# HELP node_disk_discard_time_seconds_total This is the total number of seconds spent by all discards.
testcase := `# HELP node_disk_ata_rotation_rate_rpm ATA disk rotation rate in RPMs (0 for SSDs).
# TYPE node_disk_ata_rotation_rate_rpm gauge
node_disk_ata_rotation_rate_rpm{device="sda"} 7200
node_disk_ata_rotation_rate_rpm{device="sdb"} 0
node_disk_ata_rotation_rate_rpm{device="sdc"} 0
# HELP node_disk_ata_write_cache ATA disk has a write cache.
# TYPE node_disk_ata_write_cache gauge
node_disk_ata_write_cache{device="sda"} 1
node_disk_ata_write_cache{device="sdb"} 1
node_disk_ata_write_cache{device="sdc"} 1
# HELP node_disk_ata_write_cache_enabled ATA disk has its write cache enabled.
# TYPE node_disk_ata_write_cache_enabled gauge
node_disk_ata_write_cache_enabled{device="sda"} 0
node_disk_ata_write_cache_enabled{device="sdb"} 1
node_disk_ata_write_cache_enabled{device="sdc"} 0
# HELP node_disk_device_mapper_info Info about disk device mapper.
# TYPE node_disk_device_mapper_info gauge
node_disk_device_mapper_info{device="dm-0",lv_layer="",lv_name="",name="nvme0n1_crypt",uuid="CRYPT-LUKS2-jolaulot80fy9zsiobkxyxo7y2dqeho2-nvme0n1_crypt",vg_name=""} 1
node_disk_device_mapper_info{device="dm-1",lv_layer="",lv_name="swap_1",name="system-swap_1",uuid="LVM-wbGqQEBL9SxrW2DLntJwgg8fAv946hw3Tvjqh0v31fWgxEtD4BoHO0lROWFUY65T",vg_name="system"} 1
node_disk_device_mapper_info{device="dm-2",lv_layer="",lv_name="root",name="system-root",uuid="LVM-NWEDo8q5ABDyJuC3F8veKNyWfYmeIBfFMS4MF3HakzUhkk7ekDm6fJTHkl2fYHe7",vg_name="system"} 1
node_disk_device_mapper_info{device="dm-3",lv_layer="",lv_name="var",name="system-var",uuid="LVM-hrxHo0rlZ6U95ku5841Lpd17bS1Z7V7lrtEE60DVgE6YEOCdS9gcDGyonWim4hGP",vg_name="system"} 1
node_disk_device_mapper_info{device="dm-4",lv_layer="",lv_name="tmp",name="system-tmp",uuid="LVM-XTNGOHjPWLHcxmJmVu5cWTXEtuzqDeBkdEHAZW5q9LxWQ2d4mb5CchUQzUPJpl8H",vg_name="system"} 1
node_disk_device_mapper_info{device="dm-5",lv_layer="",lv_name="home",name="system-home",uuid="LVM-MtoJaWTpjWRXlUnNFlpxZauTEuYlMvGFutigEzCCrfj8CNh6jCRi5LQJXZCpLjPf",vg_name="system"} 1
# HELP node_disk_discard_time_seconds_total This is the total number of seconds spent by all discards.
# TYPE node_disk_discard_time_seconds_total counter
node_disk_discard_time_seconds_total{device="sdb"} 11.13
node_disk_discard_time_seconds_total{device="sdc"} 11.13
Expand All @@ -66,6 +90,18 @@ node_disk_discards_completed_total{device="sdc"} 18851
# TYPE node_disk_discards_merged_total counter
node_disk_discards_merged_total{device="sdb"} 0
node_disk_discards_merged_total{device="sdc"} 0
# HELP node_disk_filesystem_info Info about disk filesystem.
# TYPE node_disk_filesystem_info gauge
node_disk_filesystem_info{device="dm-0",type="LVM2_member",usage="raid",uuid="c3C3uW-gD96-Yw69-c1CJ-5MwT-6ysM-mST0vB",version="LVM2 001"} 1
node_disk_filesystem_info{device="dm-1",type="swap",usage="other",uuid="5272bb60-04b5-49cd-b730-be57c7604450",version="1"} 1
node_disk_filesystem_info{device="dm-2",type="ext4",usage="filesystem",uuid="3deafd0d-faff-4695-8d15-51061ae1f51b",version="1.0"} 1
node_disk_filesystem_info{device="dm-3",type="ext4",usage="filesystem",uuid="5c772222-f7d4-4c8e-87e8-e97df6b7a45e",version="1.0"} 1
node_disk_filesystem_info{device="dm-4",type="ext4",usage="filesystem",uuid="a9479d44-60e1-4015-a1e5-bb065e6dd11b",version="1.0"} 1
node_disk_filesystem_info{device="dm-5",type="ext4",usage="filesystem",uuid="b05b726a-c718-4c4d-8641-7c73a7696d83",version="1.0"} 1
node_disk_filesystem_info{device="mmcblk0p1",type="vfat",usage="filesystem",uuid="6284-658D",version="FAT32"} 1
node_disk_filesystem_info{device="mmcblk0p2",type="ext4",usage="filesystem",uuid="83324ce8-a6f3-4e35-ad64-dbb3d6b87a32",version="1.0"} 1
node_disk_filesystem_info{device="sda",type="LVM2_member",usage="raid",uuid="cVVv6j-HSA2-IY33-1Jmj-dO2H-YL7w-b4Oxqw",version="LVM2 001"} 1
node_disk_filesystem_info{device="sdc",type="LVM2_member",usage="raid",uuid="QFy9W7-Brj3-hQ6v-AF8i-3Zqg-n3Vs-kGY4vb",version="LVM2 001"} 1
# HELP node_disk_flush_requests_time_seconds_total This is the total number of seconds spent by all flush requests.
# TYPE node_disk_flush_requests_time_seconds_total counter
node_disk_flush_requests_time_seconds_total{device="sdc"} 1.944
Expand All @@ -74,21 +110,21 @@ node_disk_flush_requests_time_seconds_total{device="sdc"} 1.944
node_disk_flush_requests_total{device="sdc"} 1555
# HELP node_disk_info Info of /sys/block/<block_device>.
# TYPE node_disk_info gauge
node_disk_info{device="dm-0",major="252",minor="0"} 1
node_disk_info{device="dm-1",major="252",minor="1"} 1
node_disk_info{device="dm-2",major="252",minor="2"} 1
node_disk_info{device="dm-3",major="252",minor="3"} 1
node_disk_info{device="dm-4",major="252",minor="4"} 1
node_disk_info{device="dm-5",major="252",minor="5"} 1
node_disk_info{device="mmcblk0",major="179",minor="0"} 1
node_disk_info{device="mmcblk0p1",major="179",minor="1"} 1
node_disk_info{device="mmcblk0p2",major="179",minor="2"} 1
node_disk_info{device="nvme0n1",major="259",minor="0"} 1
node_disk_info{device="sda",major="8",minor="0"} 1
node_disk_info{device="sdb",major="8",minor="0"} 1
node_disk_info{device="sdc",major="8",minor="0"} 1
node_disk_info{device="sr0",major="11",minor="0"} 1
node_disk_info{device="vda",major="254",minor="0"} 1
node_disk_info{device="dm-0",major="252",minor="0",model="",path="",revision="",serial="",wwn=""} 1
node_disk_info{device="dm-1",major="252",minor="1",model="",path="",revision="",serial="",wwn=""} 1
node_disk_info{device="dm-2",major="252",minor="2",model="",path="",revision="",serial="",wwn=""} 1
node_disk_info{device="dm-3",major="252",minor="3",model="",path="",revision="",serial="",wwn=""} 1
node_disk_info{device="dm-4",major="252",minor="4",model="",path="",revision="",serial="",wwn=""} 1
node_disk_info{device="dm-5",major="252",minor="5",model="",path="",revision="",serial="",wwn=""} 1
node_disk_info{device="mmcblk0",major="179",minor="0",model="",path="platform-df2969f3.mmc",revision="",serial="",wwn=""} 1
node_disk_info{device="mmcblk0p1",major="179",minor="1",model="",path="platform-df2969f3.mmc",revision="",serial="",wwn=""} 1
node_disk_info{device="mmcblk0p2",major="179",minor="2",model="",path="platform-df2969f3.mmc",revision="",serial="",wwn=""} 1
node_disk_info{device="nvme0n1",major="259",minor="0",model="SAMSUNG EHFTF55LURSY-000Y9",path="pci-0000:02:00.0-nvme-1",revision="4NBTUY95",serial="S252B6CU1HG3M1",wwn="eui.p3vbbiejx5aae2r3"} 1
node_disk_info{device="sda",major="8",minor="0",model="TOSHIBA_KSDB4U86",path="pci-0000:3b:00.0-sas-phy7-lun-0",revision="0102",serial="2160A0D5FVGG",wwn="0x7c72382b8de36a64"} 1
node_disk_info{device="sdb",major="8",minor="16",model="SuperMicro_SSD",path="pci-0000:00:1f.2-ata-1",revision="0R",serial="SMC0E1B87ABBB16BD84E",wwn="0xe1b87abbb16bd84e"} 1
node_disk_info{device="sdc",major="8",minor="32",model="INTEL_SSDS9X9SI0",path="pci-0000:00:1f.2-ata-4",revision="0100",serial="3EWB5Y25CWQWA7EH1U",wwn="0x58907ddc573a5de"} 1
node_disk_info{device="sr0",major="11",minor="0",model="Virtual_CDROM0",path="pci-0000:00:14.0-usb-0:1.1:1.0-scsi-0:0:0:0",revision="1.00",serial="AAAABBBBCCCC1",wwn=""} 1
node_disk_info{device="vda",major="254",minor="0",model="",path="pci-0000:00:06.0",revision="",serial="",wwn=""} 1
# HELP node_disk_io_now The number of I/Os currently in progress.
# TYPE node_disk_io_now gauge
node_disk_io_now{device="dm-0"} 0
Expand Down