Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(process): ProcessCollector use IntGauge to provide better perfor… #430

Merged
merged 2 commits into from Jan 3, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
84 changes: 37 additions & 47 deletions src/process_collector.rs
Expand Up @@ -4,13 +4,11 @@
//!
//! This module only supports **Linux** platform.

use std::sync::Mutex;

use lazy_static::lazy_static;

use crate::counter::Counter;
use crate::counter::IntCounter;
use crate::desc::Desc;
use crate::gauge::Gauge;
use crate::gauge::IntGauge;
use crate::metrics::{Collector, Opts};
use crate::proto;

Expand All @@ -27,13 +25,13 @@ const METRICS_NUMBER: usize = 7;
pub struct ProcessCollector {
pid: pid_t,
descs: Vec<Desc>,
cpu_total: Mutex<Counter>,
open_fds: Gauge,
max_fds: Gauge,
vsize: Gauge,
rss: Gauge,
start_time: Gauge,
threads: Gauge,
cpu_total: IntCounter,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I totally get the performance argument, but could we please keep the cpu_total metric as f64? All the other metrics (file descriptor counts, memory usage, threads) are actual integers, but processes that don't use a lot of CPU will now have 1 second CPU usage spikes every so often instead of a more or less flat usage with actual spikes occurring when they actually do.

E.g. a process using 1% CPU on average, with a 10% spike every minute will now look as if it is using 0% CPU most of the time and spike every 2 minutes out of 3.

I really don't think the marginal performance improvement is worth it. Please keep using Counter for cpu_total.

open_fds: IntGauge,
max_fds: IntGauge,
vsize: IntGauge,
rss: IntGauge,
start_time: IntGauge,
threads: IntGauge,
}

impl ProcessCollector {
Expand All @@ -42,7 +40,7 @@ impl ProcessCollector {
let namespace = namespace.into();
let mut descs = Vec::new();

let cpu_total = Counter::with_opts(
let cpu_total = IntCounter::with_opts(
Opts::new(
"process_cpu_seconds_total",
"Total user and system CPU time spent in \
Expand All @@ -53,14 +51,14 @@ impl ProcessCollector {
.unwrap();
descs.extend(cpu_total.desc().into_iter().cloned());

let open_fds = Gauge::with_opts(
let open_fds = IntGauge::with_opts(
Opts::new("process_open_fds", "Number of open file descriptors.")
.namespace(namespace.clone()),
)
.unwrap();
descs.extend(open_fds.desc().into_iter().cloned());

let max_fds = Gauge::with_opts(
let max_fds = IntGauge::with_opts(
Opts::new(
"process_max_fds",
"Maximum number of open file descriptors.",
Expand All @@ -70,7 +68,7 @@ impl ProcessCollector {
.unwrap();
descs.extend(max_fds.desc().into_iter().cloned());

let vsize = Gauge::with_opts(
let vsize = IntGauge::with_opts(
Opts::new(
"process_virtual_memory_bytes",
"Virtual memory size in bytes.",
Expand All @@ -80,7 +78,7 @@ impl ProcessCollector {
.unwrap();
descs.extend(vsize.desc().into_iter().cloned());

let rss = Gauge::with_opts(
let rss = IntGauge::with_opts(
Opts::new(
"process_resident_memory_bytes",
"Resident memory size in bytes.",
Expand All @@ -90,7 +88,7 @@ impl ProcessCollector {
.unwrap();
descs.extend(rss.desc().into_iter().cloned());

let start_time = Gauge::with_opts(
let start_time = IntGauge::with_opts(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, this is also not an integer. Although not as critical as CPU usage (because it is essentially a constant) the process start time has fractional seconds (and they may be useful).

Opts::new(
"process_start_time_seconds",
"Start time of the process since unix epoch \
Expand All @@ -99,19 +97,25 @@ impl ProcessCollector {
.namespace(namespace.clone()),
)
.unwrap();
// proc_start_time init once because it is immutable
if let Ok(boot_time) = procfs::boot_time_secs() {
if let Ok(p) = procfs::process::Process::myself() {
start_time.set(p.stat.starttime as i64 / *CLK_TCK + boot_time as i64);
}
}
descs.extend(start_time.desc().into_iter().cloned());

let threads = Gauge::with_opts(
let threads = IntGauge::with_opts(
Opts::new("process_threads", "Number of OS threads in the process.")
.namespace(namespace.clone()),
.namespace(namespace),
)
.unwrap();
descs.extend(threads.desc().into_iter().cloned());

ProcessCollector {
pid,
descs,
cpu_total: Mutex::new(cpu_total),
cpu_total,
open_fds,
max_fds,
vsize,
Expand Down Expand Up @@ -144,39 +148,29 @@ impl Collector for ProcessCollector {

// file descriptors
if let Ok(fd_count) = p.fd_count() {
self.open_fds.set(fd_count as f64);
self.open_fds.set(fd_count as i64);
}
if let Ok(limits) = p.limits() {
if let procfs::process::LimitValue::Value(max) = limits.max_open_files.soft_limit {
self.max_fds.set(max as f64)
self.max_fds.set(max as i64)
}
}

// memory
self.vsize.set(p.stat.vsize as f64);
self.rss.set(p.stat.rss as f64 * *PAGESIZE);

// proc_start_time
if let Some(boot_time) = *BOOT_TIME {
self.start_time
.set(p.stat.starttime as f64 / *CLK_TCK + boot_time);
}
self.vsize.set(p.stat.vsize as i64);
self.rss.set(p.stat.rss * *PAGESIZE);

// cpu
let cpu_total_mfs = {
let cpu_total = self.cpu_total.lock().unwrap();
let total = (p.stat.utime + p.stat.stime) as f64 / *CLK_TCK;
let past = cpu_total.get();
let delta = total - past;
if delta > 0.0 {
cpu_total.inc_by(delta);
}
let total = (p.stat.utime + p.stat.stime) / *CLK_TCK as u64;
let past = self.cpu_total.get();
self.cpu_total.inc_by(total - past);

cpu_total.collect()
self.cpu_total.collect()
};

// threads
self.threads.set(p.stat.num_threads as f64);
self.threads.set(p.stat.num_threads);

// collect MetricFamilys.
let mut mfs = Vec::with_capacity(METRICS_NUMBER);
Expand All @@ -193,24 +187,20 @@ impl Collector for ProcessCollector {

lazy_static! {
// getconf CLK_TCK
static ref CLK_TCK: f64 = {
static ref CLK_TCK: i64 = {
unsafe {
libc::sysconf(libc::_SC_CLK_TCK) as f64
libc::sysconf(libc::_SC_CLK_TCK)
}
};

// getconf PAGESIZE
static ref PAGESIZE: f64 = {
static ref PAGESIZE: i64 = {
unsafe {
libc::sysconf(libc::_SC_PAGESIZE) as f64
libc::sysconf(libc::_SC_PAGESIZE)
}
};
}

lazy_static! {
static ref BOOT_TIME: Option<f64> = procfs::boot_time_secs().ok().map(|i| i as f64);
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down