New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(process): ProcessCollector use IntGauge to provide better perfor… #430
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,13 +4,11 @@ | |
//! | ||
//! This module only supports **Linux** platform. | ||
|
||
use std::sync::Mutex; | ||
|
||
use lazy_static::lazy_static; | ||
|
||
use crate::counter::Counter; | ||
use crate::counter::IntCounter; | ||
use crate::desc::Desc; | ||
use crate::gauge::Gauge; | ||
use crate::gauge::IntGauge; | ||
use crate::metrics::{Collector, Opts}; | ||
use crate::proto; | ||
|
||
|
@@ -27,13 +25,13 @@ const METRICS_NUMBER: usize = 7; | |
pub struct ProcessCollector { | ||
pid: pid_t, | ||
descs: Vec<Desc>, | ||
cpu_total: Mutex<Counter>, | ||
open_fds: Gauge, | ||
max_fds: Gauge, | ||
vsize: Gauge, | ||
rss: Gauge, | ||
start_time: Gauge, | ||
threads: Gauge, | ||
cpu_total: IntCounter, | ||
open_fds: IntGauge, | ||
max_fds: IntGauge, | ||
vsize: IntGauge, | ||
rss: IntGauge, | ||
start_time: IntGauge, | ||
threads: IntGauge, | ||
} | ||
|
||
impl ProcessCollector { | ||
|
@@ -42,7 +40,7 @@ impl ProcessCollector { | |
let namespace = namespace.into(); | ||
let mut descs = Vec::new(); | ||
|
||
let cpu_total = Counter::with_opts( | ||
let cpu_total = IntCounter::with_opts( | ||
Opts::new( | ||
"process_cpu_seconds_total", | ||
"Total user and system CPU time spent in \ | ||
|
@@ -53,14 +51,14 @@ impl ProcessCollector { | |
.unwrap(); | ||
descs.extend(cpu_total.desc().into_iter().cloned()); | ||
|
||
let open_fds = Gauge::with_opts( | ||
let open_fds = IntGauge::with_opts( | ||
Opts::new("process_open_fds", "Number of open file descriptors.") | ||
.namespace(namespace.clone()), | ||
) | ||
.unwrap(); | ||
descs.extend(open_fds.desc().into_iter().cloned()); | ||
|
||
let max_fds = Gauge::with_opts( | ||
let max_fds = IntGauge::with_opts( | ||
Opts::new( | ||
"process_max_fds", | ||
"Maximum number of open file descriptors.", | ||
|
@@ -70,7 +68,7 @@ impl ProcessCollector { | |
.unwrap(); | ||
descs.extend(max_fds.desc().into_iter().cloned()); | ||
|
||
let vsize = Gauge::with_opts( | ||
let vsize = IntGauge::with_opts( | ||
Opts::new( | ||
"process_virtual_memory_bytes", | ||
"Virtual memory size in bytes.", | ||
|
@@ -80,7 +78,7 @@ impl ProcessCollector { | |
.unwrap(); | ||
descs.extend(vsize.desc().into_iter().cloned()); | ||
|
||
let rss = Gauge::with_opts( | ||
let rss = IntGauge::with_opts( | ||
Opts::new( | ||
"process_resident_memory_bytes", | ||
"Resident memory size in bytes.", | ||
|
@@ -90,7 +88,7 @@ impl ProcessCollector { | |
.unwrap(); | ||
descs.extend(rss.desc().into_iter().cloned()); | ||
|
||
let start_time = Gauge::with_opts( | ||
let start_time = IntGauge::with_opts( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, this is also not an integer. Although not as critical as CPU usage (because it is essentially a constant) the process start time has fractional seconds (and they may be useful). |
||
Opts::new( | ||
"process_start_time_seconds", | ||
"Start time of the process since unix epoch \ | ||
|
@@ -99,19 +97,25 @@ impl ProcessCollector { | |
.namespace(namespace.clone()), | ||
) | ||
.unwrap(); | ||
// proc_start_time init once because it is immutable | ||
if let Ok(boot_time) = procfs::boot_time_secs() { | ||
if let Ok(p) = procfs::process::Process::myself() { | ||
start_time.set(p.stat.starttime as i64 / *CLK_TCK + boot_time as i64); | ||
} | ||
} | ||
descs.extend(start_time.desc().into_iter().cloned()); | ||
|
||
let threads = Gauge::with_opts( | ||
let threads = IntGauge::with_opts( | ||
Opts::new("process_threads", "Number of OS threads in the process.") | ||
.namespace(namespace.clone()), | ||
.namespace(namespace), | ||
) | ||
.unwrap(); | ||
descs.extend(threads.desc().into_iter().cloned()); | ||
|
||
ProcessCollector { | ||
pid, | ||
descs, | ||
cpu_total: Mutex::new(cpu_total), | ||
cpu_total, | ||
open_fds, | ||
max_fds, | ||
vsize, | ||
|
@@ -144,39 +148,29 @@ impl Collector for ProcessCollector { | |
|
||
// file descriptors | ||
if let Ok(fd_count) = p.fd_count() { | ||
self.open_fds.set(fd_count as f64); | ||
self.open_fds.set(fd_count as i64); | ||
} | ||
if let Ok(limits) = p.limits() { | ||
if let procfs::process::LimitValue::Value(max) = limits.max_open_files.soft_limit { | ||
self.max_fds.set(max as f64) | ||
self.max_fds.set(max as i64) | ||
} | ||
} | ||
|
||
// memory | ||
self.vsize.set(p.stat.vsize as f64); | ||
self.rss.set(p.stat.rss as f64 * *PAGESIZE); | ||
|
||
// proc_start_time | ||
if let Some(boot_time) = *BOOT_TIME { | ||
self.start_time | ||
.set(p.stat.starttime as f64 / *CLK_TCK + boot_time); | ||
} | ||
self.vsize.set(p.stat.vsize as i64); | ||
self.rss.set(p.stat.rss * *PAGESIZE); | ||
|
||
// cpu | ||
let cpu_total_mfs = { | ||
let cpu_total = self.cpu_total.lock().unwrap(); | ||
let total = (p.stat.utime + p.stat.stime) as f64 / *CLK_TCK; | ||
let past = cpu_total.get(); | ||
let delta = total - past; | ||
if delta > 0.0 { | ||
cpu_total.inc_by(delta); | ||
} | ||
let total = (p.stat.utime + p.stat.stime) / *CLK_TCK as u64; | ||
let past = self.cpu_total.get(); | ||
self.cpu_total.inc_by(total - past); | ||
|
||
cpu_total.collect() | ||
self.cpu_total.collect() | ||
}; | ||
|
||
// threads | ||
self.threads.set(p.stat.num_threads as f64); | ||
self.threads.set(p.stat.num_threads); | ||
|
||
// collect MetricFamilys. | ||
let mut mfs = Vec::with_capacity(METRICS_NUMBER); | ||
|
@@ -193,24 +187,20 @@ impl Collector for ProcessCollector { | |
|
||
lazy_static! { | ||
// getconf CLK_TCK | ||
static ref CLK_TCK: f64 = { | ||
static ref CLK_TCK: i64 = { | ||
unsafe { | ||
libc::sysconf(libc::_SC_CLK_TCK) as f64 | ||
libc::sysconf(libc::_SC_CLK_TCK) | ||
} | ||
}; | ||
|
||
// getconf PAGESIZE | ||
static ref PAGESIZE: f64 = { | ||
static ref PAGESIZE: i64 = { | ||
unsafe { | ||
libc::sysconf(libc::_SC_PAGESIZE) as f64 | ||
libc::sysconf(libc::_SC_PAGESIZE) | ||
} | ||
}; | ||
} | ||
|
||
lazy_static! { | ||
static ref BOOT_TIME: Option<f64> = procfs::boot_time_secs().ok().map(|i| i as f64); | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I totally get the performance argument, but could we please keep the
cpu_total
metric asf64
? All the other metrics (file descriptor counts, memory usage, threads) are actual integers, but processes that don't use a lot of CPU will now have 1 second CPU usage spikes every so often instead of a more or less flat usage with actual spikes occurring when they actually do.E.g. a process using 1% CPU on average, with a 10% spike every minute will now look as if it is using 0% CPU most of the time and spike every 2 minutes out of 3.
I really don't think the marginal performance improvement is worth it. Please keep using
Counter
forcpu_total
.