Skip to content

Commit

Permalink
Merge pull request #115 from lars-t-hansen/w-113-nan-values
Browse files Browse the repository at this point in the history
Fix #113 - avoid NaN values in procfs output
  • Loading branch information
bast authored Oct 26, 2023
2 parents 4815fef + b8049ee commit d51dc5e
Showing 1 changed file with 51 additions and 20 deletions.
71 changes: 51 additions & 20 deletions src/procfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,21 +96,31 @@ pub fn get_process_information() -> Result<Vec<process::Process>, String> {

// Collect remaining system data from /proc/{pid}/stat for the enumerated pids.

// Values in "ticks" are represented as f64 here. A typical value for CLK_TCK in 2023 is 100
// (checked on several different systems). There are about 2^23 ticks per day. 2^52/2^23=29,
// ie 2^29 days, which is about 1.47 million years, without losing any precision. Since we're
// only ever running sonar on a single node, we will not exceed that range.

let kib_per_page = page_size::get() / 1024;
let mut result = vec![];
let mut user_table = UserTable::new();
let clock_ticks_per_sec: usize = unsafe { libc::sysconf(libc::_SC_CLK_TCK) as usize };
let clock_ticks_per_sec = unsafe { libc::sysconf(libc::_SC_CLK_TCK) as f64 };
if clock_ticks_per_sec == 0.0 {
return Err(format!("Could not get a sensible CLK_TCK"));
}

for (pid, uid) in pids {

// Basic system variables.
// Basic system variables. Intermediate time values are represented in ticks to prevent
// various roundoff artifacts resulting in NaN or Infinity.

let bsdtime;
let realtime;
let bsdtime_ticks;
let mut realtime_ticks;
let ppid;
let sess;
let comm;
let utime;
let stime;
let utime_ticks;
let stime_ticks;
if let Ok(line) = fs::read_to_string(path::Path::new(&format!("/proc/{pid}/stat"))) {
// The comm field is a little tricky, it must be extracted first as the contents between
// the first '(' and the last ')' in the line.
Expand All @@ -126,14 +136,28 @@ pub fn get_process_information() -> Result<Vec<process::Process>, String> {
// command, so ppid is 2, not 4, and then they are zero-based, not 1-based.
ppid = parse_usize_field(&fields, 1, &line, "stat", pid, "ppid")?;
sess = parse_usize_field(&fields, 3, &line, "stat", pid, "sess")?;
utime = parse_usize_field(&fields, 11, &line, "stat", pid, "utime")? / clock_ticks_per_sec;
stime = parse_usize_field(&fields, 12, &line, "stat", pid, "stime")? / clock_ticks_per_sec;
let cutime = parse_usize_field(&fields, 13, &line, "stat", pid, "cutime")? / clock_ticks_per_sec;
let cstime = parse_usize_field(&fields, 14, &line, "stat", pid, "cstime")? / clock_ticks_per_sec;
bsdtime = utime + stime + cutime + cstime;
let start_time = (parse_usize_field(&fields, 19, &line, "stat", pid, "starttime")? / clock_ticks_per_sec) as u64;
let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
realtime = now - (boot_time + start_time);
utime_ticks = parse_usize_field(&fields, 11, &line, "stat", pid, "utime")? as f64;
stime_ticks = parse_usize_field(&fields, 12, &line, "stat", pid, "stime")? as f64;
let cutime_ticks = parse_usize_field(&fields, 13, &line, "stat", pid, "cutime")? as f64;
let cstime_ticks = parse_usize_field(&fields, 14, &line, "stat", pid, "cstime")? as f64;
bsdtime_ticks = utime_ticks + stime_ticks + cutime_ticks + cstime_ticks;
let start_time_ticks = parse_usize_field(&fields, 19, &line, "stat", pid, "starttime")? as f64;

// boot_time and the current time are both time_t, ie, a 31-bit quantity in 2023 and a
// 32-bit quantity before 2038. clock_ticks_per_sec is on the order of 100. Ergo
// boot_ticks and now_ticks can be represented in about 32+7=39 bits, fine for an f64.
let now_ticks = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs() as f64 * clock_ticks_per_sec;
let boot_ticks = boot_time as f64 * clock_ticks_per_sec;

// start_time_ticks should be on the order of a few years, there is no risk of overflow
// here, and in any case boot_ticks + start_time_ticks <= now_ticks, and by the above
// reasoning now_ticks fits in an f64, ergo the sum does too.
//
// Take the max with 1 here to ensure realtime_ticks is not zero.
realtime_ticks = now_ticks - (boot_ticks + start_time_ticks);
if realtime_ticks < 1.0 {
realtime_ticks = 1.0;
}
} else {
// This is *usually* benign - the process may have gone away since we enumerated the
// /proc directory. It is *possibly* indicative of a permission problem, but that
Expand Down Expand Up @@ -165,22 +189,29 @@ pub fn get_process_information() -> Result<Vec<process::Process>, String> {

// pcpu and pmem are rounded to ##.#. We're going to get slightly different answers here
// than ps because we use float arithmetic; frequently this code will produce values that
// are one-tenth of a percent higher than ps.
//
// are one-tenth of a percent off from those of ps. One can argue about whether round(),
// floor() or ceil() is the most correct, but it's unlikely to matter much.

// realtime_ticks is nonzero, so this division will not produce NaN or Infinity
let pcpu_value = (utime_ticks + stime_ticks) / realtime_ticks;
let pcpu_formatted = (pcpu_value * 1000.0).round() / 10.0;

// clock_ticks_per_sec is nonzero, so these divisions will not produce NaN or Infinity
let cputime_sec = (bsdtime_ticks / clock_ticks_per_sec).round() as usize;

// Note ps uses rss not size here. Also, ps doesn't trust rss to be <= 100% of memory, so
// let's not trust it either.
let pmem = f64::min(((rss_kib as f64) * 1000.0 / (memtotal_kib as f64)).round() / 10.0, 99.9);

let pcpu = (((utime + stime) as f64 * 1000.0 / realtime as f64)).ceil() / 10.0;
let pmem = f64::min(((rss_kib as f64) * 1000.0 / (memtotal_kib as f64)).ceil() / 10.0, 99.9);
let user = user_table.lookup(uid);

result.push(process::Process {
pid,
uid: uid as usize,
user,
cpu_pct: pcpu,
cpu_pct: pcpu_formatted,
mem_pct: pmem,
cputime_sec: bsdtime,
cputime_sec,
mem_size_kib: size_kib,
ppid,
session: sess,
Expand Down

0 comments on commit d51dc5e

Please sign in to comment.