diff --git a/src/main.rs b/src/main.rs index 98e129b..e182db2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,7 @@ mod jobs; mod nvidia; mod process; mod procfs; +mod procfsapi; mod ps; mod slurm; mod util; diff --git a/src/process.rs b/src/process.rs index 75d9f32..42fda57 100644 --- a/src/process.rs +++ b/src/process.rs @@ -3,7 +3,7 @@ use crate::command::{self, CmdError}; use crate::util; use crate::TIMEOUT_SECONDS; -#[derive(PartialEq)] +#[derive(PartialEq, Debug)] pub struct Process { pub pid: usize, pub uid: usize, diff --git a/src/procfs.rs b/src/procfs.rs index be83d7c..9248428 100644 --- a/src/procfs.rs +++ b/src/procfs.rs @@ -1,16 +1,9 @@ /// Collect CPU process information without GPU information, from files in /proc. -extern crate libc; -extern crate page_size; -extern crate users; use crate::process; +use crate::procfsapi; use std::collections::HashMap; -use std::fs; -use std::os::linux::fs::MetadataExt; -use std::path; -use std::time::{SystemTime, UNIX_EPOCH}; -use users::{get_user_by_uid, uid_t}; /// Obtain process information via /proc and return a vector of structures with all the information /// we need. In the returned vector, pids uniquely tag the records. @@ -20,50 +13,49 @@ use users::{get_user_by_uid, uid_t}; /// /// This function uniformly uses /proc, even though in some cases there are system calls that /// provide the same information. +/// +/// The underlying computing system -- /proc, system tables, and clock -- is virtualized through the +/// ProcfsAPI instance. -pub fn get_process_information() -> Result, String> { +pub fn get_process_information(fs: &dyn procfsapi::ProcfsAPI) -> Result, String> { // The boot time is the `btime` field of /proc/stat. It is measured in seconds since epoch. We // need this to compute the process's real time, which we need to compute ps-compatible cpu // utilization. let mut boot_time = 0; - if let Ok(s) = fs::read_to_string(path::Path::new("/proc/stat")) { - for l in s.split('\n') { - if l.starts_with("btime ") { - let fields = l.split_ascii_whitespace().collect::>(); - boot_time = parse_usize_field(&fields, 1, l, "stat", 0, "btime")? as u64; - break; - } - } - if boot_time == 0 { - return Err(format!("Could not find btime in /proc/stat: {s}")); + let stat_s = fs.read_to_string("stat")?; + for l in stat_s.split('\n') { + if l.starts_with("btime ") { + let fields = l.split_ascii_whitespace().collect::>(); + boot_time = parse_usize_field(&fields, 1, l, "stat", 0, "btime")? as u64; + break; } - } else { - return Err("Could not open or read /proc/stat".to_string()); - }; + } + if boot_time == 0 { + return Err(format!("Could not find btime in /proc/stat: {stat_s}")); + } // The total RAM installed is in the `MemTotal` field of /proc/meminfo. We need this to compute // ps-compatible relative memory use. let mut memtotal_kib = 0; - if let Ok(s) = fs::read_to_string(path::Path::new("/proc/meminfo")) { - for l in s.split('\n') { - if l.starts_with("MemTotal: ") { - // We expect "MemTotal:\s+(\d+)\s+kB", roughly - let fields = l.split_ascii_whitespace().collect::>(); - if fields.len() != 3 || fields[2] != "kB" { - return Err(format!("Unexpected MemTotal in /proc/meminfo: {l}")); - } - memtotal_kib = parse_usize_field(&fields, 1, l, "meminfo", 0, "MemTotal")?; - break; + let meminfo_s = fs.read_to_string("meminfo")?; + for l in meminfo_s.split('\n') { + if l.starts_with("MemTotal: ") { + // We expect "MemTotal:\s+(\d+)\s+kB", roughly + let fields = l.split_ascii_whitespace().collect::>(); + if fields.len() != 3 || fields[2] != "kB" { + return Err(format!("Unexpected MemTotal in /proc/meminfo: {l}")); } + memtotal_kib = parse_usize_field(&fields, 1, l, "meminfo", 0, "MemTotal")?; + break; } - if memtotal_kib == 0 { - return Err(format!("Could not find MemTotal in /proc/meminfo: {s}")); - } - } else { - return Err("Could not open or read /proc/meminfo".to_string()); - }; + } + if memtotal_kib == 0 { + return Err(format!( + "Could not find MemTotal in /proc/meminfo: {meminfo_s}" + )); + } // Enumerate all pids, and collect the uids while we're here. // @@ -74,35 +66,14 @@ pub fn get_process_information() -> Result, String> { // Note that a pid may disappear between the time we see it here and the time we get around to // reading it, later, and that new pids may appear meanwhile. We should ignore both issues. - let mut pids = vec![]; - if let Ok(dir) = fs::read_dir("/proc") { - for dirent in dir { - if let Ok(dirent) = dirent { - if let Ok(meta) = dirent.metadata() { - let uid = meta.st_uid(); - if let Some(name) = dirent.path().file_name() { - if let Ok(pid) = name.to_string_lossy().parse::() { - pids.push((pid, uid)); - } - } - } - } - } - } else { - return Err("Could not open /proc".to_string()); - }; + let pids = fs.read_proc_pids()?; // Collect remaining system data from /proc/{pid}/stat for the enumerated pids. - // Values in "ticks" are represented as f64 here. A typical value for CLK_TCK in 2023 is 100 - // (checked on several different systems). There are about 2^23 ticks per day. 2^52/2^23=29, - // ie 2^29 days, which is about 1.47 million years, without losing any precision. Since we're - // only ever running sonar on a single node, we will not exceed that range. - - let kib_per_page = page_size::get() / 1024; + let kib_per_page = fs.page_size_in_kib(); let mut result = vec![]; let mut user_table = UserTable::new(); - let clock_ticks_per_sec = unsafe { libc::sysconf(libc::_SC_CLK_TCK) as f64 }; + let clock_ticks_per_sec = fs.clock_ticks_per_sec() as f64; if clock_ticks_per_sec == 0.0 { return Err("Could not get a sensible CLK_TCK".to_string()); } @@ -118,7 +89,7 @@ pub fn get_process_information() -> Result, String> { let mut comm; let utime_ticks; let stime_ticks; - if let Ok(line) = fs::read_to_string(path::Path::new(&format!("/proc/{pid}/stat"))) { + if let Ok(line) = fs.read_to_string(&format!("{pid}/stat")) { // The comm field is a little tricky, it must be extracted first as the contents between // the first '(' and the last ')' in the line. let commstart = line.find('('); @@ -131,8 +102,10 @@ pub fn get_process_information() -> Result, String> { comm = line[commstart.unwrap() + 1..commend.unwrap()].to_string(); let s = line[commend.unwrap() + 1..].trim().to_string(); let fields = s.split_ascii_whitespace().collect::>(); - // NOTE relative to the `proc` documentation: All field offsets here are relative to the - // command, so ppid is 2, not 4, and then they are zero-based, not 1-based. + + // NOTE relative to the `proc` documentation: All field offsets in the following are + // relative to the command (so ppid is 2, not 4), and then they are zero-based, not + // 1-based (so ppid is actually 1). // Fields[0] is the state. These characters are relevant for modern kernels: // R running @@ -148,7 +121,8 @@ pub fn get_process_information() -> Result, String> { // as unsigned will fail. For Z it also looks like some of the fields could have // surprising zero values; one has to be careful when dividing. // - // In particular for Z, field 5 "tpgid" has been observed to be -1. + // In particular for Z, field 5 "tpgid" has been observed to be -1. For X, many of the + // fields are -1. // Zombie jobs cannot be ignored, because they are indicative of system health and the // information about their presence is used in consumers. @@ -179,11 +153,7 @@ pub fn get_process_information() -> Result, String> { // boot_time and the current time are both time_t, ie, a 31-bit quantity in 2023 and a // 32-bit quantity before 2038. clock_ticks_per_sec is on the order of 100. Ergo // boot_ticks and now_ticks can be represented in about 32+7=39 bits, fine for an f64. - let now_ticks = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() as f64 - * clock_ticks_per_sec; + let now_ticks = fs.now_in_secs_since_epoch() as f64 * clock_ticks_per_sec; let boot_ticks = boot_time as f64 * clock_ticks_per_sec; // start_time_ticks should be on the order of a few years, there is no risk of overflow @@ -213,7 +183,7 @@ pub fn get_process_information() -> Result, String> { let size_kib; let rss_kib; - if let Ok(s) = fs::read_to_string(path::Path::new(&format!("/proc/{pid}/statm"))) { + if let Ok(s) = fs.read_to_string(&format!("{pid}/statm")) { let fields = s.split_ascii_whitespace().collect::>(); rss_kib = parse_usize_field(&fields, 1, &s, "statm", pid, "resident set size")? * kib_per_page; @@ -234,22 +204,21 @@ pub fn get_process_information() -> Result, String> { let pcpu_value = (utime_ticks + stime_ticks) / realtime_ticks; let pcpu_formatted = (pcpu_value * 1000.0).round() / 10.0; - // clock_ticks_per_sec is nonzero, so these divisions will not produce NaN or Infinity + // clock_ticks_per_sec is nonzero, so this division will not produce NaN or Infinity let cputime_sec = (bsdtime_ticks / clock_ticks_per_sec).round() as usize; // Note ps uses rss not size here. Also, ps doesn't trust rss to be <= 100% of memory, so - // let's not trust it either. + // let's not trust it either. memtotal_kib is nonzero, so this division will not produce + // NaN or Infinity. let pmem = f64::min( ((rss_kib as f64) * 1000.0 / (memtotal_kib as f64)).round() / 10.0, 99.9, ); - let user = user_table.lookup(uid); - result.push(process::Process { pid, uid: uid as usize, - user, + user: user_table.lookup(fs, uid), cpu_pct: pcpu_formatted, mem_pct: pmem, cputime_sec, @@ -297,7 +266,7 @@ fn parse_usize_field( // The UserTable optimizes uid -> name lookup. struct UserTable { - ht: HashMap, + ht: HashMap, } impl UserTable { @@ -305,11 +274,10 @@ impl UserTable { UserTable { ht: HashMap::new() } } - fn lookup(&mut self, uid: uid_t) -> String { + fn lookup(&mut self, fs: &dyn procfsapi::ProcfsAPI, uid: u32) -> String { if let Some(name) = self.ht.get(&uid) { name.clone() - } else if let Some(u) = get_user_by_uid(uid) { - let name = u.name().to_string_lossy().to_string(); + } else if let Some(name) = fs.user_by_uid(uid) { self.ht.insert(uid, name.clone()); name } else { @@ -317,3 +285,182 @@ impl UserTable { } } } + +// For the parse test we use the full text of stat and meminfo, but for stat we only want the +// 'btime' line and for meminfo we only want the 'MemTotal:' line. Other tests can economize on the +// input. + +#[test] +pub fn procfs_parse_test() { + let pids = vec![(4018, 1000)]; + + let mut users = HashMap::new(); + users.insert(1000, "zappa".to_string()); + + let mut files = HashMap::new(); + files.insert( + "stat".to_string(), + "cpu 241155 582 127006 12838870 12445 0 3816 0 0 0 +cpu0 32528 189 19573 1597325 1493 0 1149 0 0 0 +cpu1 32258 98 17128 1597900 1618 0 550 0 0 0 +cpu2 30018 18 13638 1607769 1565 0 340 0 0 0 +cpu3 31888 23 16103 1603771 1663 0 217 0 0 0 +cpu4 32830 54 27843 1581301 1506 0 295 0 0 0 +cpu5 27206 111 10254 1618633 1509 0 325 0 0 0 +cpu6 26842 26 9906 1619446 1514 0 511 0 0 0 +cpu7 27582 61 12558 1612723 1575 0 426 0 0 0 +intr 24686011 0 9 0 0 0 0 0 0 0 46121 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 660271 642 0 0 0 0 0 0 0 0 0 0 0 0 1016 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 120 122 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 155340 23 18728 14408 20861 13683 16444 17251 14218 17364 1 1 107 159457 6997 9903 12495 7135 5125 5225 7316 7414 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1414903 2183 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 51751779 +btime 1698303295 +processes 30162 +procs_running 1 +procs_blocked 0 +softirq 14448542 1561885 1201818 5 226550 10931 0 58705 8120272 39 3268337".to_string()); + files.insert( + "meminfo".to_string(), + "MemTotal: 16093776 kB +MemFree: 5247088 kB +MemAvailable: 8162068 kB +Buffers: 203244 kB +Cached: 3999448 kB +SwapCached: 0 kB +Active: 1405072 kB +Inactive: 7805220 kB +Active(anon): 6808 kB +Inactive(anon): 6112636 kB +Active(file): 1398264 kB +Inactive(file): 1692584 kB +Unevictable: 982716 kB +Mlocked: 16 kB +SwapTotal: 2097148 kB +SwapFree: 2097148 kB +Zswap: 0 kB +Zswapped: 0 kB +Dirty: 2872 kB +Writeback: 0 kB +AnonPages: 5990404 kB +Mapped: 672068 kB +Shmem: 1111828 kB +KReclaimable: 168520 kB +Slab: 385396 kB +SReclaimable: 168520 kB +SUnreclaim: 216876 kB +KernelStack: 29632 kB +PageTables: 66172 kB +SecPageTables: 0 kB +NFS_Unstable: 0 kB +Bounce: 0 kB +WritebackTmp: 0 kB +CommitLimit: 10144036 kB +Committed_AS: 16010888 kB +VmallocTotal: 34359738367 kB +VmallocUsed: 68332 kB +VmallocChunk: 0 kB +Percpu: 8160 kB +HardwareCorrupted: 0 kB +AnonHugePages: 0 kB +ShmemHugePages: 890880 kB +ShmemPmdMapped: 0 kB +FileHugePages: 0 kB +FilePmdMapped: 0 kB +HugePages_Total: 0 +HugePages_Free: 0 +HugePages_Rsvd: 0 +HugePages_Surp: 0 +Hugepagesize: 2048 kB +Hugetlb: 0 kB +DirectMap4k: 254828 kB +DirectMap2M: 4710400 kB +DirectMap1G: 11534336 kB +" + .to_string(), + ); + files.insert( + "4018/stat".to_string(), + "4018 (firefox) S 2190 2189 2189 0 -1 4194560 19293188 3117638 1823 557 51361 15728 5390 2925 20 0 187 0 16400 5144358912 184775 18446744073709551615 94466859782144 94466860597976 140720852341888 0 0 0 0 4096 17663 0 0 0 17 4 0 0 0 0 0 94466860605280 94466860610840 94466863497216 140720852350777 140720852350820 140720852350820 140720852357069 0".to_string()); + files.insert( + "4018/statm".to_string(), + "1255967 185959 54972 200 0 316078 0".to_string(), + ); + + let ticks_per_sec = 100.0; // We define this + let utime_ticks = 51361.0; // field(/proc/4018/stat, 14) + let stime_ticks = 15728.0; // field(/proc/4018/stat, 15) + let boot_time = 1698303295.0; // field(/proc/stat, "btime") + let start_ticks = 16400.0; // field(/proc/4018/stat, 22) + let rss: f64 = 185959.0 * 4.0; // pages_to_kib(field(/proc/4018/statm, 1)) + let memtotal = 16093776.0; // field(/proc/meminfo, "MemTotal:") + let size = 316078 * 4; // pages_to_kib(field(/proc/4018/statm, 5)) + + // now = boot_time + start_time + utime_ticks + stime_ticks + arbitrary idle time + let now = (boot_time + (start_ticks / ticks_per_sec) + (utime_ticks / ticks_per_sec) + (stime_ticks / ticks_per_sec) + 2000.0) as u64; + + let fs = procfsapi::MockFS::new(files, pids, users, now); + let info = get_process_information(&fs).unwrap(); + assert!(info.len() == 1); + let p = &info[0]; + assert!(p.pid == 4018); // from enumeration of /proc + assert!(p.uid == 1000); // ditto + assert!(p.user == "zappa"); // from getent + assert!(p.command == "firefox"); // field(/proc/4018/stat, 2) + assert!(p.ppid == 2190); // field(/proc/4018/stat, 4) + assert!(p.session == 2189); // field(/proc/4018/stat, 6) + + let now_time = now as f64; + let now_ticks = now_time * ticks_per_sec; + let boot_ticks = boot_time * ticks_per_sec; + let realtime_ticks = now_ticks - (boot_ticks + start_ticks); + let cpu_pct_value = (utime_ticks + stime_ticks) / realtime_ticks; + let cpu_pct = (cpu_pct_value * 1000.0).round() / 10.0; + assert!(p.cpu_pct == cpu_pct); + + let mem_pct = (rss * 1000.0 / memtotal).round() / 10.0; + assert!(p.mem_pct == mem_pct); + + assert!(p.mem_size_kib == size); +} + +#[test] +pub fn procfs_dead_and_undead_test() { + let pids = vec![(4018, 1000), (4019, 1000), (4020, 1000)]; + + let mut users = HashMap::new(); + users.insert(1000, "zappa".to_string()); + + let mut files = HashMap::new(); + files.insert("stat".to_string(), "btime 1698303295".to_string()); + files.insert("meminfo".to_string(), "MemTotal: 16093776 kB".to_string()); + files.insert( + "4018/stat".to_string(), + "4018 (firefox) S 2190 2189 2189 0 -1 4194560 19293188 3117638 1823 557 51361 15728 5390 2925 20 0 187 0 16400 5144358912 184775 18446744073709551615 94466859782144 94466860597976 140720852341888 0 0 0 0 4096 17663 0 0 0 17 4 0 0 0 0 0 94466860605280 94466860610840 94466863497216 140720852350777 140720852350820 140720852350820 140720852357069 0".to_string()); + files.insert( + "4019/stat".to_string(), + "4019 (firefox) Z 2190 2189 2189 0 -1 4194560 19293188 3117638 1823 557 51361 15728 5390 2925 20 0 187 0 16400 5144358912 184775 18446744073709551615 94466859782144 94466860597976 140720852341888 0 0 0 0 4096 17663 0 0 0 17 4 0 0 0 0 0 94466860605280 94466860610840 94466863497216 140720852350777 140720852350820 140720852350820 140720852357069 0".to_string()); + files.insert( + "4020/stat".to_string(), + "4020 (python3) X 0 -1 -1 0 -1 4243524 0 0 0 0 0 0 0 0 20 0 0 0 10643829 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 17 3 0 0 0 0 0 0 0 0 0 0 0 0 0".to_string()); + + files.insert( + "4018/statm".to_string(), + "1255967 185959 54972 200 0 316078 0".to_string(), + ); + files.insert( + "4019/statm".to_string(), + "1255967 185959 54972 200 0 316078 0".to_string(), + ); + files.insert( + "4020/statm".to_string(), + "1255967 185959 54972 200 0 316078 0".to_string(), + ); + + let fs = procfsapi::MockFS::new(files, pids, users, procfsapi::unix_now()); + let info = get_process_information(&fs).unwrap(); + + // 4020 should be dropped - it's dead + assert!(info.len() == 2); + + assert!(info[0].pid == 4018); + assert!(info[0].command == "firefox"); + assert!(info[1].pid == 4019); + assert!(info[1].command == "firefox "); +} diff --git a/src/procfsapi.rs b/src/procfsapi.rs new file mode 100644 index 0000000..f2dd1f3 --- /dev/null +++ b/src/procfsapi.rs @@ -0,0 +1,167 @@ +// This creates a API by which procfs can access the underlying computing system, allowing the +// system to be virtualized. In turn, that allows sensible test cases to be written. + +extern crate libc; +extern crate page_size; +extern crate users; + +use users::get_user_by_uid; + +use std::fs; +use std::os::linux::fs::MetadataExt; +use std::path; +use std::time::{SystemTime, UNIX_EPOCH}; + +#[cfg(test)] +use std::collections::HashMap; + +pub trait ProcfsAPI { + // Open /proc/ (which can have multiple path elements, eg, {PID}/filename), read it, and + // return its entire contents as a string. Return a sensible error message if the file can't + // be opened or read. + fn read_to_string(&self, path: &str) -> Result; + + // Return (pid,uid) for every file /proc/{PID}. Return a sensible error message in case + // something goes really, really wrong, but otherwise try to make the best of it. + fn read_proc_pids(&self) -> Result, String>; + + // Try to figure out the user's name from system tables, this may be an expensive operation. + fn user_by_uid(&self, uid: u32) -> Option; + + // Return the value of CLK_TCK, or 0 on error. + fn clock_ticks_per_sec(&self) -> usize; + + // Return the page size measured in KB + fn page_size_in_kib(&self) -> usize; + + // Return the current time in seconds since Unix epoch. + fn now_in_secs_since_epoch(&self) -> u64; +} + +// RealFS is used to actually access /proc, system tables, and system clock. + +pub struct RealFS {} + +impl RealFS { + pub fn new() -> RealFS { + RealFS {} + } +} + +impl ProcfsAPI for RealFS { + fn read_to_string(&self, path: &str) -> Result { + let filename = format!("/proc/{path}"); + match fs::read_to_string(path::Path::new(&filename)) { + Ok(s) => Ok(s), + Err(_) => Err(format!("Unable to read {filename}")), + } + } + + fn read_proc_pids(&self) -> Result, String> { + let mut pids = vec![]; + if let Ok(dir) = fs::read_dir("/proc") { + for dirent in dir { + if let Ok(dirent) = dirent { + if let Ok(meta) = dirent.metadata() { + let uid = meta.st_uid(); + if let Some(name) = dirent.path().file_name() { + if let Ok(pid) = name.to_string_lossy().parse::() { + pids.push((pid, uid)); + } + } + } + } + } + } else { + return Err("Could not open /proc".to_string()); + }; + Ok(pids) + } + + fn user_by_uid(&self, uid: u32) -> Option { + get_user_by_uid(uid).map(|u| u.name().to_string_lossy().to_string()) + } + + fn clock_ticks_per_sec(&self) -> usize { + unsafe { libc::sysconf(libc::_SC_CLK_TCK) as usize } + } + + fn page_size_in_kib(&self) -> usize { + page_size::get() / 1024 + } + + fn now_in_secs_since_epoch(&self) -> u64 { + unix_now() + } +} + +pub fn unix_now() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() +} + +// MockFS is used for testing, it is instantiated with the values we want it to return. + +#[cfg(test)] +pub struct MockFS { + files: HashMap, + pids: Vec<(usize, u32)>, + users: HashMap, + ticks_per_sec: usize, + pagesz: usize, + now: u64, +} + +#[cfg(test)] +impl MockFS { + pub fn new( + files: HashMap, + pids: Vec<(usize, u32)>, + users: HashMap, + now: u64, + ) -> MockFS { + MockFS { + files, + pids, + users, + ticks_per_sec: 100, + pagesz: 4, + now, + } + } +} + +#[cfg(test)] +impl ProcfsAPI for MockFS { + fn read_to_string(&self, path: &str) -> Result { + match self.files.get(path) { + Some(s) => Ok(s.clone()), + None => Err(format!("Unable to read /proc/{path}")), + } + } + + fn read_proc_pids(&self) -> Result, String> { + Ok(self.pids.clone()) + } + + fn user_by_uid(&self, uid: u32) -> Option { + match self.users.get(&uid) { + Some(s) => Some(s.clone()), + None => None, + } + } + + fn clock_ticks_per_sec(&self) -> usize { + self.ticks_per_sec + } + + fn page_size_in_kib(&self) -> usize { + self.pagesz + } + + fn now_in_secs_since_epoch(&self) -> u64 { + self.now + } +} diff --git a/src/ps.rs b/src/ps.rs index a02deae..7e63087 100644 --- a/src/ps.rs +++ b/src/ps.rs @@ -9,6 +9,7 @@ use crate::jobs; use crate::nvidia; use crate::process; use crate::procfs; +use crate::procfsapi; use crate::util::{three_places, time_iso8601}; use csv::{Writer, WriterBuilder}; @@ -189,11 +190,14 @@ pub fn create_snapshot(jobs: &mut dyn jobs::JobManager, opts: &PsOptions) { } */ - let procinfo_probe = match procfs::get_process_information() { - Ok(result) => Ok(result), - Err(msg) => { - eprintln!("INFO: procfs failed: {}", msg); - process::get_process_information() + let procinfo_probe = { + let fs = procfsapi::RealFS::new(); + match procfs::get_process_information(&fs) { + Ok(result) => Ok(result), + Err(msg) => { + eprintln!("INFO: procfs failed: {}", msg); + process::get_process_information() + } } }; if let Err(e) = procinfo_probe {