Skip to content

Commit

Permalink
Merge pull request #179 from lars-t-hansen/w-178-numa-info
Browse files Browse the repository at this point in the history
Fix #178 - extract numa info
  • Loading branch information
bast authored Jul 9, 2024
2 parents 2463297 + 9972eaf commit afb8ee4
Show file tree
Hide file tree
Showing 9 changed files with 206 additions and 28 deletions.
3 changes: 2 additions & 1 deletion .zenodo.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"name": "Bockwoldt, Mathias"
},
{
"affiliation": "University of Oslo",
"name": "Hansen, Lars T."
},
{
Expand All @@ -25,5 +26,5 @@
"id": "GPL-3.0"
},
"title": "sonar: Tool to profile usage of HPC resources by regularly probing processes using ps.",
"version": "0.11.0"
"version": "0.12.0"
}
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "sonar"
version = "0.11.1"
version = "0.12.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
31 changes: 28 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ Usage: sonar <COMMAND>
Commands:
ps Take a snapshot of the currently running processes
sysinfo Extract system information
analyze Not yet implemented
help Print this message or the help of the given subcommand(s)

Options:
Expand All @@ -50,6 +49,11 @@ affect the output nevertheless, ie, most changes not covered by changes to the m

These rules are new with v0.8.0.

### Changes in v0.12.x

**System load data introduced**. Added the `load` field which is printed with one of the records
per sonar invocation. (v0.12.0)

### Changes in v0.11.x

**Better `ps` data**. More data points. (v0.11.0)
Expand Down Expand Up @@ -179,6 +183,27 @@ v=0.7.0,time=2023-08-10T11:09:41+02:00,host=somehost,cores=8,user=someone,job=0,
v=0.7.0,time=2023-08-10T11:09:41+02:00,host=somehost,cores=8,user=someone,job=0,cmd=slack,cpu%=3.9,cpukib=716924,gpus=none,gpu%=0,gpumem%=0,gpukib=0,cputime_sec=266
```

### Version 0.12.0 `ps` output format

Version 0.12.0 adds one field:

`load` (optional, default blank): This is an encoding of the per-cpu time usage in seconds on the
node since boot. It is the same for all records and is therefore printed only with one of them per
sonar invocation. The encoding is an array of N+1 u64 values for an N-cpu node. The first value is
the "base" value, it is to be added to all the subsequent values. The remaining are per-cpu values
in order from cpu0 through cpuN-1. Each value is encoded little-endian base-45, with the initial
character of each value chosen from a different set than the subsequent characters. The character
sets are:

```
INITIAL = "(){}[]<>+-abcdefghijklmnopqrstuvwxyz!@#$%^&*_"
SUBSEQUENT = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ~|';:.?/`"
```

The base-45 digits of the value `897` are (in little-endian order) 42 and 19, and the encoding of
this value is thus `&J`. As the initial character is from a different character set, no explicit
separator is needed in the array - the initial digit acts as a separator.

### Version 0.11.0 `ps` output format

Version 0.11.0 adds one field:
Expand Down Expand Up @@ -366,9 +391,9 @@ Numeric fields that are zero may or may not be omitted by the producer.

Note the v0.9.0 `sysinfo` output does not carry a version number.

## Collect results with `sonar analyze` :construction:
## Collect and analyze results

The `analyze` command is work in progress. Sonar data are used by two other tools:
Sonar data are used by two other tools:

* [JobGraph](https://github.com/NordicHPC/jobgraph) provides high-level plots of system activity. Mapping
files for JobGraph can be found in the [data](data) folder.
Expand Down
10 changes: 10 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ enum Commands {

/// Create a per-host lockfile in this directory and exit early if the file exists on startup [default: none]
lockdir: Option<String>,

/// One output record per Sonar invocation will contain a load= field with an encoding of
/// the per-cpu usage since boot.
load: bool,
},
/// Extract system information
Sysinfo {},
Expand All @@ -76,6 +80,7 @@ fn main() {
exclude_users,
exclude_commands,
lockdir,
load,
} => {
let opts = ps::PsOptions {
rollup: *rollup,
Expand All @@ -84,6 +89,7 @@ fn main() {
min_mem_percent: *min_mem_percent,
min_cpu_time: *min_cpu_time,
exclude_system_jobs: *exclude_system_jobs,
load: *load,
exclude_users: if let Some(s) = exclude_users {
s.split(',').collect::<Vec<&str>>()
} else {
Expand Down Expand Up @@ -131,13 +137,16 @@ fn command_line() -> Commands {
let mut exclude_users = None;
let mut exclude_commands = None;
let mut lockdir = None;
let mut load = false;
while next < args.len() {
let arg = args[next].as_ref();
next += 1;
if let Some(new_next) = bool_arg(arg, &args, next, "--batchless") {
(next, batchless) = (new_next, true);
} else if let Some(new_next) = bool_arg(arg, &args, next, "--rollup") {
(next, rollup) = (new_next, true);
} else if let Some(new_next) = bool_arg(arg, &args, next, "--load") {
(next, load) = (new_next, true);
} else if let Some(new_next) =
bool_arg(arg, &args, next, "--exclude-system-jobs")
{
Expand Down Expand Up @@ -192,6 +201,7 @@ fn command_line() -> Commands {
exclude_users,
exclude_commands,
lockdir,
load,
}
}
"sysinfo" => Commands::Sysinfo {},
Expand Down
68 changes: 55 additions & 13 deletions src/procfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,18 +103,58 @@ fn i32_field(l: &str) -> Result<i32, String> {
pub fn get_process_information(
fs: &dyn procfsapi::ProcfsAPI,
memtotal_kib: usize,
) -> Result<HashMap<usize, Process>, String> {
// The boot time is the `btime` field of /proc/stat. It is measured in seconds since epoch. We
// need this to compute the process's real time, which we need to compute ps-compatible cpu
// utilization.
) -> Result<(HashMap<usize, Process>, u64, Vec<u64>), String> {
// We need this for a lot of things. On x86 and x64 this is always 100 but in principle it
// might be something else, so read the true value.

let ticks_per_sec = fs.clock_ticks_per_sec() as u64;
if ticks_per_sec == 0 {
return Err("Could not get a sensible CLK_TCK".to_string());
}

// Extract system data from /proc/stat. https://man7.org/linux/man-pages/man5/procfs.5.html.
//
// The per-CPU usage is the sum of some fields of the `cpuN` lines. These are in ticks since
// boot. In addition there is an across-the-system line called simply `cpu` with the same
// format. These data are useful for analyzing core bindings.
//
// The boot time is first field of the `btime` line of /proc/stat. It is measured in seconds
// since epoch. We need this to compute the process's real time, which we need to compute
// ps-compatible cpu utilization.

let mut boot_time = 0;
let mut cpu_total_secs = 0;
let mut per_cpu_secs = vec![];
let stat_s = fs.read_to_string("stat")?;
for l in stat_s.split('\n') {
if l.starts_with("btime ") {
if l.starts_with("cpu") {
// Based on sysstat sources, the "nice" time is not included in the "user" time. (But
// guest times, which we ignore here, are included in their overall times.) And
// irq/softirq numbers can be a substantial fraction of "system" time. So sum user,
// nice, sys, irq, and softirq as a sensible proxy for time spent on "work" on the CPU.
const STAT_FIELDS: [usize; 5] = [1, 2, 3, 6, 7];

let fields = l.split_ascii_whitespace().collect::<Vec<&str>>();
let mut sum = 0;
for i in STAT_FIELDS {
sum += parse_usize_field(&fields, i, l, "stat", 0, "cpu")? as u64;
}
if l.starts_with("cpu ") {
cpu_total_secs = sum / ticks_per_sec;
} else {
let cpu_no =
match fields[0][3..].parse::<usize>() {
Ok(x) => x,
Err(_) => { continue } // Too harsh to error out
};
if per_cpu_secs.len() < cpu_no + 1 {
per_cpu_secs.resize(cpu_no + 1, 0u64);
}
per_cpu_secs[cpu_no] = sum / ticks_per_sec;
}
} else if l.starts_with("btime ") {
let fields = l.split_ascii_whitespace().collect::<Vec<&str>>();
boot_time = parse_usize_field(&fields, 1, l, "stat", 0, "btime")? as u64;
break;
}
}
if boot_time == 0 {
Expand All @@ -138,10 +178,7 @@ pub fn get_process_information(
let mut result = HashMap::<usize, Process>::new();
let mut ppids = HashSet::<usize>::new();
let mut user_table = UserTable::new();
let clock_ticks_per_sec = fs.clock_ticks_per_sec() as f64;
if clock_ticks_per_sec == 0.0 {
return Err("Could not get a sensible CLK_TCK".to_string());
}
let clock_ticks_per_sec = ticks_per_sec as f64;

for (pid, uid) in pids {
// Basic system variables. Intermediate time values are represented in ticks to prevent
Expand Down Expand Up @@ -375,7 +412,7 @@ pub fn get_process_information(
p.has_children = ppids.contains(&p.pid);
}

Ok(result)
Ok((result, cpu_total_secs, per_cpu_secs))
}

// The UserTable optimizes uid -> name lookup.
Expand Down Expand Up @@ -518,7 +555,7 @@ DirectMap1G: 11534336 kB

let fs = procfsapi::MockFS::new(files, pids, users, now);
let memtotal_kib = get_memtotal_kib(&fs).unwrap();
let mut info = get_process_information(&fs, memtotal_kib).unwrap();
let (mut info, total_secs, per_cpu_secs) = get_process_information(&fs, memtotal_kib).unwrap();
assert!(info.len() == 1);
let mut xs = info.drain();
let p = xs.next().expect("Something").1;
Expand All @@ -542,6 +579,11 @@ DirectMap1G: 11534336 kB

assert!(p.mem_size_kib == size);
assert!(p.rssanon_kib == rssanon);

assert!(total_secs == (241155 + 582 + 127006 + 0 + 3816) / 100); // "cpu " line of "stat" data
assert!(per_cpu_secs.len() == 8);
assert!(per_cpu_secs[0] == (32528 + 189 + 19573 + 0 + 1149) / 100); // "cpu0 " line of "stat" data
assert!(per_cpu_secs[7] == (27582 + 61 + 12558 + 0 + 426) / 100); // "cpu7 " line of "stat" data
}

#[test]
Expand Down Expand Up @@ -585,7 +627,7 @@ pub fn procfs_dead_and_undead_test() {

let fs = procfsapi::MockFS::new(files, pids, users, procfsapi::unix_now());
let memtotal_kib = get_memtotal_kib(&fs).unwrap();
let mut info = get_process_information(&fs, memtotal_kib).unwrap();
let (mut info, _, _) = get_process_information(&fs, memtotal_kib).unwrap();

// 4020 should be dropped - it's dead
assert!(info.len() == 2);
Expand Down
Loading

0 comments on commit afb8ee4

Please sign in to comment.