diff --git a/Cargo.lock b/Cargo.lock index f22a846..bdaaba5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -133,17 +133,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.1.0" @@ -322,14 +311,14 @@ checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" [[package]] name = "console" -version = "0.15.5" +version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ "encode_unicode", "lazy_static", "libc", - "windows-sys 0.42.0", + "windows-sys 0.52.0", ] [[package]] @@ -350,9 +339,9 @@ checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "cpufeatures" -version = "0.2.7" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" dependencies = [ "libc", ] @@ -419,26 +408,21 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ - "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg", - "cfg-if", "crossbeam-utils", - "memoffset 0.9.1", - "scopeguard", ] [[package]] @@ -520,9 +504,9 @@ checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] name = "digest" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", @@ -845,15 +829,6 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - [[package]] name = "hermit-abi" version = "0.2.6" @@ -1012,9 +987,9 @@ dependencies = [ [[package]] name = "insta" -version = "1.29.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a28d25139df397cbca21408bb742cf6837e04cdbebf1b07b760caf971d6a972" +checksum = "6593a41c7a73841868772495db7dc1e8ecab43bb5c0b6da2059246c4b506ab60" dependencies = [ "console", "lazy_static", @@ -1023,7 +998,6 @@ dependencies = [ "pest_derive", "serde", "similar", - "yaml-rust", ] [[package]] @@ -1096,9 +1070,9 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" @@ -1201,6 +1175,7 @@ dependencies = [ "insta", "itertools", "kernlog", + "lazy_static", "libc", "log", "memfault-ssf", @@ -1248,15 +1223,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - [[package]] name = "mime" version = "0.3.17" @@ -1343,7 +1309,7 @@ dependencies = [ "bitflags 1.3.2", "cfg-if", "libc", - "memoffset 0.7.1", + "memoffset", "pin-utils", "static_assertions", ] @@ -1538,19 +1504,20 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.6.0" +version = "2.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e68e84bfb01f0507134eac1e9b410a12ba379d064eab48c50ba4ce329a527b70" +checksum = "9c73c26c01b8c87956cea613c907c9d6ecffd8d18a2a5908e5de0adfaa185cea" dependencies = [ + "memchr", "thiserror", "ucd-trie", ] [[package]] name = "pest_derive" -version = "2.6.0" +version = "2.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b79d4c71c865a25a4322296122e3924d30bc8ee0834c8bfc8b95f7f054afbfb" +checksum = "664d22978e2815783adbdd2c588b455b1bd625299ce36b2a99881ac9627e6d8d" dependencies = [ "pest", "pest_generator", @@ -1558,9 +1525,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.6.0" +version = "2.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c435bf1076437b851ebc8edc3a18442796b30f1728ffea6262d59bbe28b077e" +checksum = "a2d5487022d5d33f4c30d91c22afa240ce2a644e87fe08caad974d4eab6badbe" dependencies = [ "pest", "pest_meta", @@ -1571,9 +1538,9 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.6.0" +version = "2.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "745a452f8eb71e39ffd8ee32b3c5f51d03845f99786fa9b68db6ff509c505411" +checksum = "0091754bbd0ea592c4deb3a122ce8ecbb0753b738aa82bc055fcc2eccc8d8174" dependencies = [ "once_cell", "pest", @@ -1606,9 +1573,9 @@ checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" [[package]] name = "plotters" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "num-traits", "plotters-backend", @@ -1619,24 +1586,27 @@ dependencies = [ [[package]] name = "plotters-backend" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] name = "plotters-svg" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ "plotters-backend", ] [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] [[package]] name = "prctl" @@ -1948,9 +1918,9 @@ checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] @@ -2144,9 +2114,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.17" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" @@ -2213,9 +2183,9 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.6" +version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" dependencies = [ "cfg-if", "cpufeatures", @@ -2252,9 +2222,9 @@ dependencies = [ [[package]] name = "similar" -version = "2.2.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf" +checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" [[package]] name = "slab" @@ -2295,12 +2265,12 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "stderrlog" -version = "0.5.4" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69a26bbf6de627d389164afa9783739b56746c6c72c4ed16539f4ff54170327b" +checksum = "61c910772f992ab17d32d6760e167d2353f4130ed50e796752689556af07dc6b" dependencies = [ - "atty", "chrono", + "is-terminal", "log", "termcolor", "thread_local", @@ -2602,15 +2572,15 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "typenum" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "ucd-trie" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" [[package]] name = "unicode-bidi" @@ -2686,9 +2656,9 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "wait-timeout" @@ -2852,21 +2822,6 @@ dependencies = [ "windows-targets 0.48.5", ] -[[package]] -name = "windows-sys" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-sys" version = "0.45.0" @@ -3085,12 +3040,24 @@ dependencies = [ ] [[package]] -name = "yaml-rust" -version = "0.4.5" +name = "zerocopy" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ - "linked-hash-map", + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.52", ] [[package]] diff --git a/VERSION b/VERSION index d334818..4db06e8 100644 --- a/VERSION +++ b/VERSION @@ -1,3 +1,3 @@ -BUILD ID: 2370319 -GIT COMMIT: 41269b2621 -VERSION: 1.14.0 +BUILD ID: 2534584 +GIT COMMIT: e2c67955b9 +VERSION: 1.15.0 diff --git a/memfault-ssf/Cargo.toml b/memfault-ssf/Cargo.toml index b33e53e..6a9ffc4 100644 --- a/memfault-ssf/Cargo.toml +++ b/memfault-ssf/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "memfault-ssf" -version = "1.14.0" +version = "1.15.0" edition = "2021" description = "Supporting crate for the Memfault memfaultd embedded Linux agent" homepage = "https://github.com/memfault/memfaultd" diff --git a/memfault-ssf/VERSION b/memfault-ssf/VERSION new file mode 100644 index 0000000..e69de29 diff --git a/memfaultc-sys/Cargo.toml b/memfaultc-sys/Cargo.toml index 624dd32..abe1cb9 100644 --- a/memfaultc-sys/Cargo.toml +++ b/memfaultc-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "memfaultc-sys" -version = "1.14.0" +version = "1.15.0" edition = "2021" autobins = false description = "Supporting crate for the Memfault memfaultd embedded Linux agent" diff --git a/memfaultc-sys/VERSION b/memfaultc-sys/VERSION new file mode 100644 index 0000000..e69de29 diff --git a/memfaultd/Cargo.toml b/memfaultd/Cargo.toml index 96cee3d..78cf386 100644 --- a/memfaultd/Cargo.toml +++ b/memfaultd/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "memfaultd" -version = "1.14.0" +version = "1.15.0" edition = "2021" autobins = false rust-version = "1.72" @@ -28,8 +28,8 @@ name= "mfw" path= "src/bin/mfw.rs" [dependencies] -memfaultc-sys = { path= "../memfaultc-sys", version = "1.14.0" } -ssf = { package = "memfault-ssf", path= "../memfault-ssf", version = "1.14.0" } +memfaultc-sys = { path= "../memfaultc-sys", version = "1.15.0" } +ssf = { package = "memfault-ssf", path= "../memfault-ssf", version = "1.15.0" } argh = "0.1.10" cfg-if = "1.0.0" chrono = { version = "0.4.23", features = ["serde"]} @@ -50,7 +50,7 @@ serde_json = "1.0.89" serde_repr = "0.1" shuteye = "0.3.3" signal-hook = "0.3.14" -stderrlog = "0.5.4" +stderrlog = "0.6.0" strum = { version = "0.24", features = ["derive"] } strum_macros = "0.24" tempfile = "3.3.0" @@ -73,6 +73,7 @@ regex= { version = "1.10.2", optional = true} nom = "7.1.3" sealed_test = "1.1.0" zip = { version = "1.1.4", default-features = false, features = ["deflate"]} +lazy_static = "1.5.0" [target.'cfg(target_os = "linux")'.dependencies] procfs = { version = "0.15.1", optional = true } @@ -95,8 +96,12 @@ nom = "7.1.3" name = "logs_to_metrics" harness = false +[[bench]] +name = "log_level_mapper" +harness = false + [features] -default = ["coredump", "collectd", "logging", "log-to-metrics", "systemd", "rust-tls" ] +default = ["coredump", "logging", "regex", "systemd", "rust-tls"] coredump = [ "memfaultc-sys/coredump", "dep:prctl", @@ -107,13 +112,13 @@ coredump = [ "dep:goblin", "dep:scroll" ] -collectd = [] swupdate = ["memfaultc-sys/swupdate"] logging = ["dep:governor", "dep:rmp-serde", "dep:rmpv"] systemd = ["memfaultc-sys/systemd"] rust-tls = ["reqwest/rustls-tls"] openssl-tls = ["reqwest/native-tls"] openssl-vendored-tls = ["reqwest/native-tls-vendored"] -log-to-metrics = ["dep:regex"] +regex = ["dep:regex"] +log-to-metrics = ["regex"] experimental = ["mfw"] mfw = [] diff --git a/memfaultd/DEVELOPMENT.md b/memfaultd/DEVELOPMENT.md index 0019195..40b94df 100644 --- a/memfaultd/DEVELOPMENT.md +++ b/memfaultd/DEVELOPMENT.md @@ -16,7 +16,7 @@ apt install libsystemd-dev libconfig-dev #### macOS ```sh -brew install libconfig +brew install pkg-config libconfig ``` (note: `libsystemd` is not available on macOS and the build system will not try diff --git a/memfaultd/VERSION b/memfaultd/VERSION new file mode 100644 index 0000000..e69de29 diff --git a/memfaultd/benches/log_level_mapper.rs b/memfaultd/benches/log_level_mapper.rs new file mode 100644 index 0000000..9acb088 --- /dev/null +++ b/memfaultd/benches/log_level_mapper.rs @@ -0,0 +1,84 @@ +// +// Copyright (c) Memfault, Inc. +// See License.txt for details +use chrono::Utc; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use memfaultd::config::{LevelMappingConfig, LevelMappingRegex}; +use memfaultd::logs::log_entry::{LogData, LogEntry}; +use memfaultd::logs::log_level_mapper::LogLevelMapper; + +const BENCH_LEVELS: &[&str] = &[ + "EMERG", "ALERT", "CRIT", "ERROR", "WARN", "NOTICE", "INFO", "DEBUG", +]; + +fn build_log_line(level: &str, message: &str) -> LogEntry { + let data = LogData { + message: message.to_string(), + pid: None, + systemd_unit: None, + priority: Some(level.to_string()), + original_priority: None, + extra_fields: Default::default(), + }; + + LogEntry { + ts: Utc::now(), + data, + } +} + +fn build_regex_string(level: &str) -> String { + format!(r"\[.*\] \[{}\]:", level) +} + +fn build_log_mapper() -> LogLevelMapper { + let regex = BENCH_LEVELS + .iter() + .map(|level| build_regex_string(level)) + .collect::>(); + + let regex = LevelMappingRegex { + emergency: Some(regex[0].clone()), + alert: Some(regex[1].clone()), + critical: Some(regex[2].clone()), + error: Some(regex[3].clone()), + warning: Some(regex[4].clone()), + notice: Some(regex[5].clone()), + info: Some(regex[6].clone()), + debug: Some(regex[7].clone()), + }; + let level_config = LevelMappingConfig { + enable: true, + regex: Some(regex), + }; + + LogLevelMapper::try_from(&level_config).expect("Failed to build log level mapper") +} + +fn map_logs(num_log_lines: u64, mapper: &LogLevelMapper) { + for i in 0..num_log_lines { + let cur_level_idx = i % BENCH_LEVELS.len() as u64; + let cur_level = BENCH_LEVELS[cur_level_idx as usize]; + let mut log_line = build_log_line(cur_level, "This is a test log message"); + mapper.map_log(&mut log_line).expect("Error mapping log"); + } +} + +fn log_mapper_benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("Log Level Mapper"); + let num_log_lines = [100, 1000]; + let level_mapper = build_log_mapper(); + + for num in num_log_lines { + group.throughput(Throughput::Elements(num)); + group.bench_with_input(BenchmarkId::new("Log Level Mapper", num), &num, |b, num| { + // Send metrics to preallocate the metrics hashmap + b.iter(|| { + map_logs(*num, &level_mapper); + }) + }); + } +} + +criterion_group!(benches, log_mapper_benchmark); +criterion_main!(benches); diff --git a/memfaultd/benches/logs_to_metrics.rs b/memfaultd/benches/logs_to_metrics.rs index 41a85ad..7a3aaae 100644 --- a/memfaultd/benches/logs_to_metrics.rs +++ b/memfaultd/benches/logs_to_metrics.rs @@ -5,17 +5,25 @@ use std::{collections::HashMap, iter::repeat}; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use memfaultd::config::LogToMetricRule; +use memfaultd::logs::log_entry::{LogData, LogEntry}; use memfaultd::logs::log_to_metrics::LogToMetrics; use memfaultd::metrics::MetricReportManager; -use serde_json::{json, Value}; use ssf::ServiceJig; -fn log_line_from_str(line: &str) -> Value { - json!({ - "data": { - "MESSAGE": line, - } - }) +fn log_line_from_str(line: &str) -> LogEntry { + let data = LogData { + message: line.to_string(), + pid: None, + systemd_unit: None, + priority: None, + original_priority: None, + extra_fields: HashMap::new(), + }; + + LogEntry { + ts: chrono::Utc::now(), + data, + } } fn send_logs(num_log_lines: u64) { @@ -31,7 +39,7 @@ fn send_logs(num_log_lines: u64) { let log_lines = repeat("eager evaluation is bad") .take(num_log_lines as usize) .map(log_line_from_str) - .collect::>(); + .collect::>(); let mut logs_to_metrics = LogToMetrics::new(rules, report_service.mailbox.into()); log_lines.iter().for_each(|log_line| { diff --git a/memfaultd/builtin.conf b/memfaultd/builtin.conf index 497e835..5522fde 100644 --- a/memfaultd/builtin.conf +++ b/memfaultd/builtin.conf @@ -48,7 +48,10 @@ parameters. "rotate_size_kib": 10240, "rotate_after_seconds": 3600, "storage": "persist", - "source": "fluent-bit" + "source": "journald", + "level_mapping": { + "enable": false + } }, "mar": { "mar_file_max_size_kib": 10240, @@ -59,7 +62,10 @@ parameters. "enable_daily_heartbeats": false, "system_metric_collection": { "poll_interval_seconds": 10, - "enable": false + "enable": true + }, + "statsd_server": { + "bind_address": "127.0.0.1:8125" } } } diff --git a/memfaultd/src/cli/memfault_core_handler/mod.rs b/memfaultd/src/cli/memfault_core_handler/mod.rs index e1112f1..b896e32 100644 --- a/memfaultd/src/cli/memfault_core_handler/mod.rs +++ b/memfaultd/src/cli/memfault_core_handler/mod.rs @@ -70,6 +70,8 @@ pub type ElfPtrSize = u32; /// called by users directly. memfaultd is expected to set up the handler with the kernel by writing /// the appropriate configuration to /proc/sys/kernel/core_pattern. /// See https://mflt.io/linux-coredumps for more information. +/// `core` man page: +/// https://man7.org/linux/man-pages/man5/core.5.html struct MemfaultCoreHandlerArgs { /// use configuration file #[argh(option, short = 'c')] @@ -78,6 +80,12 @@ struct MemfaultCoreHandlerArgs { #[argh(positional)] pid: i32, + /// populated by the %e in the core_pattern + /// This is the value stored in /proc//comm + /// for the crashing process's PID + #[argh(positional)] + comm: String, + /// verbose output #[argh(switch, short = 'V')] verbose: bool, @@ -121,7 +129,7 @@ pub fn main() -> Result<()> { let client = MemfaultdClient::from_config(&config); match client { Ok(client) => { - if let Err(e) = client.notify_crash() { + if let Err(e) = client.notify_crash(args.comm) { debug!("Failed to notify memfaultd of crash: {:?}", e); } @@ -231,7 +239,7 @@ pub fn process_corefile( let network_config = NetworkConfig::from(config); let mar_entry = mar_builder .set_metadata(Metadata::new_coredump(output_file_name, compression.into())) - .add_attachment(output_file_path) + .add_attachment(output_file_path)? .save(&network_config)?; debug!("Coredump MAR entry generated: {}", mar_entry.path.display()); diff --git a/memfaultd/src/cli/memfault_watch/mod.rs b/memfaultd/src/cli/memfault_watch/mod.rs index 11980e7..054fe9c 100644 --- a/memfaultd/src/cli/memfault_watch/mod.rs +++ b/memfaultd/src/cli/memfault_watch/mod.rs @@ -182,7 +182,7 @@ fn run_from_args(args: MemfaultWatchArgs) -> Result { let mar_builder = MarEntryBuilder::new(&mar_staging_path)?; let mar_entry = mar_builder .set_metadata(metadata) - .add_attachment(stdio_log_file_path) + .add_attachment(stdio_log_file_path)? .save(&network_config)?; info!("MFW MAR entry generated: {}", mar_entry.path.display()); @@ -190,7 +190,8 @@ fn run_from_args(args: MemfaultWatchArgs) -> Result { let client = MemfaultdClient::from_config(&config) .map_err(|report| eyre!("Failed to create Memfaultd client from config! {report}"))?; - if client.notify_crash().is_err() { + // TODO: Add /proc/comm value for the crashing process here + if client.notify_crash("TODO".to_string()).is_err() { error!("Unable to contact memfaultd. Is it running?"); } else { debug!("Notified memfaultd about crash!"); diff --git a/memfaultd/src/cli/memfaultctl/mod.rs b/memfaultd/src/cli/memfaultctl/mod.rs index 3fe13e1..0e52df0 100644 --- a/memfaultd/src/cli/memfaultctl/mod.rs +++ b/memfaultd/src/cli/memfaultctl/mod.rs @@ -374,7 +374,7 @@ pub fn main() -> Result<()> { reason, file_name, )) - .add_copied_attachment(file_path) + .add_copied_attachment(file_path)? .save(&network_config) .map(|_entry| ()) } diff --git a/memfaultd/src/cli/memfaultd_client.rs b/memfaultd/src/cli/memfaultd_client.rs index 91415fc..f0d01ed 100644 --- a/memfaultd/src/cli/memfaultd_client.rs +++ b/memfaultd/src/cli/memfaultd_client.rs @@ -9,6 +9,7 @@ use reqwest::{ header::ACCEPT, StatusCode, }; +use serde::{Deserialize, Serialize}; use crate::{ config::Config, @@ -23,6 +24,11 @@ pub struct MemfaultdClient { client: Client, } +#[derive(Serialize, Deserialize)] +pub struct NotifyCrashRequest { + pub process_name: String, +} + pub struct DeleteToken(String); pub enum ExportGetResponse { @@ -111,9 +117,10 @@ impl MemfaultdClient { } } - pub fn notify_crash(&self) -> Result<()> { + pub fn notify_crash(&self, comm: String) -> Result<()> { self.client .post(format!("{}{}", self.base_url, "/v1/crash/report")) + .json(&NotifyCrashRequest { process_name: comm }) .send()?; Ok(()) } diff --git a/memfaultd/src/cli/show_settings.rs b/memfaultd/src/cli/show_settings.rs index 0919b0c..ca773cf 100644 --- a/memfaultd/src/cli/show_settings.rs +++ b/memfaultd/src/cli/show_settings.rs @@ -116,8 +116,8 @@ pub fn show_settings(config_path: Option<&Path>) -> Result<()> { "coredump", #[cfg(feature = "logging")] "logging", - #[cfg(feature = "log-to-metrics")] - "log-to-metrics", + #[cfg(feature = "regex")] + "regex", #[cfg(feature = "systemd")] "systemd", ]; diff --git a/memfaultd/src/collectd/snapshots/memfaultd__collectd__collectd_handler__tests__drops_cpu_metrics_when_builtin_system_metrics_are_enabled.snap b/memfaultd/src/collectd/snapshots/memfaultd__collectd__collectd_handler__tests__drops_cpu_metrics_when_builtin_system_metrics_are_enabled.snap index c90ceff..b84bd12 100644 --- a/memfaultd/src/collectd/snapshots/memfaultd__collectd__collectd_handler__tests__drops_cpu_metrics_when_builtin_system_metrics_are_enabled.snap +++ b/memfaultd/src/collectd/snapshots/memfaultd__collectd__collectd_handler__tests__drops_cpu_metrics_when_builtin_system_metrics_are_enabled.snap @@ -1,6 +1,6 @@ --- source: memfaultd/src/collectd/collectd_handler.rs -expression: metrics +expression: mock.take_metrics().unwrap() --- { "cpufreq/0/cpu/idle": 0.0, diff --git a/memfaultd/src/collectd/snapshots/memfaultd__collectd__collectd_handler__tests__handle_push.snap b/memfaultd/src/collectd/snapshots/memfaultd__collectd__collectd_handler__tests__handle_push.snap index 958fce1..63e9da2 100644 --- a/memfaultd/src/collectd/snapshots/memfaultd__collectd__collectd_handler__tests__handle_push.snap +++ b/memfaultd/src/collectd/snapshots/memfaultd__collectd__collectd_handler__tests__handle_push.snap @@ -1,6 +1,6 @@ --- source: memfaultd/src/collectd/collectd_handler.rs -expression: "serde_json::to_string_pretty(&metrics).expect(\"metric_store should be serializable\")" +expression: fixture.mock.take_metrics().unwrap() --- { "cpu/0/cpu/idle": 0.0 diff --git a/memfaultd/src/config/config_file.rs b/memfaultd/src/config/config_file.rs index 49ee64a..c162a30 100644 --- a/memfaultd/src/config/config_file.rs +++ b/memfaultd/src/config/config_file.rs @@ -112,7 +112,6 @@ pub struct HttpServerConfig { pub enum LogSource { #[serde(rename = "fluent-bit")] FluentBit, - #[cfg(feature = "systemd")] #[serde(rename = "journald")] Journald, } @@ -135,6 +134,8 @@ pub struct LogsConfig { pub storage: StorageConfig, pub source: LogSource, + + pub level_mapping: LevelMappingConfig, } #[derive(Serialize, Deserialize, Debug, Clone, Copy)] @@ -222,7 +223,7 @@ fn default_connection_check_protocol() -> ConnectionCheckProtocol { #[derive(Serialize, Clone, Deserialize, Debug)] pub struct SessionConfig { pub name: SessionName, - pub captured_metrics: Vec, + pub captured_metrics: HashSet, } #[derive(Serialize, Deserialize, Debug)] @@ -240,6 +241,24 @@ pub struct SystemMetricConfig { pub network_interfaces: Option>, } +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct LevelMappingConfig { + pub enable: bool, + pub regex: Option, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct LevelMappingRegex { + pub emergency: Option, + pub alert: Option, + pub critical: Option, + pub error: Option, + pub warning: Option, + pub notice: Option, + pub info: Option, + pub debug: Option, +} + use flate2::Compression; use serde_json::Value; use std::fs; diff --git a/memfaultd/src/config/mod.rs b/memfaultd/src/config/mod.rs index c5c6e83..6dbb6aa 100644 --- a/memfaultd/src/config/mod.rs +++ b/memfaultd/src/config/mod.rs @@ -2,6 +2,7 @@ // Copyright (c) Memfault, Inc. // See License.txt for details use eyre::eyre; +use std::collections::HashSet; use std::net::SocketAddr; use std::time::Duration; use std::{ @@ -9,6 +10,7 @@ use std::{ sync::{Arc, RwLock}, }; +use crate::metrics::DiskSpaceMetricsConfig; use crate::{ network::{NetworkClient, NetworkConfig}, util::{DiskBacked, UnwrapOrDie, UpdateStatus}, @@ -34,11 +36,16 @@ pub use self::{ device_info::{DeviceInfo, DeviceInfoDefaultsImpl, DeviceInfoWarning}, }; -use crate::mar::MarEntryBuilder; -use crate::mar::Metadata; +use crate::{ + mar::{MarEntryBuilder, Metadata}, + metrics::ProcessMetricsConfig, +}; + use eyre::{Context, Result}; mod config_file; +pub use config_file::{LevelMappingConfig, LevelMappingRegex}; + mod device_config; mod device_info; mod utils; @@ -266,9 +273,51 @@ impl Config { .poll_interval_seconds } + pub fn system_metric_monitored_processes(&self) -> ProcessMetricsConfig { + match self + .config_file + .metrics + .system_metric_collection + .processes + .as_ref() + { + Some(processes) => { + let mut processes = processes.clone(); + processes.insert("memfaultd".to_string()); + ProcessMetricsConfig::Processes(processes) + } + None => ProcessMetricsConfig::Auto, + } + } + + pub fn system_metric_disk_space_config(&self) -> DiskSpaceMetricsConfig { + match self + .config_file + .metrics + .system_metric_collection + .disk_space + .as_ref() + { + Some(mounts) => DiskSpaceMetricsConfig::Disks(mounts.clone()), + None => DiskSpaceMetricsConfig::Auto, + } + } + + pub fn system_metric_network_interfaces_config(&self) -> Option<&HashSet> { + self.config_file + .metrics + .system_metric_collection + .network_interfaces + .as_ref() + } + pub fn system_metric_config(&self) -> SystemMetricConfig { self.config_file.metrics.system_metric_collection.clone() } + + pub fn log_extraction_config(&self) -> &LevelMappingConfig { + &self.config_file.logs.level_mapping + } } #[cfg(test)] diff --git a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__empty_object.snap b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__empty_object.snap index 218d1d4..a8c746f 100644 --- a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__empty_object.snap +++ b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__empty_object.snap @@ -48,7 +48,11 @@ expression: content "max_lines_per_minute": 500, "log_to_metrics": null, "storage": "persist", - "source": "fluent-bit" + "source": "journald", + "level_mapping": { + "enable": false, + "regex": null + } }, "mar": { "mar_file_max_size_kib": 10240, @@ -63,12 +67,14 @@ expression: content "metrics": { "enable_daily_heartbeats": false, "system_metric_collection": { - "enable": false, + "enable": true, "poll_interval_seconds": 10, "processes": null, "disk_space": null, "network_interfaces": null }, - "statsd_server": null + "statsd_server": { + "bind_address": "127.0.0.1:8125" + } } } diff --git a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__metrics_config.snap b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__metrics_config.snap index 695f069..4d1c30a 100644 --- a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__metrics_config.snap +++ b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__metrics_config.snap @@ -48,7 +48,11 @@ expression: content "max_lines_per_minute": 500, "log_to_metrics": null, "storage": "persist", - "source": "fluent-bit" + "source": "journald", + "level_mapping": { + "enable": false, + "regex": null + } }, "mar": { "mar_file_max_size_kib": 10240, @@ -75,6 +79,8 @@ expression: content "eth1" ] }, - "statsd_server": null + "statsd_server": { + "bind_address": "127.0.0.1:8125" + } } } diff --git a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_connectivity_monitor.snap b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_connectivity_monitor.snap index 5b0f9df..343bd07 100644 --- a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_connectivity_monitor.snap +++ b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_connectivity_monitor.snap @@ -48,7 +48,11 @@ expression: content "max_lines_per_minute": 500, "log_to_metrics": null, "storage": "persist", - "source": "fluent-bit" + "source": "journald", + "level_mapping": { + "enable": false, + "regex": null + } }, "mar": { "mar_file_max_size_kib": 10240, @@ -73,12 +77,14 @@ expression: content "metrics": { "enable_daily_heartbeats": false, "system_metric_collection": { - "enable": false, + "enable": true, "poll_interval_seconds": 10, "processes": null, "disk_space": null, "network_interfaces": null }, - "statsd_server": null + "statsd_server": { + "bind_address": "127.0.0.1:8125" + } } } diff --git a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_coredump_capture_strategy_threads.snap b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_coredump_capture_strategy_threads.snap index 218d1d4..a8c746f 100644 --- a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_coredump_capture_strategy_threads.snap +++ b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_coredump_capture_strategy_threads.snap @@ -48,7 +48,11 @@ expression: content "max_lines_per_minute": 500, "log_to_metrics": null, "storage": "persist", - "source": "fluent-bit" + "source": "journald", + "level_mapping": { + "enable": false, + "regex": null + } }, "mar": { "mar_file_max_size_kib": 10240, @@ -63,12 +67,14 @@ expression: content "metrics": { "enable_daily_heartbeats": false, "system_metric_collection": { - "enable": false, + "enable": true, "poll_interval_seconds": 10, "processes": null, "disk_space": null, "network_interfaces": null }, - "statsd_server": null + "statsd_server": { + "bind_address": "127.0.0.1:8125" + } } } diff --git a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_log_to_metrics_rules.snap b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_log_to_metrics_rules.snap index 0d04ec3..782e4d8 100644 --- a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_log_to_metrics_rules.snap +++ b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_log_to_metrics_rules.snap @@ -65,7 +65,11 @@ expression: content ] }, "storage": "persist", - "source": "fluent-bit" + "source": "journald", + "level_mapping": { + "enable": false, + "regex": null + } }, "mar": { "mar_file_max_size_kib": 10240, @@ -80,12 +84,14 @@ expression: content "metrics": { "enable_daily_heartbeats": false, "system_metric_collection": { - "enable": false, + "enable": true, "poll_interval_seconds": 10, "processes": null, "disk_space": null, "network_interfaces": null }, - "statsd_server": null + "statsd_server": { + "bind_address": "127.0.0.1:8125" + } } } diff --git a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_partial_logs.snap b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_partial_logs.snap index 218d1d4..a8c746f 100644 --- a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_partial_logs.snap +++ b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_partial_logs.snap @@ -48,7 +48,11 @@ expression: content "max_lines_per_minute": 500, "log_to_metrics": null, "storage": "persist", - "source": "fluent-bit" + "source": "journald", + "level_mapping": { + "enable": false, + "regex": null + } }, "mar": { "mar_file_max_size_kib": 10240, @@ -63,12 +67,14 @@ expression: content "metrics": { "enable_daily_heartbeats": false, "system_metric_collection": { - "enable": false, + "enable": true, "poll_interval_seconds": 10, "processes": null, "disk_space": null, "network_interfaces": null }, - "statsd_server": null + "statsd_server": { + "bind_address": "127.0.0.1:8125" + } } } diff --git a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_sessions.snap b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_sessions.snap index fb69987..6915d60 100644 --- a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_sessions.snap +++ b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__with_sessions.snap @@ -48,7 +48,11 @@ expression: content "max_lines_per_minute": 500, "log_to_metrics": null, "storage": "persist", - "source": "fluent-bit" + "source": "journald", + "level_mapping": { + "enable": false, + "regex": null + } }, "mar": { "mar_file_max_size_kib": 10240, @@ -63,20 +67,21 @@ expression: content { "name": "test-session", "captured_metrics": [ - "cpu.total", - "statsd.wefaultd.blendpower" + "cpu.total" ] } ], "metrics": { "enable_daily_heartbeats": false, "system_metric_collection": { - "enable": false, + "enable": true, "poll_interval_seconds": 10, "processes": null, "disk_space": null, "network_interfaces": null }, - "statsd_server": null + "statsd_server": { + "bind_address": "127.0.0.1:8125" + } } } diff --git a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__without_coredump_compression.snap b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__without_coredump_compression.snap index 94a05df..7d4d4e7 100644 --- a/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__without_coredump_compression.snap +++ b/memfaultd/src/config/snapshots/memfaultd__config__config_file__test__without_coredump_compression.snap @@ -48,7 +48,11 @@ expression: content "max_lines_per_minute": 500, "log_to_metrics": null, "storage": "persist", - "source": "fluent-bit" + "source": "journald", + "level_mapping": { + "enable": false, + "regex": null + } }, "mar": { "mar_file_max_size_kib": 10240, @@ -63,12 +67,14 @@ expression: content "metrics": { "enable_daily_heartbeats": false, "system_metric_collection": { - "enable": false, + "enable": true, "poll_interval_seconds": 10, "processes": null, "disk_space": null, "network_interfaces": null }, - "statsd_server": null + "statsd_server": { + "bind_address": "127.0.0.1:8125" + } } } diff --git a/memfaultd/src/config/test-config/with_sessions.json b/memfaultd/src/config/test-config/with_sessions.json index 8599462..ee8f0a3 100644 --- a/memfaultd/src/config/test-config/with_sessions.json +++ b/memfaultd/src/config/test-config/with_sessions.json @@ -2,7 +2,7 @@ "sessions": [ { "name": "test-session", - "captured_metrics": ["cpu.total", "statsd.wefaultd.blendpower"] + "captured_metrics": ["cpu.total"] } ] } diff --git a/memfaultd/src/coredump/mod.rs b/memfaultd/src/coredump/mod.rs index e57d58d..357c780 100644 --- a/memfaultd/src/coredump/mod.rs +++ b/memfaultd/src/coredump/mod.rs @@ -14,7 +14,7 @@ pub fn coredump_configure_kernel(config_path: &Path) -> Result<()> { write( "/proc/sys/kernel/core_pattern", format!( - "|/usr/sbin/memfault-core-handler -c {} %P", + "|/usr/sbin/memfault-core-handler -c {} %P %e", config_path.display() ), ) diff --git a/memfaultd/src/fluent_bit/mod.rs b/memfaultd/src/fluent_bit/mod.rs index d636089..7f2865c 100644 --- a/memfaultd/src/fluent_bit/mod.rs +++ b/memfaultd/src/fluent_bit/mod.rs @@ -21,12 +21,15 @@ use std::sync::mpsc::{Receiver, SyncSender}; use std::{collections::HashMap, net::SocketAddr}; use chrono::{DateTime, Utc}; -use eyre::Result; +use eyre::{eyre, Error, Result}; use log::warn; use rmp_serde::Deserializer; use serde::{Deserialize, Serialize}; -use crate::{config::Config, logs::log_entry::LogEntry}; +use crate::{ + config::Config, + logs::log_entry::{LogData, LogEntry}, +}; use crate::{ logs::log_entry::LogValue, util::tcp_server::{TcpConnectionHandler, TcpNullConnectionHandler, ThreadedTcpServer}, @@ -41,6 +44,22 @@ pub enum FluentdValue { Float(f64), } +impl FluentdValue { + pub fn into_string(self) -> Option { + match self { + FluentdValue::String(s) => Some(s), + FluentdValue::Float(_) => None, + } + } + + pub fn into_float(self) -> Option { + match self { + FluentdValue::Float(f) => Some(f), + FluentdValue::String(_) => None, + } + } +} + impl From for LogValue { fn from(value: FluentdValue) -> Self { match value { @@ -56,10 +75,33 @@ pub struct FluentdMessage( pub HashMap, ); -impl From for LogEntry { - fn from(value: FluentdMessage) -> Self { - let data = value.1.into_iter().map(|(k, v)| (k, v.into())).collect(); - LogEntry { ts: value.0, data } +impl TryFrom for LogEntry { + type Error = Error; + + fn try_from(mut value: FluentdMessage) -> Result { + let message = value + .1 + .remove("MESSAGE") + .and_then(|v| v.into_string()) + .ok_or_else(|| eyre!("No message in log entry"))?; + let pid = value.1.remove("_PID").and_then(|v| v.into_string()); + let systemd_unit = value + .1 + .remove("_SYSTEMD_UNIT") + .and_then(|v| v.into_string()); + let priority = value.1.remove("PRIORITY").and_then(|v| v.into_string()); + + let extra_fields = value.1.into_iter().map(|(k, v)| (k, v.into())).collect(); + let data = LogData { + message, + pid, + systemd_unit, + priority, + original_priority: None, + extra_fields, + }; + + Ok(LogEntry { ts: value.0, data }) } } diff --git a/memfaultd/src/lib.rs b/memfaultd/src/lib.rs index 3b1ef17..556cb86 100644 --- a/memfaultd/src/lib.rs +++ b/memfaultd/src/lib.rs @@ -4,7 +4,6 @@ // See License.txt for details pub mod cli; -#[cfg(feature = "collectd")] mod collectd; pub mod config; #[cfg(feature = "coredump")] diff --git a/memfaultd/src/logs/fluent_bit_adapter.rs b/memfaultd/src/logs/fluent_bit_adapter.rs index c2249c1..b5bce0a 100644 --- a/memfaultd/src/logs/fluent_bit_adapter.rs +++ b/memfaultd/src/logs/fluent_bit_adapter.rs @@ -27,16 +27,13 @@ impl FluentBitAdapter { } /// Convert a FluentdMessage into a serde_json::Value that we can log. - /// Returns None when this message should be filtered out. + /// + /// Returns None when this message should be filtered out. This can happen + /// if the message does not contain a `MESSAGE` field. Indicating that it is + /// not a log message. fn convert_message(msg: FluentdMessage, extra_fields: &[String]) -> Option { - if !msg.1.contains_key("MESSAGE") { - // We are only interested in log messages. They will have a 'MESSAGE' key. - // Metrics do not have the MESSAGE key. - return None; - } - - let mut log_entry = LogEntry::from(msg); - log_entry.filter_fields(extra_fields); + let mut log_entry = LogEntry::try_from(msg).ok()?; + log_entry.filter_extra_fields(extra_fields); Some(log_entry) } diff --git a/memfaultd/src/logs/headroom.rs b/memfaultd/src/logs/headroom.rs index 6302849..cfd92c2 100644 --- a/memfaultd/src/logs/headroom.rs +++ b/memfaultd/src/logs/headroom.rs @@ -5,8 +5,8 @@ use crate::{ logs::log_file::{LogFile, LogFileControl}, util::disk_size::DiskSize, }; +use chrono::{DateTime, Utc}; use eyre::Result; -use serde_json::Value; #[derive(Debug)] enum Headroom { @@ -20,7 +20,7 @@ enum Headroom { pub trait HeadroomCheck { fn check( &mut self, - log_timestamp: Option<&Value>, + log_timestamp: &DateTime, log_file_control: &mut impl LogFileControl, ) -> Result; } @@ -55,7 +55,7 @@ impl HeadroomCheck for HeadroomLimiter { /// It only returns an error if there is an error writing the "Dropped N logs" message. fn check( &mut self, - log_timestamp: Option<&Value>, + log_timestamp: &DateTime, log_file_control: &mut impl LogFileControl, ) -> Result { let available = (self.get_available_space)()?; @@ -67,7 +67,8 @@ impl HeadroomCheck for HeadroomLimiter { // Best-effort warning log & flush. If this fails, just keep going. let current_log = log_file_control.current_log()?; let _ = current_log.write_log( - log_timestamp.cloned(), + *log_timestamp, + "WARN", match ( available.bytes >= self.min_headroom.bytes, available.inodes >= self.min_headroom.inodes, @@ -110,7 +111,8 @@ impl HeadroomCheck for HeadroomLimiter { ) => { let current_log = log_file_control.current_log()?; current_log.write_log( - log_timestamp.cloned(), + *log_timestamp, + "INFO", format!( "Recovered from low disk space. Dropped {} logs.", num_dropped_logs @@ -127,26 +129,27 @@ impl HeadroomCheck for HeadroomLimiter { #[cfg(test)] mod tests { - use crate::util::disk_size::DiskSize; + use crate::{logs::log_entry::LogEntry, util::disk_size::DiskSize}; use std::sync::{ atomic::{AtomicU64, Ordering}, Arc, }; use super::*; + use chrono::TimeZone; use eyre::eyre; use rstest::{fixture, rstest}; #[rstest] fn returns_true_if_headroom_ok_and_stays_ok(mut fixture: Fixture) { - let log_timestamp = Value::from(12345); + let log_timestamp = build_date_time(); let mut log_file_control = FakeLogFileControl::default(); fixture.set_available_space(MIN_HEADROOM); // Enough headroom: check() returns true and no calls to log_file_control are made: assert!(fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap()); assert_eq!(0, log_file_control.logs_written.len()); assert_eq!(0, log_file_control.flush_count); @@ -155,14 +158,14 @@ mod tests { #[rstest] fn log_upon_enter_and_exit_headroom_space_shortage(mut fixture: Fixture) { - let log_timestamp = Value::from(12345); + let log_timestamp = build_date_time(); let mut log_file_control = FakeLogFileControl::default(); // Enter headroom shortage: check() returns false: fixture.set_available_space(MIN_HEADROOM - 1); assert!(!fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap()); // Check that the warning log was written: @@ -175,14 +178,14 @@ mod tests { // Still not enough headroom: check() returns false: assert!(!fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap()); // Recover from headroom shortage: check() returns true again: fixture.set_available_space(MIN_HEADROOM); assert!(fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap()); // Check that the "recovered" log was written: @@ -193,14 +196,14 @@ mod tests { #[rstest] fn log_upon_enter_and_exit_headroom_node_shortage(mut fixture: Fixture) { - let log_timestamp = Value::from(12345); + let log_timestamp = build_date_time(); let mut log_file_control = FakeLogFileControl::default(); // Enter headroom shortage: check() returns false: fixture.set_available_inodes(MIN_INODES - 1); assert!(!fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap()); // Check that the warning log was written: @@ -214,14 +217,14 @@ mod tests { // Still not enough headroom: check() returns false: assert!(!fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap()); // Recover from headroom shortage: check() returns true again: fixture.set_available_inodes(MIN_INODES); assert!(fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap()); // Check that the "recovered" log was written: @@ -232,7 +235,7 @@ mod tests { #[rstest] fn rotate_once_only_entering_headroom_shortage(mut fixture: Fixture) { - let log_timestamp = Value::from(12345); + let log_timestamp = build_date_time(); let mut log_file_control = FakeLogFileControl { // Make log_file_control.rotate_if_needed() return Ok(true): rotate_return: Some(true), @@ -243,28 +246,28 @@ mod tests { fixture.set_available_space(MIN_HEADROOM - 1); fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap(); assert_eq!(log_file_control.rotation_count, 1); // Check again. Rotation should not be attempted again: fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap(); assert_eq!(log_file_control.rotation_count, 1); } #[rstest] fn rotate_once_only_during_headroom_shortage(mut fixture: Fixture) { - let log_timestamp = Value::from(12345); + let log_timestamp = build_date_time(); let mut log_file_control = FakeLogFileControl::default(); // Enter headroom shortage: fixture.set_available_space(MIN_HEADROOM - 1); fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap(); assert_eq!(log_file_control.rotation_count, 0); @@ -274,21 +277,21 @@ mod tests { // Check again. Rotation should be attempted again: fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap(); assert_eq!(log_file_control.rotation_count, 1); // Check again. Rotation should not be attempted again: fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap(); assert_eq!(log_file_control.rotation_count, 1); } #[rstest] fn retry_rotate_after_failure(mut fixture: Fixture) { - let log_timestamp = Value::from(12345); + let log_timestamp = build_date_time(); let mut log_file_control = FakeLogFileControl { // Make log_file_control.rotate_if_needed() return Err(...): rotate_return: None, @@ -299,7 +302,7 @@ mod tests { fixture.set_available_space(MIN_HEADROOM - 1); fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap(); assert_eq!(log_file_control.rotation_count, 0); @@ -308,39 +311,39 @@ mod tests { log_file_control.rotate_return = Some(true); fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap(); assert_eq!(log_file_control.rotation_count, 1); } #[rstest] fn write_error_of_initial_warning_message_is_ignored(mut fixture: Fixture) { - let log_timestamp = Value::from(12345); + let log_timestamp = build_date_time(); let mut log_file_control = FakeLogFileControl::default(); fixture.set_available_space(MIN_HEADROOM - 1); log_file_control.write_should_fail = true; assert!(fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .is_ok()); } #[rstest] fn write_error_of_recovery_log_message_is_bubbled_up(mut fixture: Fixture) { - let log_timestamp = Value::from(12345); + let log_timestamp = build_date_time(); let mut log_file_control = FakeLogFileControl::default(); fixture.set_available_space(MIN_HEADROOM - 1); fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .unwrap(); fixture.set_available_space(MIN_HEADROOM); log_file_control.write_should_fail = true; assert!(fixture .limiter - .check(Some(&log_timestamp), &mut log_file_control) + .check(&log_timestamp, &mut log_file_control) .is_err()); } @@ -370,7 +373,7 @@ mod tests { } impl LogFile for FakeLogFileControl { - fn write_json_line(&mut self, json: Value) -> Result<()> { + fn write_json_line(&mut self, json: LogEntry) -> Result<()> { if self.write_should_fail { Err(eyre!("Write failed")) } else { @@ -462,4 +465,8 @@ mod tests { available_space, } } + + fn build_date_time() -> DateTime { + Utc.with_ymd_and_hms(1990, 12, 16, 12, 0, 0).unwrap() + } } diff --git a/memfaultd/src/logs/journald_parser.rs b/memfaultd/src/logs/journald_parser.rs index 2332a58..3e8915f 100644 --- a/memfaultd/src/logs/journald_parser.rs +++ b/memfaultd/src/logs/journald_parser.rs @@ -2,7 +2,7 @@ // Copyright (c) Memfault, Inc. // See License.txt for details use chrono::{DateTime, NaiveDateTime, Utc}; -use eyre::{eyre, Result}; +use eyre::{eyre, Error, Result}; use libc::free; use nix::poll::{poll, PollFd}; use serde::Serialize; @@ -18,7 +18,7 @@ use memfaultc_sys::systemd::{ sd_journal_process, sd_journal_seek_cursor, }; -use super::log_entry::{LogEntry, LogValue}; +use super::log_entry::{LogData, LogEntry, LogValue}; use crate::util::system::read_system_boot_id; /// A trait for reading journal entries from the systemd journal. /// @@ -269,18 +269,38 @@ impl From for JournalEntry { } } -impl From for LogEntry { - fn from(entry: JournalEntry) -> Self { +impl TryFrom for LogEntry { + type Error = Error; + + fn try_from(mut entry: JournalEntry) -> Result { let ts = entry.ts; - let data = entry + + let fields = &mut entry.fields; + let message = fields + .remove("MESSAGE") + .ok_or_else(|| eyre!("Journal entry is missing MESSAGE field"))?; + + let pid = fields.remove("_PID"); + let systemd_unit = fields.remove("_SYSTEMD_UNIT"); + let priority = fields.remove("PRIORITY"); + + let extra_fields = entry .fields .into_iter() .fold(HashMap::new(), |mut acc, (k, v)| { acc.insert(k, LogValue::String(v)); acc }); + let data = LogData { + message, + pid, + systemd_unit, + priority, + original_priority: None, + extra_fields, + }; - LogEntry { ts, data } + Ok(LogEntry { ts, data }) } } diff --git a/memfaultd/src/logs/journald_provider.rs b/memfaultd/src/logs/journald_provider.rs index 55b8713..55ee421 100644 --- a/memfaultd/src/logs/journald_provider.rs +++ b/memfaultd/src/logs/journald_provider.rs @@ -31,10 +31,15 @@ impl JournaldLogProvider { } fn run_once(&mut self) -> Result<()> { - for entry in self.journal.iter() { - let mut log_entry = LogEntry::from(entry); + for entry in self + .journal + .iter() + .filter(|e| e.fields.contains_key("MESSAGE")) + { + // We would only fail here if 'MESSAGE' is not present. Which we verified above. + let mut log_entry = LogEntry::try_from(entry)?; // TODO: Add support for filtering additional fields - log_entry.filter_fields(&[]); + log_entry.filter_extra_fields(&[]); if let Err(e) = self.entry_sender.send(log_entry) { return Err(eyre!("Journald channel dropped: {}", e)); diff --git a/memfaultd/src/logs/log_collector.rs b/memfaultd/src/logs/log_collector.rs index 9392b4b..7f6c449 100644 --- a/memfaultd/src/logs/log_collector.rs +++ b/memfaultd/src/logs/log_collector.rs @@ -10,11 +10,11 @@ use std::time::Duration; use std::{fs, thread}; use std::{path::PathBuf, sync::Mutex}; +use chrono::{DateTime, Utc}; use eyre::{eyre, Context, Result}; use flate2::Compression; use log::{error, trace, warn}; use serde::{Deserialize, Serialize}; -use serde_json::Value; use tiny_http::{Header, Method, Request, Response, ResponseBox, StatusCode}; use crate::config::Config; @@ -30,10 +30,17 @@ use crate::{metrics::MetricsMBox, util::rate_limiter::RateLimiter}; pub const CRASH_LOGS_URL: &str = "/api/v1/crash-logs"; -use super::log_entry::LogEntry; -#[cfg(feature = "log-to-metrics")] +#[cfg(feature = "regex")] use super::log_to_metrics::LogToMetrics; +#[cfg(feature = "regex")] +use super::log_level_mapper::LogLevelMapper; + +#[cfg(feature = "regex")] +use crate::config::LevelMappingConfig; + +use super::log_entry::LogEntry; + pub struct LogCollector { inner: Arc>>>, } @@ -53,8 +60,7 @@ impl LogCollector { log_config: LogCollectorConfig, mut on_log_completion: R, headroom_limiter: H, - #[cfg_attr(not(feature = "log-to-metrics"), allow(unused_variables))] - metrics_mbox: MetricsMBox, + #[cfg_attr(not(feature = "regex"), allow(unused_variables))] metrics_mbox: MetricsMBox, ) -> Result { fs::create_dir_all(&log_config.log_tmp_path).wrap_err_with(|| { format!( @@ -65,7 +71,12 @@ impl LogCollector { // Collect any leftover logfiles in the tmp folder let next_cid = recover_old_logs(&log_config.log_tmp_path, &mut on_log_completion)?; - + #[cfg(feature = "regex")] + let level_mapper = if log_config.level_mapping_config.enable { + Some(LogLevelMapper::try_from(&log_config.level_mapping_config)?) + } else { + None + }; let in_memory_lines = if log_config.in_memory_lines > Self::MAX_IN_MEMORY_LINES { warn!( "Too many lines captured in coredump ({}), clamping to {}", @@ -88,10 +99,12 @@ impl LogCollector { )?, rate_limiter: RateLimiter::new(log_config.max_lines_per_minute), headroom_limiter, - #[cfg(feature = "log-to-metrics")] + #[cfg(feature = "regex")] log_to_metrics: LogToMetrics::new(log_config.log_to_metrics_rules, metrics_mbox), log_queue: CircularQueue::new(in_memory_lines), storage_config: log_config.storage_config, + #[cfg(feature = "regex")] + level_mapper, }))), }) } @@ -107,14 +120,7 @@ impl LogCollector { Ok(mut inner_opt) => { match &mut *inner_opt { Some(inner) => { - let log_val = match serde_json::to_value(line) { - Ok(val) => val, - Err(e) => { - warn!("Error converting log to json: {}", e); - continue; - } - }; - if let Err(e) = inner.process_log_record(log_val) { + if let Err(e) = inner.process_log_record(line) { warn!("Error writing log: {:?}", e); } } @@ -194,15 +200,15 @@ impl Drop for LogCollector { /// The log collector keeps one Inner struct behind a Arc> so it can be /// shared by multiple threads. struct Inner { - // We use an Option here because we have no typed-guarantee that every - // log message will include a `ts` key. - rate_limiter: RateLimiter>, + rate_limiter: RateLimiter>, log_file_control: LogFileControlImpl, headroom_limiter: H, - #[cfg(feature = "log-to-metrics")] + #[cfg(feature = "regex")] log_to_metrics: LogToMetrics, - log_queue: CircularQueue, + log_queue: CircularQueue, storage_config: StorageConfig, + #[cfg(feature = "regex")] + level_mapper: Option, } impl Inner { @@ -210,17 +216,21 @@ impl Inner { // mutex on the Inner object. // Be careful to not try to acquire other mutexes here to avoid a // dead-lock. Everything we need should be in Inner. - fn process_log_record(&mut self, log: Value) -> Result<()> { - let log_timestamp = log.get("ts"); + #[cfg_attr(not(feature = "regex"), allow(unused_mut))] + fn process_log_record(&mut self, mut log: LogEntry) -> Result<()> { + #[cfg(feature = "regex")] + if let Some(level_mapper) = &self.level_mapper.as_mut() { + level_mapper.map_log(&mut log)?; + } - #[cfg(feature = "log-to-metrics")] + #[cfg(feature = "regex")] if let Err(e) = self.log_to_metrics.process(&log) { warn!("Error processing log to metrics: {:?}", e); } if !self .headroom_limiter - .check(log_timestamp, &mut self.log_file_control)? + .check(&log.ts, &mut self.log_file_control)? { return Ok(()); } @@ -236,11 +246,12 @@ impl Inner { let logfile = self.log_file_control.current_log()?; self.rate_limiter - .run_within_limits(log_timestamp.cloned(), |rate_limited_calls| { + .run_within_limits(log.ts, |rate_limited_calls| { // Print a message if some previous calls were rate limited. if let Some(limited) = rate_limited_calls { logfile.write_log( limited.latest_call, + "WARN", format!("Memfaultd rate limited {} messages.", limited.count), )?; } @@ -262,7 +273,11 @@ impl Inner { } pub fn get_log_queue(&mut self) -> Result> { - let logs = self.log_queue.iter().map(|v| v.to_string()).collect(); + let logs = self + .log_queue + .iter() + .map(serde_json::to_string) + .collect::, _>>()?; Ok(logs) } @@ -285,7 +300,7 @@ pub struct LogCollectorConfig { max_lines_per_minute: NonZeroU32, /// Rules to convert logs to metrics - #[cfg_attr(not(feature = "log-to-metrics"), allow(dead_code))] + #[cfg_attr(not(feature = "regex"), allow(dead_code))] log_to_metrics_rules: Vec, /// Maximum number of lines to keep in memory @@ -293,6 +308,9 @@ pub struct LogCollectorConfig { /// Whether or not to persist log lines storage_config: StorageConfig, + + #[cfg(feature = "regex")] + level_mapping_config: LevelMappingConfig, } impl From<&Config> for LogCollectorConfig { @@ -312,6 +330,8 @@ impl From<&Config> for LogCollectorConfig { .unwrap_or_default(), in_memory_lines: config.config_file.coredump.log_lines, storage_config: config.config_file.logs.storage, + #[cfg(feature = "regex")] + level_mapping_config: config.config_file.logs.level_mapping.clone(), } } } @@ -383,16 +403,19 @@ mod tests { use std::{io::Write, path::PathBuf, time::Duration}; use std::{mem::replace, num::NonZeroU32}; - use crate::logs::{ - completed_log::CompletedLog, - log_file::{LogFile, LogFileControl}, - }; use crate::test_utils::setup_logger; + use crate::{ + config::LevelMappingConfig, + logs::{ + completed_log::CompletedLog, + log_file::{LogFile, LogFileControl}, + }, + }; use crate::{logs::headroom::HeadroomCheck, util::circular_queue::CircularQueue}; + use chrono::{DateTime, Duration as ChronoDuration, Utc}; use eyre::Context; use flate2::Compression; use rstest::{fixture, rstest}; - use serde_json::{json, Value}; use ssf::ServiceMock; use tempfile::{tempdir, TempDir}; use tiny_http::{Method, TestRequest}; @@ -404,7 +427,7 @@ mod tests { #[rstest] fn write_logs_to_disk(mut fixture: LogFixture) { - fixture.write_log(json!({"ts": 0, "MESSAGE": "xxx"})); + fixture.write_log(test_line()); assert_eq!(fixture.count_log_files(), 1); assert_eq!(fixture.on_log_completion_calls(), 0); } @@ -414,16 +437,20 @@ mod tests { #[case(100)] #[case(150)] fn circular_log_queue(#[case] mut log_count: usize, mut fixture: LogFixture) { - for i in 0..log_count { - fixture.write_log(json!({"ts": i, "MESSAGE": "xxx"})); + let delta = ChronoDuration::seconds(1); + let starting_time_str = "2024-09-11T12:34:56Z"; + let mut time = starting_time_str.parse::>().unwrap(); + for _ in 0..log_count { + time = time.checked_add_signed(delta).unwrap(); + let log = LogEntry::new_with_message_and_ts("test", time); + fixture.write_log(log); } let log_queue = fixture.get_log_queue(); // Assert that the last value in the queue has the correct timestamp let last_val = log_queue.back().unwrap(); - let ts = last_val.get("ts").unwrap().as_u64().unwrap(); - assert_eq!(ts, log_count as u64 - 1); + assert_eq!(last_val.ts, time); // Clamp the log_count to the maximum size of the queue log_count = min(log_count, IN_MEMORY_LINES); @@ -441,6 +468,10 @@ mod tests { log_to_metrics_rules: vec![], in_memory_lines: 1000, storage_config: StorageConfig::Persist, + level_mapping_config: LevelMappingConfig { + enable: false, + regex: None, + }, }; let mut collector = LogCollector::open( @@ -469,7 +500,7 @@ mod tests { #[rstest] fn do_not_create_newfile_on_close(mut fixture: LogFixture) { - fixture.write_log(json!({"ts": 0, "MESSAGE": "xxx"})); + fixture.write_log(test_line()); fixture.collector.close_internal().expect("error closing"); // 0 because the fixture "on_log_completion" moves the file out assert_eq!(fixture.count_log_files(), 0); @@ -477,7 +508,7 @@ mod tests { } #[rstest] - #[case(StorageConfig::Persist, 25)] + #[case(StorageConfig::Persist, 60)] #[case(StorageConfig::Disabled, 0)] fn log_persistence( #[case] storage_config: StorageConfig, @@ -487,7 +518,7 @@ mod tests { ) { fixture.set_log_config(storage_config); - fixture.write_log(json!({"ts": 0, "MESSAGE": "xxx"})); + fixture.write_log(test_line()); fixture.flush_log_writes().unwrap(); assert_eq!(fixture.count_log_files(), 1); @@ -496,7 +527,7 @@ mod tests { #[rstest] fn forced_rotation_with_nonempty_log(mut fixture: LogFixture) { - fixture.write_log(json!({"ts": 0, "MESSAGE": "xxx"})); + fixture.write_log(test_line()); fixture.collector.flush_logs().unwrap(); @@ -561,12 +592,16 @@ mod tests { #[rstest] fn http_handler_log_get(mut fixture: LogFixture) { + let date_str = "2024-09-11T12:34:56Z"; let logs = vec![ - json!({"ts": 0, "MESSAGE": "xxx"}), - json!({"ts": 1, "MESSAGE": "yyy"}), - json!({"ts": 2, "MESSAGE": "zzz"}), + LogEntry::new_with_message_and_ts("xxx", date_str.parse::>().unwrap()), + LogEntry::new_with_message_and_ts("yyy", date_str.parse::>().unwrap()), + LogEntry::new_with_message_and_ts("zzz", date_str.parse::>().unwrap()), ]; - let log_strings = logs.iter().map(|l| l.to_string()).collect::>(); + let log_strings = logs + .iter() + .map(|l| serde_json::to_string(l).unwrap()) + .collect::>(); for log in &logs { fixture.write_log(log.clone()); @@ -641,7 +676,7 @@ mod tests { std::fs::read_dir(&self.logs_dir).unwrap().count() } - fn write_log(&mut self, line: Value) { + fn write_log(&mut self, line: LogEntry) { self.collector .with_mut_inner(|inner| inner.process_log_record(line)) .unwrap(); @@ -665,7 +700,7 @@ mod tests { self.on_log_completion_receiver.try_iter().count() } - fn get_log_queue(&mut self) -> CircularQueue { + fn get_log_queue(&mut self) -> CircularQueue { self.collector .with_mut_inner(|inner| Ok(replace(&mut inner.log_queue, CircularQueue::new(100)))) .unwrap() @@ -691,7 +726,7 @@ mod tests { impl HeadroomCheck for StubHeadroomLimiter { fn check( &mut self, - _log_timestamp: Option<&Value>, + _log_timestamp: &DateTime, _log_file_control: &mut impl LogFileControl, ) -> eyre::Result { Ok(true) @@ -708,6 +743,10 @@ mod tests { log_to_metrics_rules: vec![], in_memory_lines: IN_MEMORY_LINES, storage_config: StorageConfig::Persist, + level_mapping_config: LevelMappingConfig { + enable: false, + regex: None, + }, }; let (on_log_completion_sender, on_log_completion_receiver) = channel(); @@ -747,7 +786,8 @@ mod tests { } } - fn test_line() -> Value { - json!({"ts": 0, "MESSAGE": "xxx"}) + fn test_line() -> LogEntry { + let date_str = "2024-09-11T12:34:56Z"; + LogEntry::new_with_message_and_ts("xxx", date_str.parse::>().unwrap()) } } diff --git a/memfaultd/src/logs/log_entry.rs b/memfaultd/src/logs/log_entry.rs index 70f66c1..2994218 100644 --- a/memfaultd/src/logs/log_entry.rs +++ b/memfaultd/src/logs/log_entry.rs @@ -8,34 +8,107 @@ use serde::{Deserialize, Serialize}; use crate::util::serialization::datetime_to_rfc3339; -// journal fields that should always be captured by memfaultd: -// https://man7.org/linux/man-pages/man7/systemd.journal-fields.7.html -const ALWAYS_INCLUDE_KEYS: &[&str] = &["MESSAGE", "_PID", "_SYSTEMD_UNIT", "PRIORITY"]; +const MESSAGE_KEY: &str = "MESSAGE"; +const PID_KEY: &str = "_PID"; +const SYSTEMD_UNIT_KEY: &str = "_SYSTEMD_UNIT"; +const PRIORITY_KEY: &str = "PRIORITY"; +const ORIGINAL_PRIORITY_KEY: &str = "ORIGINAL_PRIORITY"; -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, Clone)] #[serde(untagged)] pub enum LogValue { String(String), Float(f64), } +#[derive(Debug, Serialize, Deserialize, Clone)] +/// Represents the structured format of a log entry +/// +/// Note that we will not serialize the fields that are `None` to save space. +pub struct LogData { + #[serde(rename = "MESSAGE")] + pub message: String, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "_PID")] + pub pid: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "_SYSTEMD_UNIT")] + pub systemd_unit: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "PRIORITY")] + pub priority: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "ORIGINAL_PRIORITY")] + pub original_priority: Option, + #[serde(flatten)] + pub extra_fields: HashMap, +} + +impl LogData { + /// Returns the value of the field with the given key. + pub fn get_field(&self, key: &str) -> Option { + match key { + MESSAGE_KEY => Some(self.message.clone()), + PID_KEY => self.pid.clone(), + SYSTEMD_UNIT_KEY => self.systemd_unit.clone(), + PRIORITY_KEY => self.priority.clone(), + ORIGINAL_PRIORITY_KEY => self.original_priority.clone(), + _ => self.extra_fields.get(key).and_then(|v| match v { + LogValue::String(s) => Some(s.clone()), + LogValue::Float(_) => None, + }), + } + } +} + /// Represents a structured log that could come from a variety of sources. -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, Clone)] pub struct LogEntry { #[serde(with = "datetime_to_rfc3339")] pub ts: DateTime, - pub data: HashMap, + pub data: LogData, } impl LogEntry { /// Filter log fields to only include defaults and those specified in `extra_fields`. /// - /// This function modifies the log entry in place by removing fields that are not in the - /// `ALWAYS_INCLUDE_KEYS` list or in `extra_fields`. This is useful for reducing the size of - /// log entries sent to Memfault, as there are fields that are not useful or displayed. - pub fn filter_fields(&mut self, extra_fields: &[String]) { + /// This function modifies the log entry data to remove any extra fields that are not + /// specified by the user. + pub fn filter_extra_fields(&mut self, extra_fields: &[String]) { self.data - .retain(|k, _| ALWAYS_INCLUDE_KEYS.contains(&k.as_str()) || extra_fields.contains(k)); + .extra_fields + .retain(|k, _| extra_fields.contains(k)); + } +} + +#[cfg(test)] +impl LogEntry { + pub fn new_with_message(message: &str) -> Self { + LogEntry { + ts: Utc::now(), + data: LogData { + message: message.to_string(), + pid: None, + systemd_unit: None, + priority: None, + original_priority: None, + extra_fields: HashMap::new(), + }, + } + } + + pub fn new_with_message_and_ts(message: &str, ts: DateTime) -> Self { + LogEntry { + ts, + data: LogData { + message: message.to_string(), + pid: None, + systemd_unit: None, + priority: None, + original_priority: None, + extra_fields: HashMap::new(), + }, + } } } @@ -48,7 +121,6 @@ mod tests { use super::*; #[rstest] - #[case("empty", "{}", "")] #[case("only_message", r#"{"MESSAGE":"TEST" }"#, "")] #[case("extra_key", r#"{"MESSAGE":"TEST", "SOME_EXTRA_KEY":"XX" }"#, "")] #[case( @@ -73,7 +145,7 @@ mod tests { }; let extra_attributes = extras.split(',').map(String::from).collect::>(); - entry.filter_fields(&extra_attributes); + entry.filter_extra_fields(&extra_attributes); with_settings!({sort_maps => true}, { assert_json_snapshot!(test_name, entry); diff --git a/memfaultd/src/logs/log_file.rs b/memfaultd/src/logs/log_file.rs index 23ce6d9..ce2a393 100644 --- a/memfaultd/src/logs/log_file.rs +++ b/memfaultd/src/logs/log_file.rs @@ -5,24 +5,40 @@ //! use crate::logs::completed_log::CompletedLog; use crate::mar::CompressionAlgorithm; +use chrono::{DateTime, Utc}; use eyre::{eyre, Result, WrapErr}; use flate2::write::ZlibEncoder; use flate2::Compression; use log::{trace, warn}; -use serde_json::{json, Value}; use std::fs::{remove_file, File}; use std::io::{BufWriter, Write}; use std::path::{Path, PathBuf}; use std::time::{Duration, Instant}; use uuid::Uuid; +use std::collections::HashMap; + +use super::log_entry::{LogData, LogEntry}; + pub trait LogFile { - fn write_json_line(&mut self, json: Value) -> Result<()>; - fn write_log>(&mut self, ts: Option, msg: S) -> Result<()> { - self.write_json_line(json!({ - "ts": ts, - "data": { "MESSAGE": msg.as_ref() } - })) + fn write_json_line(&mut self, json: LogEntry) -> Result<()>; + fn write_log>( + &mut self, + ts: DateTime, + priority: &str, + msg: S, + ) -> Result<()> { + let data = LogData { + message: msg.as_ref().to_string(), + pid: None, + systemd_unit: None, + priority: Some(priority.to_string()), + original_priority: None, + extra_fields: HashMap::new(), + }; + + let log_entry = LogEntry { ts, data }; + self.write_json_line(log_entry) } fn flush(&mut self) -> Result<()>; } @@ -60,7 +76,7 @@ impl LogFileImpl { } impl LogFile for LogFileImpl { - fn write_json_line(&mut self, json: Value) -> Result<()> { + fn write_json_line(&mut self, json: LogEntry) -> Result<()> { let bytes = serde_json::to_vec(&json)?; let mut written = self.writer.write(&bytes)?; written += self.writer.write("\n".as_bytes())?; @@ -217,10 +233,13 @@ impl LogFileControl for LogFileControlImpl { mod tests { use std::io::Read; + use crate::logs::log_entry::LogValue; + use super::*; use flate2::bufread::ZlibDecoder; use rand::distributions::{Alphanumeric, DistString}; use rstest::rstest; + use serde_json::Value; use tempfile::tempdir; // We saw this bug when we tried switching to the rust-based backend for flate2 (miniz-oxide) @@ -239,7 +258,21 @@ mod tests { Alphanumeric.sample_string(&mut rand::thread_rng(), 16), Alphanumeric.sample_string(&mut rand::thread_rng(), 20), ); - log.write_json_line(json!({ "data": { "message": message, "prio": 42, "unit": "systemd"}, "ts": "2023-22-22T22:22:22Z"})).expect("error writing json line"); + let log_entry = LogEntry { + ts: "2024-09-11T12:34:56Z".parse().unwrap(), + data: LogData { + message, + pid: None, + systemd_unit: None, + priority: None, + original_priority: None, + extra_fields: [("unit".to_string(), LogValue::String("systemd".to_string()))] + .into_iter() + .collect(), + }, + }; + log.write_json_line(log_entry) + .expect("error writing json line"); count_lines += 1; } diff --git a/memfaultd/src/logs/log_level_mapper.rs b/memfaultd/src/logs/log_level_mapper.rs new file mode 100644 index 0000000..b522dd5 --- /dev/null +++ b/memfaultd/src/logs/log_level_mapper.rs @@ -0,0 +1,354 @@ +// +// Copyright (c) Memfault, Inc. +// See License.txt for details +//! Maps log levels from log messages based on a set of regex rules. +//! +//! The mapper will search for a log level in the message field of a log entry and set the level +//! field to the corresponding log level if a match is found. + +use std::borrow::Cow; + +use eyre::{eyre, Error, Result}; +use regex::{Error as RegexError, Regex}; + +use crate::config::LevelMappingConfig; + +use super::log_entry::LogEntry; + +const DEFAULT_EMERG_RULE: &str = + r"(?i)(\[EMERG(ENCY)?\]|EMERG(ENCY)?:||\{EMERG(ENCY)?\}|EMERG(ENCY)? )"; +const DEFAULT_ALERT_RULE: &str = r"(?i)(\[ALERT\]|ALERT:||\{ALERT\}|ALERT )"; +const DEFAULT_CRIT_RULE: &str = + r"(?i)(\[CRIT(ICAL)?\]|CRIT(ICAL)?:||\{CRIT(ICAL)?\}|CRIT(ICAL)? )"; +const DEFAULT_ERROR_RULE: &str = r"(?i)(\[ERR(OR)?\]|ERR(OR)?:||\{ERR(OR)?\}|ERR(OR)? )"; +const DEFAULT_WARN_RULE: &str = + r"(?i)(\[WARN(ING)?\]|WARN(ING)?:||\{WARN(ING)?\}|WARN(ING)? )"; +const DEFAULT_NOTICE_RULE: &str = r"(?i)(\[NOTICE\]|NOTICE:||\{NOTICE\}|NOTICE )"; +const DEFAULT_INFO_RULE: &str = r"(?i)(\[INFO\]|INFO:||\{INFO\}|INFO )"; +const DEFAULT_DEBUG_RULE: &str = + r"(?i)(\[DEBUG\]|DEBUG:||\{DEBUG\}|DEBUG |\[TRACE\]|TRACE:||\{TRACE\}|TRACE )"; + +const LEVEL_COUNT: usize = 8; + +const DEFAULT_REGEX_LIST: [&str; LEVEL_COUNT] = [ + DEFAULT_EMERG_RULE, + DEFAULT_ALERT_RULE, + DEFAULT_CRIT_RULE, + DEFAULT_ERROR_RULE, + DEFAULT_WARN_RULE, + DEFAULT_NOTICE_RULE, + DEFAULT_INFO_RULE, + DEFAULT_DEBUG_RULE, +]; + +/// Matches log levels in strings and updating metadata. +/// +/// This will be run on every log entry to determine the log level based on the message. +/// The regex matches will be matched in descending levels of severity, and the first match will +/// be used to set the log level. If no match is found we will default to the current log level, +/// or none if it is not set. +pub struct LogLevelMapper { + rules: [Regex; LEVEL_COUNT], +} + +impl LogLevelMapper { + pub fn map_log(&self, log: &mut LogEntry) -> Result<()> { + for (i, rule) in self.rules.iter().enumerate() { + if let Cow::Owned(mat) = rule.replace(&log.data.message, "") { + let data = &mut log.data; + data.original_priority = data.priority.take(); + data.priority = Some(i.to_string()); + data.message = mat; + break; + } + } + + Ok(()) + } +} + +impl TryFrom<&LevelMappingConfig> for LogLevelMapper { + type Error = Error; + + fn try_from(config: &LevelMappingConfig) -> Result { + let rules = config + .regex + .as_ref() + .map_or_else(build_default_rules, |level_regex| { + Ok([ + Regex::new( + level_regex + .emergency + .as_deref() + .unwrap_or(DEFAULT_EMERG_RULE), + )?, + Regex::new(level_regex.alert.as_deref().unwrap_or(DEFAULT_ALERT_RULE))?, + Regex::new(level_regex.critical.as_deref().unwrap_or(DEFAULT_CRIT_RULE))?, + Regex::new(level_regex.error.as_deref().unwrap_or(DEFAULT_ERROR_RULE))?, + Regex::new(level_regex.warning.as_deref().unwrap_or(DEFAULT_WARN_RULE))?, + Regex::new(level_regex.notice.as_deref().unwrap_or(DEFAULT_NOTICE_RULE))?, + Regex::new(level_regex.info.as_deref().unwrap_or(DEFAULT_INFO_RULE))?, + Regex::new(level_regex.debug.as_deref().unwrap_or(DEFAULT_DEBUG_RULE))?, + ]) + }); + + match rules { + Ok(rules) => Ok(Self { rules }), + Err(err) => Err(eyre!("Failed to build log level mapping rules: {}", err)), + } + } +} + +fn build_default_rules() -> Result<[Regex; LEVEL_COUNT]> { + build_rules(DEFAULT_REGEX_LIST) +} + +fn build_rules(rule_list: [&str; LEVEL_COUNT]) -> Result<[Regex; LEVEL_COUNT]> { + let regex = rule_list + .iter() + .map(|r| Regex::new(r)) + .collect::, RegexError>>()?; + + // Safe to unwrap since the number of regexes is fixed + Ok(regex.try_into().expect("Invalid number of regexes")) +} + +#[cfg(test)] +mod test { + use std::collections::HashMap; + + use crate::config::LevelMappingRegex; + use crate::logs::log_entry::LogData; + + use super::*; + + use chrono::Utc; + use itertools::Itertools; + use rstest::rstest; + + const EMERG_RULE: &str = r"EMERG"; + const ALERT_RULE: &str = r"ALERT"; + const CRIT_RULE: &str = r"CRIT"; + const ERR_RULE: &str = r"ERR"; + const WARN_RULE: &str = r"WARN"; + const NOTICE_RULE: &str = r"NOTICE"; + const INFO_RULE: &str = r"INFO"; + const DEBUG_RULE: &str = r"DEBUG"; + const TEST_RULES: [&str; LEVEL_COUNT] = [ + EMERG_RULE, + ALERT_RULE, + CRIT_RULE, + ERR_RULE, + WARN_RULE, + NOTICE_RULE, + INFO_RULE, + DEBUG_RULE, + ]; + + #[rstest] + #[case(EMERG_RULE, "0")] + #[case(ALERT_RULE, "1")] + #[case(CRIT_RULE, "2")] + #[case(ERR_RULE, "3")] + #[case(WARN_RULE, "4")] + #[case(NOTICE_RULE, "5")] + #[case(INFO_RULE, "6")] + #[case(DEBUG_RULE, "7")] + fn test_level_match_happy_path(#[case] message: &str, #[case] expected_level: &str) { + let rules = build_rules(TEST_RULES).unwrap(); + let mapper = LogLevelMapper { rules }; + + let original_level = "8".to_string(); + let data = LogData { + message: message.to_string(), + pid: None, + systemd_unit: None, + priority: Some(original_level.clone()), + original_priority: None, + extra_fields: Default::default(), + }; + let mut entry = LogEntry { + ts: Utc::now(), + data, + }; + mapper.map_log(&mut entry).unwrap(); + + // Assert that the level is set + assert_eq!( + entry.data.priority.as_ref().unwrap().as_str(), + expected_level + ); + + // Assert that the original level is saved + assert_eq!( + entry.data.original_priority.as_ref().unwrap().as_str(), + original_level, + ); + } + + #[test] + fn test_no_match() { + let rules = build_rules(TEST_RULES).unwrap(); + let mapper = LogLevelMapper { rules }; + + let data = LogData { + message: "No match".to_string(), + pid: None, + systemd_unit: None, + priority: Some("8".to_string()), + original_priority: None, + extra_fields: Default::default(), + }; + let mut entry = LogEntry { + ts: Utc::now(), + data, + }; + mapper.map_log(&mut entry).unwrap(); + + // Assert that the level is not changed + assert_eq!(entry.data.priority.as_ref().unwrap().as_str(), "8"); + } + + #[test] + fn test_level_precedence() { + // Verify that the highest precedence rule is matched + let rule_strings = [ + "first", "other", "other", "other", "other", "other", "other", "other", + ]; + let rules = build_rules(rule_strings).unwrap(); + let mapper = LogLevelMapper { rules }; + + let mut entry = LogEntry::new_with_message("other"); + mapper.map_log(&mut entry).unwrap(); + + // Assert that the second rule is matched + assert_eq!(entry.data.priority.as_ref().unwrap().as_str(), "1"); + } + + #[rstest] + #[case( + r"\[.*\] \[ERROR\]", + "[2024-09-09 12:00:00] [ERROR] Something went wrong", + " Something went wrong" + )] + fn test_complex_regex( + #[case] regex: &str, + #[case] message: &str, + #[case] expected_message: &str, + ) { + let mut rule_strings = ["None"; LEVEL_COUNT]; + rule_strings[3] = regex; + + let rules = build_rules(rule_strings).unwrap(); + let mapper = LogLevelMapper { rules }; + + let mut entry = LogEntry::new_with_message(message); + mapper.map_log(&mut entry).unwrap(); + + // Assert that the level is set + assert_eq!(entry.data.priority.as_ref().unwrap().as_str(), "3"); + assert_eq!(entry.data.message.as_str(), expected_message); + } + + #[rstest] + #[case("INFO test message", " test message")] + #[case("something else WARN test message", "something else test message")] + #[case("NO MATCH FOUND", "NO MATCH FOUND")] + #[case("who would do this? EMERG", "who would do this? ")] + #[case("who would EMERG do this? EMERG", "who would do this? EMERG")] + fn test_match_extraction(#[case] message: &str, #[case] expected_message: &str) { + let rules = build_rules(TEST_RULES).unwrap(); + let mapper = LogLevelMapper { rules }; + + let mut entry = LogEntry::new_with_message(message); + mapper.map_log(&mut entry).unwrap(); + + // Assert that the level is set to INFO + assert_eq!(entry.data.message, expected_message); + } + + #[rstest] + fn test_default_rules() { + let level_map = [ + ("EMERG", 0), + ("EMERGENCY", 0), + ("ALERT", 1), + ("CRIT", 2), + ("ERROR", 3), + ("ERR", 3), + ("WARN", 4), + ("WARNING", 4), + ("NOTICE", 5), + ("INFO", 6), + ("DEBUG", 7), + ("TRACE", 7), + ]; + let prefix_patterns = ["LEVEL: ", "[LEVEL] ", " ", "{LEVEL} ", "LEVEL "]; + let message_map = level_map + .iter() + .flat_map(|(level, level_num)| { + // Make lower and uppercase versions of the level + [ + (level.to_lowercase(), *level_num), + (level.to_uppercase(), *level_num), + ] + }) + .cartesian_product(prefix_patterns.iter()) + .map(|((level, level_num), prefix_pattern)| { + let mut message = prefix_pattern.replace("LEVEL", &level); + message.push_str("test message"); + let expected_level = level_num.to_string(); + (message, expected_level) + }) + .collect::>(); + + let rules = build_default_rules().unwrap(); + let mapper = LogLevelMapper { rules }; + + for (message, expected_level) in message_map { + let mut entry = LogEntry::new_with_message(&message); + mapper + .map_log(&mut entry) + .unwrap_or_else(|_| panic!("Failed to map log string {:?}", message)); + + assert_eq!( + entry.data.priority.as_ref().unwrap().as_str(), + expected_level + ); + } + } + + #[rstest] + fn test_default_fallthrough(#[values(0, 1, 2, 3, 4, 5, 6, 7)] default_level: usize) { + let rule_strings = (0..LEVEL_COUNT) + .map(|idx| { + if idx == default_level { + None + } else { + Some("REGEX".to_string()) + } + }) + .collect::>(); + + let mapping_regex = LevelMappingRegex { + emergency: rule_strings[0].clone(), + alert: rule_strings[1].clone(), + critical: rule_strings[2].clone(), + error: rule_strings[3].clone(), + warning: rule_strings[4].clone(), + notice: rule_strings[5].clone(), + info: rule_strings[6].clone(), + debug: rule_strings[7].clone(), + }; + let level_config = LevelMappingConfig { + enable: true, + regex: Some(mapping_regex), + }; + let mapper = LogLevelMapper::try_from(&level_config).unwrap(); + + assert_eq!( + mapper.rules[default_level].as_str(), + DEFAULT_REGEX_LIST[default_level] + ); + } +} diff --git a/memfaultd/src/logs/log_to_metrics.rs b/memfaultd/src/logs/log_to_metrics.rs index 744a0ad..fac7352 100644 --- a/memfaultd/src/logs/log_to_metrics.rs +++ b/memfaultd/src/logs/log_to_metrics.rs @@ -7,14 +7,13 @@ use std::collections::HashMap; use eyre::Result; use log::{debug, warn}; use regex::Regex; -use serde_json::{Map, Value}; use crate::{ config::LogToMetricRule, metrics::{KeyedMetricReading, MetricsMBox}, }; -const SEARCH_FIELD: &str = "MESSAGE"; +use super::log_entry::{LogData, LogEntry}; pub struct LogToMetrics { rules: Vec, @@ -31,28 +30,27 @@ impl LogToMetrics { } } - pub fn process(&mut self, structured_log: &Value) -> Result<()> { - if let Some(data) = structured_log["data"].as_object() { - if !self.rules.is_empty() { - debug!("LogToMetrics: Processing log: {:?}", data); - for rule in &self.rules { - match rule { - LogToMetricRule::CountMatching { - pattern, - metric_name, - filter, - } => Self::apply_count_matching( - data, - pattern, - &mut self.regex_cache, - metric_name, - filter, - self.metrics_mbox.clone(), - )?, - } + pub fn process(&mut self, structured_log: &LogEntry) -> Result<()> { + if !self.rules.is_empty() { + debug!("LogToMetrics: Processing log: {:?}", structured_log); + for rule in &self.rules { + match rule { + LogToMetricRule::CountMatching { + pattern, + metric_name, + filter, + } => Self::apply_count_matching( + &structured_log.data, + pattern, + &mut self.regex_cache, + metric_name, + filter, + self.metrics_mbox.clone(), + )?, } } } + Ok(()) } @@ -68,7 +66,7 @@ impl LogToMetrics { } fn apply_count_matching( - data: &Map, + data: &LogData, pattern: &str, regex_cache: &mut HashMap, metric_name: &str, @@ -77,8 +75,8 @@ impl LogToMetrics { ) -> Result<()> { // Use filter to quickly disqualify a log entry for (key, value) in filter { - if let Some(log_value) = data.get(key) { - if log_value != value { + if let Some(log_value) = data.get_field(key) { + if log_value != *value { return Ok(()); } } else { @@ -90,30 +88,31 @@ impl LogToMetrics { Entry::Occupied(e) => e.into_mut(), Entry::Vacant(e) => e.insert(Regex::new(pattern)?), }; - if let Some(search_value) = data[SEARCH_FIELD].as_str() { - let captures = regex.captures(search_value); - debug!( - "LogToMetrics Pattern '{}'=> MATCH={} Captures={:?}", - &pattern, - captures.is_some(), - captures - ); - - if let Some(captures) = captures { - let metric_name_with_captures = - Self::get_metric_name_with_captures(metric_name, captures); - match metric_name_with_captures.parse() { - Ok(metric_key) => metrics_box - .send_and_forget(vec![KeyedMetricReading::increment_counter(metric_key)])?, - Err(e) => { - warn!( - "LogToMetrics suggested invalid metric name {}: {}", - metric_name_with_captures, e - ) - } + + let search_value = &data.message; + let captures = regex.captures(search_value); + debug!( + "LogToMetrics Pattern '{}'=> MATCH={} Captures={:?}", + &pattern, + captures.is_some(), + captures + ); + + if let Some(captures) = captures { + let metric_name_with_captures = + Self::get_metric_name_with_captures(metric_name, captures); + match metric_name_with_captures.parse() { + Ok(metric_key) => metrics_box + .send_and_forget(vec![KeyedMetricReading::increment_counter(metric_key)])?, + Err(e) => { + warn!( + "LogToMetrics suggested invalid metric name {}: {}", + metric_name_with_captures, e + ) } } } + Ok(()) } } @@ -121,13 +120,14 @@ impl LogToMetrics { #[cfg(test)] mod tests { use crate::{ + logs::log_entry::LogValue, metrics::{MetricValue, TakeMetrics}, test_utils::setup_logger, }; use super::*; + use chrono::Utc; use rstest::rstest; - use serde_json::json; use ssf::ServiceMock; #[rstest] @@ -135,47 +135,68 @@ mod tests { pattern: "foo".to_string(), metric_name: "foo".to_string(), filter: HashMap::default() - }], vec![json!({"MESSAGE": "foo"})], "foo", 1.0)] + }], vec![build_log_entry("foo", HashMap::default())], + "foo", + 1.0)] #[case(vec![LogToMetricRule::CountMatching { pattern: "session opened for user (\\w*)\\(uid=".to_string(), metric_name: "ssh_sessions_$1_count".to_string(), filter: HashMap::default() - }], vec![json!({"MESSAGE": "pam_unix(sshd:session): session opened for user thomas(uid=1000) by (uid=0)"})], "ssh_sessions_thomas_count", 1.0)] + }], vec![build_log_entry("session opened for user thomas(uid=1000)", HashMap::default())], + "ssh_sessions_thomas_count", + 1.0)] #[case(vec![LogToMetricRule::CountMatching { pattern: "(.*): Scheduled restart job, restart counter is at".to_string(), metric_name: "$1_restarts".to_string(), filter: HashMap::default() - }], vec![json!({"MESSAGE": /* systemd[1]: */"docker.service: Scheduled restart job, restart counter is at 1."})], "docker.service_restarts", 1.0)] + }], vec![build_log_entry("docker.service: Scheduled restart job, restart counter is at 1.", HashMap::default())], + "docker.service_restarts", + 1.0)] #[case(vec![LogToMetricRule::CountMatching { pattern: "(.*): Scheduled restart job, restart counter is at".to_string(), metric_name: "$1_restarts".to_string(), filter: HashMap::default() }], vec![ - json!({"MESSAGE": /* systemd[1]: */"docker.service: Scheduled restart job, restart counter is at 1."}), - json!({"MESSAGE": /* systemd[1]: */"sshd.service: Scheduled restart job, restart counter is at 1."}), - json!({"MESSAGE": /* systemd[1]: */"docker.service: Scheduled restart job, restart counter is at 2."}), + build_log_entry("docker.service: Scheduled restart job, restart counter is at 1.", HashMap::default()), + build_log_entry("sshd.service: Scheduled restart job, restart counter is at 1.", HashMap::default()), + build_log_entry("docker.service: Scheduled restart job, restart counter is at 2.", HashMap::default()) ], "docker.service_restarts", 2.0) ] #[case(vec![LogToMetricRule::CountMatching { pattern: "(.*): Scheduled restart job, restart counter is at".to_string(), metric_name: "$1_restarts".to_string(), filter: HashMap::from([("UNIT".to_owned(), "systemd".to_owned())]) - }], vec![json!({"MESSAGE": /* systemd[1]: */"docker.service: Scheduled restart job, restart counter is at 1.", "UNIT": "systemd"})], "docker.service_restarts", 1.0)] + }], + vec![ + build_log_entry("docker.service: Scheduled restart job, restart counter is at 1.", HashMap::from([("UNIT", "systemd")])) + ], + "docker.service_restarts", + 1.0)] #[case(vec![LogToMetricRule::CountMatching { pattern: "(.*): Scheduled restart job, restart counter is at".to_string(), metric_name: "$1_restarts".to_string(), filter: HashMap::from([("_SYSTEMD_UNIT".to_owned(), "ssh.service".to_owned())]) - }], vec![json!({"MESSAGE": /* systemd[1]: */"docker.service: Scheduled restart job, restart counter is at 1.", "_SYSTEMD_UNIT": ""})], "docker.service_restarts", 0.0)] + }], + vec![ + build_log_entry("docker.service: Scheduled restart job, restart counter is at 1.", HashMap::from([("_SYSTEMD_UNIT", "")])), + ], + "docker.service_restarts", + 0.0)] #[case(vec![LogToMetricRule::CountMatching { pattern: "Out of memory: Killed process \\d+ \\((.*)\\)".to_string(), metric_name: "oomkill_$1".to_string(), filter: HashMap::default() - }], vec![json!({"MESSAGE": "Out of memory: Killed process 423 (wefaultd) total-vm:553448kB, anon-rss:284496kB, file-rss:0kB, shmem-rss:0kB, UID:0 pgtables:624kB oom_score_adj:0"})], "oomkill_wefaultd", 1.0)] + }], + vec![ + build_log_entry("Out of memory: Killed process 423 (wefaultd) total-vm:553448kB, anon-rss:284496kB, file-rss:0kB, shmem-rss:0kB, UID:0 pgtables:624kB oom_score_adj:0", HashMap::default()) + ], + "oomkill_wefaultd", + 1.0)] fn test_log_to_metrics( #[case] rules: Vec, - #[case] logs: Vec, + #[case] logs: Vec, #[case] metric_name: &str, #[case] expected_value: f64, _setup_logger: (), @@ -184,9 +205,7 @@ mod tests { let mut log_to_metrics = LogToMetrics::new(rules, mock.mbox.clone()); for log in logs { - log_to_metrics - .process(&json!({ "data": log })) - .expect("process error"); + log_to_metrics.process(&log).expect("process error"); } let metrics = mock.take_metrics().expect("invalid metrics"); @@ -205,4 +224,25 @@ mod tests { } } } + + fn build_log_entry(message: &str, extra_fields: HashMap<&str, &str>) -> LogEntry { + let extra_fields = extra_fields + .into_iter() + .map(|(k, v)| (k.to_string(), LogValue::String(v.to_string()))) + .collect(); + + let data = LogData { + message: message.to_string(), + pid: None, + systemd_unit: None, + priority: None, + original_priority: None, + extra_fields, + }; + + LogEntry { + ts: Utc::now(), + data, + } + } } diff --git a/memfaultd/src/logs/mod.rs b/memfaultd/src/logs/mod.rs index 51ac2f1..d8868b6 100644 --- a/memfaultd/src/logs/mod.rs +++ b/memfaultd/src/logs/mod.rs @@ -17,5 +17,8 @@ pub mod log_entry; mod log_file; mod recovery; -#[cfg(feature = "log-to-metrics")] +#[cfg(feature = "regex")] +pub mod log_level_mapper; + +#[cfg(feature = "regex")] pub mod log_to_metrics; diff --git a/memfaultd/src/mar/manifest.rs b/memfaultd/src/mar/manifest.rs index 395bc26..37ec369 100644 --- a/memfaultd/src/mar/manifest.rs +++ b/memfaultd/src/mar/manifest.rs @@ -16,8 +16,11 @@ use crate::{ network::DeviceConfigRevision, network::NetworkConfig, reboot::RebootReason, - util::serialization::{milliseconds_to_duration, optional_milliseconds_to_duration}, util::system::{get_system_clock, read_system_boot_id, Clock}, + util::{ + serialization::{milliseconds_to_duration, optional_milliseconds_to_duration}, + system::{get_osrelease, get_ostype}, + }, }; #[derive(Serialize, Deserialize)] @@ -56,6 +59,8 @@ struct Device { pub struct Producer { pub id: String, pub version: String, + pub os_version: Option, + pub os_name: Option, } impl Default for Producer { @@ -63,6 +68,8 @@ impl Default for Producer { Self { id: "memfaultd".into(), version: VERSION.to_owned(), + os_version: get_ostype(), + os_name: get_osrelease(), } } } @@ -194,7 +201,7 @@ impl, V: Into> TryFrom<(K, V)> for DeviceAttribute { fn try_from(value: (K, V)) -> std::result::Result { Ok(DeviceAttribute { - string_key: str::parse(value.0.as_ref())?, + string_key: str::parse(value.0.as_ref().trim())?, value: value.1.into(), }) } @@ -366,7 +373,7 @@ mod tests { use crate::network::NetworkConfig; use crate::reboot::RebootReason; - use super::{CollectionTime, Manifest}; + use super::*; #[rstest] #[case("coredump-gzip", CompressionAlgorithm::Gzip)] @@ -379,7 +386,7 @@ mod tests { CollectionTime::test_fixture(), super::Metadata::new_coredump("/tmp/core.elf".into(), compression), ); - insta::assert_json_snapshot!(name, manifest, { ".producer.version" => "tests"}); + insta::assert_json_snapshot!(name, manifest, { ".producer.version" => "tests", ".producer.os_version" => "tests", ".producer.os_name" => "tests"}); } #[rstest] @@ -395,7 +402,7 @@ mod tests { CollectionTime::test_fixture(), super::Metadata::new_log("/var/log/syslog".into(), this_cid, next_cid, compression), ); - insta::assert_json_snapshot!(name, manifest, { ".producer.version" => "tests" }); + insta::assert_json_snapshot!(name, manifest, { ".producer.version" => "tests", ".producer.os_version" => "tests", ".producer.os_name" => "tests"}); } #[rstest] @@ -411,7 +418,7 @@ mod tests { ("my_bool", true).try_into().unwrap(), ]), ); - insta::assert_json_snapshot!(manifest, { ".producer.version" => "tests"}); + insta::assert_json_snapshot!( manifest, { ".producer.version" => "tests", ".producer.os_version" => "tests", ".producer.os_name" => "tests"}); } #[rstest] @@ -422,7 +429,7 @@ mod tests { CollectionTime::test_fixture(), super::Metadata::new_device_config(42), ); - insta::assert_json_snapshot!(manifest, { ".producer.version" => "tests"}); + insta::assert_json_snapshot!( manifest, { ".producer.version" => "tests", ".producer.os_version" => "tests", ".producer.os_name" => "tests"}); } #[rstest] @@ -433,7 +440,7 @@ mod tests { CollectionTime::test_fixture(), super::Metadata::new_reboot(RebootReason::from(RebootReasonCode::UserShutdown)), ); - insta::assert_json_snapshot!(manifest, { ".producer.version" => "tests"}); + insta::assert_json_snapshot!( manifest, { ".producer.version" => "tests", ".producer.os_version" => "tests", ".producer.os_name" => "tests"}); } #[rstest] @@ -446,7 +453,7 @@ mod tests { RebootReason::from_str("CustomRebootReason").unwrap_or_else(|e| panic!("{}", e)), ), ); - insta::assert_json_snapshot!(manifest, { ".producer.version" => "tests"}); + insta::assert_json_snapshot!( manifest, { ".producer.version" => "tests", ".producer.os_version" => "tests", ".producer.os_name" => "tests"}); } #[rstest] @@ -460,7 +467,7 @@ mod tests { .unwrap_or_else(|e| panic!("{}", e)), ), ); - insta::assert_json_snapshot!(manifest, { ".producer.version" => "tests"}); + insta::assert_json_snapshot!( manifest, { ".producer.version" => "tests", ".producer.os_version" => "tests", ".producer.os_name" => "tests"}); } #[rstest] @@ -478,7 +485,7 @@ mod tests { report_type: MetricReportType::Heartbeat, }, ); - insta::assert_json_snapshot!(manifest, { ".producer.version" => "tests"}); + insta::assert_json_snapshot!( manifest, { ".producer.version" => "tests", ".producer.os_version" => "tests", ".producer.os_name" => "tests"}); } #[rstest] @@ -496,6 +503,17 @@ mod tests { .with_extension("json"); let manifest_json = std::fs::read_to_string(input_path).unwrap(); let manifest: Manifest = serde_json::from_str(manifest_json.as_str()).unwrap(); - insta::assert_json_snapshot!(name, manifest, { ".producer.version" => "tests"}); + insta::assert_json_snapshot!(name, manifest, { ".producer.version" => "tests", ".producer.os_version" => "tests", ".producer.os_name" => "tests"}); + } + + #[rstest] + #[case(" before", "before")] + #[case("after ", "after")] + #[case(" both ", "both")] + #[case("\ttab\t", "tab")] + #[case("\nnewline\n", "newline")] + fn whitespace_trimmed_attribute(#[case] input_string: &str, #[case] expected: &str) { + let attribute = DeviceAttribute::try_from((input_string, 42)).unwrap(); + assert_eq!(attribute.string_key.as_str(), expected); } } diff --git a/memfaultd/src/mar/mar_entry_builder.rs b/memfaultd/src/mar/mar_entry_builder.rs index 8b3e43c..f037e07 100644 --- a/memfaultd/src/mar/mar_entry_builder.rs +++ b/memfaultd/src/mar/mar_entry_builder.rs @@ -10,7 +10,7 @@ use crate::{ mar::{CollectionTime, Manifest, MarEntry, Metadata}, util::fs::copy_file, }; -use eyre::WrapErr; +use eyre::{eyre, Result, WrapErr}; use std::ffi::OsStr; use std::fs::{create_dir, remove_dir_all, rename, File, Metadata as FsMetadata}; use std::io; @@ -45,21 +45,25 @@ impl MarEntryBuilder { /// Add an attachment to this entry. /// /// The file will be moved to the entry directory - pub fn add_attachment(mut self, file: PathBuf) -> MarEntryBuilder { - assert!(file.is_file()); - assert!(file.is_absolute()); - self.attachments.push(MarAttachment::Move(file)); - self + pub fn add_attachment(mut self, file: PathBuf) -> Result> { + if file.is_file() && file.is_absolute() { + self.attachments.push(MarAttachment::Move(file)); + Ok(self) + } else { + Err(eyre!("Failed to add attachment!")) + } } /// Add an attachment to this entry. /// /// The file will be copied to the entry directory - pub fn add_copied_attachment(mut self, file: PathBuf) -> MarEntryBuilder { - assert!(file.is_file()); - assert!(file.is_absolute()); - self.attachments.push(MarAttachment::Copy(file)); - self + pub fn add_copied_attachment(mut self, file: PathBuf) -> Result> { + if file.is_file() && file.is_absolute() { + self.attachments.push(MarAttachment::Copy(file)); + Ok(self) + } else { + Err(eyre!("Failed to add copied attachment!")) + } } } @@ -96,7 +100,7 @@ impl MarEntryBuilder { impl MarEntryBuilder { /// Consume this builder, writes the manifest and moves the attachment to the /// MAR storage area and returns a MAR entry. - pub fn save(self, network_config: &NetworkConfig) -> eyre::Result { + pub fn save(self, network_config: &NetworkConfig) -> Result { // Move attachments for filepath in self.attachments { // We already check that attachments are file in the constructor so we ignore @@ -251,6 +255,7 @@ mod tests { builder .add_attachment(orig_attachment_path.clone()) + .unwrap() .set_metadata(Metadata::test_fixture()) .save(&NetworkConfig::test_fixture()) .unwrap(); @@ -270,6 +275,7 @@ mod tests { builder .add_attachment(orig_attachment_path.clone()) + .unwrap() .set_metadata(Metadata::test_fixture()) .save(&NetworkConfig::test_fixture()) .unwrap(); @@ -289,6 +295,7 @@ mod tests { let builder = builder .add_attachment(orig_attachment_path) + .unwrap() .set_metadata(Metadata::test_fixture()); assert_eq!( diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__attributes.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__attributes.snap index 63c65dc..65f8af5 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__attributes.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__attributes.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "device-attributes", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__coredump-gzip.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__coredump-gzip.snap index a716082..02db69f 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__coredump-gzip.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__coredump-gzip.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "elf-coredump", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__coredump-none.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__coredump-none.snap index 03cb837..4bc1d9a 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__coredump-none.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__coredump-none.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "elf-coredump", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__device_config.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__device_config.snap index 4f49a1c..bcdd32e 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__device_config.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__device_config.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "device-config", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__elf_coredump.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__elf_coredump.snap index 07b4b90..ba2d773 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__elf_coredump.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__elf_coredump.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "elf-coredump", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__heartbeat.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__heartbeat.snap index e3b18fb..97bef8c 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__heartbeat.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__heartbeat.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "linux-heartbeat", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__heartbeat_with_duration.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__heartbeat_with_duration.snap index c9e46b4..dbd615e 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__heartbeat_with_duration.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__heartbeat_with_duration.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "linux-heartbeat", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__log-none.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__log-none.snap index ffc4361..efd87c5 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__log-none.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__log-none.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "linux-logs", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__log-zlib.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__log-zlib.snap index 34fe055..c4b26e9 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__log-zlib.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__log-zlib.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "linux-logs", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__metric_report.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__metric_report.snap index 5176a6c..6996570 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__metric_report.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__metric_report.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "linux-metric-report", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__reboot.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__reboot.snap index 7c92718..49b9619 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__reboot.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__reboot.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "linux-reboot", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_custom_reboot.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_custom_reboot.snap index f911a44..f4bfa47 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_custom_reboot.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_custom_reboot.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "linux-reboot", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_custom_unexpected_reboot.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_custom_unexpected_reboot.snap index 6319932..0667dd9 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_custom_unexpected_reboot.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_custom_unexpected_reboot.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "linux-reboot", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_device_attributes.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_device_attributes.snap index c7d93f9..1127442 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_device_attributes.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_device_attributes.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "device-attributes", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_device_configc.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_device_configc.snap index 4f49a1c..bcdd32e 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_device_configc.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_device_configc.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "device-config", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_linux_heartbeat.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_linux_heartbeat.snap index 4bec52e..f0a3cbf 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_linux_heartbeat.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_linux_heartbeat.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "linux-metric-report", "metadata": { diff --git a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_reboot.snap b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_reboot.snap index cea2c31..546aa14 100644 --- a/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_reboot.snap +++ b/memfaultd/src/mar/snapshots/memfaultd__mar__manifest__tests__serialization_of_reboot.snap @@ -20,7 +20,9 @@ expression: manifest }, "producer": { "id": "memfaultd", - "version": "tests" + "version": "tests", + "os_version": "tests", + "os_name": "tests" }, "type": "linux-reboot", "metadata": { diff --git a/memfaultd/src/mar/test_utils.rs b/memfaultd/src/mar/test_utils.rs index 25688d2..1ef3745 100644 --- a/memfaultd/src/mar/test_utils.rs +++ b/memfaultd/src/mar/test_utils.rs @@ -217,6 +217,31 @@ impl MarCollectorFixture { path } + + pub fn create_custom_data_recording_entry(&mut self, data: Vec) -> PathBuf { + let path = self.create_empty_entry(); + let manifest_path = path.join("manifest.json"); + let data_path = path.join("data"); + + let mut data_file = File::create(&data_path).unwrap(); + data_file.write_all(&data).unwrap(); + + let manifest_file = File::create(manifest_path).unwrap(); + let manifest = Manifest::new( + &self.config, + CollectionTime::test_fixture(), + Metadata::new_custom_data_recording( + None, + Duration::from_secs(0), + vec!["mime".to_string()], + "test".to_string(), + data_path.to_str().unwrap().to_string(), + ), + ); + serde_json::to_writer(BufWriter::new(manifest_file), &manifest).unwrap(); + + path + } } /// Check the content of a MAR zip encoder against a list of expected files. diff --git a/memfaultd/src/mar/upload.rs b/memfaultd/src/mar/upload.rs index 09d3c58..a04ec04 100644 --- a/memfaultd/src/mar/upload.rs +++ b/memfaultd/src/mar/upload.rs @@ -24,6 +24,8 @@ //! +-----------------+-----+-----+--------+------+ //! | logs | | | x | x | //! +-----------------+-----+-----+--------+------+ +//! | CDR | | | x | x | +//! +-----------------+-----+-----+--------+------+ use std::fs::{remove_dir_all, File}; use std::io::BufReader; @@ -419,6 +421,31 @@ mod tests { upload_and_verify(mar_fixture, client, sampling_config, expected_files); } + #[rstest] + // Verify that CDRs are uploaded based on the debugging resolution + #[case::off(Resolution::Off, false)] + #[case::low(Resolution::Low, false)] + #[case::normal(Resolution::Normal, true)] + #[case::high(Resolution::High, true)] + fn uploading_custom_data_recordings( + #[case] resolution: Resolution, + #[case] should_upload: bool, + _setup_logger: (), + client: MockNetworkClient, + mut mar_fixture: MarCollectorFixture, + ) { + let data = vec![1, 3, 3, 7]; + mar_fixture.create_custom_data_recording_entry(data); + + let sampling_config = Sampling { + debugging_resolution: resolution, + logging_resolution: Resolution::Off, + monitoring_resolution: Resolution::Off, + }; + let expected_files = should_upload.then(|| vec!["/data", "/manifest.json"]); + upload_and_verify(mar_fixture, client, sampling_config, expected_files); + } + #[rstest] // Heartbeat cases #[case::heartbeat_off(MetricReportType::Heartbeat, Resolution::Off, false)] diff --git a/memfaultd/src/memfaultd.rs b/memfaultd/src/memfaultd.rs index 2179359..593305f 100644 --- a/memfaultd/src/memfaultd.rs +++ b/memfaultd/src/memfaultd.rs @@ -46,7 +46,6 @@ use crate::{ use crate::{mar::MarStagingCleaner, service_manager::get_service_manager}; use crate::{reboot::RebootReasonTracker, util::disk_size::DiskSize}; -#[cfg(feature = "collectd")] use crate::collectd::CollectdHandler; #[cfg(feature = "logging")] @@ -147,7 +146,6 @@ pub fn memfaultd_loop Result<()>>( ); http_handlers.push(Box::new(session_event_handler)); - #[cfg(feature = "collectd")] { let collectd_handler = CollectdHandler::new( config.config_file.enable_data_collection, @@ -177,7 +175,7 @@ pub fn memfaultd_loop Result<()>>( } // Start statsd server - if config.statsd_server_enabled() { + if config.statsd_server_enabled() && config.config_file.enable_data_collection { if let Ok(bind_address) = config.statsd_server_address() { let metrics_mailbox = metrics_mbox.clone(); spawn(move || { @@ -190,12 +188,21 @@ pub fn memfaultd_loop Result<()>>( } // Start system metric collector thread - if config.builtin_system_metric_collection_enabled() { + if config.builtin_system_metric_collection_enabled() + && config.config_file.enable_data_collection + { let poll_interval = config.system_metric_poll_interval(); - let system_metric_config = config.system_metric_config(); let mbox = metrics_mbox.clone(); + let processes_config = config.system_metric_monitored_processes(); + let network_interfaces_config = config.system_metric_network_interfaces_config().cloned(); + let disk_space_config = config.system_metric_disk_space_config(); spawn(move || { - let mut sys_metric_collector = SystemMetricsCollector::new(system_metric_config, mbox); + let mut sys_metric_collector = SystemMetricsCollector::new( + processes_config, + network_interfaces_config, + disk_space_config, + mbox, + ); sys_metric_collector.run(poll_interval) }); } @@ -220,7 +227,8 @@ pub fn memfaultd_loop Result<()>>( // Start a thread to update battery metrics // periodically if enabled by configuration - if config.battery_monitor_periodic_update_enabled() { + if config.battery_monitor_periodic_update_enabled() && config.config_file.enable_data_collection + { let battery_monitor_interval = config.battery_monitor_interval(); let battery_info_command_str = config.battery_monitor_battery_info_command().to_string(); spawn(move || { @@ -243,23 +251,26 @@ pub fn memfaultd_loop Result<()>>( } // Connected time monitor is only enabled if config is defined if let Some(connectivity_monitor_config) = config.connectivity_monitor_config() { - let mut connectivity_monitor = ConnectivityMonitor::::new( - connectivity_monitor_config, - metric_report_manager.mbox().into(), - ); - spawn(move || { - let mut next_connectivity_reading_time = - Instant::now() + connectivity_monitor.interval_seconds(); - loop { - while Instant::now() < next_connectivity_reading_time { - sleep(next_connectivity_reading_time - Instant::now()); - } - next_connectivity_reading_time += connectivity_monitor.interval_seconds(); - if let Err(e) = connectivity_monitor.update_connected_time() { - warn!("Failed to update connected time metrics: {}", e); + if config.config_file.enable_data_collection { + let mut connectivity_monitor = + ConnectivityMonitor::::new( + connectivity_monitor_config, + metric_report_manager.mbox().into(), + ); + spawn(move || { + let mut next_connectivity_reading_time = + Instant::now() + connectivity_monitor.interval_seconds(); + loop { + while Instant::now() < next_connectivity_reading_time { + sleep(next_connectivity_reading_time - Instant::now()); + } + next_connectivity_reading_time += connectivity_monitor.interval_seconds(); + if let Err(e) = connectivity_monitor.update_connected_time() { + warn!("Failed to update connected time metrics: {}", e); + } } - } - }); + }); + } } // Schedule a task to dump the metrics when a sync is forced { @@ -315,7 +326,7 @@ pub fn memfaultd_loop Result<()>>( })); } // Schedule a task to compute operational and crashfree hours - { + if config.config_file.enable_data_collection { let mut crashfree_tracker = CrashFreeIntervalTracker::::new_hourly(metric_report_manager.mbox().into()); http_handlers.push(crashfree_tracker.http_handler()); @@ -351,6 +362,13 @@ pub fn memfaultd_loop Result<()>>( } #[cfg(feature = "systemd")] LogSource::Journald => Box::new(start_journald_provider(config.tmp_dir())), + #[cfg(not(feature = "systemd"))] + LogSource::Journald => { + warn!("logs.source configuration set to \"journald\", but memfaultd was not compiled with the systemd feature. Logs will not be collected."); + // This match arm still needs to evaluate to Box + Send> + // Empty iterator typechecks and is effectively a no-op. + Box::new([].into_iter()) + } }; let mar_cleaner = mar_cleaner.clone(); @@ -373,7 +391,7 @@ pub fn memfaultd_loop Result<()>>( .to_owned(); let mar_builder = MarEntryBuilder::new(&mar_staging_path)? .set_metadata(Metadata::new_log(file_name, cid, next_cid, compression)) - .add_attachment(path); + .add_attachment(path)?; mar_cleaner.clean(mar_builder.estimated_entry_size())?; diff --git a/memfaultd/src/metrics/core_metrics.rs b/memfaultd/src/metrics/core_metrics.rs index 1a8a1ba..cb31820 100644 --- a/memfaultd/src/metrics/core_metrics.rs +++ b/memfaultd/src/metrics/core_metrics.rs @@ -1,6 +1,11 @@ // // Copyright (c) Memfault, Inc. // See License.txt for details +use std::collections::HashSet; + +use super::MetricStringKey; +use crate::util::wildcard_pattern::WildcardPattern; + // Connectivity metrics pub const METRIC_MF_SYNC_SUCCESS: &str = "sync_memfault_successful"; pub const METRIC_MF_SYNC_FAILURE: &str = "sync_memfault_failure"; @@ -8,11 +13,18 @@ pub const METRIC_CONNECTED_TIME: &str = "connectivity_connected_time_ms"; pub const METRIC_EXPECTED_CONNECTED_TIME: &str = "connectivity_expected_time_ms"; pub const METRIC_SYNC_SUCCESS: &str = "sync_successful"; pub const METRIC_SYNC_FAILURE: &str = "sync_failure"; +pub const METRIC_CONNECTIVITY_RECV_BYTES: &str = "connectivity_recv_bytes"; +pub const METRIC_CONNECTIVITY_SENT_BYTES: &str = "connectivity_sent_bytes"; +pub const METRIC_CONNECTIVITY_INTERFACE_RECV_BYTES_PREFIX: &str = "connectivity_"; +pub const METRIC_CONNECTIVITY_INTERFACE_RECV_BYTES_SUFFIX: &str = "_recv_bytes"; +pub const METRIC_CONNECTIVITY_INTERFACE_SENT_BYTES_PREFIX: &str = "connectivity_"; +pub const METRIC_CONNECTIVITY_INTERFACE_SENT_BYTES_SUFFIX: &str = "_sent_bytes"; // Stability metrics pub const METRIC_OPERATIONAL_HOURS: &str = "operational_hours"; pub const METRIC_OPERATIONAL_CRASHFREE_HOURS: &str = "operational_crashfree_hours"; pub const METRIC_OPERATIONAL_CRASHES: &str = "operational_crashes"; +pub const METRIC_OPERATIONAL_CRASHES_PROCESS_PREFIX: &str = "operational_crashes_"; // Battery metrics pub const METRIC_BATTERY_DISCHARGE_DURATION_MS: &str = "battery_discharge_duration_ms"; @@ -21,3 +33,78 @@ pub const METRIC_BATTERY_SOC_PCT: &str = "battery_soc_pct"; // Memory Metrics pub const METRIC_MEMORY_PCT: &str = "memory_pct"; + +// CPU Metrics +pub const METRIC_CPU_USAGE_PCT: &str = "cpu_usage_pct"; + +// Disk Space Metrics +pub const METRIC_STORAGE_USED_DISK_PCT_PREFIX: &str = "storage_used_"; +pub const METRIC_STORAGE_USED_DISK_PCT_SUFFIX: &str = "_pct"; + +// Process Metrics +// Note: The metric keys for per-process core metrics follow the below +// format: +// "" +pub const METRIC_CPU_USAGE_PROCESS_PCT_PREFIX: &str = "cpu_usage_"; +pub const METRIC_CPU_USAGE_PROCESS_PCT_SUFFIX: &str = "_pct"; +pub const METRIC_MEMORY_PROCESS_PCT_PREFIX: &str = "memory_"; +pub const METRIC_MEMORY_PROCESS_PCT_SUFFIX: &str = "_pct"; + +const SESSION_CORE_METRICS: &[&str; 12] = &[ + METRIC_MF_SYNC_FAILURE, + METRIC_MF_SYNC_SUCCESS, + METRIC_BATTERY_DISCHARGE_DURATION_MS, + METRIC_BATTERY_SOC_PCT_DROP, + METRIC_CONNECTED_TIME, + METRIC_EXPECTED_CONNECTED_TIME, + METRIC_SYNC_FAILURE, + METRIC_SYNC_SUCCESS, + METRIC_OPERATIONAL_CRASHES, + METRIC_MEMORY_PCT, + METRIC_CONNECTIVITY_RECV_BYTES, + METRIC_CONNECTIVITY_SENT_BYTES, +]; + +const SESSION_CORE_WILDCARD_METIRCS: &[(&str, &str); 6] = &[ + ( + METRIC_CPU_USAGE_PROCESS_PCT_PREFIX, + METRIC_CPU_USAGE_PROCESS_PCT_SUFFIX, + ), + ( + METRIC_MEMORY_PROCESS_PCT_PREFIX, + METRIC_MEMORY_PROCESS_PCT_SUFFIX, + ), + ( + METRIC_STORAGE_USED_DISK_PCT_PREFIX, + METRIC_STORAGE_USED_DISK_PCT_SUFFIX, + ), + ( + METRIC_CONNECTIVITY_INTERFACE_RECV_BYTES_PREFIX, + METRIC_CONNECTIVITY_INTERFACE_RECV_BYTES_SUFFIX, + ), + ( + METRIC_CONNECTIVITY_INTERFACE_SENT_BYTES_PREFIX, + METRIC_CONNECTIVITY_INTERFACE_SENT_BYTES_SUFFIX, + ), + (METRIC_OPERATIONAL_CRASHES_PROCESS_PREFIX, ""), +]; + +#[derive(Clone)] +pub struct CoreMetricKeys { + pub string_keys: HashSet, + pub wildcard_pattern_keys: Vec, +} + +impl CoreMetricKeys { + pub fn get_session_core_metrics() -> Self { + let string_keys = HashSet::from_iter(SESSION_CORE_METRICS.map(MetricStringKey::from)); + let wildcard_pattern_keys = SESSION_CORE_WILDCARD_METIRCS + .map(|(prefix, suffix)| WildcardPattern::new(prefix, suffix)) + .to_vec(); + + Self { + string_keys, + wildcard_pattern_keys, + } + } +} diff --git a/memfaultd/src/metrics/crashfree_interval.rs b/memfaultd/src/metrics/crashfree_interval.rs index 85dc274..93556af 100644 --- a/memfaultd/src/metrics/crashfree_interval.rs +++ b/memfaultd/src/metrics/crashfree_interval.rs @@ -3,21 +3,26 @@ // See License.txt for details use std::{ cmp::max, + collections::HashMap, + io::Read, iter::once, + str::{from_utf8, FromStr}, sync::mpsc::{channel, Receiver, Sender}, time::Duration, }; -use eyre::Result; -use log::trace; +use eyre::{eyre, Result}; +use log::{trace, warn}; +use serde::{Deserialize, Serialize}; use tiny_http::{Method, Request, Response}; use crate::{ + cli::NotifyCrashRequest, http_server::{HttpHandler, HttpHandlerResult}, metrics::{ core_metrics::{ - METRIC_OPERATIONAL_CRASHES, METRIC_OPERATIONAL_CRASHFREE_HOURS, - METRIC_OPERATIONAL_HOURS, + METRIC_OPERATIONAL_CRASHES, METRIC_OPERATIONAL_CRASHES_PROCESS_PREFIX, + METRIC_OPERATIONAL_CRASHFREE_HOURS, METRIC_OPERATIONAL_HOURS, }, KeyedMetricReading, }, @@ -30,8 +35,9 @@ pub struct CrashFreeIntervalTracker { last_interval_mark: T, last_crashfree_interval_mark: T, crash_count: u32, - sender: Sender, - receiver: Receiver, + process_crash_count: HashMap, + sender: Sender>, + receiver: Receiver>, interval: Duration, elapsed_intervals_key: MetricStringKey, crashfree_intervals_key: MetricStringKey, @@ -63,6 +69,7 @@ where sender, receiver, crash_count: 0, + process_crash_count: HashMap::new(), interval, elapsed_intervals_key: MetricStringKey::from(elapsed_intervals_key), crashfree_intervals_key: MetricStringKey::from(crashfree_intervals_key), @@ -87,14 +94,19 @@ where /// This allows us to have instant updates on crashes and hourly updates on the metrics, but /// also allows us to periodically update the metrics so that we don't have to wait for a crash. pub fn wait_and_update(&mut self, wait_duration: Duration) -> Result<()> { - if let Ok(crash_ts) = self.receiver.recv_timeout(wait_duration) { + if let Ok(crash_info) = self.receiver.recv_timeout(wait_duration) { // Drain the receiver to get all crashes that happened since the last update self.receiver .try_iter() - .chain(once(crash_ts)) - .for_each(|ts| { + .chain(once(crash_info)) + .for_each(|info| { self.crash_count += 1; - self.last_crashfree_interval_mark = max(self.last_crashfree_interval_mark, ts); + self.last_crashfree_interval_mark = + max(self.last_crashfree_interval_mark, info.timestamp); + *self + .process_crash_count + .entry(info.process_name) + .or_insert(0) += 1; }); } @@ -119,7 +131,7 @@ where let crashes = self.crash_count; self.crash_count = 0; - let metrics = vec![ + let mut metrics = vec![ KeyedMetricReading::add_to_counter( self.elapsed_intervals_key.clone(), count_op_interval as f64, @@ -130,6 +142,26 @@ where ), KeyedMetricReading::add_to_counter(self.crash_count_key.clone(), crashes as f64), ]; + + let process_crash_metrics = self.process_crash_count.drain().flat_map( + |(process_name, crash_count)| -> Result { + let operation_crashes_process_key = MetricStringKey::from_str( + format!( + "{}{}", + METRIC_OPERATIONAL_CRASHES_PROCESS_PREFIX, process_name + ) + .as_str(), + ) + .map_err(|e| eyre!("Couldn't construct MetricStringKey: {}", e))?; + Ok(KeyedMetricReading::add_to_counter( + operation_crashes_process_key, + crash_count as f64, + )) + }, + ); + + metrics.extend(process_crash_metrics); + trace!("Crashfree hours metrics: {:?}", metrics); self.metrics_mbox.send_and_forget(metrics)?; @@ -143,9 +175,12 @@ where }) } - pub fn capture_crash(&self) { + pub fn capture_crash(&self, process_name: String) { self.sender - .send(T::now()) + .send(CrashInfo { + process_name, + timestamp: T::now(), + }) .expect("Failed to send crash timestamp"); } @@ -174,7 +209,25 @@ where } struct CrashFreeIntervalHttpHandler { - channel: Sender, + channel: Sender>, +} + +#[derive(Serialize, Deserialize)] +struct CrashInfo { + process_name: String, + timestamp: T, +} + +impl CrashFreeIntervalHttpHandler +where + T: TimeMeasure + Copy + Ord + std::ops::Add + Send + Sync, +{ + fn parse_notify_crash_request(stream: &mut dyn Read) -> Result { + let mut buf = vec![]; + stream.read_to_end(&mut buf)?; + let reading = serde_json::from_str(from_utf8(&buf)?)?; + Ok(reading) + } } impl HttpHandler for CrashFreeIntervalHttpHandler @@ -183,9 +236,23 @@ where { fn handle_request(&self, request: &mut Request) -> HttpHandlerResult { if request.url() == "/v1/crash/report" && request.method() == &Method::Post { - self.channel - .send(T::now()) - .expect("Crashfree channel closed"); + match Self::parse_notify_crash_request(request.as_reader()) { + Ok(NotifyCrashRequest { process_name }) => { + self.channel + .send(CrashInfo { + process_name, + timestamp: T::now(), + }) + .expect("Crashfree channel closed"); + } + Err(e) => { + warn!("Failed to parse notify crash request: {}", e); + return HttpHandlerResult::Error(format!( + "Failed to notify crash request: {}", + e + )); + } + } HttpHandlerResult::Response(Response::from_string("OK").boxed()) } else { HttpHandlerResult::NotHandled @@ -197,6 +264,7 @@ where mod tests { use std::{collections::BTreeMap, time::Duration}; + use insta::assert_json_snapshot; use rstest::rstest; use ssf::ServiceMock; @@ -279,7 +347,7 @@ mod tests { ); TestInstant::sleep(Duration::from_secs(3600)); - crashfree_tracker.capture_crash(); + crashfree_tracker.capture_crash("memfaultd".to_string()); TestInstant::sleep(Duration::from_secs(3600)); crashfree_tracker @@ -320,7 +388,7 @@ mod tests { assert_operational_metrics(metrics_mock.take_metrics().unwrap(), 0, 0); // Crash at t0 + 30min - crashfree_tracker.capture_crash(); + crashfree_tracker.capture_crash("memfaultd".to_string()); // After 30' we should be ready to mark an operational hour TestInstant::sleep(Duration::from_secs(1800)); @@ -359,7 +427,7 @@ mod tests { // Crash at interval + 170' TestInstant::sleep(Duration::from_secs(170 * 60)); - crashfree_tracker.capture_crash(); + crashfree_tracker.capture_crash("memfaultd".to_string()); // Another 10' to the heartbeat mark // We will count 0 operational hour here. That is a consequence of the heartbeat being larger than the hour @@ -372,7 +440,7 @@ mod tests { // However, doing the crash at interval +10' then waiting for 170' will record 2 crashfree hours TestInstant::sleep(Duration::from_secs(10 * 60)); - crashfree_tracker.capture_crash(); + crashfree_tracker.capture_crash("memfaultd".to_string()); TestInstant::sleep(Duration::from_secs(170 * 60)); crashfree_tracker .wait_and_update(Duration::from_secs(0)) @@ -380,12 +448,29 @@ mod tests { assert_operational_metrics(metrics_mock.take_metrics().unwrap(), 3, 2); } + #[rstest] + fn test_process_crash_counter() { + let mut metrics_mock = ServiceMock::new(); + let mut crashfree_tracker = + CrashFreeIntervalTracker::::new_hourly(metrics_mock.mbox.clone()); + + crashfree_tracker.capture_crash("memfaultd".to_string()); + crashfree_tracker.capture_crash("memfaultd".to_string()); + crashfree_tracker.capture_crash("memfaultd".to_string()); + crashfree_tracker.capture_crash("collectd".to_string()); + + TestInstant::sleep(Duration::from_secs(3600)); + crashfree_tracker + .wait_and_update(Duration::from_secs(0)) + .unwrap(); + assert_json_snapshot!(metrics_mock.take_metrics().unwrap()); + } + fn assert_operational_metrics( metrics: BTreeMap, expected_op_hours: u32, expected_crashfree_hours: u32, ) { - assert_eq!(metrics.len(), 3); let op_hours = metrics .iter() .find(|(name, _)| name.as_str() == METRIC_OPERATIONAL_HOURS) diff --git a/memfaultd/src/metrics/metric_reading.rs b/memfaultd/src/metrics/metric_reading.rs index ecbef35..ef9948b 100644 --- a/memfaultd/src/metrics/metric_reading.rs +++ b/memfaultd/src/metrics/metric_reading.rs @@ -123,6 +123,16 @@ impl KeyedMetricReading { } } + pub fn new_counter(name: MetricStringKey, value: f64) -> Self { + Self { + name, + value: MetricReading::Counter { + value, + timestamp: Utc::now(), + }, + } + } + /// Construct a KeyedMetricReading from a string in the StatsD format /// | /// diff --git a/memfaultd/src/metrics/metric_report.rs b/memfaultd/src/metrics/metric_report.rs index cc4d171..0524f81 100644 --- a/memfaultd/src/metrics/metric_report.rs +++ b/memfaultd/src/metrics/metric_report.rs @@ -7,48 +7,92 @@ use serde::{Deserialize, Serialize}; use std::{ collections::{HashMap, HashSet}, path::Path, - str::FromStr, time::{Duration, Instant}, }; use crate::{ mar::{MarEntryBuilder, Metadata}, metrics::{ - core_metrics::{ - METRIC_BATTERY_DISCHARGE_DURATION_MS, METRIC_BATTERY_SOC_PCT_DROP, - METRIC_CONNECTED_TIME, METRIC_EXPECTED_CONNECTED_TIME, METRIC_MEMORY_PCT, - METRIC_MF_SYNC_FAILURE, METRIC_MF_SYNC_SUCCESS, METRIC_OPERATIONAL_CRASHES, - METRIC_SYNC_FAILURE, METRIC_SYNC_SUCCESS, - }, metric_reading::KeyedMetricReading, timeseries::{Counter, Gauge, Histogram, TimeSeries, TimeWeightedAverage}, MetricReading, MetricStringKey, MetricValue, SessionName, }, + util::wildcard_pattern::WildcardPattern, }; -use super::timeseries::ReportTag; +use super::{ + core_metrics::{ + METRIC_CPU_USAGE_PCT, METRIC_CPU_USAGE_PROCESS_PCT_PREFIX, + METRIC_CPU_USAGE_PROCESS_PCT_SUFFIX, METRIC_MEMORY_PCT, METRIC_MEMORY_PROCESS_PCT_PREFIX, + METRIC_MEMORY_PROCESS_PCT_SUFFIX, + }, + system_metrics::{ + METRIC_INTERFACE_BYTES_PER_SECOND_RX_SUFFIX, METRIC_INTERFACE_BYTES_PER_SECOND_TX_SUFFIX, + NETWORK_INTERFACE_METRIC_NAMESPACE, THERMAL_METRIC_NAMESPACE, + }, + timeseries::ReportTag, +}; pub enum CapturedMetrics { All, - Metrics(HashSet), + Metrics(MetricsSet), +} + +pub struct MetricsSet { + pub metric_keys: HashSet, + pub wildcard_metric_keys: Vec, +} + +/// Returns a MetricsSet that represents +/// all the keys that should report a +/// min and max when aggregated as a +/// Histogram +fn histo_min_max_keys() -> MetricsSet { + MetricsSet { + metric_keys: HashSet::from_iter([ + MetricStringKey::from(METRIC_CPU_USAGE_PCT), + MetricStringKey::from(METRIC_MEMORY_PCT), + ]), + wildcard_metric_keys: vec![ + // cpu_usage_*_pct + WildcardPattern::new( + METRIC_CPU_USAGE_PROCESS_PCT_PREFIX, + METRIC_CPU_USAGE_PROCESS_PCT_SUFFIX, + ), + // memory_*_pct + WildcardPattern::new( + METRIC_MEMORY_PROCESS_PCT_PREFIX, + METRIC_MEMORY_PROCESS_PCT_SUFFIX, + ), + // interface/*/bytes_per_second/rx + WildcardPattern::new( + NETWORK_INTERFACE_METRIC_NAMESPACE, + METRIC_INTERFACE_BYTES_PER_SECOND_RX_SUFFIX, + ), + // interface/*/bytes_per_second/tx + WildcardPattern::new( + NETWORK_INTERFACE_METRIC_NAMESPACE, + METRIC_INTERFACE_BYTES_PER_SECOND_TX_SUFFIX, + ), + // thermal/* + WildcardPattern::new(THERMAL_METRIC_NAMESPACE, ""), + ], + } +} + +impl MetricsSet { + pub fn contains(&self, metric_string_key: &MetricStringKey) -> bool { + self.metric_keys.contains(metric_string_key) + || self + .wildcard_metric_keys + .iter() + .any(|pattern| pattern.matches(metric_string_key.as_str())) + } } pub const HEARTBEAT_REPORT_TYPE: &str = "heartbeat"; pub const DAILY_HEARTBEAT_REPORT_TYPE: &str = "daily-heartbeat"; -const SESSION_CORE_METRICS: &[&str; 10] = &[ - METRIC_MF_SYNC_FAILURE, - METRIC_MF_SYNC_SUCCESS, - METRIC_BATTERY_DISCHARGE_DURATION_MS, - METRIC_BATTERY_SOC_PCT_DROP, - METRIC_CONNECTED_TIME, - METRIC_EXPECTED_CONNECTED_TIME, - METRIC_SYNC_FAILURE, - METRIC_SYNC_SUCCESS, - METRIC_OPERATIONAL_CRASHES, - METRIC_MEMORY_PCT, -]; - #[derive(Serialize, Deserialize, Clone, PartialEq, Eq)] pub enum MetricReportType { #[serde(rename = "heartbeat")] @@ -80,6 +124,9 @@ pub struct MetricReport { /// Indicates whether this is a heartbeat metric report or /// session metric report (with session name) report_type: MetricReportType, + /// Metric keys that should report a min and max value when + /// aggregated as Histograms + histo_min_max_metrics: MetricsSet, } struct MetricReportSnapshot { @@ -88,33 +135,13 @@ struct MetricReportSnapshot { } impl MetricReport { - pub fn new( - report_type: MetricReportType, - configured_captured_metrics: CapturedMetrics, - ) -> Self { - // Always include session core metrics regardless of configuration - let captured_metrics = match configured_captured_metrics { - CapturedMetrics::All => CapturedMetrics::All, - CapturedMetrics::Metrics(metrics) => { - let mut merged_set = SESSION_CORE_METRICS - .iter() - .map(|core_metric_key| { - // This expect should never be hit as SESSION_CORE_METRICS is - // an array of static strings - MetricStringKey::from_str(core_metric_key) - .expect("Invalid Metric Key in SESSION_CORE_METRICS") - }) - .collect::>(); - merged_set.extend(metrics); - CapturedMetrics::Metrics(merged_set) - } - }; - + pub fn new(report_type: MetricReportType, captured_metrics: CapturedMetrics) -> Self { Self { metrics: HashMap::new(), start: Instant::now(), captured_metrics, report_type, + histo_min_max_metrics: histo_min_max_keys(), } } @@ -188,7 +215,20 @@ impl MetricReport { let duration = std::mem::replace(&mut self.start, Instant::now()).elapsed(); let metrics = std::mem::take(&mut self.metrics) .into_iter() - .map(|(name, state)| (name, state.value())) + .flat_map(|(name, state)| match state.value() { + MetricValue::Histogram(histo) => { + if self.histo_min_max_metrics.contains(&name) { + vec![ + (name.with_suffix("_max"), histo.max()), + (name.with_suffix("_min"), histo.min()), + (name, histo.avg()), + ] + } else { + vec![(name, histo.avg())] + } + } + _ => vec![(name, state.value())], + }) .collect(); MetricReportSnapshot { duration, metrics } @@ -241,7 +281,10 @@ mod tests { use std::collections::BTreeMap; use super::*; - use crate::test_utils::in_histograms; + use crate::{ + metrics::core_metrics::CoreMetricKeys, + test_utils::{in_counters, in_histograms}, + }; use std::str::FromStr; use insta::assert_json_snapshot; @@ -249,10 +292,15 @@ mod tests { #[rstest] #[case(in_histograms(vec![("foo", 1.0), ("bar", 2.0), ("baz", 3.0)]), "heartbeat_report_1")] - #[case(in_histograms(vec![("foo", 1.0), ("foo", 2.0), ("foo", 3.0)]), "heartbeat_report_2")] + // cpu_usage_memfaultd_pct should report a min and max as it matches a wildcard pattern + // in the MetricsSet returned by MetricReport::histo_min_max_metrics + #[case(in_histograms(vec![("cpu_usage_memfaultd_pct", 1.0), ("cpu_usage_memfaultd_pct", 2.0), ("cpu_usage_memfaultd_pct", 3.0)]), "heartbeat_report_2")] #[case(in_histograms(vec![("foo", 1.0), ("foo", 1.0)]), "heartbeat_report_3")] - #[case(in_histograms(vec![("foo", 1.0), ("foo", 2.0)]), "heartbeat_report_4")] - #[case(in_histograms(vec![("foo", 1.0), ("foo", 2.0), ("foo", 2.0)]), "heartbeat_report_5")] + #[case(in_histograms(vec![("memory_pct", 1.0), ("memory_pct", 2.0)]), "heartbeat_report_4")] + #[case(in_histograms(vec![("memory_systemd_pct", 1.0), ("memory_systemd_pct", 2.0), ("memory_systemd_pct", 2.0)]), "heartbeat_report_5")] + // bytes_per_second/rx should report a min and max, packets_per_second/rx should *not* + #[case(in_histograms(vec![("interface/eth0/bytes_per_second/rx", 1.0), ("interface/eth0/bytes_per_second/rx", 2.0), ("interface/eth0/bytes_per_second/rx", 2.0)]), "heartbeat_report_6")] + #[case(in_histograms(vec![("interface/eth0/packets_per_second/rx", 1.0), ("interface/eth0/packets_per_second/rx", 2.0), ("interface/eth0/packets_per_second/rx", 2.0)]), "heartbeat_report_7")] fn test_aggregate_metrics( #[case] metrics: impl Iterator, #[case] test_name: &str, @@ -272,20 +320,23 @@ mod tests { #[case(in_histograms(vec![("foo", 1.0), ("foo", 1.0)]), "session_report_3")] #[case(in_histograms(vec![("foo", 1.0), ("foo", 2.0)]), "session_report_4")] #[case(in_histograms(vec![("foo", 1.0), ("foo", 2.0), ("baz", 2.0), ("bar", 3.0)]), "session_report_5")] + #[case(in_counters(vec![("operational_crashes", 2.0), ("operational_crashes_memfaultd", 3.0), ("operational_crashes_memfaultd", 2.0), ("crashes_systemd", 3.0)]), "session_report_6")] fn test_aggregate_metrics_session( #[case] metrics: impl Iterator, #[case] test_name: &str, ) { + let session_core_metrics = CoreMetricKeys::get_session_core_metrics(); + let mut metric_keys = vec![ + MetricStringKey::from_str("foo").unwrap(), + MetricStringKey::from_str("baz").unwrap(), + ]; + metric_keys.extend(session_core_metrics.string_keys); let mut metric_report = MetricReport::new( MetricReportType::Session(SessionName::from_str("foo_only").unwrap()), - CapturedMetrics::Metrics( - [ - MetricStringKey::from_str("foo").unwrap(), - MetricStringKey::from_str("baz").unwrap(), - ] - .into_iter() - .collect(), - ), + CapturedMetrics::Metrics(MetricsSet { + metric_keys: HashSet::from_iter(metric_keys), + wildcard_metric_keys: session_core_metrics.wildcard_pattern_keys, + }), ); for m in metrics { @@ -295,35 +346,6 @@ mod tests { assert_json_snapshot!(test_name, sorted_metrics); } - /// Core metrics should always be captured by sessions even if they are not - /// configured to do so - #[rstest] - fn test_aggregate_core_metrics_session() { - let mut metric_report = MetricReport::new( - MetricReportType::Session(SessionName::from_str("foo_only").unwrap()), - CapturedMetrics::Metrics( - [ - MetricStringKey::from_str("foo").unwrap(), - MetricStringKey::from_str("baz").unwrap(), - ] - .into_iter() - .collect(), - ), - ); - - let metrics = in_histograms( - SESSION_CORE_METRICS - .map(|metric_name: &'static str| (metric_name, 100.0)) - .to_vec(), - ); - - for m in metrics { - metric_report.add_metric(m).unwrap(); - } - let sorted_metrics: BTreeMap<_, _> = metric_report.take_metrics().into_iter().collect(); - assert_json_snapshot!(sorted_metrics); - } - #[rstest] fn test_empty_after_write() { let mut metric_report = MetricReport::new_heartbeat(); diff --git a/memfaultd/src/metrics/metric_report_manager.rs b/memfaultd/src/metrics/metric_report_manager.rs index 82fd2c1..a253b70 100644 --- a/memfaultd/src/metrics/metric_report_manager.rs +++ b/memfaultd/src/metrics/metric_report_manager.rs @@ -10,14 +10,16 @@ use std::{ sync::{Arc, Mutex}, }; -use super::{ - core_metrics::METRIC_OPERATIONAL_CRASHES, metric_reading::KeyedMetricReading, - metric_report::CapturedMetrics, SessionEventMessage, SessionName, -}; use crate::{ config::SessionConfig, mar::{MarEntryBuilder, Metadata}, - metrics::{MetricReport, MetricReportType, MetricStringKey, MetricValue}, + metrics::{ + core_metrics::{CoreMetricKeys, METRIC_OPERATIONAL_CRASHES}, + metric_reading::KeyedMetricReading, + metric_report::{CapturedMetrics, MetricsSet}, + MetricReport, MetricReportType, MetricStringKey, MetricValue, SessionEventMessage, + SessionName, + }, network::NetworkConfig, }; @@ -26,6 +28,7 @@ pub struct MetricReportManager { daily_heartbeat: MetricReport, sessions: HashMap, session_configs: Vec, + core_metrics: CoreMetricKeys, } impl MetricReportManager { @@ -37,6 +40,7 @@ impl MetricReportManager { daily_heartbeat: MetricReport::new_daily_heartbeat(), sessions: HashMap::new(), session_configs: vec![], + core_metrics: CoreMetricKeys::get_session_core_metrics(), } } @@ -46,6 +50,7 @@ impl MetricReportManager { daily_heartbeat: MetricReport::new_daily_heartbeat(), sessions: HashMap::new(), session_configs: session_configs.to_vec(), + core_metrics: CoreMetricKeys::get_session_core_metrics(), } } @@ -73,14 +78,23 @@ impl MetricReportManager { match report_type { MetricReportType::Heartbeat => Ok(CapturedMetrics::All), MetricReportType::DailyHeartbeat => Ok(CapturedMetrics::All), - MetricReportType::Session(session_name) => self - .session_configs - .iter() - .find(|&session_config| session_config.name == *session_name) - .map(|config| { - CapturedMetrics::Metrics(config.captured_metrics.clone().into_iter().collect()) - }) - .ok_or_else(|| eyre!("No configuration for session named {} found!", session_name)), + MetricReportType::Session(session_name) => { + let mut metrics = self + .session_configs + .iter() + .find(|&session_config| session_config.name == *session_name) + .map(|config| config.captured_metrics.clone()) + .ok_or_else(|| { + eyre!("No configuration for session named {} found!", session_name) + })?; + + metrics.extend(self.core_metrics.string_keys.clone()); + + Ok(CapturedMetrics::Metrics(MetricsSet { + metric_keys: metrics, + wildcard_metric_keys: self.core_metrics.wildcard_pattern_keys.clone(), + })) + } } } @@ -302,10 +316,10 @@ mod tests { use super::*; use crate::test_utils::in_histograms; - use insta::assert_json_snapshot; + use insta::{assert_json_snapshot, rounded_redaction, with_settings}; use rstest::rstest; use ssf::ServiceMock; - use std::str::FromStr; + use std::{collections::HashSet, str::FromStr}; impl TakeMetrics for ServiceMock> { fn take_metrics(&mut self) -> Result> { @@ -368,18 +382,18 @@ mod tests { let session_configs = vec![ SessionConfig { name: session_a_name.clone(), - captured_metrics: vec![ + captured_metrics: HashSet::from_iter([ MetricStringKey::from_str("foo").unwrap(), MetricStringKey::from_str("bar").unwrap(), - ], + ]), }, SessionConfig { name: session_b_name.clone(), - captured_metrics: vec![ + captured_metrics: HashSet::from_iter([ MetricStringKey::from_str("foo").unwrap(), MetricStringKey::from_str("bar").unwrap(), MetricStringKey::from_str("baz").unwrap(), - ], + ]), }, ]; @@ -418,15 +432,57 @@ mod tests { } } + #[rstest] + #[case(in_histograms(vec![("foo", 1.0), ("cpu_usage_memfaultd_pct", 2.0), ("memory_pct", 3.0)]), "system_and_process_metrics")] + #[case(in_histograms(vec![("memory_systemd_pct", 1.0), ("memory_memfaultd_pct", 2.0), ("memory_foo_pct", 2.0)]), "process_metrics")] + fn test_sessions_capture_core_metrics( + #[case] metrics: impl Iterator, + #[case] test_name: &str, + ) { + let session_name = SessionName::from_str("test-session").unwrap(); + let session_configs = vec![SessionConfig { + name: session_name.clone(), + captured_metrics: HashSet::from_iter([ + MetricStringKey::from_str("foo").unwrap(), + MetricStringKey::from_str("bar").unwrap(), + ]), + }]; + + let mut metric_report_manager = + MetricReportManager::new_with_session_configs(&session_configs); + + assert!(metric_report_manager + .start_session(session_name.clone()) + .is_ok()); + + for m in metrics { + metric_report_manager + .add_metric(m) + .expect("Failed to add metric reading"); + } + + let session_report = metric_report_manager + .sessions + .get_mut(&session_name) + .unwrap(); + let metrics = session_report.take_metrics(); + + with_settings!({sort_maps => true}, { + assert_json_snapshot!(format!("{}_{}", test_name, "metrics"), + metrics, + {"[].value.**.timestamp" => "[timestamp]", "[].value.**.value" => rounded_redaction(5)}) + }); + } + #[rstest] fn test_start_session_twice() { let session_name = SessionName::from_str("test-session-start-twice").unwrap(); let session_configs = vec![SessionConfig { name: session_name.clone(), - captured_metrics: vec![ + captured_metrics: HashSet::from_iter([ MetricStringKey::from_str("foo").unwrap(), MetricStringKey::from_str("bar").unwrap(), - ], + ]), }]; let mut metric_report_manager = @@ -471,10 +527,10 @@ mod tests { let session_name = SessionName::from_str("test-session").unwrap(); let session_configs = vec![SessionConfig { name: session_name.clone(), - captured_metrics: vec![ + captured_metrics: HashSet::from_iter([ MetricStringKey::from_str("foo").unwrap(), MetricStringKey::from_str("bar").unwrap(), - ], + ]), }]; let mut metric_report_manager = diff --git a/memfaultd/src/metrics/metric_string_key.rs b/memfaultd/src/metrics/metric_string_key.rs index 179da7d..892de18 100644 --- a/memfaultd/src/metrics/metric_string_key.rs +++ b/memfaultd/src/metrics/metric_string_key.rs @@ -23,6 +23,12 @@ impl MetricStringKey { &self.inner } + pub fn with_suffix(&self, suffix: &str) -> Self { + Self { + inner: (self.inner.clone() + suffix), + } + } + pub fn metric_string_key_parser>(input: T) -> IResult where T: InputTakeAtPosition, diff --git a/memfaultd/src/metrics/metric_value.rs b/memfaultd/src/metrics/metric_value.rs index ceb4724..1de54cd 100644 --- a/memfaultd/src/metrics/metric_value.rs +++ b/memfaultd/src/metrics/metric_value.rs @@ -4,11 +4,37 @@ use eyre::Result; use serde::{Deserialize, Serialize, Serializer}; +#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] +pub struct Histogram { + pub min: f64, + pub mean: f64, + pub max: f64, +} + +impl Histogram { + pub fn min(&self) -> MetricValue { + MetricValue::Number(self.min) + } + + pub fn avg(&self) -> MetricValue { + MetricValue::Number(self.mean) + } + + pub fn max(&self) -> MetricValue { + MetricValue::Number(self.max) + } +} + +pub fn construct_histogram_value(min: f64, mean: f64, max: f64) -> Histogram { + Histogram { min, mean, max } +} + #[derive(Clone, Debug, PartialEq, Deserialize)] #[serde(untagged)] pub enum MetricValue { Number(f64), String(String), + Histogram(Histogram), } impl Serialize for MetricValue { @@ -19,6 +45,7 @@ impl Serialize for MetricValue { match self { MetricValue::Number(v) => serializer.serialize_f64(*v), MetricValue::String(v) => serializer.serialize_str(v.as_str()), + MetricValue::Histogram(histo) => histo.serialize(serializer), } } } diff --git a/memfaultd/src/metrics/mod.rs b/memfaultd/src/metrics/mod.rs index cdc6cf1..205dfe1 100644 --- a/memfaultd/src/metrics/mod.rs +++ b/memfaultd/src/metrics/mod.rs @@ -33,6 +33,8 @@ pub use metric_reading::MetricReading; mod timeseries; mod metric_value; +pub use metric_value::construct_histogram_value; +pub use metric_value::Histogram as HistogramValue; pub use metric_value::MetricValue; pub type MetricTimestamp = DateTime; @@ -55,5 +57,7 @@ pub mod statsd_server; pub use statsd_server::StatsDServer; mod system_metrics; +pub use system_metrics::DiskSpaceMetricsConfig; +pub use system_metrics::ProcessMetricsConfig; pub use system_metrics::SystemMetricsCollector; pub use system_metrics::BUILTIN_SYSTEM_METRIC_NAMESPACES; diff --git a/memfaultd/src/metrics/session_event_handler.rs b/memfaultd/src/metrics/session_event_handler.rs index 4c83639..54da5de 100644 --- a/memfaultd/src/metrics/session_event_handler.rs +++ b/memfaultd/src/metrics/session_event_handler.rs @@ -132,7 +132,11 @@ impl HttpHandler for SessionEventHandler { #[cfg(test)] mod tests { - use std::{collections::BTreeMap, path::Path, str::FromStr}; + use std::{ + collections::{BTreeMap, HashSet}, + path::Path, + str::FromStr, + }; use insta::assert_json_snapshot; use rstest::{fixture, rstest}; @@ -354,11 +358,11 @@ mod tests { fn fixture() -> Fixture { let session_config = SessionConfig { name: SessionName::from_str("test-session").unwrap(), - captured_metrics: vec![ + captured_metrics: HashSet::from_iter([ MetricStringKey::from_str("foo").unwrap(), MetricStringKey::from_str("bar").unwrap(), MetricStringKey::from_str("baz").unwrap(), - ], + ]), }; let jig = diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__crashfree_interval__tests__process_crash_counter.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__crashfree_interval__tests__process_crash_counter.snap new file mode 100644 index 0000000..448a254 --- /dev/null +++ b/memfaultd/src/metrics/snapshots/memfaultd__metrics__crashfree_interval__tests__process_crash_counter.snap @@ -0,0 +1,11 @@ +--- +source: memfaultd/src/metrics/crashfree_interval.rs +expression: metrics_mock.take_metrics().unwrap() +--- +{ + "operational_crashes": 4.0, + "operational_crashes_collectd": 1.0, + "operational_crashes_memfaultd": 3.0, + "operational_crashfree_hours": 1.0, + "operational_hours": 1.0 +} diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__aggregate_core_metrics_session.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__aggregate_core_metrics_session.snap deleted file mode 100644 index 1ea2ea9..0000000 --- a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__aggregate_core_metrics_session.snap +++ /dev/null @@ -1,16 +0,0 @@ ---- -source: memfaultd/src/metrics/metric_report.rs -expression: sorted_metrics ---- -{ - "battery_discharge_duration_ms": 100.0, - "battery_soc_pct_drop": 100.0, - "connectivity_connected_time_ms": 100.0, - "connectivity_expected_time_ms": 100.0, - "memory_pct": 100.0, - "operational_crashes": 100.0, - "sync_failure": 100.0, - "sync_memfault_failure": 100.0, - "sync_memfault_successful": 100.0, - "sync_successful": 100.0 -} diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_2.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_2.snap index 4420475..ff6fd88 100644 --- a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_2.snap +++ b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_2.snap @@ -3,5 +3,7 @@ source: memfaultd/src/metrics/metric_report.rs expression: sorted_metrics --- { - "foo": 2.0 + "cpu_usage_memfaultd_pct": 2.0, + "cpu_usage_memfaultd_pct_max": 3.0, + "cpu_usage_memfaultd_pct_min": 1.0 } diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_4.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_4.snap index de0e20b..0053267 100644 --- a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_4.snap +++ b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_4.snap @@ -3,5 +3,7 @@ source: memfaultd/src/metrics/metric_report.rs expression: sorted_metrics --- { - "foo": 1.5 + "memory_pct": 1.5, + "memory_pct_max": 2.0, + "memory_pct_min": 1.0 } diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_5.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_5.snap index 74f3f37..54885ef 100644 --- a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_5.snap +++ b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_5.snap @@ -3,5 +3,7 @@ source: memfaultd/src/metrics/metric_report.rs expression: sorted_metrics --- { - "foo": 1.6666666666666667 + "memory_systemd_pct": 1.6666666666666667, + "memory_systemd_pct_max": 2.0, + "memory_systemd_pct_min": 1.0 } diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_6.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_6.snap new file mode 100644 index 0000000..aec2988 --- /dev/null +++ b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_6.snap @@ -0,0 +1,9 @@ +--- +source: memfaultd/src/metrics/metric_report.rs +expression: sorted_metrics +--- +{ + "interface/eth0/bytes_per_second/rx": 1.6666666666666667, + "interface/eth0/bytes_per_second/rx_max": 2.0, + "interface/eth0/bytes_per_second/rx_min": 1.0 +} diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_7.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_7.snap new file mode 100644 index 0000000..f0f38e1 --- /dev/null +++ b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__heartbeat_report_7.snap @@ -0,0 +1,7 @@ +--- +source: memfaultd/src/metrics/metric_report.rs +expression: sorted_metrics +--- +{ + "interface/eth0/packets_per_second/rx": 1.6666666666666667 +} diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__session_report_6.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__session_report_6.snap new file mode 100644 index 0000000..0bbeaa7 --- /dev/null +++ b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report__tests__session_report_6.snap @@ -0,0 +1,8 @@ +--- +source: memfaultd/src/metrics/metric_report.rs +expression: sorted_metrics +--- +{ + "operational_crashes": 2.0, + "operational_crashes_memfaultd": 5.0 +} diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report_manager__tests__process_metrics_metrics.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report_manager__tests__process_metrics_metrics.snap new file mode 100644 index 0000000..ac819f8 --- /dev/null +++ b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report_manager__tests__process_metrics_metrics.snap @@ -0,0 +1,16 @@ +--- +source: memfaultd/src/metrics/metric_report_manager.rs +expression: metrics +--- +{ + "memory_foo_pct": 2.0, + "memory_foo_pct_max": 2.0, + "memory_foo_pct_min": 2.0, + "memory_memfaultd_pct": 2.0, + "memory_memfaultd_pct_max": 2.0, + "memory_memfaultd_pct_min": 2.0, + "memory_systemd_pct": 1.0, + "memory_systemd_pct_max": 1.0, + "memory_systemd_pct_min": 1.0, + "operational_crashes": 0.0 +} diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report_manager__tests__system_and_process_metrics_metrics.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report_manager__tests__system_and_process_metrics_metrics.snap new file mode 100644 index 0000000..5cff3ab --- /dev/null +++ b/memfaultd/src/metrics/snapshots/memfaultd__metrics__metric_report_manager__tests__system_and_process_metrics_metrics.snap @@ -0,0 +1,14 @@ +--- +source: memfaultd/src/metrics/metric_report_manager.rs +expression: metrics +--- +{ + "cpu_usage_memfaultd_pct": 2.0, + "cpu_usage_memfaultd_pct_max": 2.0, + "cpu_usage_memfaultd_pct_min": 2.0, + "foo": 1.0, + "memory_pct": 3.0, + "memory_pct_max": 3.0, + "memory_pct_min": 3.0, + "operational_crashes": 0.0 +} diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__session_event_handler__tests__start_twice_without_stop_session.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__session_event_handler__tests__start_twice_without_stop_session.snap index bb3f755..e4c4da3 100644 --- a/memfaultd/src/metrics/snapshots/memfaultd__metrics__session_event_handler__tests__start_twice_without_stop_session.snap +++ b/memfaultd/src/metrics/snapshots/memfaultd__metrics__session_event_handler__tests__start_twice_without_stop_session.snap @@ -1,6 +1,6 @@ --- source: memfaultd/src/metrics/session_event_handler.rs -expression: metrics +expression: fixture.take_session_metrics() --- { "bar": 11.0, diff --git a/memfaultd/src/metrics/snapshots/memfaultd__metrics__session_event_handler__tests__start_without_stop_session.snap b/memfaultd/src/metrics/snapshots/memfaultd__metrics__session_event_handler__tests__start_without_stop_session.snap index 0536e61..4c2a9a2 100644 --- a/memfaultd/src/metrics/snapshots/memfaultd__metrics__session_event_handler__tests__start_without_stop_session.snap +++ b/memfaultd/src/metrics/snapshots/memfaultd__metrics__session_event_handler__tests__start_without_stop_session.snap @@ -1,6 +1,6 @@ --- source: memfaultd/src/metrics/session_event_handler.rs -expression: metrics +expression: fixture.take_session_metrics() --- { "bar": 2.0, diff --git a/memfaultd/src/metrics/statsd_server/mod.rs b/memfaultd/src/metrics/statsd_server/mod.rs index 3900f49..42d9a8b 100644 --- a/memfaultd/src/metrics/statsd_server/mod.rs +++ b/memfaultd/src/metrics/statsd_server/mod.rs @@ -62,7 +62,7 @@ impl StatsDServer { #[cfg(test)] mod test { use crate::metrics::TakeMetrics; - use insta::assert_json_snapshot; + use insta::{assert_json_snapshot, with_settings}; use rstest::{fixture, rstest}; use ssf::ServiceMock; @@ -93,7 +93,9 @@ mod test { // Process second StatsD test message fixture.server.process_statsd_message(statsd_message_b); + with_settings!({sort_maps => true}, { assert_json_snapshot!(test_name, fixture.mock.take_metrics().unwrap()); + }); } struct Fixture { diff --git a/memfaultd/src/metrics/statsd_server/snapshots/memfaultd__metrics__statsd_server__test__test_histogram_aggregation.snap b/memfaultd/src/metrics/statsd_server/snapshots/memfaultd__metrics__statsd_server__test__test_histogram_aggregation.snap index 731e6e6..7c614e2 100644 --- a/memfaultd/src/metrics/statsd_server/snapshots/memfaultd__metrics__statsd_server__test__test_histogram_aggregation.snap +++ b/memfaultd/src/metrics/statsd_server/snapshots/memfaultd__metrics__statsd_server__test__test_histogram_aggregation.snap @@ -1,6 +1,6 @@ --- source: memfaultd/src/metrics/statsd_server/mod.rs -expression: metrics +expression: fixture.mock.take_metrics().unwrap() --- { "test_another_histo": 510.0, diff --git a/memfaultd/src/metrics/system_metrics/cpu.rs b/memfaultd/src/metrics/system_metrics/cpu.rs index 5c25f2e..684cf35 100644 --- a/memfaultd/src/metrics/system_metrics/cpu.rs +++ b/memfaultd/src/metrics/system_metrics/cpu.rs @@ -30,6 +30,7 @@ //! See additional Linux kernel documentation on /proc/stat here: //! https://docs.kernel.org/filesystems/proc.html#miscellaneous-kernel-statistics-in-proc-stat use std::{ + collections::HashMap, fs::File, io::{BufRead, BufReader}, iter::zip, @@ -38,13 +39,15 @@ use std::{ }; use chrono::Utc; +use log::debug; use nom::{ bytes::complete::tag, character::complete::space1, multi::count, number::complete::double, sequence::preceded, IResult, }; use crate::metrics::{ - system_metrics::SystemMetricFamilyCollector, KeyedMetricReading, MetricReading, MetricStringKey, + core_metrics::METRIC_CPU_USAGE_PCT, system_metrics::SystemMetricFamilyCollector, + KeyedMetricReading, MetricReading, MetricStringKey, }; use eyre::{eyre, ErrReport, Result}; @@ -140,12 +143,12 @@ impl CpuMetricCollector { ["user", "nice", "system", "idle", "iowait", "irq", "softirq"], delta, ) - .collect::>(); + .collect::>(); - let sum: f64 = cpu_states_with_ticks.iter().map(|(_k, v)| v).sum(); + let sum: f64 = cpu_states_with_ticks.values().sum(); let timestamp = Utc::now(); - let readings = cpu_states_with_ticks + let mut readings = cpu_states_with_ticks .iter() .map(|(key, value)| -> Result { Ok(KeyedMetricReading::new( @@ -161,11 +164,24 @@ impl CpuMetricCollector { }, )) }) - .collect::>>(); - match readings { - Ok(readings) => Ok(Some(readings)), - Err(e) => Err(e), + .collect::>>()?; + + if sum > 0.0 { + let cpu_usage_pct = ((sum - cpu_states_with_ticks["idle"]) / sum) * 100.0; + let cpu_usage_pct_key = + MetricStringKey::from_str(METRIC_CPU_USAGE_PCT).map_err(|e| { + eyre!("Failed to construct MetricStringKey for used memory: {}", e) + })?; + + readings.push(KeyedMetricReading::new_histogram( + cpu_usage_pct_key, + cpu_usage_pct, + )); + } else { + debug!("Sum of time spent in all CPU states is <= 0 - this is probably incorrect.") } + + Ok(Some(readings)) } else { Ok(None) } @@ -227,24 +243,28 @@ mod test { matches!(result_a, Ok(None)); let stats = CpuMetricCollector::parse_proc_stat_line_cpu(proc_stat_line_b).unwrap(); - let result_b = cpu_metric_collector.delta_since_last_reading(stats); - - assert!(result_b.is_ok()); + let mut result_b = cpu_metric_collector + .delta_since_last_reading(stats) + .unwrap() + .unwrap(); + result_b.sort_by(|a, b| a.name.cmp(&b.name)); with_settings!({sort_maps => true}, { assert_json_snapshot!(format!("{}_{}", test_name, "a_b_metrics"), - result_b.unwrap(), + result_b, {"[].value.**.timestamp" => "[timestamp]", "[].value.**.value" => rounded_redaction(5)}) }); let stats = CpuMetricCollector::parse_proc_stat_line_cpu(proc_stat_line_c).unwrap(); - let result_c = cpu_metric_collector.delta_since_last_reading(stats); - - assert!(result_c.is_ok()); + let mut result_c = cpu_metric_collector + .delta_since_last_reading(stats) + .unwrap() + .unwrap(); + result_c.sort_by(|a, b| a.name.cmp(&b.name)); with_settings!({sort_maps => true}, { assert_json_snapshot!(format!("{}_{}", test_name, "b_c_metrics"), - result_c.unwrap(), + result_c, {"[].value.**.timestamp" => "[timestamp]", "[].value.**.value" => rounded_redaction(5)}) }); } diff --git a/memfaultd/src/metrics/system_metrics/disk_space.rs b/memfaultd/src/metrics/system_metrics/disk_space.rs index 0110c50..061ac2e 100644 --- a/memfaultd/src/metrics/system_metrics/disk_space.rs +++ b/memfaultd/src/metrics/system_metrics/disk_space.rs @@ -27,7 +27,11 @@ use nom::{ use serde::Serialize; -use crate::metrics::{system_metrics::SystemMetricFamilyCollector, KeyedMetricReading}; +use crate::metrics::{ + core_metrics::{METRIC_STORAGE_USED_DISK_PCT_PREFIX, METRIC_STORAGE_USED_DISK_PCT_SUFFIX}, + system_metrics::SystemMetricFamilyCollector, + KeyedMetricReading, +}; pub const DISKSPACE_METRIC_NAMESPACE_LEGACY: &str = "df"; pub const DISKSPACE_METRIC_NAMESPACE: &str = "disk_space"; @@ -168,14 +172,17 @@ where /// For a given mounted device, construct metric readings /// for how many bytes are used and free on the device + /// Also takes 2 pointers used to track how much space + /// is used in total on the system fn get_metrics_for_mount(&self, mount: &Mount) -> Result> { let mount_stats = self .disk_space_impl .disk_space_info_for_path(mount.mount_point.as_path())?; let block_size = mount_stats.block_size; - let bytes_free = mount_stats.blocks_free * block_size; - let bytes_used = (mount_stats.blocks * block_size) - bytes_free; + let bytes_total = (mount_stats.blocks * block_size) as f64; + let bytes_free = (mount_stats.blocks_free * block_size) as f64; + let bytes_used = bytes_total - bytes_free; let disk_id = mount .device @@ -183,23 +190,48 @@ where .ok_or_else(|| eyre!("Couldn't extract basename"))? .to_string_lossy(); - let bytes_free_reading = KeyedMetricReading::new_histogram( - format!("disk_space/{}/free_bytes", disk_id) - .as_str() - .parse() - .map_err(|e| eyre!("Couldn't parse metric key for bytes free: {}", e))?, - bytes_free as f64, - ); - - let bytes_used_reading = KeyedMetricReading::new_histogram( - format!("disk_space/{}/used_bytes", disk_id) + if bytes_total > 0.0 { + let bytes_free_reading = KeyedMetricReading::new_histogram( + format!("disk_space/{}/free_bytes", disk_id) + .as_str() + .parse() + .map_err(|e| eyre!("Couldn't parse metric key for bytes free: {}", e))?, + bytes_free, + ); + + let bytes_used_reading = KeyedMetricReading::new_histogram( + format!("disk_space/{}/used_bytes", disk_id) + .as_str() + .parse() + .map_err(|e| eyre!("Couldn't parse metric key for bytes used: {}", e))?, + bytes_used, + ); + + let storage_disk_pct_reading = KeyedMetricReading::new_histogram( + format!( + "{}{}{}", + METRIC_STORAGE_USED_DISK_PCT_PREFIX, + disk_id, + METRIC_STORAGE_USED_DISK_PCT_SUFFIX + ) .as_str() .parse() .map_err(|e| eyre!("Couldn't parse metric key for bytes used: {}", e))?, - bytes_used as f64, - ); - - Ok(vec![bytes_free_reading, bytes_used_reading]) + (bytes_used / bytes_total) * 100.0, + ); + + Ok(vec![ + bytes_free_reading, + bytes_used_reading, + storage_disk_pct_reading, + ]) + } else { + Err(eyre!( + "Total bytes for {} is not a positive number ({}) - can't calculate metrics.", + disk_id, + bytes_total, + )) + } } pub fn get_disk_space_metrics(&mut self) -> Result> { diff --git a/memfaultd/src/metrics/system_metrics/memory.rs b/memfaultd/src/metrics/system_metrics/memory.rs index 17b511e..d3b9120 100644 --- a/memfaultd/src/metrics/system_metrics/memory.rs +++ b/memfaultd/src/metrics/system_metrics/memory.rs @@ -60,25 +60,22 @@ use crate::metrics::{ KeyedMetricReading, MetricStringKey, }; -const PROC_MEMINFO_PATH: &str = "/proc/meminfo"; +pub const PROC_MEMINFO_PATH: &str = "/proc/meminfo"; pub const MEMORY_METRIC_NAMESPACE: &str = "memory"; -pub struct MemoryMetricsCollector; +#[cfg_attr(test, mockall::automock)] +pub trait MemInfoParser { + fn get_meminfo_stats(&self) -> Result>; +} -impl MemoryMetricsCollector { - pub fn new() -> Self { - MemoryMetricsCollector {} - } +/// Isolates the functions for parsing the contents +/// of /proc/meminfo for use in other modules +pub struct MemInfoParserImpl; - pub fn get_memory_metrics(&self) -> Result> { - let path = Path::new(PROC_MEMINFO_PATH); - // Need to read all of /proc/meminfo at once - // as we derive used memory based on a calculation - // using multiple lines - let contents = read_to_string(path)?; - Self::parse_meminfo_memory_stats(&contents) +impl MemInfoParserImpl { + pub fn new() -> Self { + Self {} } - /// Parses the key in a /proc/meminfo line /// /// A key is the string terminated by the `:` character @@ -99,21 +96,53 @@ impl MemoryMetricsCollector { delimited(multispace1, double, tag(" kB"))(meminfo_line_suffix) } - /// Parses a full /proc/meminfo contents and returns - /// a vector of KeyedMetricReadings - fn parse_meminfo_memory_stats(meminfo: &str) -> Result> { - let mut stats = meminfo + /// Given the contents of /proc/meminfo, returns a HashMap that maps + /// the keys in the file to the size in bytes for that key + fn parse_meminfo_stats(meminfo: &str) -> HashMap { + meminfo .trim() .lines() - .map(|line| -> Result<(&str, f64), String> { + .map(|line| -> Result<(String, f64), String> { let (suffix, key) = Self::parse_meminfo_key(line).map_err(|e| e.to_string())?; let (_, kb) = Self::parse_meminfo_kb(suffix).map_err(|e| e.to_string())?; // Use bytes as unit instead of KB - Ok((key, kb * 1024.0)) + Ok((key.to_string(), kb * 1024.0)) }) .filter_map(|result| result.ok()) - .collect::>(); + .collect() + } +} +impl MemInfoParser for MemInfoParserImpl { + /// Returns the MemTotal value from /proc/meminfo + /// Returns an error if the value for MemTotal could + /// not be parsed or if the value of MemTotal is 0.0 + fn get_meminfo_stats(&self) -> Result> { + let path = Path::new(PROC_MEMINFO_PATH); + // Need to read all of /proc/meminfo at once + // as we derive used memory based on a calculation + // using multiple lines + let contents = read_to_string(path)?; + Ok(MemInfoParserImpl::parse_meminfo_stats(&contents)) + } +} + +pub struct MemoryMetricsCollector { + mem_info_parser: T, +} + +impl MemoryMetricsCollector +where + T: MemInfoParser, +{ + pub fn new(mem_info_parser: T) -> Self { + MemoryMetricsCollector { mem_info_parser } + } + + /// Parses a full /proc/meminfo contents and returns + /// a vector of KeyedMetricReadings + fn get_memory_metrics(&self) -> Result> { + let mut stats = self.mem_info_parser.get_meminfo_stats()?; // Use the same methodology as `free` to calculate used memory. // // For kernels 3.14 and greater: @@ -158,7 +187,10 @@ impl MemoryMetricsCollector { } } -impl SystemMetricFamilyCollector for MemoryMetricsCollector { +impl SystemMetricFamilyCollector for MemoryMetricsCollector +where + T: MemInfoParser, +{ fn collect_metrics(&mut self) -> Result> { self.get_memory_metrics() } @@ -174,7 +206,7 @@ mod test { use insta::{assert_json_snapshot, rounded_redaction, with_settings}; use rstest::rstest; - use super::MemoryMetricsCollector; + use super::*; #[rstest] #[case("MemTotal: 365916 kB", "MemTotal", 365916.0)] @@ -187,8 +219,8 @@ mod test { #[case] expected_key: &str, #[case] expected_value: f64, ) { - let (suffix, key) = MemoryMetricsCollector::parse_meminfo_key(proc_meminfo_line).unwrap(); - let (_, kb) = MemoryMetricsCollector::parse_meminfo_kb(suffix).unwrap(); + let (suffix, key) = MemInfoParserImpl::parse_meminfo_key(proc_meminfo_line).unwrap(); + let (_, kb) = MemInfoParserImpl::parse_meminfo_kb(suffix).unwrap(); assert_eq!(key, expected_key); assert_eq!(kb, expected_value); @@ -222,7 +254,7 @@ KReclaimable: 14028 kB with_settings!({sort_maps => true}, { assert_json_snapshot!( - MemoryMetricsCollector::parse_meminfo_memory_stats(meminfo).unwrap(), + MemInfoParserImpl::parse_meminfo_stats(meminfo), {"[].value.**.timestamp" => "[timestamp]", "[].value.**.value" => rounded_redaction(5)}) }); } @@ -251,18 +283,24 @@ Mapped: 29668 kB Shmem: 11264 kB KReclaimable: 14028 kB "; + let mut mock_meminfo_parser = MockMemInfoParser::new(); + mock_meminfo_parser + .expect_get_meminfo_stats() + .times(1) + .returning(|| Ok(MemInfoParserImpl::parse_meminfo_stats(meminfo))); + let memory_metrics_collector = MemoryMetricsCollector::new(mock_meminfo_parser); with_settings!({sort_maps => true}, { assert_json_snapshot!( - MemoryMetricsCollector::parse_meminfo_memory_stats(meminfo).unwrap(), + memory_metrics_collector.get_memory_metrics().unwrap(), {"[].value.**.timestamp" => "[timestamp]", "[].value.**.value" => rounded_redaction(5)}) }); } #[rstest] fn test_fail_to_parse_bad_meminfo_line() { - assert!(MemoryMetricsCollector::parse_meminfo_key("MemFree=1080kB").is_err()); - assert!(MemoryMetricsCollector::parse_meminfo_kb("1080 mB").is_err()); + assert!(MemInfoParserImpl::parse_meminfo_key("MemFree=1080kB").is_err()); + assert!(MemInfoParserImpl::parse_meminfo_kb("1080 mB").is_err()); } #[rstest] @@ -290,43 +328,65 @@ Mapped: 29668 kB Shmem: 11264 kB KReclaimable: 14028 kB "; - assert!(MemoryMetricsCollector::parse_meminfo_memory_stats(meminfo).is_err()); + + let mut mock_meminfo_parser = MockMemInfoParser::new(); + + mock_meminfo_parser + .expect_get_meminfo_stats() + .times(1) + .returning(|| Ok(MemInfoParserImpl::parse_meminfo_stats(meminfo))); + + let memory_metrics_collector = MemoryMetricsCollector::new(mock_meminfo_parser); + assert!(memory_metrics_collector.get_memory_metrics().is_err()) } #[rstest] fn test_fail_get_metrics_with_bad_fmt() { // Not properly formatted with newlines between each key / kB pair let meminfo = "MemTotal: 365916 kB MemFree: 242276 kB -Buffers: 4544 kB Cached: 52128 kB Shmem: 11264 kB - "; - assert!(MemoryMetricsCollector::parse_meminfo_memory_stats(meminfo).is_err()); + Buffers: 4544 kB Cached: 52128 kB Shmem: 11264 kB + "; + let mut mock_meminfo_parser = MockMemInfoParser::new(); + + mock_meminfo_parser + .expect_get_meminfo_stats() + .times(1) + .returning(|| Ok(MemInfoParserImpl::parse_meminfo_stats(meminfo))); + let memory_metrics_collector = MemoryMetricsCollector::new(mock_meminfo_parser); + assert!(memory_metrics_collector.get_memory_metrics().is_err()) } #[rstest] fn test_fail_get_metrics_when_mem_total_is_zero() { - // MemFree is missing let meminfo = "MemTotal: 0 kB -MemAvailable: 292088 kB -Buffers: 4544 kB -Cached: 52128 kB -SwapCached: 0 kB -Active: 21668 kB -Inactive: 51404 kB -Active(anon): 2312 kB -Inactive(anon): 25364 kB -Active(file): 19356 kB -Inactive(file): 26040 kB -Unevictable: 3072 kB -Mlocked: 0 kB -SwapTotal: 0 kB -SwapFree: 0 kB -Dirty: 0 kB -Writeback: 0 kB -AnonPages: 19488 kB -Mapped: 29668 kB -Shmem: 11264 kB -KReclaimable: 14028 kB - "; - assert!(MemoryMetricsCollector::parse_meminfo_memory_stats(meminfo).is_err()); + MemAvailable: 292088 kB + Buffers: 4544 kB + Cached: 52128 kB + SwapCached: 0 kB + Active: 21668 kB + Inactive: 51404 kB + Active(anon): 2312 kB + Inactive(anon): 25364 kB + Active(file): 19356 kB + Inactive(file): 26040 kB + Unevictable: 3072 kB + Mlocked: 0 kB + SwapTotal: 0 kB + SwapFree: 0 kB + Dirty: 0 kB + Writeback: 0 kB + AnonPages: 19488 kB + Mapped: 29668 kB + Shmem: 11264 kB + KReclaimable: 14028 kB + "; + let mut mock_meminfo_parser = MockMemInfoParser::new(); + + mock_meminfo_parser + .expect_get_meminfo_stats() + .times(1) + .returning(|| Ok(MemInfoParserImpl::parse_meminfo_stats(meminfo))); + let memory_metrics_collector = MemoryMetricsCollector::new(mock_meminfo_parser); + assert!(memory_metrics_collector.get_memory_metrics().is_err()) } } diff --git a/memfaultd/src/metrics/system_metrics/mod.rs b/memfaultd/src/metrics/system_metrics/mod.rs index 8433010..73a6683 100644 --- a/memfaultd/src/metrics/system_metrics/mod.rs +++ b/memfaultd/src/metrics/system_metrics/mod.rs @@ -2,15 +2,15 @@ // Copyright (c) Memfault, Inc. // See License.txt for details use std::{ + collections::HashSet, thread::sleep, time::{Duration, Instant}, }; -use eyre::Result; -use log::{debug, warn}; +use eyre::{eyre, Result}; +use log::debug; use crate::{ - config::SystemMetricConfig, metrics::KeyedMetricReading, util::system::{bytes_per_page, clock_ticks_per_second}, }; @@ -19,27 +19,31 @@ mod cpu; use crate::metrics::system_metrics::cpu::{CpuMetricCollector, CPU_METRIC_NAMESPACE}; mod thermal; -use crate::metrics::system_metrics::thermal::{ThermalMetricsCollector, THERMAL_METRIC_NAMESPACE}; +use thermal::ThermalMetricsCollector; +pub use thermal::THERMAL_METRIC_NAMESPACE; mod memory; use crate::metrics::system_metrics::memory::{MemoryMetricsCollector, MEMORY_METRIC_NAMESPACE}; mod network_interfaces; -use crate::metrics::system_metrics::network_interfaces::{ - NetworkInterfaceMetricCollector, NetworkInterfaceMetricsConfig, +use network_interfaces::{NetworkInterfaceMetricCollector, NetworkInterfaceMetricsConfig}; +pub use network_interfaces::{ + METRIC_INTERFACE_BYTES_PER_SECOND_RX_SUFFIX, METRIC_INTERFACE_BYTES_PER_SECOND_TX_SUFFIX, NETWORK_INTERFACE_METRIC_NAMESPACE, }; mod processes; +pub use processes::ProcessMetricsConfig; use processes::{ProcessMetricsCollector, PROCESSES_METRIC_NAMESPACE}; mod disk_space; +pub use disk_space::DiskSpaceMetricsConfig; use disk_space::{ - DiskSpaceMetricCollector, DiskSpaceMetricsConfig, NixStatvfs, DISKSPACE_METRIC_NAMESPACE, + DiskSpaceMetricCollector, NixStatvfs, DISKSPACE_METRIC_NAMESPACE, DISKSPACE_METRIC_NAMESPACE_LEGACY, }; -use self::processes::ProcessMetricsConfig; +use self::memory::{MemInfoParser, MemInfoParserImpl}; use super::MetricsMBox; pub const BUILTIN_SYSTEM_METRIC_NAMESPACES: &[&str; 7] = &[ @@ -66,52 +70,51 @@ pub struct SystemMetricsCollector { } impl SystemMetricsCollector { - pub fn new(system_metric_config: SystemMetricConfig, metrics_mbox: MetricsMBox) -> Self { + pub fn new( + processes_config: ProcessMetricsConfig, + network_interfaces_config: Option>, + disk_space_config: DiskSpaceMetricsConfig, + metrics_mbox: MetricsMBox, + ) -> Self { // CPU, Memory, and Thermal metrics are captured by default let mut metric_family_collectors: Vec> = vec![ Box::new(CpuMetricCollector::new()), - Box::new(MemoryMetricsCollector::new()), + Box::new(MemoryMetricsCollector::new(MemInfoParserImpl::new())), Box::new(ThermalMetricsCollector::new()), ]; // Check if process metrics have been manually configured - match system_metric_config.processes { - Some(processes) if !processes.is_empty() => { - metric_family_collectors.push(Box::new(ProcessMetricsCollector::::new( - ProcessMetricsConfig::Processes(processes), - clock_ticks_per_second() as f64 / 1000.0, - bytes_per_page() as f64, - ))) - } + match processes_config { // Monitoring no processes means this collector is disabled - Some(_empty_set) => {} - None => { - metric_family_collectors.push(Box::new(ProcessMetricsCollector::::new( - ProcessMetricsConfig::Auto, - clock_ticks_per_second() as f64 / 1000.0, - bytes_per_page() as f64, - ))) + ProcessMetricsConfig::Processes(processes) if processes.is_empty() => {} + // In all other cases we can just directly pass the config to ProcessMetricsCollector + process_metrics_config => { + // We need the total memory for the system to calculate the + // percent used by each individual process + if let Ok(mem_total) = Self::get_total_memory() { + metric_family_collectors.push(Box::new( + ProcessMetricsCollector::::new( + process_metrics_config, + clock_ticks_per_second() as f64 / 1000.0, + bytes_per_page() as f64, + mem_total, + ), + )) + } } }; // Check if disk space metrics have been manually configured - match system_metric_config.disk_space { - Some(disks) if !disks.is_empty() => { - metric_family_collectors.push(Box::new(DiskSpaceMetricCollector::new( - NixStatvfs::new(), - DiskSpaceMetricsConfig::Disks(disks), - ))) - } + match disk_space_config { // Monitoring no disks means this collector is disabled - Some(_empty_set) => {} - None => metric_family_collectors.push(Box::new(DiskSpaceMetricCollector::new( - NixStatvfs::new(), - DiskSpaceMetricsConfig::Auto, - ))), + DiskSpaceMetricsConfig::Disks(disks) if disks.is_empty() => {} + disk_space_metrics_config => metric_family_collectors.push(Box::new( + DiskSpaceMetricCollector::new(NixStatvfs::new(), disk_space_metrics_config), + )), }; // Check if network interface metrics have been manually configured - match system_metric_config.network_interfaces { + match network_interfaces_config { Some(interfaces) if !interfaces.is_empty() => metric_family_collectors.push(Box::new( NetworkInterfaceMetricCollector::::new( NetworkInterfaceMetricsConfig::Interfaces(interfaces), @@ -132,6 +135,14 @@ impl SystemMetricsCollector { } } + fn get_total_memory() -> Result { + let mem_info_parser = MemInfoParserImpl::new(); + let mut stats = mem_info_parser.get_meminfo_stats()?; + stats + .remove("MemTotal") + .ok_or_else(|| eyre!("Couldn't get MemTotal")) + } + pub fn run(&mut self, metric_poll_duration: Duration) { loop { for collector in self.metric_family_collectors.iter_mut() { @@ -145,7 +156,7 @@ impl SystemMetricsCollector { ) } } - Err(e) => warn!( + Err(e) => debug!( "Failed to collect readings for family \"{}\": {}", collector.family_name(), e diff --git a/memfaultd/src/metrics/system_metrics/network_interfaces.rs b/memfaultd/src/metrics/system_metrics/network_interfaces.rs index 77ace6b..75501a7 100644 --- a/memfaultd/src/metrics/system_metrics/network_interfaces.rs +++ b/memfaultd/src/metrics/system_metrics/network_interfaces.rs @@ -30,6 +30,13 @@ use nom::{ IResult, }; +use crate::metrics::core_metrics::{ + METRIC_CONNECTIVITY_INTERFACE_RECV_BYTES_PREFIX, + METRIC_CONNECTIVITY_INTERFACE_RECV_BYTES_SUFFIX, + METRIC_CONNECTIVITY_INTERFACE_SENT_BYTES_PREFIX, + METRIC_CONNECTIVITY_INTERFACE_SENT_BYTES_SUFFIX, METRIC_CONNECTIVITY_RECV_BYTES, + METRIC_CONNECTIVITY_SENT_BYTES, +}; use crate::{ metrics::{ system_metrics::SystemMetricFamilyCollector, KeyedMetricReading, MetricReading, @@ -42,6 +49,8 @@ use eyre::{eyre, ErrReport, Result}; const PROC_NET_DEV_PATH: &str = "/proc/net/dev"; pub const NETWORK_INTERFACE_METRIC_NAMESPACE: &str = "interface"; +pub const METRIC_INTERFACE_BYTES_PER_SECOND_RX_SUFFIX: &str = "bytes_per_second/rx"; +pub const METRIC_INTERFACE_BYTES_PER_SECOND_TX_SUFFIX: &str = "bytes_per_second/tx"; // Metric keys that are currently captured and reported // by memfaultd. @@ -50,11 +59,11 @@ pub const NETWORK_INTERFACE_METRIC_NAMESPACE: &str = "interface"; // to filter out the values read from it so that // only the high-signal and widely-applicable metrics remain. const NETWORK_INTERFACE_METRIC_KEYS: &[&str; 8] = &[ - "bytes_per_second/rx", + METRIC_INTERFACE_BYTES_PER_SECOND_RX_SUFFIX, "packets_per_second/rx", "errors_per_second/rx", "dropped_per_second/rx", - "bytes_per_second/tx", + METRIC_INTERFACE_BYTES_PER_SECOND_TX_SUFFIX, "packets_per_second/tx", "errors_per_second/tx", "dropped_per_second/tx", @@ -89,8 +98,12 @@ where fn interface_is_monitored(&self, interface: &str) -> bool { match &self.config { - // Ignore loopback interfaces - NetworkInterfaceMetricsConfig::Auto => !interface.starts_with("lo"), + // Ignore loopback, tunnel, and dummy interfaces in Auto mode + NetworkInterfaceMetricsConfig::Auto => { + !(interface.starts_with("lo") + || interface.starts_with("tun") + || interface.starts_with("dummy")) + } NetworkInterfaceMetricsConfig::Interfaces(configured_interfaces) => { configured_interfaces.contains(interface) } @@ -108,6 +121,8 @@ where let reader = BufReader::new(file); let mut net_metric_readings = vec![]; + let mut total_bytes_rx = 0; + let mut total_bytes_tx = 0; for line in reader.lines() { // Discard errors - the assumption here is that we are only parsing @@ -123,6 +138,8 @@ where stats: net_stats, reading_time: T::now(), }, + &mut total_bytes_rx, + &mut total_bytes_tx, ) { net_metric_readings.append(&mut readings); } @@ -130,6 +147,17 @@ where } } + net_metric_readings.extend([ + KeyedMetricReading::new_counter( + MetricStringKey::from(METRIC_CONNECTIVITY_RECV_BYTES), + total_bytes_rx as f64, + ), + KeyedMetricReading::new_counter( + MetricStringKey::from(METRIC_CONNECTIVITY_SENT_BYTES), + total_bytes_tx as f64, + ), + ]); + // Check if we were able to parse at least one CPU metric reading if no_parseable_lines { Err(eyre!( @@ -200,6 +228,8 @@ where &mut self, interface: String, current_reading: ProcNetDevReading, + total_bytes_rx: &mut u64, + total_bytes_tx: &mut u64, ) -> Result>> { // Check to make sure there was a previous reading to calculate a delta with if let Some(ProcNetDevReading { @@ -209,6 +239,60 @@ where .previous_readings_by_interface .insert(interface.clone(), current_reading.clone()) { + // Bytes received is the first numeric value in a /proc/net/dev line + let curr_interface_bytes_rx = current_reading + .stats + .first() + .ok_or(eyre!("Current reading is missing bytes received value"))?; + let prev_interface_bytes_rx = previous_net_stats + .first() + .ok_or(eyre!("Previous reading is missing bytes received value"))?; + let interface_bytes_rx = Self::counter_delta_with_overflow( + *curr_interface_bytes_rx, + *prev_interface_bytes_rx, + ); + + // Bytes sent is the 9th numeric value in a /proc/net/dev line + let curr_interface_bytes_tx = current_reading + .stats + .get(8) + .ok_or(eyre!("Current reading is missing bytes sent value"))?; + let prev_interface_bytes_tx = previous_net_stats + .get(8) + .ok_or(eyre!("Previous reading is missing bytes sent value"))?; + let interface_bytes_tx = Self::counter_delta_with_overflow( + *curr_interface_bytes_tx, + *prev_interface_bytes_tx, + ); + + *total_bytes_rx += interface_bytes_rx; + *total_bytes_tx += interface_bytes_tx; + + let interface_rx_key = MetricStringKey::from_str( + format!( + "{}{}{}", + METRIC_CONNECTIVITY_INTERFACE_RECV_BYTES_PREFIX, + interface, + METRIC_CONNECTIVITY_INTERFACE_RECV_BYTES_SUFFIX + ) + .as_str(), + ) + .map_err(|e| eyre!("Couldn't construct metric key: {}", e))?; + let interface_tx_key = MetricStringKey::from_str( + format!( + "{}{}{}", + METRIC_CONNECTIVITY_INTERFACE_SENT_BYTES_PREFIX, + interface, + METRIC_CONNECTIVITY_INTERFACE_SENT_BYTES_SUFFIX + ) + .as_str(), + ) + .map_err(|e| eyre!("Couldn't construct metric key: {}", e))?; + let interface_core_metrics = [ + KeyedMetricReading::new_counter(interface_rx_key, interface_bytes_rx as f64), + KeyedMetricReading::new_counter(interface_tx_key, interface_bytes_tx as f64), + ]; + let current_period_rates = current_reading .stats @@ -265,7 +349,13 @@ where }) .collect::>>(); match readings { - Ok(readings) => Ok(Some(readings)), + Ok(mut readings) => { + // Add core metrics for interface + // calculated earlier to returned Vec of metric + // readings + readings.extend(interface_core_metrics); + Ok(Some(readings)) + } Err(e) => Err(e), } } else { @@ -350,7 +440,8 @@ mod test { stats, reading_time: TestInstant::now(), }; - let result_a = net_metric_collector.calculate_network_metrics(net_if, reading_a); + let result_a = + net_metric_collector.calculate_network_metrics(net_if, reading_a, &mut 0, &mut 0); matches!(result_a, Ok(None)); TestInstant::sleep(Duration::from_secs(10)); @@ -364,7 +455,8 @@ mod test { stats, reading_time: TestInstant::now(), }; - let result_b = net_metric_collector.calculate_network_metrics(net_if, reading_b); + let result_b = + net_metric_collector.calculate_network_metrics(net_if, reading_b, &mut 0, &mut 0); assert!(result_b.is_ok()); @@ -385,7 +477,8 @@ mod test { stats, reading_time: TestInstant::now(), }; - let result_c = net_metric_collector.calculate_network_metrics(net_if, reading_c); + let result_c = + net_metric_collector.calculate_network_metrics(net_if, reading_c, &mut 0, &mut 0); assert!(result_c.is_ok()); @@ -401,6 +494,7 @@ mod test { " eth0: 1000 25 0 0 0 0 0 0 2000 25 0 0 0 0 0 0", " eth1: 2500 80 10 10 0 0 0 0 3000 50 0 0 0 0 0 0", " eth0: 5000 100 15 15 0 0 0 0 5000 75 20 20 0 0 0 0", + " eth1: 3700 100 10 10 0 0 0 0 3200 50 0 0 0 0 0 0", true, "different_interfaces" )] @@ -408,9 +502,13 @@ mod test { #[case] proc_net_dev_line_a: &str, #[case] proc_net_dev_line_b: &str, #[case] proc_net_dev_line_c: &str, + #[case] proc_net_dev_line_d: &str, #[case] use_auto_config: bool, #[case] test_name: &str, ) { + let mut total_bytes_rx: u64 = 0; + let mut total_bytes_tx: u64 = 0; + let mut net_metric_collector = NetworkInterfaceMetricCollector::::new(if use_auto_config { NetworkInterfaceMetricsConfig::Auto @@ -427,7 +525,12 @@ mod test { stats, reading_time: TestInstant::now(), }; - let result_a = net_metric_collector.calculate_network_metrics(net_if, reading_a); + let result_a = net_metric_collector.calculate_network_metrics( + net_if, + reading_a, + &mut total_bytes_rx, + &mut total_bytes_tx, + ); matches!(result_a, Ok(None)); TestInstant::sleep(Duration::from_secs(10)); @@ -441,7 +544,12 @@ mod test { stats, reading_time: TestInstant::now(), }; - let result_b = net_metric_collector.calculate_network_metrics(net_if, reading_b); + let result_b = net_metric_collector.calculate_network_metrics( + net_if, + reading_b, + &mut total_bytes_rx, + &mut total_bytes_tx, + ); matches!(result_b, Ok(None)); TestInstant::sleep(Duration::from_secs(30)); @@ -455,10 +563,14 @@ mod test { stats, reading_time: TestInstant::now(), }; - let result_c = net_metric_collector.calculate_network_metrics(net_if, reading_c); + let result_c = net_metric_collector.calculate_network_metrics( + net_if, + reading_c, + &mut total_bytes_rx, + &mut total_bytes_tx, + ); assert!(result_c.is_ok()); - // 2 readings are required to calculate metrics (since they are rates), // so we should only get actual metrics after processing reading_c // (which is the second eth0 reading) @@ -467,6 +579,35 @@ mod test { result_c.unwrap(), {"[].value.**.timestamp" => "[timestamp]", "[].value.**.value" => rounded_redaction(5)}) }); + + TestInstant::sleep(Duration::from_secs(30)); + + let (net_if, stats) = + NetworkInterfaceMetricCollector::::parse_proc_net_dev_line( + proc_net_dev_line_d, + ) + .unwrap(); + let reading_d = ProcNetDevReading { + stats, + reading_time: TestInstant::now(), + }; + let result_d = net_metric_collector.calculate_network_metrics( + net_if, + reading_d, + &mut total_bytes_rx, + &mut total_bytes_tx, + ); + + assert!(result_d.is_ok()); + + assert_eq!(total_bytes_tx, 3200); + assert_eq!(total_bytes_rx, 5200); + + with_settings!({sort_maps => true}, { + assert_json_snapshot!(format!("{}_{}", test_name, "b_d_metrics"), + result_d.unwrap(), + {"[].value.**.timestamp" => "[timestamp]", "[].value.**.value" => rounded_redaction(5)}) + }); } #[rstest] #[case(vec!["eth0".to_string(), "wlan1".to_string()], "eth1", false)] @@ -486,4 +627,24 @@ mod test { should_be_monitored ) } + #[rstest] + #[case("eth1", true)] + #[case("eth0", true)] + #[case("enp0s10", true)] + #[case("wlan1", true)] + #[case("tun0", false)] + #[case("dummy1", false)] + #[case("lo1", false)] + fn test_interface_is_monitored_auto( + #[case] interface: &str, + #[case] should_be_monitored: bool, + ) { + let net_metric_collector = NetworkInterfaceMetricCollector::::new( + NetworkInterfaceMetricsConfig::Auto, + ); + assert_eq!( + net_metric_collector.interface_is_monitored(interface), + should_be_monitored + ) + } } diff --git a/memfaultd/src/metrics/system_metrics/processes.rs b/memfaultd/src/metrics/system_metrics/processes.rs index 06a7eca..12586bc 100644 --- a/memfaultd/src/metrics/system_metrics/processes.rs +++ b/memfaultd/src/metrics/system_metrics/processes.rs @@ -33,12 +33,20 @@ use nom::{ IResult, }; -use crate::metrics::{system_metrics::SystemMetricFamilyCollector, KeyedMetricReading}; +use crate::metrics::{ + core_metrics::{ + METRIC_CPU_USAGE_PROCESS_PCT_PREFIX, METRIC_CPU_USAGE_PROCESS_PCT_SUFFIX, + METRIC_MEMORY_PROCESS_PCT_PREFIX, + }, + system_metrics::SystemMetricFamilyCollector, + KeyedMetricReading, +}; use crate::util::time_measure::TimeMeasure; const PROC_DIR: &str = "/proc/"; pub const PROCESSES_METRIC_NAMESPACE: &str = "processes"; +#[derive(Clone)] pub enum ProcessMetricsConfig { Auto, Processes(HashSet), @@ -63,18 +71,25 @@ pub struct ProcessMetricsCollector { processes: HashMap>, clock_ticks_per_ms: f64, bytes_per_page: f64, + mem_total: f64, } impl ProcessMetricsCollector where T: TimeMeasure + Copy + Send + Sync + 'static, { - pub fn new(config: ProcessMetricsConfig, clock_ticks_per_ms: f64, bytes_per_page: f64) -> Self { + pub fn new( + config: ProcessMetricsConfig, + clock_ticks_per_ms: f64, + bytes_per_page: f64, + mem_total: f64, + ) -> Self { Self { config, processes: HashMap::new(), clock_ticks_per_ms, bytes_per_page, + mem_total, } } @@ -289,6 +304,27 @@ where current.pagefaults_major - previous.pagefaults_major, ); + let cpu_usage_process_pct_reading = KeyedMetricReading::new_histogram( + format!( + "{}{}{}", + METRIC_CPU_USAGE_PROCESS_PCT_PREFIX, + current.name, + METRIC_CPU_USAGE_PROCESS_PCT_SUFFIX, + ) + .as_str() + .parse() + .map_err(|e| eyre!("Couldn't parse metric key: {}", e))?, + cputime_sys_pct + cputime_user_pct, + ); + + let memory_process_pct_reading = KeyedMetricReading::new_histogram( + format!("{}{}_pct", METRIC_MEMORY_PROCESS_PCT_PREFIX, current.name) + .as_str() + .parse() + .map_err(|e| eyre!("Couldn't parse metric key: {}", e))?, + current.rss / self.mem_total, + ); + Ok(vec![ rss_reading, vm_reading, @@ -297,6 +333,8 @@ where utime_reading, pagefaults_minor_reading, pagefaults_major_reading, + cpu_usage_process_pct_reading, + memory_process_pct_reading, ]) } @@ -384,6 +422,7 @@ mod tests { ProcessMetricsConfig::Processes(HashSet::from_iter(["memfaultd".to_string()])), 100.0, 4096.0, + 1000000000.0, ); let line = "55270 (memfaultd) S 1 55270 55270 0 -1 4194368 825 0 0 0 155 102 0 0 20 0 19 0 18548522 1411293184 4397 18446744073709551615 1 1 0 0 0 0 0 4096 17987 0 0 0 17 7 0 0 0 0 0 0 0 0 0 0 0 0 0"; @@ -403,6 +442,7 @@ mod tests { ProcessMetricsConfig::Processes(HashSet::from_iter(["memfaultd".to_string()])), 100.0, 4096.0, + 1000000000.0, ); let first_reading = @@ -449,7 +489,12 @@ mod tests { #[case] use_auto: bool, ) { let mut collector = if use_auto { - ProcessMetricsCollector::::new(ProcessMetricsConfig::Auto, 100.0, 4096.0) + ProcessMetricsCollector::::new( + ProcessMetricsConfig::Auto, + 100.0, + 4096.0, + 1000000000.0, + ) } else { // If auto is not used, the configuration should capture metrics from both processes ProcessMetricsCollector::::new( @@ -459,6 +504,7 @@ mod tests { ])), 100.0, 4096.0, + 1000000000.0, ) }; diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__cpu__test__basic_delta_a_b_metrics.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__cpu__test__basic_delta_a_b_metrics.snap index 0cda315..71a2c5e 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__cpu__test__basic_delta_a_b_metrics.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__cpu__test__basic_delta_a_b_metrics.snap @@ -1,46 +1,46 @@ --- source: memfaultd/src/metrics/system_metrics/cpu.rs -expression: result_b.unwrap() +expression: result_b --- [ { - "name": "cpu/cpu/percent/user", + "name": "cpu/cpu/percent/idle", "value": { "Histogram": { - "value": 96.15385, + "value": 0.19231, "timestamp": "[timestamp]" } } }, { - "name": "cpu/cpu/percent/nice", + "name": "cpu/cpu/percent/iowait", "value": { "Histogram": { - "value": 2.88462, + "value": 0.0, "timestamp": "[timestamp]" } } }, { - "name": "cpu/cpu/percent/system", + "name": "cpu/cpu/percent/irq", "value": { "Histogram": { - "value": 0.76923, + "value": 0.0, "timestamp": "[timestamp]" } } }, { - "name": "cpu/cpu/percent/idle", + "name": "cpu/cpu/percent/nice", "value": { "Histogram": { - "value": 0.19231, + "value": 2.88462, "timestamp": "[timestamp]" } } }, { - "name": "cpu/cpu/percent/iowait", + "name": "cpu/cpu/percent/softirq", "value": { "Histogram": { "value": 0.0, @@ -49,19 +49,28 @@ expression: result_b.unwrap() } }, { - "name": "cpu/cpu/percent/irq", + "name": "cpu/cpu/percent/system", "value": { "Histogram": { - "value": 0.0, + "value": 0.76923, "timestamp": "[timestamp]" } } }, { - "name": "cpu/cpu/percent/softirq", + "name": "cpu/cpu/percent/user", "value": { "Histogram": { - "value": 0.0, + "value": 96.15385, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "cpu_usage_pct", + "value": { + "Histogram": { + "value": 99.80769, "timestamp": "[timestamp]" } } diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__cpu__test__basic_delta_b_c_metrics.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__cpu__test__basic_delta_b_c_metrics.snap index 9e65a0a..a125e40 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__cpu__test__basic_delta_b_c_metrics.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__cpu__test__basic_delta_b_c_metrics.snap @@ -1,67 +1,76 @@ --- source: memfaultd/src/metrics/system_metrics/cpu.rs -expression: result_c.unwrap() +expression: result_c --- [ { - "name": "cpu/cpu/percent/user", + "name": "cpu/cpu/percent/idle", "value": { "Histogram": { - "value": 18.58736, + "value": 0.74349, "timestamp": "[timestamp]" } } }, { - "name": "cpu/cpu/percent/nice", + "name": "cpu/cpu/percent/iowait", "value": { "Histogram": { - "value": 66.9145, + "value": 0.37175, "timestamp": "[timestamp]" } } }, { - "name": "cpu/cpu/percent/system", + "name": "cpu/cpu/percent/irq", "value": { "Histogram": { - "value": 13.3829, + "value": 0.0, "timestamp": "[timestamp]" } } }, { - "name": "cpu/cpu/percent/idle", + "name": "cpu/cpu/percent/nice", "value": { "Histogram": { - "value": 0.74349, + "value": 66.9145, "timestamp": "[timestamp]" } } }, { - "name": "cpu/cpu/percent/iowait", + "name": "cpu/cpu/percent/softirq", "value": { "Histogram": { - "value": 0.37175, + "value": 0.0, "timestamp": "[timestamp]" } } }, { - "name": "cpu/cpu/percent/irq", + "name": "cpu/cpu/percent/system", "value": { "Histogram": { - "value": 0.0, + "value": 13.3829, "timestamp": "[timestamp]" } } }, { - "name": "cpu/cpu/percent/softirq", + "name": "cpu/cpu/percent/user", "value": { "Histogram": { - "value": 0.0, + "value": 18.58736, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "cpu_usage_pct", + "value": { + "Histogram": { + "value": 99.25651, "timestamp": "[timestamp]" } } diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__disk_space__test__initialize_and_calc_disk_space_for_mounts-2.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__disk_space__test__initialize_and_calc_disk_space_for_mounts-2.snap index c3dbaf0..fb14197 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__disk_space__test__initialize_and_calc_disk_space_for_mounts-2.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__disk_space__test__initialize_and_calc_disk_space_for_mounts-2.snap @@ -21,6 +21,15 @@ expression: metrics } } }, + { + "name": "storage_used_sda1_pct", + "value": { + "Histogram": { + "value": 72.07031, + "timestamp": "[timestamp]" + } + } + }, { "name": "disk_space/sda2/free_bytes", "value": { @@ -38,5 +47,14 @@ expression: metrics "timestamp": "[timestamp]" } } + }, + { + "name": "storage_used_sda2_pct", + "value": { + "Histogram": { + "value": 72.07031, + "timestamp": "[timestamp]" + } + } } ] diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__memory__test__get_memory_metrics.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__memory__test__get_memory_metrics.snap index c0659f7..db52b87 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__memory__test__get_memory_metrics.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__memory__test__get_memory_metrics.snap @@ -1,33 +1,24 @@ --- source: memfaultd/src/metrics/system_metrics/memory.rs -expression: "MemoryMetricsCollector::parse_meminfo_memory_stats(meminfo).unwrap()" +expression: "MemInfoParserImpl::parse_meminfo_stats(meminfo)" --- -[ - { - "name": "memory/memory/free", - "value": { - "Histogram": { - "value": 248090624.0, - "timestamp": "[timestamp]" - } - } - }, - { - "name": "memory/memory/used", - "value": { - "Histogram": { - "value": 75599872.0, - "timestamp": "[timestamp]" - } - } - }, - { - "name": "memory_pct", - "value": { - "Histogram": { - "value": 20.17622, - "timestamp": "[timestamp]" - } - } - } -] +{ + "Active": 22188032.0, + "AnonPages": 19955712.0, + "Buffers": 4653056.0, + "Cached": 53379072.0, + "Dirty": 0.0, + "Inactive": 52637696.0, + "KReclaimable": 14364672.0, + "Mapped": 30380032.0, + "MemAvailable": 299098112.0, + "MemFree": 248090624.0, + "MemTotal": 374697984.0, + "Mlocked": 0.0, + "Shmem": 11534336.0, + "SwapCached": 0.0, + "SwapFree": 0.0, + "SwapTotal": 0.0, + "Unevictable": 3145728.0, + "Writeback": 0.0 +} diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__basic_delta_a_b_metrics.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__basic_delta_a_b_metrics.snap index b075d2c..ad140d7 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__basic_delta_a_b_metrics.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__basic_delta_a_b_metrics.snap @@ -74,5 +74,23 @@ expression: result_b.unwrap() "timestamp": "[timestamp]" } } + }, + { + "name": "connectivity_eth0_recv_bytes", + "value": { + "Counter": { + "value": 1500.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "connectivity_eth0_sent_bytes", + "value": { + "Counter": { + "value": 1000.0, + "timestamp": "[timestamp]" + } + } } ] diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__basic_delta_b_c_metrics.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__basic_delta_b_c_metrics.snap index 747c537..f155e77 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__basic_delta_b_c_metrics.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__basic_delta_b_c_metrics.snap @@ -74,5 +74,23 @@ expression: result_c.unwrap() "timestamp": "[timestamp]" } } + }, + { + "name": "connectivity_eth0_recv_bytes", + "value": { + "Counter": { + "value": 2500.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "connectivity_eth0_sent_bytes", + "value": { + "Counter": { + "value": 2000.0, + "timestamp": "[timestamp]" + } + } } ] diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__different_interfaces_a_c_metrics.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__different_interfaces_a_c_metrics.snap index dcab1f9..95c239c 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__different_interfaces_a_c_metrics.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__different_interfaces_a_c_metrics.snap @@ -74,5 +74,23 @@ expression: result_c.unwrap() "timestamp": "[timestamp]" } } + }, + { + "name": "connectivity_eth0_recv_bytes", + "value": { + "Counter": { + "value": 4000.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "connectivity_eth0_sent_bytes", + "value": { + "Counter": { + "value": 3000.0, + "timestamp": "[timestamp]" + } + } } ] diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__different_interfaces_b_d_metrics.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__different_interfaces_b_d_metrics.snap new file mode 100644 index 0000000..ebf98db --- /dev/null +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__different_interfaces_b_d_metrics.snap @@ -0,0 +1,96 @@ +--- +source: memfaultd/src/metrics/system_metrics/network_interfaces.rs +expression: result_d.unwrap() +--- +[ + { + "name": "interface/eth1/bytes_per_second/rx", + "value": { + "Histogram": { + "value": 20.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "interface/eth1/packets_per_second/rx", + "value": { + "Histogram": { + "value": 0.33333, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "interface/eth1/errors_per_second/rx", + "value": { + "Histogram": { + "value": 0.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "interface/eth1/dropped_per_second/rx", + "value": { + "Histogram": { + "value": 0.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "interface/eth1/bytes_per_second/tx", + "value": { + "Histogram": { + "value": 3.33333, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "interface/eth1/packets_per_second/tx", + "value": { + "Histogram": { + "value": 0.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "interface/eth1/errors_per_second/tx", + "value": { + "Histogram": { + "value": 0.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "interface/eth1/dropped_per_second/tx", + "value": { + "Histogram": { + "value": 0.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "connectivity_eth1_recv_bytes", + "value": { + "Counter": { + "value": 1200.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "connectivity_eth1_sent_bytes", + "value": { + "Counter": { + "value": 200.0, + "timestamp": "[timestamp]" + } + } + } +] diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__with_overflow_a_b_metrics.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__with_overflow_a_b_metrics.snap index 6aaac07..e249231 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__with_overflow_a_b_metrics.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__with_overflow_a_b_metrics.snap @@ -74,5 +74,23 @@ expression: result_b.unwrap() "timestamp": "[timestamp]" } } + }, + { + "name": "connectivity_eth0_recv_bytes", + "value": { + "Counter": { + "value": 2500.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "connectivity_eth0_sent_bytes", + "value": { + "Counter": { + "value": 1000.0, + "timestamp": "[timestamp]" + } + } } ] diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__with_overflow_b_c_metrics.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__with_overflow_b_c_metrics.snap index fb6382a..976f445 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__with_overflow_b_c_metrics.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__network_interfaces__test__with_overflow_b_c_metrics.snap @@ -74,5 +74,23 @@ expression: result_c.unwrap() "timestamp": "[timestamp]" } } + }, + { + "name": "connectivity_eth0_recv_bytes", + "value": { + "Counter": { + "value": 2502.0, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "connectivity_eth0_sent_bytes", + "value": { + "Counter": { + "value": 2000.0, + "timestamp": "[timestamp]" + } + } } ] diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__process_metrics_auto_false.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__process_metrics_auto_false.snap index 3371196..f46b602 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__process_metrics_auto_false.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__process_metrics_auto_false.snap @@ -3,6 +3,42 @@ source: memfaultd/src/metrics/system_metrics/processes.rs expression: process_metric_readings_2 --- [ + { + "name": "cpu_usage_memfaultd_pct", + "value": { + "Histogram": { + "value": 0.15, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "cpu_usage_systemd_pct", + "value": { + "Histogram": { + "value": 0.0116, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "memory_memfaultd_pct", + "value": { + "Histogram": { + "value": 0.01801, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "memory_systemd_pct", + "value": { + "Histogram": { + "value": 0.0114, + "timestamp": "[timestamp]" + } + } + }, { "name": "processes/memfaultd/cpu/percent/system", "value": { diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__process_metrics_auto_true.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__process_metrics_auto_true.snap index 8380217..6580786 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__process_metrics_auto_true.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__process_metrics_auto_true.snap @@ -3,6 +3,24 @@ source: memfaultd/src/metrics/system_metrics/processes.rs expression: process_metric_readings_2 --- [ + { + "name": "cpu_usage_memfaultd_pct", + "value": { + "Histogram": { + "value": 0.15, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "memory_memfaultd_pct", + "value": { + "Histogram": { + "value": 0.01801, + "timestamp": "[timestamp]" + } + } + }, { "name": "processes/memfaultd/cpu/percent/system", "value": { diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__simple_cpu_delta_metrics.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__simple_cpu_delta_metrics.snap index 3c733cc..42980be 100644 --- a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__simple_cpu_delta_metrics.snap +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__processes__tests__simple_cpu_delta_metrics.snap @@ -65,5 +65,23 @@ expression: process_metric_readings.unwrap() "timestamp": "[timestamp]" } } + }, + { + "name": "cpu_usage_memfaultd_pct", + "value": { + "Histogram": { + "value": 0.15, + "timestamp": "[timestamp]" + } + } + }, + { + "name": "memory_memfaultd_pct", + "value": { + "Histogram": { + "value": 0.01801, + "timestamp": "[timestamp]" + } + } } ] diff --git a/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__thermal__tests__read_thermal_zone_temp.snap b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__thermal__tests__read_thermal_zone_temp.snap new file mode 100644 index 0000000..ca04668 --- /dev/null +++ b/memfaultd/src/metrics/system_metrics/snapshots/memfaultd__metrics__system_metrics__thermal__tests__read_thermal_zone_temp.snap @@ -0,0 +1,13 @@ +--- +source: memfaultd/src/metrics/system_metrics/thermal.rs +expression: result +--- +{ + "name": "thermal/cpu-temp/temp", + "value": { + "Histogram": { + "value": 50.0, + "timestamp": "[timestamp]" + } + } +} diff --git a/memfaultd/src/metrics/system_metrics/thermal.rs b/memfaultd/src/metrics/system_metrics/thermal.rs index d251f32..bab5c5a 100644 --- a/memfaultd/src/metrics/system_metrics/thermal.rs +++ b/memfaultd/src/metrics/system_metrics/thermal.rs @@ -23,7 +23,7 @@ //! See additional Linux kernel documentation on /sys/class/thermal here: //! https://www.kernel.org/doc/Documentation/thermal/sysfs-api.txt -use std::str::FromStr; +use std::{fs::read_to_string, str::FromStr}; use crate::metrics::{ system_metrics::SystemMetricFamilyCollector, KeyedMetricReading, MetricStringKey, @@ -41,13 +41,21 @@ impl ThermalMetricsCollector { } fn read_thermal_zone_temp(zone_name: &str, root_dir: &str) -> Result { - let temp_file = &format!("{}/{}/temp", root_dir, zone_name); + let temp_file = format!("{}/{}/temp", root_dir, zone_name); + let type_file = format!("{}/{}/type", root_dir, zone_name); + // The readings are in millidegrees Celsius, so we divide by 1000 to get // the temperature in degrees Celsius. - let temp_in_celsius = std::fs::read_to_string(temp_file)?.trim().parse::()? / 1000.0; + let temp_in_celsius = read_to_string(temp_file)?.trim().parse::()? / 1000.0; + + // Extract the string for the type of the thermal zone for use in the metric key + let thermal_zone_type = read_to_string(type_file)?.trim().to_string(); - Ok(KeyedMetricReading::new_gauge( - MetricStringKey::from_str(zone_name).map_err(|e| { + Ok(KeyedMetricReading::new_histogram( + MetricStringKey::from_str( + format!("{}/{}/temp", THERMAL_METRIC_NAMESPACE, thermal_zone_type).as_str(), + ) + .map_err(|e| { eyre!( "Failed to construct MetricStringKey for thermal zone: {}", e @@ -95,7 +103,7 @@ impl SystemMetricFamilyCollector for ThermalMetricsCollector { // the input and output values are known. mod tests { use super::*; - use crate::metrics::MetricReading; + use insta::{assert_json_snapshot, rounded_redaction}; use std::fs::File; use std::io::Write; use tempfile::tempdir; @@ -116,21 +124,21 @@ mod tests { // Write the temperature (in millidegrees Celsius) to the "temp" file. writeln!(temp_file, "50000").unwrap(); + // Create a "temp" file inside the "thermal_zone0" directory. + let type_file_path = thermal_zone_dir.join("type"); + let mut type_file = File::create(type_file_path).unwrap(); + + // Write the temperature (in millidegrees Celsius) to the "temp" file. + writeln!(type_file, "cpu-temp").unwrap(); + // Call the function and check the result. let result = ThermalMetricsCollector::read_thermal_zone_temp( "thermal_zone0", dir.path().to_str().unwrap(), ) .unwrap(); - // The temperature should be 50.0 degrees Celsius. - assert!(matches!( - result.value, - MetricReading::Gauge { - #[allow(illegal_floating_point_literal_pattern)] - value: 50.0, - .. - } - )); + + assert_json_snapshot!(result, {".value.**.timestamp" => "[timestamp]", ".value.**.value" => rounded_redaction(5)}); // Delete the temporary directory. dir.close().unwrap(); diff --git a/memfaultd/src/metrics/timeseries/mod.rs b/memfaultd/src/metrics/timeseries/mod.rs index 836feae..9b35bab 100644 --- a/memfaultd/src/metrics/timeseries/mod.rs +++ b/memfaultd/src/metrics/timeseries/mod.rs @@ -5,7 +5,7 @@ use chrono::{DateTime, Utc}; use eyre::{eyre, Result}; use std::cmp; -use super::{MetricReading, MetricValue}; +use super::{construct_histogram_value, MetricReading, MetricValue}; const FINITENESS_ERROR: &str = "Metric values must be finite."; @@ -33,6 +33,7 @@ impl Histogram { if !value.is_finite() { return Err(eyre!(FINITENESS_ERROR)); } + Ok(Self { sum: value, count: 1, @@ -70,9 +71,13 @@ impl TimeSeries for Histogram { fn value(&self) -> MetricValue { if self.count > 0 { - MetricValue::Number(self.sum / self.count as f64) + MetricValue::Histogram(construct_histogram_value( + self.min, + self.sum / self.count as f64, + self.max, + )) } else { - MetricValue::Number(f64::NAN) + MetricValue::Histogram(construct_histogram_value(f64::NAN, f64::NAN, f64::NAN)) } } } @@ -282,23 +287,24 @@ mod tests { use chrono::Duration; use rstest::rstest; - use crate::metrics::{MetricReading, MetricTimestamp, MetricValue}; + use crate::metrics::{HistogramValue, MetricReading, MetricTimestamp, MetricValue}; use std::{f64::INFINITY, f64::NAN, f64::NEG_INFINITY, str::FromStr}; use super::TimeSeries; use super::{Counter, Gauge, Histogram, TimeWeightedAverage}; #[rstest] - #[case(1.0, 1000, 2.0, 1.5, 1000)] - #[case(10.0, 10_000, 10.0, 10.0, 10_000)] - #[case(1.0, 9_000, 0.0, 0.5, 9_000)] - #[case(1.0, 0, 2.0, 1.5, 0)] - #[case(1.0, 1000, 2.0, 1.5, 1000)] + #[case(1.0, 1000, 2.0, 1.5, 2.0, 1.0, 1000)] + #[case(10.0, 10_000, 10.0, 10.0, 10.0, 10.0, 10_000)] + #[case(1.0, 9_000, 0.0, 0.5, 1.0, 0.0, 9_000)] + #[case(1.0, 0, 2.0, 1.5, 2.0, 1.0, 0)] fn test_histogram_aggregation( #[case] a: f64, #[case] duration_between_ms: i64, #[case] b: f64, - #[case] expected: f64, + #[case] expected_mean: f64, + #[case] expected_max: f64, + #[case] expected_min: f64, #[case] expected_ms: i64, ) { let t0 = MetricTimestamp::from_str("2021-01-01T00:00:00Z").unwrap(); @@ -318,7 +324,14 @@ mod tests { assert_eq!(h.start, t0); assert_eq!(h.end, t0 + Duration::milliseconds(duration_between_ms)); assert_eq!((h.end - h.start).num_milliseconds(), expected_ms); - assert_eq!(h.value(), MetricValue::Number(expected)); + assert_eq!( + h.value(), + MetricValue::Histogram(HistogramValue { + min: expected_min, + mean: expected_mean, + max: expected_max, + }) + ); } #[rstest] diff --git a/memfaultd/src/reboot/mod.rs b/memfaultd/src/reboot/mod.rs index 7ff8f76..764eccf 100644 --- a/memfaultd/src/reboot/mod.rs +++ b/memfaultd/src/reboot/mod.rs @@ -266,7 +266,7 @@ fn capture_pstore_content(pstore_dir: &Path, config: &Config) -> Result<()> { zip::CompressionMethod::Deflated, )?; - let mar_entry = mar_builder.add_attachment(zip_path).save(&config.into())?; + let mar_entry = mar_builder.add_attachment(zip_path)?.save(&config.into())?; debug!("Saved pstore to {:?}", mar_entry.path); Ok(()) } diff --git a/memfaultd/src/test_utils.rs b/memfaultd/src/test_utils.rs index 13fbcf0..5bef3c2 100644 --- a/memfaultd/src/test_utils.rs +++ b/memfaultd/src/test_utils.rs @@ -128,4 +128,21 @@ pub fn in_histograms( }) } +#[cfg(test)] +/// Constructs an iterator of Counter metric readings for tests +/// to easily generate mock data +pub fn in_counters(metrics: Vec<(&'static str, f64)>) -> impl Iterator { + metrics + .into_iter() + .enumerate() + .map(|(i, (name, value))| KeyedMetricReading { + name: MetricStringKey::from_str(name).unwrap(), + value: MetricReading::Counter { + value, + timestamp: MetricTimestamp::from_str("2021-01-01T00:00:00Z").unwrap() + + chrono::Duration::seconds(i as i64), + }, + }) +} + pub(crate) use set_snapshot_suffix; diff --git a/memfaultd/src/util/mod.rs b/memfaultd/src/util/mod.rs index 8aaf0a5..32efb6f 100644 --- a/memfaultd/src/util/mod.rs +++ b/memfaultd/src/util/mod.rs @@ -28,5 +28,6 @@ pub mod task; #[cfg(feature = "logging")] pub mod tcp_server; pub mod time_measure; +pub mod wildcard_pattern; pub mod zip; pub mod zip_dir; diff --git a/memfaultd/src/util/system.rs b/memfaultd/src/util/system.rs index b6b0553..9d5b6fc 100644 --- a/memfaultd/src/util/system.rs +++ b/memfaultd/src/util/system.rs @@ -2,17 +2,26 @@ // Copyright (c) Memfault, Inc. // See License.txt for details use eyre::{eyre, Result}; +use lazy_static::lazy_static; use libc::{clockid_t, timespec, CLOCK_MONOTONIC}; #[cfg(target_os = "linux")] use libc::{sysconf, _SC_CLK_TCK, _SC_PAGE_SIZE}; +#[cfg(target_os = "linux")] +use std::fs::read_to_string; + use uuid::Uuid; +lazy_static! { + static ref OS_RELEASE: Option = read_osrelease().ok(); + static ref OS_TYPE: Option = read_ostype().ok(); +} + #[cfg(target_os = "linux")] pub fn read_system_boot_id() -> Result { use eyre::Context; - use std::{fs::read_to_string, str::FromStr}; + use std::str::FromStr; const BOOT_ID_PATH: &str = "/proc/sys/kernel/random/boot_id"; let boot_id = read_to_string(BOOT_ID_PATH); @@ -23,6 +32,40 @@ pub fn read_system_boot_id() -> Result { } } +#[cfg(target_os = "linux")] +fn read_osrelease() -> Result { + let mut os_release = read_to_string("/proc/sys/kernel/osrelease") + .map_err(|e| eyre!("Unable to read osrelease: {}", e))?; + os_release.truncate(os_release.trim_end().len()); + Ok(os_release) +} + +#[cfg(target_os = "linux")] +fn read_ostype() -> Result { + let mut os_type = read_to_string("/proc/sys/kernel/ostype") + .map_err(|e| eyre!("Unable to read ostype: {}", e))?; + os_type.truncate(os_type.trim_end().len()); + Ok(os_type) +} + +/// Returns the value stored in /proc/sys/kernel/osrelease +/// The first time it is called we read and cache +/// the value from procfs. On subsequent calls the cached value +/// is returned. This is safe to do since this is a value set +/// at kernel build-time and cannot be altered at runtime. +pub fn get_osrelease() -> Option { + OS_RELEASE.clone() +} + +/// Returns the value stored in /proc/sys/kernel/ostype +/// The first time it is called we read and cache +/// the value from procfs. On subsequent calls the cached value +/// is returned. This is safe to do since this is a value set +/// at kernel build-time and cannot be altered at runtime. +pub fn get_ostype() -> Option { + OS_TYPE.clone() +} + #[cfg(target_os = "linux")] pub fn clock_ticks_per_second() -> u64 { unsafe { sysconf(_SC_CLK_TCK) as u64 } @@ -83,6 +126,16 @@ pub fn read_system_boot_id() -> Result { Ok(*MOCK_BOOT_ID) } +#[cfg(not(target_os = "linux"))] +pub fn read_osrelease() -> Result { + Ok("non-Linux-system".to_string()) +} + +#[cfg(not(target_os = "linux"))] +pub fn read_ostype() -> Result { + Ok("non-Linux-system".to_string()) +} + #[cfg(not(target_os = "linux"))] pub fn clock_ticks_per_second() -> u64 { 10_000 diff --git a/memfaultd/src/util/wildcard_pattern.rs b/memfaultd/src/util/wildcard_pattern.rs new file mode 100644 index 0000000..0b6ca8c --- /dev/null +++ b/memfaultd/src/util/wildcard_pattern.rs @@ -0,0 +1,61 @@ +// +// Copyright (c) Memfault, Inc. +// See License.txt for details +//! Very simple wildcard pattern implementation. +//! Only 1 wildcard position in the pattern is currently +//! supported and strings with an arbitrary amount of +//! characters greater than or equal to 0 in the wildcard +//! position will always match. +use std::fmt::{Display, Formatter, Result as FmtResult}; + +#[derive(Clone)] +pub struct WildcardPattern { + prefix: String, + suffix: String, +} + +impl WildcardPattern { + /// Construct a new Wildcard pattern with the following + /// format, where * represents the wildcard position: + /// "*" + pub fn new(prefix: &str, suffix: &str) -> Self { + Self { + prefix: prefix.to_string(), + suffix: suffix.to_string(), + } + } + + pub fn matches(&self, s: &str) -> bool { + s.starts_with(self.prefix.as_str()) && s.ends_with(self.suffix.as_str()) + } +} + +impl Display for WildcardPattern { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + write!(f, "{}*{}", self.prefix, self.suffix) + } +} + +#[cfg(test)] +mod tests { + use rstest::rstest; + + use super::*; + + #[rstest] + #[case("cat", "dog", "catcatdog")] + #[case("Mem", "fault", "Memefault")] + #[case("Hello, ", "!", "Hello, Bob!")] + #[case("Zero Character", " Wildcard Match", "Zero Character Wildcard Match")] + fn test_matches(#[case] prefix: &str, #[case] suffix: &str, #[case] s: &str) { + assert!(WildcardPattern::new(prefix, suffix).matches(s)) + } + + #[rstest] + #[case("cat", "dog", "cacatdog")] + #[case("Mem", "fault", "MemFault")] + #[case("Hello, ", "!", "Hello! Bob!")] + fn test_nonmatches_do_not_match(#[case] prefix: &str, #[case] suffix: &str, #[case] s: &str) { + assert!(!WildcardPattern::new(prefix, suffix).matches(s)) + } +}