diff --git a/CHANGELOG.md b/CHANGELOG.md index f44b1ae..43dd3f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,13 @@ All notable changes to this project will be documented in this file. - Downgraded DNS errors to warnings ([#17]). - All output is now wrapped in a "containerdebug" span ([#18]). +### Fixes + +- Reduced memory usage dramatically by limiting and caching fetched information ([#20]). + [#17]: https://github.com/stackabletech/containerdebug/pull/17 [#18]: https://github.com/stackabletech/containerdebug/pull/18 +[#20]: https://github.com/stackabletech/containerdebug/pull/20 ## [0.1.0] - 2024-12-09 diff --git a/src/main.rs b/src/main.rs index c2fc08d..491a759 100644 --- a/src/main.rs +++ b/src/main.rs @@ -55,6 +55,8 @@ fn main() { built_info::RUSTC_VERSION, ); + let mut collect_ctx = SystemInformation::init(); + let mut next_run = Instant::now(); loop { let next_run_sleep = next_run.saturating_duration_since(Instant::now()); @@ -63,7 +65,7 @@ fn main() { } std::thread::sleep(next_run_sleep); - let system_information = SystemInformation::collect(); + let system_information = SystemInformation::collect(&mut collect_ctx); let serialized = serde_json::to_string_pretty(&system_information).unwrap(); if let Some(output_path) = &opts.output { diff --git a/src/system_information/mod.rs b/src/system_information/mod.rs index a37bd1d..1ebcd59 100644 --- a/src/system_information/mod.rs +++ b/src/system_information/mod.rs @@ -8,13 +8,14 @@ pub mod os; pub mod resources; pub mod user; -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, Default)] pub struct SystemInformation { - pub resources: resources::Resources, - pub os: os::OperatingSystem, - pub current_user: ComponentResult, - pub disks: Vec, - pub network: network::SystemNetworkInfo, + // All fields are optional, to make it easy to disable modules one by one + pub resources: Option, + pub os: Option, + pub current_user: Option>, + pub disks: Option>, + pub network: Option, // TODO: // Current time // SElinux/AppArmor @@ -32,26 +33,47 @@ pub struct SystemInformation { // - Users/Groups } +/// Common data that is cached between [`SystemInformation::collect`] calls. +pub struct CollectContext { + system: sysinfo::System, +} + impl SystemInformation { - #[tracing::instrument(name = "SystemInformation::collect")] - pub fn collect() -> Self { - tracing::info!("Starting data collection"); + /// Collects static information that doesn't need to be refreshed. + #[tracing::instrument(name = "SystemInformation::init")] + pub fn init() -> CollectContext { + tracing::info!("initializing"); + let mut ctx = CollectContext { + // Each module is responsible for updating the information that it cares about. + system: sysinfo::System::new(), + }; + if let Err(err) = user::User::init(&mut ctx.system) { + tracing::error!( + error = &err as &dyn std::error::Error, + "failed to initialize user module, ignoring but this will likely cause collection errors..." + ); + } + tracing::info!("init finished"); + ctx + } - // Please note that we use "new_all" to ensure that all list of - // components, network interfaces, disks and users are already - // filled! - let sys = sysinfo::System::new_all(); + /// Collects and reports + #[tracing::instrument(name = "SystemInformation::collect", skip(ctx))] + pub fn collect(ctx: &mut CollectContext) -> Self { + tracing::info!("Starting data collection"); let info = Self { - resources: resources::Resources::collect(&sys), - os: os::OperatingSystem::collect(), - current_user: ComponentResult::report_from_result( + resources: Some(resources::Resources::collect(&mut ctx.system)), + os: Some(os::OperatingSystem::collect()), + current_user: Some(ComponentResult::report_from_result( "User::collect_current", - user::User::collect_current(&sys), - ), - disks: disk::Disk::collect_all(), - network: network::SystemNetworkInfo::collect(), + user::User::collect_current(&ctx.system), + )), + disks: Some(disk::Disk::collect_all()), + network: Some(network::SystemNetworkInfo::collect()), + // ..Default::default() }; + tracing::info!("Data collection finished"); info } diff --git a/src/system_information/resources.rs b/src/system_information/resources.rs index ab7101c..82b8400 100644 --- a/src/system_information/resources.rs +++ b/src/system_information/resources.rs @@ -1,5 +1,5 @@ use serde::Serialize; -use sysinfo::System; +use sysinfo::{CpuRefreshKind, MemoryRefreshKind, RefreshKind, System}; #[derive(Debug, Serialize)] pub struct Resources { @@ -22,10 +22,16 @@ pub struct Resources { impl Resources { #[tracing::instrument(name = "Resources::collect", skip(sys))] - pub fn collect(sys: &System) -> Self { + pub fn collect(sys: &mut System) -> Self { // This style of "declare-then-log-then-merge becomes a bit verbose, // but should help keep each log statement local to where that info is collected. + sys.refresh_specifics( + RefreshKind::new() + .with_cpu(CpuRefreshKind::new().with_cpu_usage()) + .with_memory(MemoryRefreshKind::everything()), + ); + let cpu_count = sys.cpus().len(); let physical_core_count = sys.physical_core_count(); tracing::info!( diff --git a/src/system_information/user.rs b/src/system_information/user.rs index 1719f38..7cba540 100644 --- a/src/system_information/user.rs +++ b/src/system_information/user.rs @@ -1,6 +1,6 @@ use serde::Serialize; use snafu::{OptionExt, ResultExt, Snafu}; -use sysinfo::{Gid, Pid, Uid}; +use sysinfo::{Gid, Pid, ProcessRefreshKind, Uid, UpdateKind}; use crate::error::SysinfoError; @@ -21,6 +21,20 @@ pub struct User { } impl User { + #[tracing::instrument(name = "User::init", skip(sys))] + pub fn init(sys: &mut sysinfo::System) -> Result<()> { + let pid = sysinfo::get_current_pid() + .map_err(|msg| SysinfoError { msg }) + .context(GetCurrentPidSnafu)?; + // The process user is static, and there is a memory leak to updating it for every run, so cache it once and keep that. + sys.refresh_processes_specifics( + sysinfo::ProcessesToUpdate::Some(&[pid]), + false, + ProcessRefreshKind::new().with_user(UpdateKind::OnlyIfNotSet), + ); + Ok(()) + } + #[tracing::instrument(name = "User::collect_current", skip(sys))] pub fn collect_current(sys: &sysinfo::System) -> Result { let pid = sysinfo::get_current_pid()