From f50f7cd54168abede725643bf1dd0526ed813a30 Mon Sep 17 00:00:00 2001 From: Kunal Sareen Date: Mon, 23 Oct 2023 14:29:29 +0000 Subject: [PATCH 1/2] Add option to exclude kernel events from perf results This commit adds an option to exclude kernel events from perf results. We include kernel events by default. Note that previously kernel events were excluded by default. --- Cargo.toml | 4 ++-- src/scheduler/stat.rs | 7 ++++++- src/scheduler/work_counter.rs | 3 ++- src/util/options.rs | 5 ++++- src/util/statistics/counter/perf_event.rs | 3 ++- src/util/statistics/stats.rs | 2 +- 6 files changed, 17 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b5d5dfc621..19b0500056 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,9 +40,9 @@ mimalloc-sys = { version = "0.1.6", optional = true } mmtk-macros = { version = "0.20.0", path = "macros/" } num_cpus = "1.8" num-traits = "0.2" -pfm = { version = "0.1.0-beta.3", optional = true } -probe = "0.5" +pfm = { git = "https://github.com/caizixian/pfm", rev = "822f89e53daf66f140445b47c1e7b349e1f6266a", optional = true } portable-atomic = "1.4.3" +probe = "0.5" regex = "1.7.0" spin = "0.9.5" static_assertions = "1.1.0" diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index f40857182c..027d3c490c 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -240,7 +240,12 @@ impl WorkerLocalStat { let mut counters: Vec> = vec![Box::new(WorkDuration::new())]; #[cfg(feature = "perf_counter")] for e in &mmtk.options.work_perf_events.events { - counters.push(Box::new(WorkPerfEvent::new(&e.0, e.1, e.2))); + counters.push(Box::new(WorkPerfEvent::new( + &e.0, + e.1, + e.2, + *mmtk.options.perf_exclude_kernel, + ))); } counters } diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index 1e1d8b5770..505105b042 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -162,9 +162,10 @@ mod perf_event { /// 0, -1 measures the calling thread on all CPUs /// -1, 0 measures all threads on CPU 0 /// -1, -1 is invalid - pub fn new(name: &str, pid: pid_t, cpu: c_int) -> WorkPerfEvent { + pub fn new(name: &str, pid: pid_t, cpu: c_int, exclude_kernel: bool) -> WorkPerfEvent { let mut pe = PerfEvent::new(name, false) .unwrap_or_else(|_| panic!("Failed to create perf event {}", name)); + pe.set_exclude_kernel(exclude_kernel as u64); pe.open(pid, cpu) .unwrap_or_else(|_| panic!("Failed to open perf event {}", name)); WorkPerfEvent { diff --git a/src/util/options.rs b/src/util/options.rs index 5e46307839..7fe0e31a30 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -726,6 +726,9 @@ options! { // Measuring perf events for GC and mutators // TODO: Ideally this option should only be included when the features 'perf_counter' are enabled. The current macro does not allow us to do this. phase_perf_events: PerfEventOptions [env_var: true, command_line: true] [|_| cfg!(feature = "perf_counter")] = PerfEventOptions {events: vec![]}, + // Should we exclude perf events occurring in kernel space. By default we include the kernel. + // Only set this option if you know the implications of excluding the kernel! + perf_exclude_kernel: bool [env_var: true, command_line: true] [|_| cfg!(feature = "perf_counter")] = false, // Set how to bind affinity to the GC Workers. Default thread affinity delegates to the OS // scheduler. If a list of cores are specified, cores are allocated to threads in a round-robin // fashion. The core ids should match the ones reported by /proc/cpuinfo. Core ids are @@ -742,7 +745,7 @@ options! { thread_affinity: AffinityKind [env_var: true, command_line: true] [|v: &AffinityKind| v.validate()] = AffinityKind::OsDefault, // Set the GC trigger. This defines the heap size and how MMTk triggers a GC. // Default to a fixed heap size of 0.5x physical memory. - gc_trigger : GCTriggerSelector [env_var: true, command_line: true] [|v: &GCTriggerSelector| v.validate()] = GCTriggerSelector::FixedHeapSize((crate::util::memory::get_system_total_memory() as f64 * 0.5f64) as usize), + gc_trigger: GCTriggerSelector [env_var: true, command_line: true] [|v: &GCTriggerSelector| v.validate()] = GCTriggerSelector::FixedHeapSize((crate::util::memory::get_system_total_memory() as f64 * 0.5f64) as usize), // Enable transparent hugepage support via madvise (only Linux is supported) transparent_hugepages: bool [env_var: true, command_line: true] [|v: &bool| !v || cfg!(target_os = "linux")] = false } diff --git a/src/util/statistics/counter/perf_event.rs b/src/util/statistics/counter/perf_event.rs index a203f41257..3b0d616d26 100644 --- a/src/util/statistics/counter/perf_event.rs +++ b/src/util/statistics/counter/perf_event.rs @@ -9,9 +9,10 @@ pub struct PerfEventDiffable { } impl PerfEventDiffable { - pub fn new(name: &str) -> Self { + pub fn new(name: &str, exclude_kernel: bool) -> Self { let mut pe = PerfEvent::new(name, true) .unwrap_or_else(|_| panic!("Failed to create perf event {}", name)); + pe.set_exclude_kernel(exclude_kernel as u64); // measures the calling thread (and all child threads) on all CPUs pe.open(0, -1) .unwrap_or_else(|_| panic!("Failed to open perf event {}", name)); diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 61a2a05172..6e754b1384 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -91,7 +91,7 @@ impl Stats { shared.clone(), true, false, - PerfEventDiffable::new(&e.0), + PerfEventDiffable::new(&e.0, *options.perf_exclude_kernel), )))); } Stats { From 15c19264925d37aea157c390a8ca71cd43641897 Mon Sep 17 00:00:00 2001 From: Kunal Sareen Date: Sun, 29 Oct 2023 22:02:28 +0000 Subject: [PATCH 2/2] Bump libpfm version to v0.1.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 19b0500056..0793ca3b13 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,7 @@ mimalloc-sys = { version = "0.1.6", optional = true } mmtk-macros = { version = "0.20.0", path = "macros/" } num_cpus = "1.8" num-traits = "0.2" -pfm = { git = "https://github.com/caizixian/pfm", rev = "822f89e53daf66f140445b47c1e7b349e1f6266a", optional = true } +pfm = { version = "0.1.1", optional = true } portable-atomic = "1.4.3" probe = "0.5" regex = "1.7.0"