Skip to content

Commit 31b2aea

Browse files
authored
metrics: Export process metrics using prometheus-client (#2552)
* Move process metrics from linkerd-app-core to linkerd-metrics (with a feature flag). * Add a linkered_metrics::prom::registry helper that automatically configures process metrics when the feature is enabled. * Add a process_threads metric to help surface when the multi-core proxy runtime is in use. * All of this uses prometheus-client to set up for future reusability. Before # HELP process_start_time_seconds Time that the process started (in seconds since the UNIX epoch) # TYPE process_start_time_seconds gauge process_start_time_seconds 1701551542 # HELP process_uptime_seconds_total Total time since the process started (in seconds) # TYPE process_uptime_seconds_total counter process_uptime_seconds_total 1782.137 # HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. # TYPE process_cpu_seconds_total counter process_cpu_seconds_total 0.72 # HELP process_virtual_memory_bytes Virtual memory size in bytes. # TYPE process_virtual_memory_bytes gauge process_virtual_memory_bytes 111042560 # HELP process_resident_memory_bytes Resident memory size in bytes. # TYPE process_resident_memory_bytes gauge process_resident_memory_bytes 33910784 # HELP process_open_fds Number of open file descriptors. # TYPE process_open_fds gauge process_open_fds 28 # HELP process_max_fds Maximum number of open file descriptors. # TYPE process_max_fds gauge process_max_fds 1048576 # HELP proxy_build_info Proxy build info # TYPE proxy_build_info gauge proxy_build_info{version="2.213.0",git_sha="9f7e7ac",profile="release",date="2023-11-16T23:24:26Z",vendor="linkerd"} 1 After # HELP proxy_build_info Proxy build info. # TYPE proxy_build_info gauge proxy_build_info{date="2023-12-06T02:15:30Z",git_sha="9c29322d5",profile="release",vendor="code@ver-sea",version="0.0.0-dev.9c29322d5"} 1 # HELP process_start_time_seconds Time that the process started (in seconds since the UNIX epoch). # TYPE process_start_time_seconds gauge # UNIT process_start_time_seconds seconds process_start_time_seconds 1701829321.4647413 # HELP process_uptime_seconds Total time since the process started (in seconds) # TYPE process_uptime_seconds counter # UNIT process_uptime_seconds seconds process_uptime_seconds_total 51.986633717 # HELP process_cpu_seconds Total user and system CPU time spent in seconds # TYPE process_cpu_seconds counter # UNIT process_cpu_seconds seconds process_cpu_seconds_total 0.04 # HELP process_virtual_memory_bytes Virtual memory size in bytes # TYPE process_virtual_memory_bytes gauge # UNIT process_virtual_memory_bytes bytes process_virtual_memory_bytes 108208128 # HELP process_resident_memory_bytes Resident memory size in bytes # TYPE process_resident_memory_bytes gauge # UNIT process_resident_memory_bytes bytes process_resident_memory_bytes 27471872 # HELP process_open_fds Number of open file descriptors # TYPE process_open_fds gauge process_open_fds 21 # HELP process_max_fds Maximum number of open file descriptors # TYPE process_max_fds gauge process_max_fds 1048576 # HELP process_threads Number of OS threads in the process. # TYPE process_threads gauge process_threads 2 # EOF
1 parent f72cc7f commit 31b2aea

File tree

16 files changed

+219
-304
lines changed

16 files changed

+219
-304
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,6 +1629,7 @@ dependencies = [
16291629
"http",
16301630
"hyper",
16311631
"linkerd-stack",
1632+
"linkerd-system",
16321633
"parking_lot",
16331634
"prometheus-client",
16341635
"quickcheck",

linkerd/app/core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ linkerd-identity = { path = "../../identity" }
3434
linkerd-idle-cache = { path = "../../idle-cache" }
3535
linkerd-io = { path = "../../io" }
3636
linkerd-meshtls = { path = "../../meshtls", default-features = false }
37-
linkerd-metrics = { path = "../../metrics", features = ["linkerd-stack"] }
37+
linkerd-metrics = { path = "../../metrics", features = ["process", "stack"] }
3838
linkerd-opencensus = { path = "../../opencensus" }
3939
linkerd-proxy-core = { path = "../../proxy/core" }
4040
linkerd-proxy-api-resolve = { path = "../../proxy/api-resolve" }

linkerd/app/core/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ pub mod metrics;
2525
pub mod proxy;
2626
pub mod serve;
2727
pub mod svc;
28-
pub mod telemetry;
2928
pub mod transport;
3029

3130
pub use self::build_info::{BuildInfo, BUILD_INFO};

linkerd/app/core/src/metrics.rs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use crate::{
1111
classify::Class,
1212
control, http_metrics, opencensus, profiles, stack_metrics,
1313
svc::Param,
14-
telemetry, tls,
14+
tls,
1515
transport::{self, labels::TlsConnect},
1616
};
1717
use linkerd_addr::Addr;
@@ -146,13 +146,8 @@ where
146146
// === impl Metrics ===
147147

148148
impl Metrics {
149-
pub fn new(
150-
retain_idle: Duration,
151-
start_time: telemetry::StartTime,
152-
) -> (Self, impl FmtMetrics + Clone + Send + 'static) {
153-
let registry = prom::Registry::default();
154-
155-
let process = telemetry::process::Report::new(start_time);
149+
pub fn new(retain_idle: Duration) -> (Self, impl FmtMetrics + Clone + Send + 'static) {
150+
let registry = prom::registry();
156151

157152
registry.write().register(
158153
"proxy_build_info",
@@ -223,7 +218,6 @@ impl Metrics {
223218
.and_report(transport_report)
224219
.and_report(opencensus_report)
225220
.and_report(stack)
226-
.and_report(process)
227221
// The prom registry reports an "# EOF" at the end of its export, so
228222
// it should be emitted last.
229223
.and_report(registry);

linkerd/app/core/src/telemetry.rs

Lines changed: 0 additions & 3 deletions
This file was deleted.

linkerd/app/core/src/telemetry/process.rs

Lines changed: 0 additions & 205 deletions
This file was deleted.

linkerd/app/inbound/src/test_util.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,7 @@ pub fn default_config() -> Config {
9292
pub fn runtime() -> (ProxyRuntime, drain::Signal) {
9393
let (drain_tx, drain) = drain::channel();
9494
let (tap, _) = tap::new();
95-
let (metrics, _) =
96-
metrics::Metrics::new(std::time::Duration::from_secs(10), Default::default());
95+
let (metrics, _) = metrics::Metrics::new(std::time::Duration::from_secs(10));
9796
let runtime = ProxyRuntime {
9897
identity: rustls::creds::default_for_test().1.into(),
9998
metrics: metrics.proxy,

linkerd/app/integration/src/proxy.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -460,14 +460,7 @@ async fn run(proxy: Proxy, mut env: TestEnv, random_ports: bool) -> Listening {
460460
let bind_adm = listen::BindTcp::default();
461461
let (shutdown_tx, mut shutdown_rx) = tokio::sync::mpsc::unbounded_channel();
462462
let main = config
463-
.build(
464-
bind_in,
465-
bind_out,
466-
bind_adm,
467-
shutdown_tx,
468-
trace_handle,
469-
Default::default(),
470-
)
463+
.build(bind_in, bind_out, bind_adm, shutdown_tx, trace_handle)
471464
.await
472465
.expect("config");
473466

linkerd/app/outbound/src/test_util.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,7 @@ pub(crate) fn default_config() -> Config {
5656
pub(crate) fn runtime() -> (ProxyRuntime, drain::Signal) {
5757
let (drain_tx, drain) = drain::channel();
5858
let (tap, _) = tap::new();
59-
let (metrics, _) =
60-
metrics::Metrics::new(std::time::Duration::from_secs(10), Default::default());
59+
let (metrics, _) = metrics::Metrics::new(std::time::Duration::from_secs(10));
6160
let runtime = ProxyRuntime {
6261
identity: linkerd_meshtls_rustls::creds::default_for_test().1.into(),
6362
metrics: metrics.proxy,

linkerd/app/src/lib.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ use linkerd_app_core::{
2121
dns, drain,
2222
metrics::FmtMetrics,
2323
svc::Param,
24-
telemetry,
2524
transport::{addrs::*, listen::Bind},
2625
Error, ProxyRuntime,
2726
};
@@ -98,7 +97,6 @@ impl Config {
9897
bind_admin: BAdmin,
9998
shutdown_tx: mpsc::UnboundedSender<()>,
10099
log_level: trace::Handle,
101-
start_time: telemetry::StartTime,
102100
) -> Result<App, Error>
103101
where
104102
BIn: Bind<ServerConfig> + 'static,
@@ -128,7 +126,7 @@ impl Config {
128126
..
129127
} = self;
130128
debug!("building app");
131-
let (metrics, report) = Metrics::new(admin.metrics_retain_idle, start_time);
129+
let (metrics, report) = Metrics::new(admin.metrics_retain_idle);
132130

133131
let dns = dns.build();
134132

linkerd/metrics/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@ publish = false
88

99
[features]
1010
default = []
11+
process = ["linkerd-system"]
12+
stack = ["linkerd-stack"]
1113
test_util = []
1214

1315
[dependencies]
1416
deflate = { version = "1", features = ["gzip"] }
1517
http = "0.2"
1618
hyper = { version = "0.14", features = ["http1", "http2"] }
1719
linkerd-stack = { path = "../stack", optional = true }
20+
linkerd-system = { path = "../system", optional = true }
1821
parking_lot = "0.12"
1922
prometheus-client = "0.22"
2023
tokio = { version = "1", features = ["time"] }

linkerd/metrics/src/counter.rs

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ impl<F: Factor> FmtMetric for Counter<F> {
9595
#[allow(clippy::float_cmp)]
9696
mod tests {
9797
use super::*;
98-
use crate::{MicrosAsSeconds, MillisAsSeconds, MAX_PRECISE_UINT64};
98+
use crate::MAX_PRECISE_UINT64;
9999

100100
#[test]
101101
fn count_simple() {
@@ -123,42 +123,4 @@ mod tests {
123123
let max = Counter::<()>::from(MAX_PRECISE_UINT64);
124124
assert_eq!(max.value(), MAX_PRECISE_UINT64 as f64);
125125
}
126-
127-
#[test]
128-
fn millis_as_seconds() {
129-
let c = Counter::<MillisAsSeconds>::from(1);
130-
assert_eq!(c.value(), 0.001);
131-
132-
let c = Counter::<MillisAsSeconds>::from((MAX_PRECISE_UINT64 - 1) * 1000);
133-
assert_eq!(c.value(), (MAX_PRECISE_UINT64 - 1) as f64);
134-
c.add(1000);
135-
assert_eq!(c.value(), MAX_PRECISE_UINT64 as f64);
136-
c.add(1000);
137-
assert_eq!(c.value(), 0.0);
138-
c.add(1000);
139-
assert_eq!(c.value(), 1.0);
140-
141-
let max = Counter::<MillisAsSeconds>::from(MAX_PRECISE_UINT64 * 1000);
142-
assert_eq!(max.value(), MAX_PRECISE_UINT64 as f64);
143-
}
144-
145-
#[test]
146-
fn micros_as_seconds() {
147-
let c = Counter::<MicrosAsSeconds>::from(1);
148-
assert_eq!(c.value(), 0.000_001);
149-
c.add(110);
150-
assert_eq!(c.value(), 0.000_111);
151-
152-
let c = Counter::<MicrosAsSeconds>::from((MAX_PRECISE_UINT64 - 1) * 1000);
153-
assert_eq!(c.value(), (MAX_PRECISE_UINT64 - 1) as f64 * 0.001);
154-
c.add(1_000);
155-
assert_eq!(c.value(), MAX_PRECISE_UINT64 as f64 * 0.001);
156-
c.add(1_000);
157-
assert_eq!(c.value(), 0.0);
158-
c.add(1);
159-
assert_eq!(c.value(), 0.000_001);
160-
161-
let max = Counter::<MicrosAsSeconds>::from(MAX_PRECISE_UINT64 * 1000);
162-
assert_eq!(max.value(), MAX_PRECISE_UINT64 as f64 * 0.001);
163-
}
164126
}

0 commit comments

Comments
 (0)