Skip to content

Commit 22a6460

Browse files
authored
libs/utils: add force parameter for /profile/cpu (#10361)
## Problem It's only possible to take one CPU profile at a time. With Grafana continuous profiling, a (low-frequency) CPU profile will always be running, making it hard to take an ad hoc CPU profile at the same time. Resolves #10072. ## Summary of changes Add a `force` parameter for `/profile/cpu` which will end and return an already running CPU profile, starting a new one for the current caller.
1 parent cd982a8 commit 22a6460

File tree

1 file changed

+30
-14
lines changed

1 file changed

+30
-14
lines changed

libs/utils/src/http/endpoint.rs

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use once_cell::sync::Lazy;
1515
use regex::Regex;
1616
use routerify::ext::RequestExt;
1717
use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
18-
use tokio::sync::{mpsc, Mutex};
18+
use tokio::sync::{mpsc, Mutex, Notify};
1919
use tokio_stream::wrappers::ReceiverStream;
2020
use tokio_util::io::ReaderStream;
2121
use tracing::{debug, info, info_span, warn, Instrument};
@@ -358,25 +358,41 @@ pub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, A
358358
Some(1001..) => return Err(ApiError::BadRequest(anyhow!("frequency must be <=1000 Hz"))),
359359
Some(frequency) => frequency,
360360
};
361+
let force: bool = parse_query_param(&req, "force")?.unwrap_or_default();
361362

362-
// Only allow one profiler at a time.
363+
// Take the profile.
363364
static PROFILE_LOCK: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
364-
let _lock = PROFILE_LOCK
365-
.try_lock()
366-
.map_err(|_| ApiError::Conflict("profiler already running".into()))?;
365+
static PROFILE_CANCEL: Lazy<Notify> = Lazy::new(Notify::new);
366+
367+
let report = {
368+
// Only allow one profiler at a time. If force is true, cancel a running profile (e.g. a
369+
// Grafana continuous profile). We use a try_lock() loop when cancelling instead of waiting
370+
// for a lock(), to avoid races where the notify isn't currently awaited.
371+
let _lock = loop {
372+
match PROFILE_LOCK.try_lock() {
373+
Ok(lock) => break lock,
374+
Err(_) if force => PROFILE_CANCEL.notify_waiters(),
375+
Err(_) => return Err(ApiError::Conflict("profiler already running".into())),
376+
}
377+
tokio::time::sleep(Duration::from_millis(1)).await; // don't busy-wait
378+
};
367379

368-
// Take the profile.
369-
let report = tokio::task::spawn_blocking(move || {
370380
let guard = ProfilerGuardBuilder::default()
371381
.frequency(frequency_hz)
372382
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
373-
.build()?;
374-
std::thread::sleep(Duration::from_secs(seconds));
375-
guard.report().build()
376-
})
377-
.await
378-
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
379-
.map_err(|pprof_err| ApiError::InternalServerError(pprof_err.into()))?;
383+
.build()
384+
.map_err(|err| ApiError::InternalServerError(err.into()))?;
385+
386+
tokio::select! {
387+
_ = tokio::time::sleep(Duration::from_secs(seconds)) => {},
388+
_ = PROFILE_CANCEL.notified() => {},
389+
};
390+
391+
guard
392+
.report()
393+
.build()
394+
.map_err(|err| ApiError::InternalServerError(err.into()))?
395+
};
380396

381397
// Return the report in the requested format.
382398
match format {

0 commit comments

Comments
 (0)