From 412a79f39f9b90fa02a10ab2db58f10812ab8e8d Mon Sep 17 00:00:00 2001 From: Tiago Castro Date: Fri, 2 Dec 2022 13:07:57 +0000 Subject: [PATCH] fix(csi-node): cherry-pick wait until filesystem shutdown before disconnecting Sometimes after/during disconnect we get some page write errors, triggered by a journal update by the JBD2 worker task. To get around these, wait until the filesystem&jbd2 is shutdown before disconnecting. Signed-off-by: Tiago Castro --- csi/src/filesystem_vol.rs | 9 ++----- csi/src/mount.rs | 56 +++++++++++++++++++++++++++++++++++++++ csi/src/node.rs | 2 ++ 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/csi/src/filesystem_vol.rs b/csi/src/filesystem_vol.rs index ceb117f5c..dd37657f9 100644 --- a/csi/src/filesystem_vol.rs +++ b/csi/src/filesystem_vol.rs @@ -162,13 +162,6 @@ pub async fn unstage_fs_volume( )); } - // Sometimes after/during disconnect we get some page write errors, - // triggered by a journal update by the JBD2 worker task. We - // don't really know how we can "flush" these tasks so at the - // moment the best solution we have is to remount the staging path - // as RO before we umount it for good, which seems to help. - mount::remount(fs_staging_path, true)?; - if let Err(error) = mount::filesystem_unmount(fs_staging_path) { return Err(failure!( Code::Internal, @@ -179,6 +172,8 @@ pub async fn unstage_fs_volume( error )); } + + mount::wait_fs_shutdown(&device, Some(mount.fstype)).await?; } Ok(()) diff --git a/csi/src/mount.rs b/csi/src/mount.rs index 62d14aa50..c7afbab1e 100644 --- a/csi/src/mount.rs +++ b/csi/src/mount.rs @@ -322,3 +322,59 @@ pub fn blockdevice_unmount(target: &str) -> Result<(), Error> { info!("block device at {} has been unmounted", target); Ok(()) } + +/// Waits until a device's filesystem is shutdown. +/// This is useful to know if it's safe to detach a device from a node or not as +/// it seems that even after a umount completes the filesystem and more +/// specifically the filesystem's journal might not be completely shutdown. +/// Specifically, this waits for the filesystem (eg: ext4) shutdown and the +/// filesystem's journal shutdown: jbd2. +pub(crate) async fn wait_fs_shutdown( + device: &str, + fstype: Option, +) -> Result<(), Error> { + let device_trim = device.replace("/dev/", ""); + + let start = std::time::Instant::now(); + let timeout = std::time::Duration::from_secs(2); + + if let Some(fstype) = fstype { + let proc_fs_str = format!("/proc/fs/{}/{}", fstype, device_trim); + let proc_fs = std::path::Path::new(&proc_fs_str); + wait_file_removal(proc_fs, start, timeout).await?; + } + + let jbd2_pattern = format!("/proc/fs/jbd2/{}-*", device_trim); + let proc_jbd2 = glob::glob(&jbd2_pattern) + .expect("valid pattern") + .next() + .and_then(|v| v.ok()); + if let Some(proc_jbd2) = proc_jbd2 { + wait_file_removal(&proc_jbd2, start, timeout).await?; + } + + Ok(()) +} + +/// Waits until a file is removed, up to a timeout. +async fn wait_file_removal( + proc: &std::path::Path, + start: std::time::Instant, + timeout: std::time::Duration, +) -> Result<(), Error> { + let check_interval = std::time::Duration::from_millis(200); + let proc_str = proc.to_string_lossy().to_string(); + let mut exists = proc.exists(); + while start.elapsed() < timeout && exists { + tracing::debug!(proc = %proc_str, "proc entry still exists"); + tokio::time::sleep(check_interval).await; + exists = proc.exists(); + } + match exists { + false => Ok(()), + true => Err(Error::new( + std::io::ErrorKind::TimedOut, + format!("Timed out waiting for '{}' to be removed", proc_str), + )), + } +} diff --git a/csi/src/node.rs b/csi/src/node.rs index 70949b4c8..b98b98ebd 100644 --- a/csi/src/node.rs +++ b/csi/src/node.rs @@ -126,6 +126,8 @@ async fn detach(uuid: &Uuid, errheader: String) -> Result<(), Status> { )); } + crate::mount::wait_fs_shutdown(&device_path, None).await?; + if let Err(error) = device.detach().await { return Err(failure!( Code::Internal,