diff --git a/csi/src/filesystem_vol.rs b/csi/src/filesystem_vol.rs index ceb117f5c..dd37657f9 100644 --- a/csi/src/filesystem_vol.rs +++ b/csi/src/filesystem_vol.rs @@ -162,13 +162,6 @@ pub async fn unstage_fs_volume( )); } - // Sometimes after/during disconnect we get some page write errors, - // triggered by a journal update by the JBD2 worker task. We - // don't really know how we can "flush" these tasks so at the - // moment the best solution we have is to remount the staging path - // as RO before we umount it for good, which seems to help. - mount::remount(fs_staging_path, true)?; - if let Err(error) = mount::filesystem_unmount(fs_staging_path) { return Err(failure!( Code::Internal, @@ -179,6 +172,8 @@ pub async fn unstage_fs_volume( error )); } + + mount::wait_fs_shutdown(&device, Some(mount.fstype)).await?; } Ok(()) diff --git a/csi/src/mount.rs b/csi/src/mount.rs index 62d14aa50..c7afbab1e 100644 --- a/csi/src/mount.rs +++ b/csi/src/mount.rs @@ -322,3 +322,59 @@ pub fn blockdevice_unmount(target: &str) -> Result<(), Error> { info!("block device at {} has been unmounted", target); Ok(()) } + +/// Waits until a device's filesystem is shutdown. +/// This is useful to know if it's safe to detach a device from a node or not as +/// it seems that even after a umount completes the filesystem and more +/// specifically the filesystem's journal might not be completely shutdown. +/// Specifically, this waits for the filesystem (eg: ext4) shutdown and the +/// filesystem's journal shutdown: jbd2. +pub(crate) async fn wait_fs_shutdown( + device: &str, + fstype: Option, +) -> Result<(), Error> { + let device_trim = device.replace("/dev/", ""); + + let start = std::time::Instant::now(); + let timeout = std::time::Duration::from_secs(2); + + if let Some(fstype) = fstype { + let proc_fs_str = format!("/proc/fs/{}/{}", fstype, device_trim); + let proc_fs = std::path::Path::new(&proc_fs_str); + wait_file_removal(proc_fs, start, timeout).await?; + } + + let jbd2_pattern = format!("/proc/fs/jbd2/{}-*", device_trim); + let proc_jbd2 = glob::glob(&jbd2_pattern) + .expect("valid pattern") + .next() + .and_then(|v| v.ok()); + if let Some(proc_jbd2) = proc_jbd2 { + wait_file_removal(&proc_jbd2, start, timeout).await?; + } + + Ok(()) +} + +/// Waits until a file is removed, up to a timeout. +async fn wait_file_removal( + proc: &std::path::Path, + start: std::time::Instant, + timeout: std::time::Duration, +) -> Result<(), Error> { + let check_interval = std::time::Duration::from_millis(200); + let proc_str = proc.to_string_lossy().to_string(); + let mut exists = proc.exists(); + while start.elapsed() < timeout && exists { + tracing::debug!(proc = %proc_str, "proc entry still exists"); + tokio::time::sleep(check_interval).await; + exists = proc.exists(); + } + match exists { + false => Ok(()), + true => Err(Error::new( + std::io::ErrorKind::TimedOut, + format!("Timed out waiting for '{}' to be removed", proc_str), + )), + } +} diff --git a/csi/src/node.rs b/csi/src/node.rs index 70949b4c8..b98b98ebd 100644 --- a/csi/src/node.rs +++ b/csi/src/node.rs @@ -126,6 +126,8 @@ async fn detach(uuid: &Uuid, errheader: String) -> Result<(), Status> { )); } + crate::mount::wait_fs_shutdown(&device_path, None).await?; + if let Err(error) = device.detach().await { return Err(failure!( Code::Internal, diff --git a/deploy/csi-daemonset.yaml b/deploy/csi-daemonset.yaml index ccad42890..93a380d1b 100644 --- a/deploy/csi-daemonset.yaml +++ b/deploy/csi-daemonset.yaml @@ -30,7 +30,7 @@ spec: # the same. containers: - name: mayastor-csi - image: mayadata/mayastor:v1.0.4 + image: mayadata/mayastor:v1.0.5 imagePullPolicy: IfNotPresent # we need privileged because we mount filesystems and use mknod securityContext: diff --git a/deploy/mayastor-daemonset.yaml b/deploy/mayastor-daemonset.yaml index 29c94081f..7525053e8 100644 --- a/deploy/mayastor-daemonset.yaml +++ b/deploy/mayastor-daemonset.yaml @@ -33,7 +33,7 @@ spec: command: ['sh', '-c', 'until nc -vz nats 4222; do echo "Waiting for message bus..."; sleep 1; done;'] containers: - name: mayastor - image: mayadata/mayastor:v1.0.4 + image: mayadata/mayastor:v1.0.5 imagePullPolicy: IfNotPresent env: - name: RUST_LOG diff --git a/scripts/check-deploy-yamls.sh b/scripts/check-deploy-yamls.sh index ceabadbf4..e66c422be 100755 --- a/scripts/check-deploy-yamls.sh +++ b/scripts/check-deploy-yamls.sh @@ -8,7 +8,7 @@ DEPLOYDIR="$ROOTDIR"/deploy CORES=2 PROFILE=release -TAG=v1.0.4 +TAG=v1.0.5 "$SCRIPTDIR"/generate-deploy-yamls.sh -c "$CORES" -t "$TAG" "$PROFILE"