From 65b69392ea156ff04a3b4fc1609ba7b990ddbe27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 30 Oct 2024 19:37:09 +0100 Subject: [PATCH] Disallow offloaded children during timeline deletion (#9582) If we delete a timeline that has childen, those children will have their data corrupted. Therefore, extend the already existing safety check to offloaded timelines as well. Part of #8088 --- pageserver/src/tenant/timeline/delete.rs | 39 ++++++++++---------- pageserver/src/tenant/timeline/offload.rs | 4 +- test_runner/regress/test_timeline_archive.py | 7 ++++ 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index 2c6161da15b7..b0c4fa2bc995 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -214,7 +214,8 @@ impl DeleteTimelineFlow { ) -> Result<(), DeleteTimelineError> { super::debug_assert_current_span_has_tenant_and_timeline_id(); - let (timeline, mut guard) = Self::prepare(tenant, timeline_id)?; + let allow_offloaded_children = false; + let (timeline, mut guard) = Self::prepare(tenant, timeline_id, allow_offloaded_children)?; guard.mark_in_progress()?; @@ -340,6 +341,7 @@ impl DeleteTimelineFlow { pub(super) fn prepare( tenant: &Tenant, timeline_id: TimelineId, + allow_offloaded_children: bool, ) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> { // Note the interaction between this guard and deletion guard. // Here we attempt to lock deletion guard when we're holding a lock on timelines. @@ -352,30 +354,27 @@ impl DeleteTimelineFlow { // T1: acquire deletion lock, do another `DeleteTimelineFlow::run` // For more context see this discussion: `https://github.com/neondatabase/neon/pull/4552#discussion_r1253437346` let timelines = tenant.timelines.lock().unwrap(); + let timelines_offloaded = tenant.timelines_offloaded.lock().unwrap(); let timeline = match timelines.get(&timeline_id) { Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)), - None => { - let offloaded_timelines = tenant.timelines_offloaded.lock().unwrap(); - match offloaded_timelines.get(&timeline_id) { - Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)), - None => return Err(DeleteTimelineError::NotFound), - } - } + None => match timelines_offloaded.get(&timeline_id) { + Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)), + None => return Err(DeleteTimelineError::NotFound), + }, }; - // Ensure that there are no child timelines **attached to that pageserver**, - // because detach removes files, which will break child branches - let children: Vec = timelines - .iter() - .filter_map(|(id, entry)| { - if entry.get_ancestor_timeline_id() == Some(timeline_id) { - Some(*id) - } else { - None - } - }) - .collect(); + // Ensure that there are no child timelines, because we are about to remove files, + // which will break child branches + let mut children = Vec::new(); + if !allow_offloaded_children { + children.extend(timelines_offloaded.iter().filter_map(|(id, entry)| { + (entry.ancestor_timeline_id == Some(timeline_id)).then_some(*id) + })); + } + children.extend(timelines.iter().filter_map(|(id, entry)| { + (entry.get_ancestor_timeline_id() == Some(timeline_id)).then_some(*id) + })); if !children.is_empty() { return Err(DeleteTimelineError::HasChildren(children)); diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs index 305c139b54e4..5b196cf8a79f 100644 --- a/pageserver/src/tenant/timeline/offload.rs +++ b/pageserver/src/tenant/timeline/offload.rs @@ -12,7 +12,9 @@ pub(crate) async fn offload_timeline( debug_assert_current_span_has_tenant_and_timeline_id(); tracing::info!("offloading archived timeline"); - let (timeline, guard) = DeleteTimelineFlow::prepare(tenant, timeline.timeline_id)?; + let allow_offloaded_children = true; + let (timeline, guard) = + DeleteTimelineFlow::prepare(tenant, timeline.timeline_id, allow_offloaded_children)?; let TimelineOrOffloaded::Timeline(timeline) = timeline else { tracing::error!("timeline already offloaded, but given timeline object"); diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py index 77efd7b74981..3e9812c38a27 100644 --- a/test_runner/regress/test_timeline_archive.py +++ b/test_runner/regress/test_timeline_archive.py @@ -213,6 +213,13 @@ def leaf_offloaded(): wait_until(30, 1, leaf_offloaded) wait_until(30, 1, parent_offloaded) + # Offloaded child timelines should still prevent deletion + with pytest.raises( + PageserverApiException, + match=f".* timeline which has child timelines: \\[{leaf_timeline_id}\\]", + ): + ps_http.timeline_delete(tenant_id, parent_timeline_id) + ps_http.timeline_archival_config( tenant_id, grandparent_timeline_id,