Skip to content

Commit 5c76e2a

Browse files
authored
fix(storage-scrubber): ignore errors if index_part is not consistent (#10304)
## Problem Consider the pageserver is doing the following sequence of operations: * upload X files * update index_part to add X and remove Y * delete Y files When storage scrubber obtains the initial timeline snapshot before "update index_part" (that is the old version that contains Y but not X), and then obtains the index_part file after it gets updated, it will report all Y files are missing. ## Summary of changes Do not report layer file missing if index_part listed and downloaded are not the same (i.e. different last_modified times) Signed-off-by: Alex Chi Z <chi@neon.tech>
1 parent 237dae7 commit 5c76e2a

File tree

5 files changed

+36
-13
lines changed

5 files changed

+36
-13
lines changed

storage_scrubber/src/checks.rs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::collections::{HashMap, HashSet};
2+
use std::time::SystemTime;
23

34
use itertools::Itertools;
45
use pageserver::tenant::checks::check_valid_layermap;
@@ -88,9 +89,14 @@ pub(crate) async fn branch_cleanup_and_check_errors(
8889
match s3_data.blob_data {
8990
BlobDataParseResult::Parsed {
9091
index_part,
91-
index_part_generation: _index_part_generation,
92-
s3_layers: _s3_layers,
92+
index_part_generation: _,
93+
s3_layers: _,
94+
index_part_last_modified_time,
95+
index_part_snapshot_time,
9396
} => {
97+
// Ignore missing file error if index_part downloaded is different from the one when listing the layer files.
98+
let ignore_error = index_part_snapshot_time < index_part_last_modified_time
99+
&& !cfg!(debug_assertions);
94100
if !IndexPart::KNOWN_VERSIONS.contains(&index_part.version()) {
95101
result
96102
.errors
@@ -171,7 +177,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
171177
is_l0,
172178
);
173179

174-
if is_l0 {
180+
if is_l0 || ignore_error {
175181
result.warnings.push(msg);
176182
} else {
177183
result.errors.push(msg);
@@ -308,6 +314,8 @@ pub(crate) enum BlobDataParseResult {
308314
Parsed {
309315
index_part: Box<IndexPart>,
310316
index_part_generation: Generation,
317+
index_part_last_modified_time: SystemTime,
318+
index_part_snapshot_time: SystemTime,
311319
s3_layers: HashSet<(LayerName, Generation)>,
312320
},
313321
/// The remains of an uncleanly deleted Timeline or aborted timeline creation(e.g. an initdb archive only, or some layer without an index)
@@ -484,9 +492,9 @@ async fn list_timeline_blobs_impl(
484492
}
485493

486494
if let Some(index_part_object_key) = index_part_object.as_ref() {
487-
let index_part_bytes =
495+
let (index_part_bytes, index_part_last_modified_time) =
488496
match download_object_with_retries(remote_client, &index_part_object_key.key).await {
489-
Ok(index_part_bytes) => index_part_bytes,
497+
Ok(data) => data,
490498
Err(e) => {
491499
// It is possible that the branch gets deleted in-between we list the objects
492500
// and we download the index part file.
@@ -500,14 +508,16 @@ async fn list_timeline_blobs_impl(
500508
));
501509
}
502510
};
503-
511+
let index_part_snapshot_time = index_part_object_key.last_modified;
504512
match serde_json::from_slice(&index_part_bytes) {
505513
Ok(index_part) => {
506514
return Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData {
507515
blob_data: BlobDataParseResult::Parsed {
508516
index_part: Box::new(index_part),
509517
index_part_generation,
510518
s3_layers,
519+
index_part_last_modified_time,
520+
index_part_snapshot_time,
511521
},
512522
unused_index_keys: index_part_keys,
513523
unknown_keys,
@@ -625,7 +635,7 @@ pub(crate) async fn list_tenant_manifests(
625635

626636
let manifest_bytes =
627637
match download_object_with_retries(remote_client, &latest_listing_object.key).await {
628-
Ok(bytes) => bytes,
638+
Ok((bytes, _)) => bytes,
629639
Err(e) => {
630640
// It is possible that the tenant gets deleted in-between we list the objects
631641
// and we download the manifest file.

storage_scrubber/src/lib.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub mod tenant_snapshot;
1313
use std::env;
1414
use std::fmt::Display;
1515
use std::sync::Arc;
16-
use std::time::Duration;
16+
use std::time::{Duration, SystemTime};
1717

1818
use anyhow::Context;
1919
use aws_config::retry::{RetryConfigBuilder, RetryMode};
@@ -509,10 +509,11 @@ async fn list_objects_with_retries(
509509
panic!("MAX_RETRIES is not allowed to be 0");
510510
}
511511

512+
/// Returns content, last modified time
512513
async fn download_object_with_retries(
513514
remote_client: &GenericRemoteStorage,
514515
key: &RemotePath,
515-
) -> anyhow::Result<Vec<u8>> {
516+
) -> anyhow::Result<(Vec<u8>, SystemTime)> {
516517
let cancel = CancellationToken::new();
517518
for trial in 0..MAX_RETRIES {
518519
let mut buf = Vec::new();
@@ -535,7 +536,7 @@ async fn download_object_with_retries(
535536
{
536537
Ok(bytes_read) => {
537538
tracing::debug!("Downloaded {bytes_read} bytes for object {key}");
538-
return Ok(buf);
539+
return Ok((buf, download.last_modified));
539540
}
540541
Err(e) => {
541542
error!("Failed to stream object body for key {key}: {e}");

storage_scrubber/src/pageserver_physical_gc.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,8 @@ async fn gc_ancestor(
450450
index_part: _,
451451
index_part_generation: _,
452452
s3_layers,
453+
index_part_last_modified_time: _,
454+
index_part_snapshot_time: _,
453455
} => s3_layers,
454456
BlobDataParseResult::Relic => {
455457
// Post-deletion tenant location: don't try and GC it.
@@ -586,7 +588,9 @@ async fn gc_timeline(
586588
BlobDataParseResult::Parsed {
587589
index_part,
588590
index_part_generation,
589-
s3_layers: _s3_layers,
591+
s3_layers: _,
592+
index_part_last_modified_time: _,
593+
index_part_snapshot_time: _,
590594
} => (index_part, *index_part_generation, data.unused_index_keys),
591595
BlobDataParseResult::Relic => {
592596
// Post-deletion tenant location: don't try and GC it.

storage_scrubber/src/scan_pageserver_metadata.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ impl MetadataSummary {
4747
index_part,
4848
index_part_generation: _,
4949
s3_layers: _,
50+
index_part_last_modified_time: _,
51+
index_part_snapshot_time: _,
5052
} = &data.blob_data
5153
{
5254
*self
@@ -195,7 +197,9 @@ pub async fn scan_pageserver_metadata(
195197
if let BlobDataParseResult::Parsed {
196198
index_part,
197199
index_part_generation,
198-
s3_layers: _s3_layers,
200+
s3_layers: _,
201+
index_part_last_modified_time: _,
202+
index_part_snapshot_time: _,
199203
} = &data.blob_data
200204
{
201205
if index_part.deleted_at.is_some() {
@@ -318,9 +322,11 @@ pub async fn scan_pageserver_metadata(
318322

319323
match &data.blob_data {
320324
BlobDataParseResult::Parsed {
321-
index_part: _index_part,
325+
index_part: _,
322326
index_part_generation: _index_part_generation,
323327
s3_layers,
328+
index_part_last_modified_time: _,
329+
index_part_snapshot_time: _,
324330
} => {
325331
tenant_objects.push(ttid, s3_layers.clone());
326332
}

storage_scrubber/src/tenant_snapshot.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,8 @@ impl SnapshotDownloader {
268268
index_part,
269269
index_part_generation,
270270
s3_layers: _,
271+
index_part_last_modified_time: _,
272+
index_part_snapshot_time: _,
271273
} => {
272274
self.download_timeline(
273275
ttid,

0 commit comments

Comments
 (0)