Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(pageserver): separate sparse and dense keyspace #7503

Merged
merged 10 commits into from
Apr 30, 2024
Merged
27 changes: 27 additions & 0 deletions libs/pageserver_api/src/keyspace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ pub struct KeySpace {
pub ranges: Vec<Range<Key>>,
}

/// A wrapper type for sparse keyspaces.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct SparseKeySpace(pub KeySpace);

/// Represents a contiguous half-open range of the keyspace, masked according to a particular
/// ShardNumber's stripes: within this range of keys, only some "belong" to the current
/// shard.
Expand Down Expand Up @@ -435,10 +439,33 @@ pub struct KeyPartitioning {
pub parts: Vec<KeySpace>,
}

/// Represents a partitioning of the sparse key space.
#[derive(Clone, Debug, Default)]
pub struct SparseKeyPartitioning {
pub parts: Vec<SparseKeySpace>,
}

impl KeyPartitioning {
pub fn new() -> Self {
KeyPartitioning { parts: Vec::new() }
}

/// Convert a key partitioning to a sparse partition.
pub fn into_sparse(self) -> SparseKeyPartitioning {
SparseKeyPartitioning {
parts: self.parts.into_iter().map(SparseKeySpace).collect(),
}
}
}

impl SparseKeyPartitioning {
/// Note: use this function with caution. Attempt to handle a sparse keyspace in the same way as a dense keyspace will
/// cause long/dead loops.
pub fn into_dense(self) -> KeyPartitioning {
KeyPartitioning {
parts: self.parts.into_iter().map(|x| x.0).collect(),
}
}
}

///
Expand Down
14 changes: 13 additions & 1 deletion libs/pageserver_api/src/models/partitioning.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use utils::lsn::Lsn;

use crate::keyspace::SparseKeySpace;

#[derive(Debug, PartialEq, Eq)]
pub struct Partitioning {
pub keys: crate::keyspace::KeySpace,

pub sparse_keys: crate::keyspace::SparseKeySpace,
pub at_lsn: Lsn,
}

Expand Down Expand Up @@ -32,6 +34,8 @@ impl serde::Serialize for Partitioning {
let mut map = serializer.serialize_map(Some(2))?;
map.serialize_key("keys")?;
map.serialize_value(&KeySpace(&self.keys))?;
map.serialize_key("sparse_keys")?;
map.serialize_value(&KeySpace(&self.sparse_keys.0))?;
map.serialize_key("at_lsn")?;
map.serialize_value(&WithDisplay(&self.at_lsn))?;
map.end()
Expand Down Expand Up @@ -99,6 +103,7 @@ impl<'a> serde::Deserialize<'a> for Partitioning {
#[derive(serde::Deserialize)]
struct De {
keys: KeySpace,
sparse_keys: KeySpace,
#[serde_as(as = "serde_with::DisplayFromStr")]
at_lsn: Lsn,
}
Expand All @@ -107,6 +112,7 @@ impl<'a> serde::Deserialize<'a> for Partitioning {
Ok(Self {
at_lsn: de.at_lsn,
keys: de.keys.0,
sparse_keys: SparseKeySpace(de.sparse_keys.0),
})
}
}
Expand All @@ -133,6 +139,12 @@ mod tests {
"030000000000000000000000000000000003"
]
],
"sparse_keys": [
[
"620000000000000000000000000000000000",
"620000000000000000000000000000000003"
]
],
"at_lsn": "0/2240160"
}
"#;
Expand Down
6 changes: 4 additions & 2 deletions pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1918,12 +1918,14 @@ async fn timeline_collect_keyspace(
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;
let at_lsn = at_lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
let keys = timeline
let (dense_ks, sparse_ks) = timeline
.collect_keyspace(at_lsn, &ctx)
.await
.map_err(|e| ApiError::InternalServerError(e.into()))?;

let res = pageserver_api::models::partitioning::Partitioning { keys, at_lsn };
// This API is currently used by pagebench. Pagebench will iterate all keys within the keyspace.
// Therefore, we split dense/sparse keys in this API.
let res = pageserver_api::models::partitioning::Partitioning { keys: dense_ks, sparse_keys: sparse_ks, at_lsn };

json_response(StatusCode::OK, res)
}
Expand Down
12 changes: 10 additions & 2 deletions pageserver/src/pgdatadir_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use pageserver_api::key::{
slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range,
AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
};
use pageserver_api::keyspace::SparseKeySpace;
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi::BLCKSZ;
Expand Down Expand Up @@ -730,11 +731,13 @@ impl Timeline {
/// Get a KeySpace that covers all the Keys that are in use at the given LSN.
/// Anything that's not listed maybe removed from the underlying storage (from
/// that LSN forwards).
///
/// The return value is (dense keyspace, sparse keyspace).
pub(crate) async fn collect_keyspace(
&self,
lsn: Lsn,
ctx: &RequestContext,
) -> Result<KeySpace, CollectKeySpaceError> {
) -> Result<(KeySpace, SparseKeySpace), CollectKeySpaceError> {
// Iterate through key ranges, greedily packing them into partitions
let mut result = KeySpaceAccum::new();

Expand Down Expand Up @@ -806,7 +809,12 @@ impl Timeline {
if self.get(AUX_FILES_KEY, lsn, ctx).await.is_ok() {
result.add_key(AUX_FILES_KEY);
}
Ok(result.to_keyspace())

Ok((
result.to_keyspace(),
/* AUX sparse key space */
SparseKeySpace(KeySpace::single(Key::metadata_aux_key_range())),
))
}

/// Get cached size of relation if it not updated after specified LSN
Expand Down
1 change: 1 addition & 0 deletions pageserver/src/tenant/layer_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,7 @@ mod tests {
assert_eq!(lhs, rhs);
}

#[cfg(test)]
fn brute_force_range_search(
layer_map: &LayerMap,
key_range: Range<Key>,
Expand Down
24 changes: 21 additions & 3 deletions pageserver/src/tenant/storage_layer/inmemory_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -597,14 +597,17 @@ impl InMemoryLayer {
}
}

/// Write this frozen in-memory layer to disk.
/// Write this frozen in-memory layer to disk. If `key_range` is set, the delta
/// layer will only contain the key range the user specifies, and may return `None`
/// if there are no matching keys.
///
/// Returns a new delta layer with all the same data as this in-memory layer
pub(crate) async fn write_to_disk(
&self,
timeline: &Arc<Timeline>,
ctx: &RequestContext,
) -> Result<ResidentLayer> {
key_range: Option<Range<Key>>,
) -> Result<Option<ResidentLayer>> {
// Grab the lock in read-mode. We hold it over the I/O, but because this
// layer is not writeable anymore, no one should be trying to acquire the
// write lock on it, so we shouldn't block anyone. There's one exception
Expand All @@ -618,6 +621,21 @@ impl InMemoryLayer {

let end_lsn = *self.end_lsn.get().unwrap();

let keys: Vec<_> = if let Some(key_range) = key_range {
inner
.index
.iter()
.filter(|(k, _)| key_range.contains(k))
.map(|(k, m)| (k.to_i128(), m))
.collect()
} else {
inner.index.iter().map(|(k, m)| (k.to_i128(), m)).collect()
};

if keys.is_empty() {
return Ok(None);
}

let mut delta_layer_writer = DeltaLayerWriter::new(
self.conf,
self.timeline_id,
Expand Down Expand Up @@ -649,6 +667,6 @@ impl InMemoryLayer {

// MAX is used here because we identify L0 layers by full key range
let delta_layer = delta_layer_writer.finish(Key::MAX, timeline).await?;
Ok(delta_layer)
Ok(Some(delta_layer))
}
}
Loading
Loading