Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix poor performance of to_tracking_manifest #1047

Merged
merged 1 commit into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 38 additions & 6 deletions crates/spfs/src/graph/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
// https://github.com/imageworks/spk

use std::collections::BTreeSet;
use std::collections::{BTreeSet, HashMap};
use std::io::BufRead;

use spfs_proto::ManifestArgs;
Expand All @@ -19,6 +19,9 @@ mod manifest_test;
/// A manifest holds the state of a filesystem tree.
pub type Manifest = super::object::FlatObject<spfs_proto::Manifest<'static>>;

/// A mapping of digest to tree in a manifest.
pub type ManifestTreeCache<'a> = HashMap<encoding::Digest, Tree<'a>>;

impl Default for Manifest {
fn default() -> Self {
Self::builder().build(&crate::tracking::Entry::<()>::empty_dir_with_open_perms())
Expand Down Expand Up @@ -73,6 +76,29 @@ impl Manifest {
children.into_iter().collect()
}

/// Return the trees of this Manifest mapped by digest.
///
/// It is expensive to find a tree in a manifest by digest. If multiple
/// trees need to be accessed by digest, it is faster to use this method
/// instead of [`Manifest::get_tree`].
pub fn get_tree_cache(&self) -> ManifestTreeCache {
let mut tree_cache = HashMap::new();
for tree in self.iter_trees() {
let Ok(digest) = tree.digest() else {
tracing::warn!("Undigestible tree found in manifest");
continue;
};
tree_cache.insert(digest, tree);
}
tree_cache
}

/// Return the tree in this manifest with the given digest.
///
/// # Warning
///
/// This can be very slow to call repeatedly on the same manifest. See
/// [`Manifest::get_tree_cache`].
pub fn get_tree(&self, digest: &encoding::Digest) -> Option<Tree<'_>> {
self.iter_trees()
.find(|t| t.digest().ok().as_ref() == Some(digest))
Expand All @@ -85,9 +111,15 @@ impl Manifest {

/// Convert this manifest into a more workable form for editing.
pub fn to_tracking_manifest(&self) -> tracking::Manifest {
let tree_cache = self.get_tree_cache();

let mut root = tracking::Entry::empty_dir_with_open_perms();

fn iter_tree(source: &Manifest, tree: Tree<'_>, parent: &mut tracking::Entry) {
fn iter_tree(
tree_cache: &ManifestTreeCache,
tree: &Tree<'_>,
parent: &mut tracking::Entry,
) {
for entry in tree.entries() {
let mut new_entry = tracking::Entry {
kind: entry.kind(),
Expand All @@ -100,9 +132,9 @@ impl Manifest {
if entry.kind().is_tree() {
new_entry.object = encoding::NULL_DIGEST.into();
iter_tree(
source,
source
.get_tree(entry.object())
tree_cache,
tree_cache
.get(entry.object())
.expect("manifest is internally inconsistent (missing child tree)"),
&mut new_entry,
)
Expand All @@ -111,7 +143,7 @@ impl Manifest {
}
}

iter_tree(self, self.root(), &mut root);
iter_tree(&tree_cache, &self.root(), &mut root);
let mut manifest = tracking::Manifest::new(root);
// ensure that the manifest will round-trip in the case of it
// being converted back into this type
Expand Down
2 changes: 1 addition & 1 deletion crates/spfs/src/graph/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub use database::{
pub use entry::Entry;
pub use kind::{HasKind, Kind, ObjectKind};
pub use layer::{KeyAnnotationValuePair, Layer};
pub use manifest::Manifest;
pub use manifest::{Manifest, ManifestTreeCache};
pub use object::{FlatObject, Object, ObjectProto};
pub use platform::Platform;
pub use stack::Stack;
Expand Down
11 changes: 6 additions & 5 deletions crates/spfs/src/storage/fs/renderer_unix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,8 +414,9 @@ where
self.reporter.visit_entry(entry);
}

let manifest_tree_cache = manifest.get_tree_cache();
let mut res = self
.render_into_dir_fd(root_dir, root_node, manifest, render_type)
.render_into_dir_fd(root_dir, &root_node, &manifest_tree_cache, render_type)
.await;
if let Err(Error::StorageWriteError(_, p, _)) = &mut res {
*p = target_dir.join(p.as_path());
Expand All @@ -429,8 +430,8 @@ where
async fn render_into_dir_fd<Fd>(
&self,
root_dir_fd: Fd,
tree: graph::Tree<'async_recursion>,
manifest: &graph::Manifest,
tree: &graph::Tree<'async_recursion>,
manifest_tree_cache: &graph::ManifestTreeCache,
render_type: RenderType,
) -> Result<()>
where
Expand Down Expand Up @@ -458,7 +459,7 @@ where
let mut root_path = PathBuf::from(entry.name());
match entry.kind() {
tracking::EntryKind::Tree => {
let tree = manifest.get_tree(entry.object()).ok_or_else(|| {
let tree = manifest_tree_cache.get(entry.object()).ok_or_else(|| {
Error::String(format!("Failed to render: manifest is internally inconsistent (missing child tree {})", *entry.object()))
})?;

Expand All @@ -475,7 +476,7 @@ where
.render_into_dir_fd(
child_dir.as_raw_fd(),
tree,
manifest,
manifest_tree_cache,
render_type,
)
.await;
Expand Down
1 change: 1 addition & 0 deletions cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,7 @@
"undeprecated",
"undeprecates",
"undeprecating",
"undigestible",
"uninstallation",
"unistd",
"unloadable",
Expand Down
Loading