Skip to content

Commit

Permalink
Fix poor performance of to_tracking_manifest
Browse files Browse the repository at this point in the history
This method had something like O(n^3) complexity because it would for
every tree in a manifest iterate over every tree in a manifest and
calculate the digest for every tree in the manifest (modulo short
circuiting).

`Manifest::get_tree` is culprit and an alternative technique of caching
the lookup of digest to tree was added. Both uses of this method have
been updated but this method was left in place with a new warning. It
would be cheaper to call `get_tree` in a contrived case where the cache
is only accessed once and not reused for multiple lookups.

Fixes #1043.

Signed-off-by: J Robert Ray <jrray@imageworks.com>
  • Loading branch information
J Robert Ray committed Jun 13, 2024
1 parent 9892a89 commit e6fb537
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 12 deletions.
44 changes: 38 additions & 6 deletions crates/spfs/src/graph/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
// https://github.com/imageworks/spk

use std::collections::BTreeSet;
use std::collections::{BTreeSet, HashMap};
use std::io::BufRead;

use spfs_proto::ManifestArgs;
Expand All @@ -19,6 +19,9 @@ mod manifest_test;
/// A manifest holds the state of a filesystem tree.
pub type Manifest = super::object::FlatObject<spfs_proto::Manifest<'static>>;

/// A mapping of digest to tree in a manifest.
pub type ManifestTreeCache<'a> = HashMap<encoding::Digest, Tree<'a>>;

impl Default for Manifest {
fn default() -> Self {
Self::builder().build(&crate::tracking::Entry::<()>::empty_dir_with_open_perms())
Expand Down Expand Up @@ -73,6 +76,29 @@ impl Manifest {
children.into_iter().collect()
}

/// Return the trees of this Manifest mapped by digest.
///
/// It is expensive to find a tree in a manifest by digest. If multiple
/// trees need to be accessed by digest, it is faster to use this method
/// instead of [`Manifest::get_tree`].
pub fn get_tree_cache(&self) -> ManifestTreeCache {
let mut tree_cache = HashMap::new();
for tree in self.iter_trees() {
let Ok(digest) = tree.digest() else {
tracing::warn!("Undigestible tree found in manifest");
continue;
};
tree_cache.insert(digest, tree);
}
tree_cache
}

/// Return the tree in this manifest with the given digest.
///
/// # Warning
///
/// This can be very slow to call repeatedly on the same manifest. See
/// [`Manifest::get_tree_cache`].
pub fn get_tree(&self, digest: &encoding::Digest) -> Option<Tree<'_>> {
self.iter_trees()
.find(|t| t.digest().ok().as_ref() == Some(digest))
Expand All @@ -85,9 +111,15 @@ impl Manifest {

/// Convert this manifest into a more workable form for editing.
pub fn to_tracking_manifest(&self) -> tracking::Manifest {
let tree_cache = self.get_tree_cache();

let mut root = tracking::Entry::empty_dir_with_open_perms();

fn iter_tree(source: &Manifest, tree: Tree<'_>, parent: &mut tracking::Entry) {
fn iter_tree(
tree_cache: &HashMap<encoding::Digest, Tree<'_>>,
tree: &Tree<'_>,
parent: &mut tracking::Entry,
) {
for entry in tree.entries() {
let mut new_entry = tracking::Entry {
kind: entry.kind(),
Expand All @@ -100,9 +132,9 @@ impl Manifest {
if entry.kind().is_tree() {
new_entry.object = encoding::NULL_DIGEST.into();
iter_tree(
source,
source
.get_tree(entry.object())
tree_cache,
tree_cache
.get(entry.object())
.expect("manifest is internally inconsistent (missing child tree)"),
&mut new_entry,
)
Expand All @@ -111,7 +143,7 @@ impl Manifest {
}
}

iter_tree(self, self.root(), &mut root);
iter_tree(&tree_cache, &self.root(), &mut root);
let mut manifest = tracking::Manifest::new(root);
// ensure that the manifest will round-trip in the case of it
// being converted back into this type
Expand Down
2 changes: 1 addition & 1 deletion crates/spfs/src/graph/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub use database::{
pub use entry::Entry;
pub use kind::{HasKind, Kind, ObjectKind};
pub use layer::{KeyAnnotationValuePair, Layer};
pub use manifest::Manifest;
pub use manifest::{Manifest, ManifestTreeCache};
pub use object::{FlatObject, Object, ObjectProto};
pub use platform::Platform;
pub use stack::Stack;
Expand Down
11 changes: 6 additions & 5 deletions crates/spfs/src/storage/fs/renderer_unix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,8 +414,9 @@ where
self.reporter.visit_entry(entry);
}

let manifest_tree_cache = manifest.get_tree_cache();
let mut res = self
.render_into_dir_fd(root_dir, root_node, manifest, render_type)
.render_into_dir_fd(root_dir, &root_node, &manifest_tree_cache, render_type)
.await;
if let Err(Error::StorageWriteError(_, p, _)) = &mut res {
*p = target_dir.join(p.as_path());
Expand All @@ -429,8 +430,8 @@ where
async fn render_into_dir_fd<Fd>(
&self,
root_dir_fd: Fd,
tree: graph::Tree<'async_recursion>,
manifest: &graph::Manifest,
tree: &graph::Tree<'async_recursion>,
manifest_tree_cache: &graph::ManifestTreeCache,
render_type: RenderType,
) -> Result<()>
where
Expand Down Expand Up @@ -458,7 +459,7 @@ where
let mut root_path = PathBuf::from(entry.name());
match entry.kind() {
tracking::EntryKind::Tree => {
let tree = manifest.get_tree(entry.object()).ok_or_else(|| {
let tree = manifest_tree_cache.get(entry.object()).ok_or_else(|| {
Error::String(format!("Failed to render: manifest is internally inconsistent (missing child tree {})", *entry.object()))
})?;

Expand All @@ -475,7 +476,7 @@ where
.render_into_dir_fd(
child_dir.as_raw_fd(),
tree,
manifest,
manifest_tree_cache,
render_type,
)
.await;
Expand Down
1 change: 1 addition & 0 deletions cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,7 @@
"undeprecated",
"undeprecates",
"undeprecating",
"undigestible",
"uninstallation",
"unistd",
"unloadable",
Expand Down

0 comments on commit e6fb537

Please sign in to comment.