From e6fb53735b1c1d1a2580a8686d82122173e79c13 Mon Sep 17 00:00:00 2001 From: J Robert Ray Date: Wed, 12 Jun 2024 13:35:35 -0700 Subject: [PATCH] Fix poor performance of to_tracking_manifest This method had something like O(n^3) complexity because it would for every tree in a manifest iterate over every tree in a manifest and calculate the digest for every tree in the manifest (modulo short circuiting). `Manifest::get_tree` is culprit and an alternative technique of caching the lookup of digest to tree was added. Both uses of this method have been updated but this method was left in place with a new warning. It would be cheaper to call `get_tree` in a contrived case where the cache is only accessed once and not reused for multiple lookups. Fixes #1043. Signed-off-by: J Robert Ray --- crates/spfs/src/graph/manifest.rs | 44 ++++++++++++++++++--- crates/spfs/src/graph/mod.rs | 2 +- crates/spfs/src/storage/fs/renderer_unix.rs | 11 +++--- cspell.json | 1 + 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/crates/spfs/src/graph/manifest.rs b/crates/spfs/src/graph/manifest.rs index b1d111370..fa8c750f0 100644 --- a/crates/spfs/src/graph/manifest.rs +++ b/crates/spfs/src/graph/manifest.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk -use std::collections::BTreeSet; +use std::collections::{BTreeSet, HashMap}; use std::io::BufRead; use spfs_proto::ManifestArgs; @@ -19,6 +19,9 @@ mod manifest_test; /// A manifest holds the state of a filesystem tree. pub type Manifest = super::object::FlatObject>; +/// A mapping of digest to tree in a manifest. +pub type ManifestTreeCache<'a> = HashMap>; + impl Default for Manifest { fn default() -> Self { Self::builder().build(&crate::tracking::Entry::<()>::empty_dir_with_open_perms()) @@ -73,6 +76,29 @@ impl Manifest { children.into_iter().collect() } + /// Return the trees of this Manifest mapped by digest. + /// + /// It is expensive to find a tree in a manifest by digest. If multiple + /// trees need to be accessed by digest, it is faster to use this method + /// instead of [`Manifest::get_tree`]. + pub fn get_tree_cache(&self) -> ManifestTreeCache { + let mut tree_cache = HashMap::new(); + for tree in self.iter_trees() { + let Ok(digest) = tree.digest() else { + tracing::warn!("Undigestible tree found in manifest"); + continue; + }; + tree_cache.insert(digest, tree); + } + tree_cache + } + + /// Return the tree in this manifest with the given digest. + /// + /// # Warning + /// + /// This can be very slow to call repeatedly on the same manifest. See + /// [`Manifest::get_tree_cache`]. pub fn get_tree(&self, digest: &encoding::Digest) -> Option> { self.iter_trees() .find(|t| t.digest().ok().as_ref() == Some(digest)) @@ -85,9 +111,15 @@ impl Manifest { /// Convert this manifest into a more workable form for editing. pub fn to_tracking_manifest(&self) -> tracking::Manifest { + let tree_cache = self.get_tree_cache(); + let mut root = tracking::Entry::empty_dir_with_open_perms(); - fn iter_tree(source: &Manifest, tree: Tree<'_>, parent: &mut tracking::Entry) { + fn iter_tree( + tree_cache: &HashMap>, + tree: &Tree<'_>, + parent: &mut tracking::Entry, + ) { for entry in tree.entries() { let mut new_entry = tracking::Entry { kind: entry.kind(), @@ -100,9 +132,9 @@ impl Manifest { if entry.kind().is_tree() { new_entry.object = encoding::NULL_DIGEST.into(); iter_tree( - source, - source - .get_tree(entry.object()) + tree_cache, + tree_cache + .get(entry.object()) .expect("manifest is internally inconsistent (missing child tree)"), &mut new_entry, ) @@ -111,7 +143,7 @@ impl Manifest { } } - iter_tree(self, self.root(), &mut root); + iter_tree(&tree_cache, &self.root(), &mut root); let mut manifest = tracking::Manifest::new(root); // ensure that the manifest will round-trip in the case of it // being converted back into this type diff --git a/crates/spfs/src/graph/mod.rs b/crates/spfs/src/graph/mod.rs index e5282400a..38269f62d 100644 --- a/crates/spfs/src/graph/mod.rs +++ b/crates/spfs/src/graph/mod.rs @@ -35,7 +35,7 @@ pub use database::{ pub use entry::Entry; pub use kind::{HasKind, Kind, ObjectKind}; pub use layer::{KeyAnnotationValuePair, Layer}; -pub use manifest::Manifest; +pub use manifest::{Manifest, ManifestTreeCache}; pub use object::{FlatObject, Object, ObjectProto}; pub use platform::Platform; pub use stack::Stack; diff --git a/crates/spfs/src/storage/fs/renderer_unix.rs b/crates/spfs/src/storage/fs/renderer_unix.rs index e57462d46..07234dab2 100644 --- a/crates/spfs/src/storage/fs/renderer_unix.rs +++ b/crates/spfs/src/storage/fs/renderer_unix.rs @@ -414,8 +414,9 @@ where self.reporter.visit_entry(entry); } + let manifest_tree_cache = manifest.get_tree_cache(); let mut res = self - .render_into_dir_fd(root_dir, root_node, manifest, render_type) + .render_into_dir_fd(root_dir, &root_node, &manifest_tree_cache, render_type) .await; if let Err(Error::StorageWriteError(_, p, _)) = &mut res { *p = target_dir.join(p.as_path()); @@ -429,8 +430,8 @@ where async fn render_into_dir_fd( &self, root_dir_fd: Fd, - tree: graph::Tree<'async_recursion>, - manifest: &graph::Manifest, + tree: &graph::Tree<'async_recursion>, + manifest_tree_cache: &graph::ManifestTreeCache, render_type: RenderType, ) -> Result<()> where @@ -458,7 +459,7 @@ where let mut root_path = PathBuf::from(entry.name()); match entry.kind() { tracking::EntryKind::Tree => { - let tree = manifest.get_tree(entry.object()).ok_or_else(|| { + let tree = manifest_tree_cache.get(entry.object()).ok_or_else(|| { Error::String(format!("Failed to render: manifest is internally inconsistent (missing child tree {})", *entry.object())) })?; @@ -475,7 +476,7 @@ where .render_into_dir_fd( child_dir.as_raw_fd(), tree, - manifest, + manifest_tree_cache, render_type, ) .await; diff --git a/cspell.json b/cspell.json index 8ab343f09..510a26b26 100644 --- a/cspell.json +++ b/cspell.json @@ -745,6 +745,7 @@ "undeprecated", "undeprecates", "undeprecating", + "undigestible", "uninstallation", "unistd", "unloadable",