From c1f01886256560858aad63faf5c6f13e61f047f6 Mon Sep 17 00:00:00 2001 From: J Robert Ray Date: Fri, 7 Jun 2024 17:08:18 -0700 Subject: [PATCH] Fix improper capturing of mask into package We have observed packages getting built containing a file removal mask, despite `test_package_with_circular_dep_does_not_collect_file_removals` attempting to test correct behavior in this area. It turns out that a specific setup is required to exhibit the incorrect behavior. When the result of a build is being diffed to figure out what changes are to be captured into the new package, the "environment_filesystem" Manifest was being incorrectly calculated. If multiple layers in the Solution contain entries for the same subdirectory, only the latest layer that does so would be used to represent what the "before" contents were for that directory. Consider one package that puts a file into "subdir/" like "subdir/foo.txt", and another package that simply creates an empty directory called "subdir/". The order that layers end up appearing in a Solution is arbitrary, so with this setup it is possible for the Manifest to end up with an empty "subdir/" directory, whereas the true build environment would have contained the "subdir/foo.txt" file entry. In our environment we have a package called "stdfs" which contains an empty "lib/" subdirectory. When this package's entries are walked, its empty "lib/" directory would overwrite any contents that had already been encounted under "lib/". In our problem case, that included a _deletion_ of a subdirectory under "lib/". Since the "before" contents ended up not containing the entry that was deleted by the build, when diffing the before and after, the after would include a diff event that "Added" a new mask entry, and its presence in the diff would allow it to be collected into the newly built package. One "fix" for this was to modify `diff_path` so it detects this (unexpected) situation and ignore mask entries when the mask is deleting something that didn't exist in the "before" Manifest. But this problem is a symptom of the "before" Manifest being incorrect. The proper fix was to change the "environment_filesystem" calculation to merge two Trees when Trees with the same path appear in multiple layers. In order to properly test the expected behavior, the "environment_filesystem" calculation was refactored out into its own method. It was moved to `ResolvedLayers` since practically none if its logic was related to `BinaryPackageBuilder`. The recursive package test was updated in such a way that it would actually fail without these changes, however the problem ended up not being specific to recursive packages. A new test was added to specifically test the Tree merge behavior. Fixes #1013. Signed-off-by: J Robert Ray --- Cargo.lock | 7 ++ crates/spfs/src/tracking/diff.rs | 11 ++ crates/spk-build/src/build/binary.rs | 80 ++----------- .../cmd-build/src/cmd_build_test/mod.rs | 40 +++---- crates/spk-exec/Cargo.toml | 13 ++- crates/spk-exec/src/error.rs | 3 + crates/spk-exec/src/exec.rs | 107 +++++++++++++++++- crates/spk-exec/src/exec_test.rs | 84 ++++++++++++++ crates/spk-exec/src/lib.rs | 2 + 9 files changed, 248 insertions(+), 99 deletions(-) create mode 100644 crates/spk-exec/src/exec_test.rs diff --git a/Cargo.lock b/Cargo.lock index cbe14a925e..e41d6c8010 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4172,15 +4172,22 @@ name = "spk-exec" version = "0.40.0" dependencies = [ "async-stream", + "clap 4.5.0", "futures", "miette", "relative-path", + "rstest 0.18.2", "serde_json", "spfs", + "spk-cli-common", + "spk-cmd-build", "spk-schema", "spk-solve", + "spk-solve-macros", "spk-storage", + "tempfile", "thiserror", + "tokio", "tracing", ] diff --git a/crates/spfs/src/tracking/diff.rs b/crates/spfs/src/tracking/diff.rs index 2e986d0c75..53e6559e7f 100644 --- a/crates/spfs/src/tracking/diff.rs +++ b/crates/spfs/src/tracking/diff.rs @@ -124,6 +124,8 @@ pub fn compute_diff(a: &Manifest, b: &Manifest) -> Vec changes.push(d), None => tracing::debug!( + // XXX this is not the only reason `diff_path` may return + // None. "path was missing from both manifests during diff, this should be impossible" ), } @@ -149,6 +151,15 @@ fn diff_path( path: path.clone(), }), + (None, Some(b_entry)) if b_entry.kind == EntryKind::Mask => { + debug_assert!( + false, + "detected a mask entry that deletes something that doesn't exist" + ); + // Can't return `a` as documented since the left side is None. + None + } + (None, Some(e)) => Some(Diff { mode: DiffMode::Added(e.clone()), path: path.clone(), diff --git a/crates/spk-build/src/build/binary.rs b/crates/spk-build/src/build/binary.rs index 4bf4b151e9..50d0a9eb06 100644 --- a/crates/spk-build/src/build/binary.rs +++ b/crates/spk-build/src/build/binary.rs @@ -11,11 +11,12 @@ use std::sync::Arc; use futures::StreamExt; use relative_path::RelativePathBuf; use spfs::prelude::*; -use spfs::tracking::{DiffMode, EntryKind}; +use spfs::tracking::DiffMode; use spk_exec::{ pull_resolved_runtime_layers, resolve_runtime_layers, solution_to_resolved_runtime_layers, + ConflictingPackagePair, }; use spk_schema::foundation::env::data_path; use spk_schema::foundation::format::FormatIdent; @@ -39,7 +40,6 @@ use spk_solve::graph::Graph; use spk_solve::solution::Solution; use spk_solve::{BoxedResolverCallback, Named, ResolverCallback, Solver}; use spk_storage as storage; -use tokio::pin; use crate::report::{BuildOutputReport, BuildReport, BuildSetupReport}; use crate::validation::{Report, Validator}; @@ -79,11 +79,6 @@ pub enum BuildSource { LocalPath(PathBuf), } -/// A pair of packages that are in conflict for some reason, -/// e.g. because they both provide one or more of the same files. -#[derive(Eq, Hash, PartialEq)] -struct ConflictingPackagePair(BuildIdent, BuildIdent); - /// A struct with the two variants needed to calculate a build digest for a /// package as well as build the package. struct VariantPair { @@ -349,73 +344,12 @@ where tokio::spawn(async move { Ok(resolved_layers_copy.layers()) }) }; - let mut environment_filesystem = spfs::tracking::Manifest::new( - // we expect this to be replaced, but the source build for this package - // seems like one of the most reasonable default owners for the root - // of this manifest until then - spfs::tracking::Entry::empty_dir_with_open_perms_with_data( + let environment_filesystem = resolved_layers + .get_environment_filesystem( self.recipe.ident().to_build(Build::Source), - ), - ); - - // Warn about possibly unexpected shadowed files in the layer stack. - let mut warning_found = false; - let entries = resolved_layers.iter_entries(); - pin!(entries); - - while let Some(entry) = entries.next().await { - let (path, entry, resolved_layer) = match entry { - Err(spk_exec::Error::NonSpfsLayerInResolvedLayers) => continue, - Err(err) => return Err(err.into()), - Ok(entry) => entry, - }; - - let entry = entry.and_user_data(resolved_layer.spec.ident().to_owned()); - let Some(previous) = environment_filesystem.get_path(&path).cloned() else { - environment_filesystem.mknod(&path, entry)?; - continue; - }; - let entry = environment_filesystem.mknod(&path, entry)?; - if !matches!(entry.kind, EntryKind::Blob(_)) { - continue; - } - - // Ignore when the shadowing is from different components - // of the same package. - if entry.user_data == previous.user_data { - continue; - } - - // The layer order isn't necessarily meaningful in terms - // of spk package dependency ordering (at the time of - // writing), so phrase this in a way that doesn't suggest - // one layer "owns" the file more than the other. - warning_found = true; - tracing::warn!( - "File {path} found in more than one package: {} and {}", - previous.user_data, - entry.user_data - ); - - // Track the packages involved for later use - let pkg_a = previous.user_data.clone(); - let pkg_b = entry.user_data.clone(); - let packages_key = if pkg_a < pkg_b { - ConflictingPackagePair(pkg_a, pkg_b) - } else { - ConflictingPackagePair(pkg_b, pkg_a) - }; - let counter = self.conflicting_packages.entry(packages_key).or_default(); - counter.insert(path.clone()); - } - if warning_found { - tracing::warn!("Conflicting files were detected"); - tracing::warn!(" > This can cause undefined runtime behavior"); - tracing::warn!(" > It should be addressed by:"); - tracing::warn!(" - not using these packages together"); - tracing::warn!(" - removing the file from one of them"); - tracing::warn!(" - using alternate versions or components"); - } + &mut self.conflicting_packages, + ) + .await?; runtime.status.stack.extend( pull_task diff --git a/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs b/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs index ac84163f60..8e803bbfc9 100644 --- a/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs +++ b/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs @@ -502,39 +502,36 @@ async fn test_package_with_circular_dep_does_not_collect_file_removals(tmpdir: t // build). let rt = spfs_runtime().await; + // This extra package "empty" helps create the situation that ends up with + // file removals getting collected in a real world example. build_package!( tmpdir, - "circ.spk.yaml", + "empty.spk.yaml", br#" api: v0/package -pkg: circ/1.0.0 +pkg: empty/1.0.0 build: script: - - echo "1.0.0" > $PREFIX/version.txt - - echo "hello world" > $PREFIX/hello.txt + - mkdir $PREFIX/subdir "# ); build_package!( tmpdir, - "middle.spk.yaml", + "circ.spk.yaml", br#" api: v0/package -pkg: middle/1.0.0 +pkg: circ/1.0.0 build: - options: - - pkg: circ script: - - "true" -install: - requirements: - - pkg: circ - fromBuildEnv: true -"#, + - echo "1.0.0" > $PREFIX/version.txt + - mkdir -p $PREFIX/subdir/v1 + - echo "hello world" > $PREFIX/subdir/v1/hello.txt +"# ); - // This build deletes a file that is owned by the previous build. It should - // not be collected as part of the new build. + // This build deletes a subdir that is owned by the previous build. It should + // not be collected (as a mask) as part of the new build. build_package!( tmpdir, "circ.spk.yaml", @@ -543,10 +540,13 @@ api: v0/package pkg: circ/2.0.0 build: options: - - pkg: middle + - pkg: circ + - pkg: empty script: - echo "2.0.0" > $PREFIX/version.txt - - rm $PREFIX/hello.txt + - rm -rf $PREFIX/subdir/v1 + - mkdir -p $PREFIX/subdir/v2 + - echo "hello world" > $PREFIX/subdir/v2/hello.txt validation: rules: - allow: RecursiveBuild @@ -586,10 +586,10 @@ build: .unwrap() .to_tracking_manifest(); - let entry = manifest.get_path("hello.txt"); + let entry = manifest.get_path("subdir/v1"); assert!( entry.is_none(), - "should not capture file deleted in new build" + "should not capture entry deleted in new build" ); } diff --git a/crates/spk-exec/Cargo.toml b/crates/spk-exec/Cargo.toml index 91338a15f9..3ca801684b 100644 --- a/crates/spk-exec/Cargo.toml +++ b/crates/spk-exec/Cargo.toml @@ -17,12 +17,21 @@ migration-to-components = [ [dependencies] async-stream = "0.3" futures = { workspace = true } +miette = { workspace = true } relative-path = { workspace = true } serde_json = { workspace = true } spfs = { workspace = true } spk-schema = { workspace = true } -spk-storage = { workspace = true } spk-solve = { workspace = true } +spk-storage = { workspace = true } thiserror = { workspace = true } -miette = { workspace = true } +tokio = { workspace = true, features = ["rt"] } tracing = { workspace = true } + +[dev-dependencies] +clap = { workspace = true } +rstest = { workspace = true } +spk-cli-common = { workspace = true } +spk-cmd-build = { workspace = true } +spk-solve-macros = { workspace = true } +tempfile = { workspace = true } diff --git a/crates/spk-exec/src/error.rs b/crates/spk-exec/src/error.rs index 699191ad8c..6a5cfbb93d 100644 --- a/crates/spk-exec/src/error.rs +++ b/crates/spk-exec/src/error.rs @@ -22,6 +22,9 @@ pub enum Error { Error(#[from] spfs::Error), #[error(transparent)] #[diagnostic(forward(0))] + BuildManifest(#[from] spfs::tracking::manifest::MkError), + #[error(transparent)] + #[diagnostic(forward(0))] SpkStorageError(#[from] spk_storage::Error), #[error("Error: {0}")] String(String), diff --git a/crates/spk-exec/src/exec.rs b/crates/spk-exec/src/exec.rs index 47ecf330c5..321ee8d05c 100644 --- a/crates/spk-exec/src/exec.rs +++ b/crates/spk-exec/src/exec.rs @@ -2,25 +2,35 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; use async_stream::try_stream; -use futures::Stream; +use futures::{Stream, StreamExt}; use relative_path::RelativePathBuf; use spfs::encoding::Digest; use spfs::prelude::*; -use spfs::tracking::Entry; +use spfs::tracking::{Entry, EntryKind}; use spk_schema::foundation::format::{FormatIdent, FormatOptionMap}; use spk_schema::foundation::ident_component::Component; use spk_schema::prelude::*; use spk_schema::Spec; use spk_solve::solution::{PackageSource, Solution, SPK_SOLVE_EXTRA_DATA_KEY}; -use spk_solve::RepositoryHandle; +use spk_solve::{BuildIdent, RepositoryHandle}; use spk_storage as storage; +use tokio::pin; use crate::{Error, Result}; +#[cfg(test)] +#[path = "./exec_test.rs"] +mod exec_test; + +/// A pair of packages that are in conflict for some reason, +/// e.g. because they both provide one or more of the same files. +#[derive(Eq, Hash, PartialEq)] +pub struct ConflictingPackagePair(BuildIdent, BuildIdent); + /// A single layer of a resolved solution. #[derive(Clone)] pub struct ResolvedLayer { @@ -76,6 +86,95 @@ impl ResolvedLayers { pub fn layers(&self) -> Vec { self.0.iter().map(|l| l.digest).collect() } + + /// Compute a [`spfs::tracking::Manifest`] from a [`ResolvedLayers`]. + /// + /// If any shadowed files are detected a warning will be logged. Because the + /// layers in a Solution are in an arbitrary order, the Manifest's contents + /// can be unpredictable if multiple layers contain overlapping entries. + pub async fn get_environment_filesystem( + &self, + ident: BuildIdent, + conflicting_packages: &mut HashMap>, + ) -> Result> { + let mut environment_filesystem = spfs::tracking::Manifest::new( + // we expect this to be replaced, but the source build for this package + // seems like one of the most reasonable default owners for the root + // of this manifest until then + spfs::tracking::Entry::empty_dir_with_open_perms_with_data(ident), + ); + + // Warn about possibly unexpected shadowed files in the layer stack. + let mut warning_found = false; + let entries = self.iter_entries(); + pin!(entries); + + while let Some(entry) = entries.next().await { + let (path, entry, resolved_layer) = match entry { + Err(Error::NonSpfsLayerInResolvedLayers) => continue, + Err(err) => return Err(err), + Ok(entry) => entry, + }; + + let mut entry = entry.and_user_data(resolved_layer.spec.ident().to_owned()); + let Some(previous) = environment_filesystem.get_path(&path).cloned() else { + environment_filesystem.mknod(&path, entry)?; + continue; + }; + // If old and new entries are both Trees, then merge them to + // properly mimic overlayfs behavior. + if previous.kind == EntryKind::Tree && entry.kind == EntryKind::Tree { + for (previous_entry_name, previous_entry) in previous.entries.into_iter() { + entry + .entries + .entry(previous_entry_name) + .or_insert(previous_entry); + } + } + let entry = environment_filesystem.mknod(&path, entry)?; + if !matches!(entry.kind, EntryKind::Blob(_)) { + continue; + } + + // Ignore when the shadowing is from different components + // of the same package. + if entry.user_data == previous.user_data { + continue; + } + + // The layer order isn't necessarily meaningful in terms + // of spk package dependency ordering (at the time of + // writing), so phrase this in a way that doesn't suggest + // one layer "owns" the file more than the other. + warning_found = true; + tracing::warn!( + "File {path} found in more than one package: {} and {}", + previous.user_data, + entry.user_data + ); + + // Track the packages involved for later use + let pkg_a = previous.user_data.clone(); + let pkg_b = entry.user_data.clone(); + let packages_key = if pkg_a < pkg_b { + ConflictingPackagePair(pkg_a, pkg_b) + } else { + ConflictingPackagePair(pkg_b, pkg_a) + }; + let counter = conflicting_packages.entry(packages_key).or_default(); + counter.insert(path.clone()); + } + if warning_found { + tracing::warn!("Conflicting files were detected"); + tracing::warn!(" > This can cause undefined runtime behavior"); + tracing::warn!(" > It should be addressed by:"); + tracing::warn!(" - not using these packages together"); + tracing::warn!(" - removing the file from one of them"); + tracing::warn!(" - using alternate versions or components"); + } + + Ok(environment_filesystem) + } } /// Return the necessary layers to have all solution packages. diff --git a/crates/spk-exec/src/exec_test.rs b/crates/spk-exec/src/exec_test.rs new file mode 100644 index 0000000000..7055e41c93 --- /dev/null +++ b/crates/spk-exec/src/exec_test.rs @@ -0,0 +1,84 @@ +// Copyright (c) Sony Pictures Imageworks, et al. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/imageworks/spk + +use std::collections::HashMap; +use std::sync::Arc; + +use rstest::{fixture, rstest}; +use spk_cmd_build::build_package; +use spk_schema::foundation::fixtures::*; +use spk_schema::ident::build_ident; +use spk_solve::{DecisionFormatterBuilder, Solver}; +use spk_solve_macros::request; +use spk_storage::fixtures::*; + +use crate::solution_to_resolved_runtime_layers; + +#[fixture] +fn solver() -> Solver { + Solver::default() +} + +/// If two layers contribute files to the same subdirectory, the Manifest is +/// expected to contain both files. +#[rstest] +#[tokio::test] +async fn get_environment_filesystem_merges_directories( + tmpdir: tempfile::TempDir, + mut solver: Solver, +) { + let rt = spfs_runtime().await; + + build_package!( + tmpdir, + "one.spk.yaml", + br#" +api: v0/package +pkg: one/1.0.0 + +build: + script: + - mkdir "$PREFIX"/subdir + - touch "$PREFIX"/subdir/one.txt +"#, + ); + + build_package!( + tmpdir, + "two.spk.yaml", + br#" +api: v0/package +pkg: two/1.0.0 + +build: + script: + - mkdir "$PREFIX"/subdir + - touch "$PREFIX"/subdir/two.txt +"#, + ); + + let formatter = DecisionFormatterBuilder::default() + .with_verbosity(0) + .build(); + + solver.add_repository(Arc::clone(&rt.tmprepo)); + solver.add_request(request!("one")); + solver.add_request(request!("two")); + + let (solution, _) = formatter.run_and_log_resolve(&solver).await.unwrap(); + + let resolved_layers = solution_to_resolved_runtime_layers(&solution).unwrap(); + + let mut conflicting_packages = HashMap::new(); + let environment = resolved_layers + .get_environment_filesystem( + build_ident!("does-not-matter/1.0.0/src"), + &mut conflicting_packages, + ) + .await + .unwrap(); + + assert!(environment.get_path("subdir/one.txt").is_some()); + assert!(environment.get_path("subdir/two.txt").is_some()); +} diff --git a/crates/spk-exec/src/lib.rs b/crates/spk-exec/src/lib.rs index 6750000212..5d35e5b0d0 100644 --- a/crates/spk-exec/src/lib.rs +++ b/crates/spk-exec/src/lib.rs @@ -12,5 +12,7 @@ pub use exec::{ setup_current_runtime, setup_runtime, solution_to_resolved_runtime_layers, + ConflictingPackagePair, ResolvedLayer, + ResolvedLayers, };