diff --git a/Cargo.lock b/Cargo.lock index cbe14a925e..e41d6c8010 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4172,15 +4172,22 @@ name = "spk-exec" version = "0.40.0" dependencies = [ "async-stream", + "clap 4.5.0", "futures", "miette", "relative-path", + "rstest 0.18.2", "serde_json", "spfs", + "spk-cli-common", + "spk-cmd-build", "spk-schema", "spk-solve", + "spk-solve-macros", "spk-storage", + "tempfile", "thiserror", + "tokio", "tracing", ] diff --git a/crates/spfs/src/tracking/diff.rs b/crates/spfs/src/tracking/diff.rs index 2e986d0c75..53e6559e7f 100644 --- a/crates/spfs/src/tracking/diff.rs +++ b/crates/spfs/src/tracking/diff.rs @@ -124,6 +124,8 @@ pub fn compute_diff(a: &Manifest, b: &Manifest) -> Vec changes.push(d), None => tracing::debug!( + // XXX this is not the only reason `diff_path` may return + // None. "path was missing from both manifests during diff, this should be impossible" ), } @@ -149,6 +151,15 @@ fn diff_path( path: path.clone(), }), + (None, Some(b_entry)) if b_entry.kind == EntryKind::Mask => { + debug_assert!( + false, + "detected a mask entry that deletes something that doesn't exist" + ); + // Can't return `a` as documented since the left side is None. + None + } + (None, Some(e)) => Some(Diff { mode: DiffMode::Added(e.clone()), path: path.clone(), diff --git a/crates/spk-build/src/build/binary.rs b/crates/spk-build/src/build/binary.rs index 4bf4b151e9..50d0a9eb06 100644 --- a/crates/spk-build/src/build/binary.rs +++ b/crates/spk-build/src/build/binary.rs @@ -11,11 +11,12 @@ use std::sync::Arc; use futures::StreamExt; use relative_path::RelativePathBuf; use spfs::prelude::*; -use spfs::tracking::{DiffMode, EntryKind}; +use spfs::tracking::DiffMode; use spk_exec::{ pull_resolved_runtime_layers, resolve_runtime_layers, solution_to_resolved_runtime_layers, + ConflictingPackagePair, }; use spk_schema::foundation::env::data_path; use spk_schema::foundation::format::FormatIdent; @@ -39,7 +40,6 @@ use spk_solve::graph::Graph; use spk_solve::solution::Solution; use spk_solve::{BoxedResolverCallback, Named, ResolverCallback, Solver}; use spk_storage as storage; -use tokio::pin; use crate::report::{BuildOutputReport, BuildReport, BuildSetupReport}; use crate::validation::{Report, Validator}; @@ -79,11 +79,6 @@ pub enum BuildSource { LocalPath(PathBuf), } -/// A pair of packages that are in conflict for some reason, -/// e.g. because they both provide one or more of the same files. -#[derive(Eq, Hash, PartialEq)] -struct ConflictingPackagePair(BuildIdent, BuildIdent); - /// A struct with the two variants needed to calculate a build digest for a /// package as well as build the package. struct VariantPair { @@ -349,73 +344,12 @@ where tokio::spawn(async move { Ok(resolved_layers_copy.layers()) }) }; - let mut environment_filesystem = spfs::tracking::Manifest::new( - // we expect this to be replaced, but the source build for this package - // seems like one of the most reasonable default owners for the root - // of this manifest until then - spfs::tracking::Entry::empty_dir_with_open_perms_with_data( + let environment_filesystem = resolved_layers + .get_environment_filesystem( self.recipe.ident().to_build(Build::Source), - ), - ); - - // Warn about possibly unexpected shadowed files in the layer stack. - let mut warning_found = false; - let entries = resolved_layers.iter_entries(); - pin!(entries); - - while let Some(entry) = entries.next().await { - let (path, entry, resolved_layer) = match entry { - Err(spk_exec::Error::NonSpfsLayerInResolvedLayers) => continue, - Err(err) => return Err(err.into()), - Ok(entry) => entry, - }; - - let entry = entry.and_user_data(resolved_layer.spec.ident().to_owned()); - let Some(previous) = environment_filesystem.get_path(&path).cloned() else { - environment_filesystem.mknod(&path, entry)?; - continue; - }; - let entry = environment_filesystem.mknod(&path, entry)?; - if !matches!(entry.kind, EntryKind::Blob(_)) { - continue; - } - - // Ignore when the shadowing is from different components - // of the same package. - if entry.user_data == previous.user_data { - continue; - } - - // The layer order isn't necessarily meaningful in terms - // of spk package dependency ordering (at the time of - // writing), so phrase this in a way that doesn't suggest - // one layer "owns" the file more than the other. - warning_found = true; - tracing::warn!( - "File {path} found in more than one package: {} and {}", - previous.user_data, - entry.user_data - ); - - // Track the packages involved for later use - let pkg_a = previous.user_data.clone(); - let pkg_b = entry.user_data.clone(); - let packages_key = if pkg_a < pkg_b { - ConflictingPackagePair(pkg_a, pkg_b) - } else { - ConflictingPackagePair(pkg_b, pkg_a) - }; - let counter = self.conflicting_packages.entry(packages_key).or_default(); - counter.insert(path.clone()); - } - if warning_found { - tracing::warn!("Conflicting files were detected"); - tracing::warn!(" > This can cause undefined runtime behavior"); - tracing::warn!(" > It should be addressed by:"); - tracing::warn!(" - not using these packages together"); - tracing::warn!(" - removing the file from one of them"); - tracing::warn!(" - using alternate versions or components"); - } + &mut self.conflicting_packages, + ) + .await?; runtime.status.stack.extend( pull_task diff --git a/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs b/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs index ac84163f60..8e803bbfc9 100644 --- a/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs +++ b/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs @@ -502,39 +502,36 @@ async fn test_package_with_circular_dep_does_not_collect_file_removals(tmpdir: t // build). let rt = spfs_runtime().await; + // This extra package "empty" helps create the situation that ends up with + // file removals getting collected in a real world example. build_package!( tmpdir, - "circ.spk.yaml", + "empty.spk.yaml", br#" api: v0/package -pkg: circ/1.0.0 +pkg: empty/1.0.0 build: script: - - echo "1.0.0" > $PREFIX/version.txt - - echo "hello world" > $PREFIX/hello.txt + - mkdir $PREFIX/subdir "# ); build_package!( tmpdir, - "middle.spk.yaml", + "circ.spk.yaml", br#" api: v0/package -pkg: middle/1.0.0 +pkg: circ/1.0.0 build: - options: - - pkg: circ script: - - "true" -install: - requirements: - - pkg: circ - fromBuildEnv: true -"#, + - echo "1.0.0" > $PREFIX/version.txt + - mkdir -p $PREFIX/subdir/v1 + - echo "hello world" > $PREFIX/subdir/v1/hello.txt +"# ); - // This build deletes a file that is owned by the previous build. It should - // not be collected as part of the new build. + // This build deletes a subdir that is owned by the previous build. It should + // not be collected (as a mask) as part of the new build. build_package!( tmpdir, "circ.spk.yaml", @@ -543,10 +540,13 @@ api: v0/package pkg: circ/2.0.0 build: options: - - pkg: middle + - pkg: circ + - pkg: empty script: - echo "2.0.0" > $PREFIX/version.txt - - rm $PREFIX/hello.txt + - rm -rf $PREFIX/subdir/v1 + - mkdir -p $PREFIX/subdir/v2 + - echo "hello world" > $PREFIX/subdir/v2/hello.txt validation: rules: - allow: RecursiveBuild @@ -586,10 +586,10 @@ build: .unwrap() .to_tracking_manifest(); - let entry = manifest.get_path("hello.txt"); + let entry = manifest.get_path("subdir/v1"); assert!( entry.is_none(), - "should not capture file deleted in new build" + "should not capture entry deleted in new build" ); } diff --git a/crates/spk-exec/Cargo.toml b/crates/spk-exec/Cargo.toml index 91338a15f9..3ca801684b 100644 --- a/crates/spk-exec/Cargo.toml +++ b/crates/spk-exec/Cargo.toml @@ -17,12 +17,21 @@ migration-to-components = [ [dependencies] async-stream = "0.3" futures = { workspace = true } +miette = { workspace = true } relative-path = { workspace = true } serde_json = { workspace = true } spfs = { workspace = true } spk-schema = { workspace = true } -spk-storage = { workspace = true } spk-solve = { workspace = true } +spk-storage = { workspace = true } thiserror = { workspace = true } -miette = { workspace = true } +tokio = { workspace = true, features = ["rt"] } tracing = { workspace = true } + +[dev-dependencies] +clap = { workspace = true } +rstest = { workspace = true } +spk-cli-common = { workspace = true } +spk-cmd-build = { workspace = true } +spk-solve-macros = { workspace = true } +tempfile = { workspace = true } diff --git a/crates/spk-exec/src/error.rs b/crates/spk-exec/src/error.rs index 699191ad8c..6a5cfbb93d 100644 --- a/crates/spk-exec/src/error.rs +++ b/crates/spk-exec/src/error.rs @@ -22,6 +22,9 @@ pub enum Error { Error(#[from] spfs::Error), #[error(transparent)] #[diagnostic(forward(0))] + BuildManifest(#[from] spfs::tracking::manifest::MkError), + #[error(transparent)] + #[diagnostic(forward(0))] SpkStorageError(#[from] spk_storage::Error), #[error("Error: {0}")] String(String), diff --git a/crates/spk-exec/src/exec.rs b/crates/spk-exec/src/exec.rs index 47ecf330c5..321ee8d05c 100644 --- a/crates/spk-exec/src/exec.rs +++ b/crates/spk-exec/src/exec.rs @@ -2,25 +2,35 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; use async_stream::try_stream; -use futures::Stream; +use futures::{Stream, StreamExt}; use relative_path::RelativePathBuf; use spfs::encoding::Digest; use spfs::prelude::*; -use spfs::tracking::Entry; +use spfs::tracking::{Entry, EntryKind}; use spk_schema::foundation::format::{FormatIdent, FormatOptionMap}; use spk_schema::foundation::ident_component::Component; use spk_schema::prelude::*; use spk_schema::Spec; use spk_solve::solution::{PackageSource, Solution, SPK_SOLVE_EXTRA_DATA_KEY}; -use spk_solve::RepositoryHandle; +use spk_solve::{BuildIdent, RepositoryHandle}; use spk_storage as storage; +use tokio::pin; use crate::{Error, Result}; +#[cfg(test)] +#[path = "./exec_test.rs"] +mod exec_test; + +/// A pair of packages that are in conflict for some reason, +/// e.g. because they both provide one or more of the same files. +#[derive(Eq, Hash, PartialEq)] +pub struct ConflictingPackagePair(BuildIdent, BuildIdent); + /// A single layer of a resolved solution. #[derive(Clone)] pub struct ResolvedLayer { @@ -76,6 +86,95 @@ impl ResolvedLayers { pub fn layers(&self) -> Vec { self.0.iter().map(|l| l.digest).collect() } + + /// Compute a [`spfs::tracking::Manifest`] from a [`ResolvedLayers`]. + /// + /// If any shadowed files are detected a warning will be logged. Because the + /// layers in a Solution are in an arbitrary order, the Manifest's contents + /// can be unpredictable if multiple layers contain overlapping entries. + pub async fn get_environment_filesystem( + &self, + ident: BuildIdent, + conflicting_packages: &mut HashMap>, + ) -> Result> { + let mut environment_filesystem = spfs::tracking::Manifest::new( + // we expect this to be replaced, but the source build for this package + // seems like one of the most reasonable default owners for the root + // of this manifest until then + spfs::tracking::Entry::empty_dir_with_open_perms_with_data(ident), + ); + + // Warn about possibly unexpected shadowed files in the layer stack. + let mut warning_found = false; + let entries = self.iter_entries(); + pin!(entries); + + while let Some(entry) = entries.next().await { + let (path, entry, resolved_layer) = match entry { + Err(Error::NonSpfsLayerInResolvedLayers) => continue, + Err(err) => return Err(err), + Ok(entry) => entry, + }; + + let mut entry = entry.and_user_data(resolved_layer.spec.ident().to_owned()); + let Some(previous) = environment_filesystem.get_path(&path).cloned() else { + environment_filesystem.mknod(&path, entry)?; + continue; + }; + // If old and new entries are both Trees, then merge them to + // properly mimic overlayfs behavior. + if previous.kind == EntryKind::Tree && entry.kind == EntryKind::Tree { + for (previous_entry_name, previous_entry) in previous.entries.into_iter() { + entry + .entries + .entry(previous_entry_name) + .or_insert(previous_entry); + } + } + let entry = environment_filesystem.mknod(&path, entry)?; + if !matches!(entry.kind, EntryKind::Blob(_)) { + continue; + } + + // Ignore when the shadowing is from different components + // of the same package. + if entry.user_data == previous.user_data { + continue; + } + + // The layer order isn't necessarily meaningful in terms + // of spk package dependency ordering (at the time of + // writing), so phrase this in a way that doesn't suggest + // one layer "owns" the file more than the other. + warning_found = true; + tracing::warn!( + "File {path} found in more than one package: {} and {}", + previous.user_data, + entry.user_data + ); + + // Track the packages involved for later use + let pkg_a = previous.user_data.clone(); + let pkg_b = entry.user_data.clone(); + let packages_key = if pkg_a < pkg_b { + ConflictingPackagePair(pkg_a, pkg_b) + } else { + ConflictingPackagePair(pkg_b, pkg_a) + }; + let counter = conflicting_packages.entry(packages_key).or_default(); + counter.insert(path.clone()); + } + if warning_found { + tracing::warn!("Conflicting files were detected"); + tracing::warn!(" > This can cause undefined runtime behavior"); + tracing::warn!(" > It should be addressed by:"); + tracing::warn!(" - not using these packages together"); + tracing::warn!(" - removing the file from one of them"); + tracing::warn!(" - using alternate versions or components"); + } + + Ok(environment_filesystem) + } } /// Return the necessary layers to have all solution packages. diff --git a/crates/spk-exec/src/exec_test.rs b/crates/spk-exec/src/exec_test.rs new file mode 100644 index 0000000000..7055e41c93 --- /dev/null +++ b/crates/spk-exec/src/exec_test.rs @@ -0,0 +1,84 @@ +// Copyright (c) Sony Pictures Imageworks, et al. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/imageworks/spk + +use std::collections::HashMap; +use std::sync::Arc; + +use rstest::{fixture, rstest}; +use spk_cmd_build::build_package; +use spk_schema::foundation::fixtures::*; +use spk_schema::ident::build_ident; +use spk_solve::{DecisionFormatterBuilder, Solver}; +use spk_solve_macros::request; +use spk_storage::fixtures::*; + +use crate::solution_to_resolved_runtime_layers; + +#[fixture] +fn solver() -> Solver { + Solver::default() +} + +/// If two layers contribute files to the same subdirectory, the Manifest is +/// expected to contain both files. +#[rstest] +#[tokio::test] +async fn get_environment_filesystem_merges_directories( + tmpdir: tempfile::TempDir, + mut solver: Solver, +) { + let rt = spfs_runtime().await; + + build_package!( + tmpdir, + "one.spk.yaml", + br#" +api: v0/package +pkg: one/1.0.0 + +build: + script: + - mkdir "$PREFIX"/subdir + - touch "$PREFIX"/subdir/one.txt +"#, + ); + + build_package!( + tmpdir, + "two.spk.yaml", + br#" +api: v0/package +pkg: two/1.0.0 + +build: + script: + - mkdir "$PREFIX"/subdir + - touch "$PREFIX"/subdir/two.txt +"#, + ); + + let formatter = DecisionFormatterBuilder::default() + .with_verbosity(0) + .build(); + + solver.add_repository(Arc::clone(&rt.tmprepo)); + solver.add_request(request!("one")); + solver.add_request(request!("two")); + + let (solution, _) = formatter.run_and_log_resolve(&solver).await.unwrap(); + + let resolved_layers = solution_to_resolved_runtime_layers(&solution).unwrap(); + + let mut conflicting_packages = HashMap::new(); + let environment = resolved_layers + .get_environment_filesystem( + build_ident!("does-not-matter/1.0.0/src"), + &mut conflicting_packages, + ) + .await + .unwrap(); + + assert!(environment.get_path("subdir/one.txt").is_some()); + assert!(environment.get_path("subdir/two.txt").is_some()); +} diff --git a/crates/spk-exec/src/lib.rs b/crates/spk-exec/src/lib.rs index 6750000212..5d35e5b0d0 100644 --- a/crates/spk-exec/src/lib.rs +++ b/crates/spk-exec/src/lib.rs @@ -12,5 +12,7 @@ pub use exec::{ setup_current_runtime, setup_runtime, solution_to_resolved_runtime_layers, + ConflictingPackagePair, ResolvedLayer, + ResolvedLayers, };