diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index b4f904e3b4..28513322d8 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -25,6 +25,11 @@ jobs: uses: crazy-max/ghaction-chocolatey@v3 with: args: install protoc llvm winfsp make + - name: Install FlatBuffers Compiler + run: | + $url = "https://github.com/google/flatbuffers/releases/download/v23.5.26/Windows.flatc.binary.zip" + Invoke-WebRequest $url -OutFile C:\flatc.zip + Expand-Archive C:\flatc.zip -DestinationPath C:\Windows - name: Install Debug SPFS run: | make install-debug-spfs @@ -55,6 +60,9 @@ jobs: apt-get install -y libcap2-bin sudo cmake tcsh rsync protobuf-compiler fuse libfuse-dev # spfs-fuse requires this option enabled echo user_allow_other >> /etc/fuse.conf + FB_REL=https://github.com/google/flatbuffers/releases/ + curl --proto '=https' --tlsv1.2 -sSfL ${FB_REL}/download/v23.5.26/Linux.flatc.binary.g++-10.zip | funzip > /usr/bin/flatc + chmod +x /usr/bin/flatc rustup install nightly rustup component add clippy rustup component add rustfmt --toolchain nightly diff --git a/Cargo.lock b/Cargo.lock index 7a610eb802..3a132c3a85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1186,6 +1186,25 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flatbuffers" +version = "23.5.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + +[[package]] +name = "flatc-rust" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57e61227926ef5b237af48bee74394cc4a5a221ebd10c5147a98e612f207851d" +dependencies = [ + "log", +] + [[package]] name = "flate2" version = "1.0.28" @@ -3424,6 +3443,7 @@ dependencies = [ "async-recursion", "async-stream", "async-trait", + "bytes", "cached", "caps", "chrono", @@ -3437,6 +3457,7 @@ dependencies = [ "dirs", "dunce", "faccess", + "flatbuffers", "fuser", "futures", "futures-core", @@ -3451,6 +3472,7 @@ dependencies = [ "nonempty", "num_cpus", "once_cell", + "parsedbuf", "pin-project-lite", "procfs", "progress_bar_derive_macro", @@ -3468,6 +3490,7 @@ dependencies = [ "serde_yaml 0.9.27", "serial_test", "spfs-encoding", + "spfs-proto", "static_assertions", "strum", "tar", @@ -3663,11 +3686,26 @@ dependencies = [ "rand", "ring", "rstest 0.15.0", - "serde", + "spfs-proto", "thiserror", "tokio", ] +[[package]] +name = "spfs-proto" +version = "0.38.0" +dependencies = [ + "data-encoding", + "flatbuffers", + "flatc-rust", + "futures", + "miette", + "ring", + "rstest 0.15.0", + "serde", + "thiserror", +] + [[package]] name = "spfs-vfs" version = "0.38.0" @@ -3904,6 +3942,7 @@ dependencies = [ "clap 4.5.0", "miette", "rstest 0.18.2", + "spfs", "spk-cli-common", "spk-cmd-make-binary", "spk-cmd-make-source", diff --git a/Cargo.toml b/Cargo.toml index 5dbd68f5cb..9ee6722731 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,8 +5,9 @@ codegen-units = 1 [workspace] members = [ "crates/progress_bar_derive_macro", - "crates/spfs-encoding", "crates/spfs", + "crates/spfs-encoding", + "crates/spfs-proto", "crates/spk-build", "crates/spk-cli/*", "crates/spfs-cli/*", @@ -45,9 +46,11 @@ config = "0.14.0" console = "0.15" dashmap = "5.4.0" derive_builder = "0.20" +data-encoding = "2.3" dirs = "5.0" dunce = "1.0.4" dyn-clone = "1.0" +flatbuffers = "23.5.26" fuser = "0.14.0" futures = "0.3.28" futures-core = "0.3.28" diff --git a/Makefile b/Makefile index f6ac1f02da..7b664eb280 100644 --- a/Makefile +++ b/Makefile @@ -46,9 +46,18 @@ clean: packages.clean .PHONY: lint lint: FEATURES?=server,spfs/server -lint: +lint: lint-fmt lint-clippy lint-docs + +.PHONY: lint-fmt +lint-fmt: $(CARGO) +nightly fmt --check - $(CARGO) clippy --tests $(cargo_features_arg) $(cargo_packages_arg) -- -Dwarnings + +.PHONY: lint-clippy +lint-clippy: + $(CARGO) clippy --tests $(cargo_features_arg) $(cargo_packages_arg) $(CARGO_ARGS) -- -Dwarnings + +.PHONY: lint-docs +lint-docs: env RUSTDOCFLAGS="-Dwarnings" cargo doc --no-deps $(cargo_features_arg) $(cargo_packages_arg) .PHONY: format @@ -73,7 +82,7 @@ release-spfs: .PHONY: test test: FEATURES?=server,spfs/server test: - spfs run - -- cargo test $(cargo_features_arg) $(cargo_packages_arg) + spfs run - -- cargo test $(cargo_features_arg) $(cargo_packages_arg) -- $(TEST_ARGS) .PHONY: converters converters: @@ -92,7 +101,9 @@ spk-rpm: .PHONY: rpm-build rpm-build: rpm-buildenv + # ulimit for faster yum installs docker build . \ + --ulimit 'nofile=32768:32768' \ --target rpm_build \ --cache-from build_env \ -f rpmbuild.Dockerfile \ @@ -108,10 +119,10 @@ rpm-build: rpm-buildenv .PHONY: rpm-buildenv rpm-buildenv: + # ulimit for faster yum installs docker build . \ + --ulimit 'nofile=32768:32768' \ --target build_env \ --cache-from build_env \ -f rpmbuild.Dockerfile \ --tag build_env - - diff --git a/crates/spfs-cli/cmd-enter/src/cmd_enter.rs b/crates/spfs-cli/cmd-enter/src/cmd_enter.rs index 6144b76000..6d272ce6d6 100644 --- a/crates/spfs-cli/cmd-enter/src/cmd_enter.rs +++ b/crates/spfs-cli/cmd-enter/src/cmd_enter.rs @@ -142,6 +142,8 @@ impl CmdEnter { let mut runtime = self.load_runtime(config).await?; + tracing::debug!("entering runtime: {runtime:#?}"); + if self.make_durable.enabled { if runtime.is_durable() { return Err(spfs::Error::from("runtime is already durable").into()); @@ -241,6 +243,7 @@ impl CmdEnter { config: &spfs::Config, ) -> Result> { let runtime = self.load_runtime(config).await?; + tracing::debug!("entering runtime: {runtime:#?}"); if self.exit.enabled { todo!() } else if self.remount.enabled { diff --git a/crates/spfs-cli/cmd-render/src/cmd_render.rs b/crates/spfs-cli/cmd-render/src/cmd_render.rs index 7b0d32359d..51e97c69c5 100644 --- a/crates/spfs-cli/cmd-render/src/cmd_render.rs +++ b/crates/spfs-cli/cmd-render/src/cmd_render.rs @@ -156,7 +156,7 @@ impl CmdRender { &render_summary_reporter as &dyn spfs::storage::fs::RenderReporter, ]), ); - let stack = layers.into_iter().map(|l| l.manifest).collect(); + let stack = layers.into_iter().map(|l| *l.manifest()).collect(); renderer .render(&stack, self.strategy) .await diff --git a/crates/spfs-cli/cmd-winfsp/src/cmd_winfsp.rs b/crates/spfs-cli/cmd-winfsp/src/cmd_winfsp.rs index 561735213e..3100dfea77 100644 --- a/crates/spfs-cli/cmd-winfsp/src/cmd_winfsp.rs +++ b/crates/spfs-cli/cmd-winfsp/src/cmd_winfsp.rs @@ -75,7 +75,7 @@ impl cli::CommandName for CmdWinFsp { impl CmdWinFsp { fn run(&mut self, config: &spfs::Config) -> Result { - // the actual winfsp filesystem uses it's own threads, and + // the actual winfsp filesystem uses its own threads, and // the mount command only needs to send requests to the running // service, so a current thread runtime is appropriate let rt = tokio::runtime::Builder::new_current_thread() @@ -215,7 +215,7 @@ impl CmdService { #[derive(Debug, Args)] struct CmdMount { /// The process id for which the mount will be visible, along - /// with all of it's children. Defaults to the calling process. + /// with all of its children. Defaults to the calling process. #[clap(long)] root_process: Option, diff --git a/crates/spfs-cli/main/src/cmd_commit.rs b/crates/spfs-cli/main/src/cmd_commit.rs index 434d56588a..04146a04ba 100644 --- a/crates/spfs-cli/main/src/cmd_commit.rs +++ b/crates/spfs-cli/main/src/cmd_commit.rs @@ -6,7 +6,7 @@ use std::path::PathBuf; use clap::Args; use miette::Result; -use spfs::encoding::Encodable; +use spfs::encoding::prelude::*; use spfs::prelude::*; /// Commit the current runtime state or a directory to storage @@ -72,6 +72,10 @@ pub struct CmdCommit { required_unless_present = "path", )] kind: Option, + + /// Allow committing an empty layer or platform + #[clap(long, hide = true)] + allow_empty: bool, } impl CmdCommit { @@ -82,7 +86,8 @@ impl CmdCommit { let committer = spfs::Committer::new(&repo) .with_reporter(spfs::commit::ConsoleCommitReporter::default()) .with_max_concurrent_branches(self.max_concurrent_branches) - .with_max_concurrent_blobs(self.max_concurrent_blobs); + .with_max_concurrent_blobs(self.max_concurrent_blobs) + .with_allow_empty(self.allow_empty); if self.hash_while_committing { let committer = committer .with_blob_hasher(spfs::commit::WriteToRepositoryBlobHasher { repo: &repo }); @@ -92,7 +97,7 @@ impl CmdCommit { } }; - tracing::info!(digest = ?result.digest()?, "created"); + tracing::info!(digest=%result.digest()?, "created"); for tag in self.tags.iter() { let tag_spec = match spfs::tracking::TagSpec::parse(tag) { Ok(tag_spec) => tag_spec, @@ -122,11 +127,11 @@ impl CmdCommit { { if let Some(path) = &self.path { let manifest = committer.commit_dir(path).await?; - if manifest.is_empty() { + if manifest.is_empty() && !self.allow_empty { return Err(spfs::Error::NothingToCommit); } return Ok(repo - .create_layer(&spfs::graph::Manifest::from(&manifest)) + .create_layer(&manifest.to_graph_manifest()) .await? .into()); } diff --git a/crates/spfs-cli/main/src/cmd_info.rs b/crates/spfs-cli/main/src/cmd_info.rs index 549eac00f9..33b4a9a67d 100644 --- a/crates/spfs-cli/main/src/cmd_info.rs +++ b/crates/spfs-cli/main/src/cmd_info.rs @@ -9,7 +9,6 @@ use colored::*; use miette::Result; use spfs::env::SPFS_DIR; use spfs::find_path::ObjectPathEntry; -use spfs::graph::Object; use spfs::io::{self, DigestFormat, Pluralize}; use spfs::prelude::*; use spfs::{self}; @@ -104,8 +103,9 @@ impl CmdInfo { repo: &spfs::storage::RepositoryHandle, verbosity: usize, ) -> Result<()> { - match obj { - Object::Platform(obj) => { + use spfs::graph::object::Enum; + match obj.into_enum() { + Enum::Platform(obj) => { println!( "{}:\n{}", self.format_digest(obj.digest()?, repo).await?, @@ -117,7 +117,7 @@ impl CmdInfo { self.format_digest(obj.digest()?, repo).await? ); println!("{}:", "stack (top-down)".bright_blue()); - for reference in obj.stack.to_top_down() { + for reference in obj.to_stack().to_top_down() { println!(" - {}", self.format_digest(reference, repo).await?); if self.follow { self.to_process.push_back(reference.to_string()); @@ -125,7 +125,7 @@ impl CmdInfo { } } - Object::Layer(obj) => { + Enum::Layer(obj) => { println!( "{}:\n{}", self.format_digest(obj.digest()?, repo).await?, @@ -139,14 +139,14 @@ impl CmdInfo { println!( " {} {}", "manifest:".bright_blue(), - self.format_digest(obj.manifest, repo).await? + self.format_digest(*obj.manifest(), repo).await? ); if self.follow { - self.to_process.push_back(obj.manifest.to_string()); + self.to_process.push_back(obj.manifest().to_string()); } } - Object::Manifest(obj) => { + Enum::Manifest(obj) => { println!( "{}:\n{}", self.format_digest(obj.digest()?, repo).await?, @@ -174,24 +174,23 @@ impl CmdInfo { } } - Object::Blob(obj) => { + Enum::Blob(obj) => { println!( "{}:\n {}:", - self.format_digest(obj.payload, repo).await?, + self.format_digest(*obj.payload(), repo).await?, "blob".green() ); println!( " {} {}", "digest:".bright_blue(), - self.format_digest(obj.payload, repo).await? + self.format_digest(*obj.payload(), repo).await? ); println!( " {} {}", "size:".bright_blue(), - spfs::io::format_size(obj.size) + spfs::io::format_size(obj.size()) ); } - Object::Tree(_) | Object::Mask => println!("{obj:?}"), } Ok(()) } diff --git a/crates/spfs-cli/main/src/cmd_read.rs b/crates/spfs-cli/main/src/cmd_read.rs index b07e1525eb..9bfb9ff74f 100644 --- a/crates/spfs-cli/main/src/cmd_read.rs +++ b/crates/spfs-cli/main/src/cmd_read.rs @@ -32,9 +32,9 @@ impl CmdRead { tracing::info!(target: "sentry", "using repo: {}", repo.address()); let item = repo.read_ref(&self.reference.to_string()).await?; - use spfs::graph::Object; - let blob = match item { - Object::Blob(blob) => blob, + use spfs::graph::object::Enum; + let blob = match item.to_enum() { + Enum::Blob(blob) => blob, _ => { let path = match &self.path { None => { @@ -58,7 +58,7 @@ impl CmdRead { } }; - let (mut payload, filename) = repo.open_payload(blob.digest()).await?; + let (mut payload, filename) = repo.open_payload(*blob.digest()).await?; tokio::io::copy(&mut payload, &mut tokio::io::stdout()) .await .map_err(|err| Error::StorageReadError("copy of payload to stdout", filename, err))?; diff --git a/crates/spfs-cli/main/src/cmd_reset.rs b/crates/spfs-cli/main/src/cmd_reset.rs index bf3997e352..f678ab964d 100644 --- a/crates/spfs-cli/main/src/cmd_reset.rs +++ b/crates/spfs-cli/main/src/cmd_reset.rs @@ -50,7 +50,7 @@ impl CmdReset { .sync_env(env_spec.clone()) .await?; for item in synced.env.iter() { - let digest = item.resolve_digest(&*repo).await?; + let digest = item.resolve_digest(&repo).await?; runtime.push_digest(digest); } } diff --git a/crates/spfs-cli/main/src/cmd_run.rs b/crates/spfs-cli/main/src/cmd_run.rs index a771cc9f42..b33e4adf6c 100644 --- a/crates/spfs-cli/main/src/cmd_run.rs +++ b/crates/spfs-cli/main/src/cmd_run.rs @@ -7,7 +7,7 @@ use std::time::Instant; use clap::{ArgGroup, Args}; use miette::{Context, Result}; -use spfs::storage::FromConfig; +use spfs::prelude::*; use spfs::tracking::EnvSpec; use spfs_cli_common as cli; @@ -158,7 +158,7 @@ impl CmdRun { // local repo. Tags synced to a local repo will prevent // future 'spfs clean's from removing many unused spfs // objects. - let repos: Vec<_> = vec![&*origin, &*repo]; + let repos: Vec<_> = vec![&origin, &repo]; let references_to_sync = reference .with_tag_items_resolved_to_digest_items(&repos) .await?; diff --git a/crates/spfs-cli/main/src/cmd_server.rs b/crates/spfs-cli/main/src/cmd_server.rs index d665d28d29..dec438625c 100644 --- a/crates/spfs-cli/main/src/cmd_server.rs +++ b/crates/spfs-cli/main/src/cmd_server.rs @@ -9,7 +9,7 @@ use spfs_cli_common as cli; /// Start an spfs server /// /// The server can be used as a remote repository by -/// it's clients, communicating over gRPC and http +/// its clients, communicating over gRPC and http #[derive(Debug, Args)] pub struct CmdServer { #[clap(flatten)] diff --git a/crates/spfs-cli/main/src/cmd_write.rs b/crates/spfs-cli/main/src/cmd_write.rs index 10076589fe..f6bcc6b261 100644 --- a/crates/spfs-cli/main/src/cmd_write.rs +++ b/crates/spfs-cli/main/src/cmd_write.rs @@ -56,7 +56,7 @@ impl CmdWrite { let digest = repo.commit_blob(reader).await?; - tracing::info!(?digest, "created"); + tracing::info!(%digest, "created"); for tag in self.tags.iter() { let tag_spec = match spfs::tracking::TagSpec::parse(tag) { Ok(tag_spec) => tag_spec, diff --git a/crates/spfs-encoding/Cargo.toml b/crates/spfs-encoding/Cargo.toml index 0762c2170e..3fa564b593 100644 --- a/crates/spfs-encoding/Cargo.toml +++ b/crates/spfs-encoding/Cargo.toml @@ -8,9 +8,9 @@ version = { workspace = true } workspace = true [dependencies] -data-encoding = "2.3" +data-encoding = { workspace = true } ring = { workspace = true } -serde = { workspace = true, features = ["derive"] } +spfs-proto = { path = "../spfs-proto" } tokio = { version = "1.20", features = ["io-util", "io-std"] } thiserror = { workspace = true } miette = { workspace = true } diff --git a/crates/spfs-encoding/src/binary.rs b/crates/spfs-encoding/src/binary.rs index cd55e168bf..33b43fa914 100644 --- a/crates/spfs-encoding/src/binary.rs +++ b/crates/spfs-encoding/src/binary.rs @@ -5,10 +5,10 @@ use std::io::{BufRead, Read, Write}; use std::iter::FromIterator; -use super::hash::{Digest, DIGEST_SIZE, NULL_DIGEST}; -use crate::{Error, Result}; +use crate::{Digest, Error, Result, DIGEST_SIZE, NULL_DIGEST}; -const INT_SIZE: usize = std::mem::size_of::(); +const INT64_SIZE: usize = std::mem::size_of::(); +const INT8_SIZE: usize = std::mem::size_of::(); #[cfg(test)] #[path = "./binary_test.rs"] @@ -47,13 +47,21 @@ pub fn write_int(mut writer: impl Write, value: i64) -> Result<()> { /// Read an integer from the given binary stream. pub fn read_int(mut reader: impl Read) -> Result { - let mut buf: [u8; INT_SIZE] = [0, 0, 0, 0, 0, 0, 0, 0]; + let mut buf: [u8; INT64_SIZE] = [0, 0, 0, 0, 0, 0, 0, 0]; reader.read_exact(&mut buf).map_err(Error::FailedRead)?; Ok(i64::from_be_bytes(buf)) } /// Write an unsigned integer to the given binary stream. -pub fn write_uint(mut writer: impl Write, value: u64) -> Result<()> { +pub fn write_uint64(mut writer: impl Write, value: u64) -> Result<()> { + writer + .write_all(&value.to_be_bytes()) + .map_err(Error::FailedWrite)?; + Ok(()) +} + +/// Write an unsigned integer to the given binary stream. +pub fn write_uint8(mut writer: impl Write, value: u8) -> Result<()> { writer .write_all(&value.to_be_bytes()) .map_err(Error::FailedWrite)?; @@ -61,12 +69,19 @@ pub fn write_uint(mut writer: impl Write, value: u64) -> Result<()> { } /// Read an unsigned integer from the given binary stream. -pub fn read_uint(mut reader: impl Read) -> Result { - let mut buf: [u8; INT_SIZE] = [0, 0, 0, 0, 0, 0, 0, 0]; +pub fn read_uint64(mut reader: impl Read) -> Result { + let mut buf: [u8; INT64_SIZE] = [0, 0, 0, 0, 0, 0, 0, 0]; reader.read_exact(&mut buf).map_err(Error::FailedRead)?; Ok(u64::from_be_bytes(buf)) } +/// Read an unsigned integer from the given binary stream. +pub fn read_uint8(mut reader: impl Read) -> Result { + let mut buf: [u8; INT8_SIZE] = [0]; + reader.read_exact(&mut buf).map_err(Error::FailedRead)?; + Ok(u8::from_be_bytes(buf)) +} + /// Write a digest to the given binary stream. pub fn write_digest(mut writer: impl Write, digest: &Digest) -> Result<()> { writer @@ -79,7 +94,7 @@ pub fn write_digest(mut writer: impl Write, digest: &Digest) -> Result<()> { pub fn read_digest(mut reader: impl Read) -> Result { let mut buf: [u8; DIGEST_SIZE] = NULL_DIGEST; reader.read_exact(buf.as_mut()).map_err(Error::FailedRead)?; - Digest::from_bytes(&buf) + Ok(Digest::from_bytes(&buf)?) } /// Write a string to the given binary stream. diff --git a/crates/spfs-encoding/src/error.rs b/crates/spfs-encoding/src/error.rs index de5f59bd66..6b4fb14e45 100644 --- a/crates/spfs-encoding/src/error.rs +++ b/crates/spfs-encoding/src/error.rs @@ -42,15 +42,10 @@ pub enum Error { got: Vec, }, - /// A digest could not be decoded from a string because the - /// contained invalid data or was otherwise malformed - #[error("Could not decode digest: {0}")] - InvalidDigestEncoding(#[source] data_encoding::DecodeError), - - /// A digest could not be created because the wrong number - /// of bytes were provided - #[error("Invalid number of bytes for digest: {0} != {}", super::DIGEST_SIZE)] - InvalidDigestLength(usize), + /// An error occurred with the digest protocol format + #[error(transparent)] + #[diagnostic(forward(0))] + Digest(#[from] spfs_proto::digest::Error), /// A partial digest could not be parsed from a string because /// of some issue with the provided data diff --git a/crates/spfs-encoding/src/hash.rs b/crates/spfs-encoding/src/hash.rs index b5bf5a8f91..f3b66888c3 100644 --- a/crates/spfs-encoding/src/hash.rs +++ b/crates/spfs-encoding/src/hash.rs @@ -2,18 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk -use std::convert::{TryFrom, TryInto}; +use std::convert::TryInto; use std::fmt::Display; use std::io::{Read, Write}; use std::pin::Pin; use std::task::Poll; use data_encoding::BASE32; -use ring::digest::{Context, SHA256, SHA256_OUTPUT_LEN}; -use serde::{Deserialize, Serialize}; +use ring::digest::{Context, SHA256}; use tokio::io::{AsyncRead, AsyncWrite}; -use super::binary; +use super::{binary, Digest}; use crate::{Error, Result}; #[cfg(test)] @@ -149,26 +148,75 @@ where } } -/// Encodable is a type that can be binary-encoded to a byte stream -pub trait Encodable -where - Self: Sized, -{ - /// The flavor of error returned by encoding methods +impl Hasher<()> { + /// Reads the given async reader to completion, returning + /// the digest of its contents. + pub async fn hash_async_reader(mut reader: impl AsyncRead + Unpin) -> Result { + let mut hasher = Hasher::new_async(); + tokio::io::copy(&mut reader, &mut hasher) + .await + .map_err(Error::FailedRead)?; + Ok(hasher.digest()) + } + + /// Reads the given reader to completion, returning + /// the digest of its contents. + pub fn hash_reader(mut reader: impl Read) -> Result { + let mut hasher = Hasher::new_sync(); + std::io::copy(&mut reader, &mut hasher).map_err(Error::FailedRead)?; + Ok(hasher.digest()) + } +} + +/// Digestible is a type that can return an `encoding::Digest` for itself. +pub trait Digestible { + /// The flavor of error returned by digesting methods type Error; /// Compute the digest for this instance, by /// encoding it into binary form and hashing the result + fn digest(&self) -> std::result::Result; +} + +impl Digestible for &T +where + T: Digestible, +{ + type Error = T::Error; + + /// Meant to represent a hash of the content of this + /// item - all unique instances should have a unique digest + /// and all instances that are equal should share a digest + fn digest(&self) -> std::result::Result { + (**self).digest() + } +} + +impl Digestible for &[u8] { + type Error = Error; + + /// Meant to represent a hash of the content of this + /// item - all unique instances should have a unique digest + /// and all instances that are equal should share a digest fn digest(&self) -> std::result::Result { let mut hasher = Hasher::new_sync(); - self.encode(&mut hasher)?; + hasher.write_all(self).map_err(Error::FailedWrite)?; Ok(hasher.digest()) } +} + +/// Encodable is a type that can be binary-encoded to a byte stream +pub trait Encodable +where + Self: Sized, +{ + /// The flavor of error returned by encoding methods + type Error; /// Write this object in binary format. fn encode(&self, writer: &mut impl Write) -> std::result::Result<(), Self::Error>; - /// Encode this object into it's binary form in memory. + /// Encode this object into its binary form in memory. fn encode_to_bytes(&self) -> std::result::Result, Self::Error> { let mut buf = Vec::new(); self.encode(&mut buf)?; @@ -179,7 +227,7 @@ where /// Decodable is a type that can be rebuilt from a previously encoded binary stream pub trait Decodable where - Self: Encodable, + Self: Encodable + Sized, { /// Read a previously encoded object from the given binary stream. fn decode(reader: &mut impl std::io::BufRead) -> std::result::Result; @@ -276,7 +324,7 @@ impl PartialDigest { /// Return true if this partial digest is actually a full digest pub fn is_full(&self) -> bool { - self.len() == DIGEST_SIZE + self.len() == super::DIGEST_SIZE } /// If this partial digest is actually a full digest, convert it @@ -344,147 +392,9 @@ impl AsRef for PartialDigest { } } -/// Digest is the result of a hashing operation over binary data. -#[derive(PartialEq, Eq, Hash, Copy, Clone, Ord, PartialOrd)] -pub struct Digest([u8; DIGEST_SIZE]); - -impl std::ops::Deref for Digest { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - &self.0[..] - } -} - -impl Default for Digest { - fn default() -> Self { - NULL_DIGEST.into() - } -} - -impl std::fmt::Debug for Digest { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(self.to_string().as_ref()) - } -} - -impl std::str::FromStr for Digest { - type Err = crate::Error; - - fn from_str(s: &str) -> Result { - Digest::parse(s) - } -} - -impl AsRef<[u8]> for Digest { - fn as_ref(&self) -> &[u8] { - self.0.as_ref() - } -} - -impl AsRef for Digest { - fn as_ref(&self) -> &Self { - self - } -} - -impl<'a> Digest { - /// Yields a view of the underlying bytes for this digest - pub fn as_bytes(&'a self) -> &'a [u8] { - self.0.as_ref() - } - - /// Extract the raw bytes of this digest - pub fn into_bytes(self) -> [u8; DIGEST_SIZE] { - self.0 - } - - /// Create a digest from the provided bytes. - /// - /// The exact [`DIGEST_SIZE`] number of bytes must - /// be given. - pub fn from_bytes(digest_bytes: &[u8]) -> Result { - match digest_bytes.try_into() { - Err(_err) => Err(Error::InvalidDigestLength(digest_bytes.len())), - Ok(bytes) => Ok(Self(bytes)), - } - } - - /// Parse the given string as an encoded digest - pub fn parse(digest_str: &str) -> Result { - digest_str.try_into() - } - - /// Reads the given async reader to completion, returning - /// the digest of it's contents. - pub async fn from_async_reader(mut reader: impl AsyncRead + Unpin) -> Result { - let mut hasher = Hasher::new_async(); - tokio::io::copy(&mut reader, &mut hasher) - .await - .map_err(Error::FailedRead)?; - Ok(hasher.digest()) - } - - /// Reads the given reader to completion, returning - /// the digest of it's contents. - pub fn from_reader(mut reader: impl Read) -> Result { - let mut hasher = Hasher::new_sync(); - std::io::copy(&mut reader, &mut hasher).map_err(Error::FailedRead)?; - Ok(hasher.digest()) - } -} - -impl Serialize for Digest { - fn serialize(&self, serializer: S) -> std::result::Result - where - S: serde::Serializer, - { - serializer.serialize_str(self.to_string().as_ref()) - } -} -impl<'de> Deserialize<'de> for Digest { - fn deserialize(deserializer: D) -> std::result::Result - where - D: serde::Deserializer<'de>, - { - /// Visits a serialized string, decoding it as a digest - struct StringVisitor; - - impl<'de> serde::de::Visitor<'de> for StringVisitor { - type Value = Digest; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("base32 encoded digest") - } - - fn visit_str(self, value: &str) -> std::result::Result - where - E: serde::de::Error, - { - Digest::try_from(value).map_err(serde::de::Error::custom) - } - } - deserializer.deserialize_str(StringVisitor) - } -} - -impl From<[u8; DIGEST_SIZE]> for Digest { - fn from(bytes: [u8; DIGEST_SIZE]) -> Self { - Digest(bytes) - } -} - -impl TryFrom<&str> for Digest { - type Error = Error; - - fn try_from(digest_str: &str) -> Result { - parse_digest(digest_str) - } -} - -impl Display for Digest { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(BASE32.encode(self.as_bytes()).as_ref()) +impl Decodable for Digest { + fn decode(reader: &mut impl std::io::BufRead) -> Result { + binary::read_digest(reader) } } @@ -494,46 +404,12 @@ impl Encodable for Digest { fn encode(&self, writer: &mut impl Write) -> Result<()> { binary::write_digest(writer, self) } +} + +impl Digestible for Digest { + type Error = Error; fn digest(&self) -> Result { Ok(*self) } } - -impl Decodable for Digest { - fn decode(reader: &mut impl std::io::BufRead) -> Result { - binary::read_digest(reader) - } -} - -/// The number of bytes that make up an spfs digest -pub const DIGEST_SIZE: usize = SHA256_OUTPUT_LEN; - -/// The bytes of an empty digest. This represents the result of hashing no bytes - the initial state. -/// -/// ``` -/// use std::convert::TryInto; -/// use ring::digest; -/// use spfs_encoding::{EMPTY_DIGEST, DIGEST_SIZE}; -/// -/// let empty_digest: [u8; DIGEST_SIZE] = digest::digest(&digest::SHA256, b"").as_ref().try_into().unwrap(); -/// assert_eq!(empty_digest, EMPTY_DIGEST); -/// ``` -pub const EMPTY_DIGEST: [u8; DIGEST_SIZE] = [ - 227, 176, 196, 66, 152, 252, 28, 20, 154, 251, 244, 200, 153, 111, 185, 36, 39, 174, 65, 228, - 100, 155, 147, 76, 164, 149, 153, 27, 120, 82, 184, 85, -]; - -/// The bytes of an entirely null digest. This does not represent the result of hashing no bytes, because -/// sha256 has a defined initial state. This is an explicitly unique result of entirely null bytes. -pub const NULL_DIGEST: [u8; DIGEST_SIZE] = [ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -]; - -/// Parse a string-digest. -pub fn parse_digest(digest_str: impl AsRef) -> Result { - let digest_bytes = BASE32 - .decode(digest_str.as_ref().as_bytes()) - .map_err(Error::InvalidDigestEncoding)?; - Digest::from_bytes(digest_bytes.as_slice()) -} diff --git a/crates/spfs-encoding/src/lib.rs b/crates/spfs-encoding/src/lib.rs index 0bc57d92e6..53f85bb340 100644 --- a/crates/spfs-encoding/src/lib.rs +++ b/crates/spfs-encoding/src/lib.rs @@ -21,29 +21,22 @@ pub use binary::{ read_digest, read_int, read_string, - read_uint, + read_uint64, + read_uint8, write_digest, write_header, write_int, write_string, - write_uint, + write_uint64, + write_uint8, }; pub use error::{Error, Result}; -pub use hash::{ - parse_digest, - Decodable, - Digest, - Encodable, - Hasher, - PartialDigest, - DIGEST_SIZE, - EMPTY_DIGEST, - NULL_DIGEST, -}; +pub use hash::{Decodable, Digestible, Encodable, Hasher, PartialDigest}; +pub use spfs_proto::{parse_digest, Digest, DIGEST_SIZE, EMPTY_DIGEST, NULL_DIGEST}; /// # Encoding Prelude /// /// A collection of traits commonly used from this crate. pub mod prelude { - pub use super::{Decodable, Encodable}; + pub use super::{Decodable, Digestible, Encodable}; } diff --git a/crates/spfs-proto/Cargo.toml b/crates/spfs-proto/Cargo.toml new file mode 100644 index 0000000000..1253375d45 --- /dev/null +++ b/crates/spfs-proto/Cargo.toml @@ -0,0 +1,26 @@ +[package] +authors = { workspace = true } +edition = { workspace = true } +name = "spfs-proto" +version = { workspace = true } + +[lints] +workspace = true + +[features] +serde = ["dep:serde"] + +[dependencies] +data-encoding = { workspace = true } +flatbuffers = { workspace = true } +futures = { workspace = true } +miette = { workspace = true } +serde = { workspace = true, optional = true } +thiserror = { workspace = true } + +[build-dependencies] +flatc-rust = "0.2" + +[dev-dependencies] +ring = { workspace = true } +rstest = { version = "0.15.0", default_features = false } diff --git a/crates/spfs-proto/build.rs b/crates/spfs-proto/build.rs new file mode 100644 index 0000000000..1c8a31ff02 --- /dev/null +++ b/crates/spfs-proto/build.rs @@ -0,0 +1,22 @@ +// Copyright (c) Sony Pictures Imageworks, et al. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/imageworks/spk + +use std::path::Path; + +fn main() { + println!("cargo:rerun-if-changed=schema/spfs.fbs"); + + let cmd = match std::env::var_os("FLATC") { + Some(exe) => flatc_rust::Flatc::from_path(exe), + None => flatc_rust::Flatc::from_env_path(), + }; + + cmd.run(flatc_rust::Args { + lang: "rust", + inputs: &[Path::new("schema/spfs.fbs")], + out_dir: Path::new("src/"), + ..Default::default() + }) + .expect("schema compiler command"); +} diff --git a/crates/spfs-proto/schema/spfs.fbs b/crates/spfs-proto/schema/spfs.fbs new file mode 100644 index 0000000000..071aa1de1c --- /dev/null +++ b/crates/spfs-proto/schema/spfs.fbs @@ -0,0 +1,65 @@ +// Copyright (c) Sony Pictures Imageworks, et al. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/imageworks/spk + +// when adding or modifying this file, take care to read +// the compatibility and evolution notes here: +// https://flatbuffers.dev/flatbuffers_guide_writing_schema.html +// In general, only add fields to the end of tables. Never add +// new fields to struct types. + +union Object { + Platform, + Layer, + Manifest, + Blob, +} + +table Platform { + layers:[Digest] (required); +} + +table Layer { + manifest:Digest (required); +} + +table Manifest { + /// Must be non-empty + trees:[Tree] (required); +} + +/// Blobs represent an arbitrary chunk of binary data, usually a file. +table Blob { + size:uint64; + payload:Digest (required); +} + +/// Digest is the result of a hashing operation over binary data. +struct Digest { + bytes:[uint8:32]; // SHA-256 output len (256 / 8) +} + +table Tree { + entries:[Entry] (required); +} + +enum EntryKind:uint8 { + Blob = 0, + /* Aligned to the rust ObjectKind type */ + Tree = 4, + Mask = 5, +} + +table Entry { + kind:EntryKind; + object:Digest (required); + mode:uint32; + size:uint64; + name:string (required); +} + + +table AnyObject { + object:Object (required); +} +root_type AnyObject; diff --git a/crates/spfs-proto/src/.gitignore b/crates/spfs-proto/src/.gitignore new file mode 100644 index 0000000000..c8f6818d78 --- /dev/null +++ b/crates/spfs-proto/src/.gitignore @@ -0,0 +1 @@ +spfs_generated.rs diff --git a/crates/spfs-proto/src/digest.rs b/crates/spfs-proto/src/digest.rs new file mode 100644 index 0000000000..63dc09250d --- /dev/null +++ b/crates/spfs-proto/src/digest.rs @@ -0,0 +1,194 @@ +// Copyright (c) Sony Pictures Imageworks, et al. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/imageworks/spk + +use data_encoding::BASE32; +use miette::Diagnostic; + +use super::Digest; + +#[cfg(test)] +#[path = "./digest_test.rs"] +mod digest_test; + +/// The number of bytes that make up an spfs digest +pub const DIGEST_SIZE: usize = std::mem::size_of::(); + +/// The bytes of an empty digest. This represents the result of hashing no bytes - the initial state. +pub const EMPTY_DIGEST: [u8; DIGEST_SIZE] = [ + 227, 176, 196, 66, 152, 252, 28, 20, 154, 251, 244, 200, 153, 111, 185, 36, 39, 174, 65, 228, + 100, 155, 147, 76, 164, 149, 153, 27, 120, 82, 184, 85, +]; + +/// The bytes of an entirely null digest. This does not represent the result of hashing no bytes, because +/// sha256 has a defined initial state. This is an explicitly unique result of entirely null bytes. +pub const NULL_DIGEST: [u8; DIGEST_SIZE] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +]; + +impl std::ops::Deref for Digest { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.0[..] + } +} + +impl std::cmp::Eq for Digest {} + +impl std::hash::Hash for Digest { + fn hash(&self, hasher: &mut H) + where + H: std::hash::Hasher, + { + self.0.hash(hasher) + } +} + +impl std::cmp::Ord for Digest { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.cmp(&other.0) + } +} + +impl std::cmp::PartialOrd for Digest { + fn partial_cmp(&self, other: &Self) -> std::option::Option { + Some(self.cmp(other)) + } +} + +impl std::str::FromStr for Digest { + type Err = Error; + + fn from_str(s: &str) -> Result { + Digest::parse(s) + } +} + +impl AsRef<[u8]> for Digest { + fn as_ref(&self) -> &[u8] { + self.0.as_ref() + } +} + +impl AsRef for Digest { + fn as_ref(&self) -> &Self { + self + } +} + +impl Digest { + /// Yields a view of the underlying bytes for this digest + pub fn as_bytes(&self) -> &[u8] { + self.0.as_ref() + } + + /// Extract the raw bytes of this digest + pub fn into_bytes(self) -> [u8; DIGEST_SIZE] { + self.0 + } + + /// Create a digest from the provided bytes. + /// + /// The exact [`DIGEST_SIZE`] number of bytes must + /// be given. + pub fn from_bytes(digest_bytes: &[u8]) -> Result { + match digest_bytes.try_into() { + Err(_err) => Err(Error::InvalidDigestLength(digest_bytes.len())), + Ok(bytes) => Ok(Self(bytes)), + } + } + + /// Parse the given string as an encoded digest + pub fn parse(digest_str: &str) -> Result { + digest_str.try_into() + } +} + +#[cfg(feature = "serde")] +impl serde::Serialize for Digest { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + serializer.serialize_str(self.to_string().as_ref()) + } +} + +#[cfg(feature = "serde")] +impl<'de> serde::Deserialize<'de> for Digest { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + /// Visits a serialized string, decoding it as a digest + struct StringVisitor; + + impl<'de> serde::de::Visitor<'de> for StringVisitor { + type Value = Digest; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("base32 encoded digest") + } + + fn visit_str(self, value: &str) -> std::result::Result + where + E: serde::de::Error, + { + Digest::try_from(value).map_err(serde::de::Error::custom) + } + } + deserializer.deserialize_str(StringVisitor) + } +} + +impl From<[u8; DIGEST_SIZE]> for Digest { + fn from(bytes: [u8; DIGEST_SIZE]) -> Self { + Digest(bytes) + } +} + +impl TryFrom<&str> for Digest { + type Error = Error; + + fn try_from(digest_str: &str) -> Result { + parse_digest(digest_str) + } +} + +impl std::fmt::Display for Digest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(BASE32.encode(self.as_bytes()).as_ref()) + } +} + +/// Parse a string-encoded digest. +pub fn parse_digest(digest_str: impl AsRef) -> Result { + let digest_bytes = BASE32 + .decode(digest_str.as_ref().as_bytes()) + .map_err(Error::InvalidDigestEncoding)?; + Digest::from_bytes(digest_bytes.as_slice()) +} + +/// A specialized result for digest-related operations +pub type Result = std::result::Result; + +/// The error type that is returned by digest operations +#[derive(thiserror::Error, Diagnostic, Debug)] +#[diagnostic( + url( + "https://getspk.io/error_codes#{}", + self.code().unwrap_or_else(|| Box::new("spfs::generic")) + ) +)] +pub enum Error { + /// A digest could not be decoded from a string because the + /// contained invalid data or was otherwise malformed + #[error("Could not decode digest: {0}")] + InvalidDigestEncoding(#[source] data_encoding::DecodeError), + + /// A digest could not be created because the wrong number + /// of bytes were provided + #[error("Invalid number of bytes for digest: {0} != {}", super::DIGEST_SIZE)] + InvalidDigestLength(usize), +} diff --git a/crates/spfs-proto/src/digest_test.rs b/crates/spfs-proto/src/digest_test.rs new file mode 100644 index 0000000000..c6d3f42c9f --- /dev/null +++ b/crates/spfs-proto/src/digest_test.rs @@ -0,0 +1,18 @@ +// Copyright (c) Sony Pictures Imageworks, et al. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/imageworks/spk + +use std::convert::TryInto; + +use ring::digest; +use rstest::rstest; + +#[rstest] +fn test_empty_digest_bytes() { + use crate::{DIGEST_SIZE, EMPTY_DIGEST}; + let empty_digest: [u8; DIGEST_SIZE] = digest::digest(&digest::SHA256, b"") + .as_ref() + .try_into() + .unwrap(); + assert_eq!(empty_digest, EMPTY_DIGEST); +} diff --git a/crates/spfs-proto/src/lib.rs b/crates/spfs-proto/src/lib.rs new file mode 100644 index 0000000000..0f872ef487 --- /dev/null +++ b/crates/spfs-proto/src/lib.rs @@ -0,0 +1,14 @@ +// Copyright (c) Sony Pictures Imageworks, et al. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/imageworks/spk + +#[allow(unused_imports)] +#[allow(unsafe_op_in_unsafe_fn)] +#[allow(clippy::all)] +#[rustfmt::skip] +pub mod spfs_generated; +pub mod digest; + +pub use digest::{parse_digest, DIGEST_SIZE, EMPTY_DIGEST, NULL_DIGEST}; +pub use flatbuffers; +pub use spfs_generated::*; diff --git a/crates/spfs-vfs/src/fuse.rs b/crates/spfs-vfs/src/fuse.rs index 4e1156c368..f76d11e57d 100644 --- a/crates/spfs-vfs/src/fuse.rs +++ b/crates/spfs-vfs/src/fuse.rs @@ -27,7 +27,7 @@ use fuser::{ ReplyOpen, Request, }; -use spfs::storage::FromConfig; +use spfs::prelude::*; #[cfg(feature = "fuse-backend-abi-7-31")] use spfs::tracking::BlobRead; use spfs::tracking::{Entry, EntryKind, EnvSpec, Manifest}; diff --git a/crates/spfs-vfs/src/winfsp/mount.rs b/crates/spfs-vfs/src/winfsp/mount.rs index 156834897a..f99f5d1f18 100644 --- a/crates/spfs-vfs/src/winfsp/mount.rs +++ b/crates/spfs-vfs/src/winfsp/mount.rs @@ -8,6 +8,7 @@ use std::sync::Arc; use dashmap::DashMap; use libc::c_void; +use spfs::prelude::*; use spfs::tracking::{Entry, EntryKind}; use spfs::OsError; use tokio::io::AsyncReadExt; @@ -65,12 +66,12 @@ impl Mount { repos: Vec>, manifest: spfs::tracking::Manifest, ) -> spfs::Result { - /// This syntax describes the default security descriptor settings - /// that are used for files and directories in the mounted file system. - /// It essentially provides a sane default ownership as well as - /// read/write access to all users. - /// More information about the SDD language and syntax can be found here: - /// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-dtyp/4f4251cc-23b6-44b6-93ba-69688422cb06 + // This syntax describes the default security descriptor settings + // that are used for files and directories in the mounted file system. + // It essentially provides a sane default ownership as well as + // read/write access to all users. + // More information about the SDD language and syntax can be found here: + // https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-dtyp/4f4251cc-23b6-44b6-93ba-69688422cb06 let sddl = windows::core::w!("O:BAG:BAD:P(A;;FA;;;SY)(A;;FA;;;BA)(A;;FA;;;WD)"); let mut psecurity_descriptor = PSECURITY_DESCRIPTOR(std::ptr::null_mut()); let mut security_descriptor_size: u32 = 0; diff --git a/crates/spfs/Cargo.toml b/crates/spfs/Cargo.toml index d185fa3d8a..86d6bfe887 100644 --- a/crates/spfs/Cargo.toml +++ b/crates/spfs/Cargo.toml @@ -25,6 +25,7 @@ async-compression = { version = "0.3.15", features = ["tokio", "bzip2"] } async-trait = "0.1.52" async-recursion = "1.0" async-stream = "0.3" +bytes = { workspace = true } cached = { workspace = true } chrono = { workspace = true } close-err = "1.0" @@ -36,6 +37,7 @@ derive_builder = { workspace = true } dirs = { workspace = true } dunce = { workspace = true } faccess = "0.2.3" +flatbuffers = { workspace = true } futures = { workspace = true } futures-core = { workspace = true } gitignore = "1.0" @@ -49,6 +51,7 @@ nix = { workspace = true, features = ["fs"] } nonempty = "0.8.1" num_cpus = "1.13.1" once_cell = { workspace = true } +parsedbuf = { path = "../parsedbuf" } pin-project-lite = { workspace = true } progress_bar_derive_macro = { workspace = true } prost = { workspace = true } @@ -62,6 +65,7 @@ serde_json = { workspace = true } serde_yaml = { workspace = true } serde_qs = "0.10.1" spfs-encoding = { workspace = true } +spfs-proto = { path = "../spfs-proto", features = ["serde"] } strum = { workspace = true, features = ["derive"] } tar = "0.4.30" tempfile = { workspace = true } diff --git a/crates/spfs/src/check.rs b/crates/spfs/src/check.rs index a75ad7a463..fc380bfdc0 100644 --- a/crates/spfs/src/check.rs +++ b/crates/spfs/src/check.rs @@ -239,7 +239,7 @@ where } let _permit = self.object_semaphore.acquire().await; - tracing::trace!(?digest, "Checking digest"); + tracing::trace!(%digest, "Checking digest"); self.reporter.visit_digest(&digest); match self.read_object_with_fallback(digest).await { Err(Error::UnknownObject(_)) => Ok(CheckObjectResult::Missing(digest)), @@ -290,7 +290,7 @@ where obj: graph::Object, perms: Option, ) -> Result { - use graph::Object; + use graph::object::Enum; if let Some(CheckProgress::CheckStarted) = self .processed_digests .insert(obj.digest()?, CheckProgress::CheckStarted) @@ -298,17 +298,15 @@ where return Ok(CheckObjectResult::Duplicate); } self.reporter.visit_object(&obj); - let res = match obj { - Object::Layer(obj) => CheckObjectResult::Layer(self.check_layer(obj).await?.into()), - Object::Platform(obj) => CheckObjectResult::Platform(self.check_platform(obj).await?), - Object::Blob(obj) => CheckObjectResult::Blob(unsafe { + let res = match obj.into_enum() { + Enum::Layer(obj) => CheckObjectResult::Layer(self.check_layer(obj).await?.into()), + Enum::Platform(obj) => CheckObjectResult::Platform(self.check_platform(obj).await?), + Enum::Blob(obj) => CheckObjectResult::Blob(unsafe { // Safety: it is unsafe to call this function unless the blob // is known to exist, which is the same rule we pass up to the caller - self.must_check_blob_with_perms_opt(obj, perms).await? + self.must_check_blob_with_perms_opt(&obj, perms).await? }), - Object::Manifest(obj) => CheckObjectResult::Manifest(self.check_manifest(obj).await?), - Object::Tree(obj) => CheckObjectResult::Tree(obj), - Object::Mask => CheckObjectResult::Mask, + Enum::Manifest(obj) => CheckObjectResult::Manifest(self.check_manifest(obj).await?), }; self.reporter.checked_object(&res); Ok(res) @@ -319,9 +317,8 @@ where /// To also check if the platform object exists, use [`Self::check_digest`] pub async fn check_platform(&self, platform: graph::Platform) -> Result { let futures: FuturesUnordered<_> = platform - .stack .iter_bottom_up() - .map(|d| self.check_digest(d)) + .map(|d| self.check_digest(*d)) .collect(); let results = futures.try_collect().await?; let res = CheckPlatformResult { @@ -336,7 +333,7 @@ where /// /// To also check if the layer object exists, use [`Self::check_digest`] pub async fn check_layer(&self, layer: graph::Layer) -> Result { - let result = self.check_digest(layer.manifest).await?; + let result = self.check_digest(*layer.manifest()).await?; let res = CheckLayerResult { layer, result, @@ -351,15 +348,14 @@ where pub async fn check_manifest(&self, manifest: graph::Manifest) -> Result { let futures: FuturesUnordered<_> = manifest .iter_entries() - .filter(|e| e.kind.is_blob()) - .cloned() + .filter(|e| e.kind().is_blob()) // run through check_digest to ensure that blobs can be loaded // from the db and allow for possible repairs - .map(|e| self.check_digest_with_perms_opt(e.object, Some(e.mode))) + .map(|e| self.check_digest_with_perms_opt(*e.object(), Some(e.mode()))) .collect(); let results = futures.try_collect().await?; let res = CheckManifestResult { - manifest, + manifest: manifest.to_owned(), results, repaired: false, }; @@ -374,11 +370,11 @@ where /// This function may sync a payload without /// syncing the blob, which is unsafe unless the blob /// is known to exist in the repository being checked - pub async unsafe fn check_blob(&self, blob: graph::Blob) -> Result { + pub async unsafe fn check_blob(&self, blob: &graph::Blob) -> Result { let digest = blob.digest(); if let Some(CheckProgress::CheckStarted) = self .processed_digests - .insert(digest, CheckProgress::CheckStarted) + .insert(*digest, CheckProgress::CheckStarted) { return Ok(CheckBlobResult::Duplicate); } @@ -401,19 +397,19 @@ where /// is known to exist in the repository being checked async unsafe fn must_check_blob_with_perms_opt( &self, - blob: graph::Blob, + blob: &graph::Blob, perms: Option, ) -> Result { - self.reporter.visit_blob(&blob); + self.reporter.visit_blob(blob); let result = unsafe { // Safety: this function may sync a payload and so // is unsafe to call unless we know the blob exists, // which is why this is an unsafe function - self.check_payload_with_perms_opt(blob.payload, perms) + self.check_payload_with_perms_opt(*blob.payload(), perms) .await? }; let res = CheckBlobResult::Checked { - blob, + blob: blob.to_owned(), result, repaired: result == CheckPayloadResult::Repaired, }; @@ -598,7 +594,7 @@ impl CheckReporter for ConsoleCheckReporter { fn visit_blob(&self, blob: &graph::Blob) { let bars = self.get_bars(); - bars.bytes.inc_length(blob.size); + bars.bytes.inc_length(blob.size()); } fn visit_payload(&self, _digest: encoding::Digest) { @@ -805,8 +801,6 @@ pub enum CheckObjectResult { Layer(Box), Blob(CheckBlobResult), Manifest(CheckManifestResult), - Tree(graph::Tree), - Mask, } impl CheckObjectResult { @@ -821,8 +815,6 @@ impl CheckObjectResult { CheckObjectResult::Layer(r) => r.set_repaired(), CheckObjectResult::Blob(r) => r.set_repaired(), CheckObjectResult::Manifest(r) => r.set_repaired(), - CheckObjectResult::Tree(_) => (), - CheckObjectResult::Mask => (), } } @@ -839,7 +831,6 @@ impl CheckObjectResult { Layer(res) => res.summary(), Blob(res) => res.summary(), Manifest(res) => res.summary(), - Mask | Tree(_) => CheckSummary::default(), } } } @@ -918,10 +909,7 @@ pub enum CheckEntryResult { /// The entry was not one that needed checking Skipped, /// The entry was checked - Checked { - entry: graph::Entry, - result: CheckBlobResult, - }, + Checked { result: CheckBlobResult }, } impl CheckEntryResult { @@ -970,7 +958,7 @@ impl CheckBlobResult { let mut summary = result.summary(); summary += CheckSummary { checked_objects: 1, - checked_payload_bytes: blob.size, + checked_payload_bytes: blob.size(), repaired_objects: *repaired as usize, ..Default::default() }; diff --git a/crates/spfs/src/check_test.rs b/crates/spfs/src/check_test.rs index f309da5ab2..0fcf2a6c2c 100644 --- a/crates/spfs/src/check_test.rs +++ b/crates/spfs/src/check_test.rs @@ -3,10 +3,12 @@ // https://github.com/imageworks/spk use rstest::rstest; -use spfs_encoding::Encodable; +use spfs_encoding::prelude::*; use super::{CheckSummary, Checker}; use crate::fixtures::*; +use crate::graph::Database; +use crate::storage::PayloadStorage; #[rstest] #[tokio::test] @@ -20,9 +22,9 @@ async fn test_check_missing_payload(#[future] tmprepo: TempRepo) { .find(|entry| entry.is_regular_file()) .expect("at least one regular file"); - tracing::info!(digest=?file.object, "remove payload"); + tracing::info!(digest=%file.object(), "remove payload"); tmprepo - .remove_payload(file.object) + .remove_payload(*file.object()) .await .expect("failed to remove payload"); @@ -49,7 +51,7 @@ async fn test_check_missing_payload(#[future] tmprepo: TempRepo) { "expected all payloads to be visited except missing one" ); assert!( - summary.missing_payloads.contains(&file.object), + summary.missing_payloads.contains(file.object()), "should find one missing payload" ); assert_eq!( @@ -71,9 +73,9 @@ async fn test_check_missing_object(#[future] tmprepo: TempRepo) { .find(|entry| entry.is_regular_file()) .expect("at least one regular file"); - tracing::info!(digest=?file.object, "remove object"); + tracing::info!(digest=%file.object(), "remove object"); tmprepo - .remove_object(file.object) + .remove_object(*file.object()) .await .expect("failed to remove object"); @@ -81,7 +83,7 @@ async fn test_check_missing_object(#[future] tmprepo: TempRepo) { .iter_entries() .filter(|e| e.is_regular_file()) .count(); - let total_objects = total_blobs + 1; //the manifest + let total_objects = total_blobs + 1; // the manifest let results = Checker::new(&tmprepo.repo()) .check_all_objects() @@ -101,7 +103,7 @@ async fn test_check_missing_object(#[future] tmprepo: TempRepo) { "one payload should not be seen because of missing object" ); assert!( - summary.missing_objects.contains(&file.object), + summary.missing_objects.contains(file.object()), "should find one missing object" ); assert!( @@ -129,9 +131,9 @@ async fn test_check_missing_payload_recover(#[future] tmprepo: TempRepo) { .find(|entry| entry.is_regular_file()) .expect("at least one regular file"); - tracing::info!(digest=?file.object, "remove payload"); + tracing::info!(digest=%file.object(), "remove payload"); tmprepo - .remove_payload(file.object) + .remove_payload(*file.object()) .await .expect("failed to remove payload"); @@ -190,9 +192,9 @@ async fn test_check_missing_object_recover(#[future] tmprepo: TempRepo) { .find(|entry| entry.is_regular_file()) .expect("at least one regular file"); - tracing::info!(digest=?file.object, "remove object"); + tracing::info!(digest=%file.object(), "remove object"); tmprepo - .remove_object(file.object) + .remove_object(*file.object()) .await .expect("failed to remove object"); diff --git a/crates/spfs/src/clean.rs b/crates/spfs/src/clean.rs index 7cb17f1d58..6ab1d086bf 100644 --- a/crates/spfs/src/clean.rs +++ b/crates/spfs/src/clean.rs @@ -428,9 +428,9 @@ where }; self.reporter.visit_object(&obj); result.visited_objects += 1; - if let graph::Object::Blob(b) = &obj { + if let graph::object::Enum::Blob(b) = obj.to_enum() { result.visited_payloads += 1; - self.reporter.visit_payload(b); + self.reporter.visit_payload(&b); } let mut walk_stream = futures::stream::iter(obj.child_objects()) .then(|child| ready(self.discover_attached_objects(child).boxed())) @@ -491,8 +491,8 @@ where }) // also try to remove the corresponding payload // each removed blob - .try_filter_map(|obj| match obj { - graph::Object::Blob(blob) => ready(Ok(Some(blob))), + .try_filter_map(|obj| match obj.into_enum() { + graph::object::Enum::Blob(blob) => ready(Ok(Some(blob))), _ => ready(Ok(None)), }) .and_then(|blob| { @@ -503,7 +503,7 @@ where result.visited_payloads += 1; let future = self .repo - .remove_payload(blob.payload) + .remove_payload(*blob.payload()) .map(|res| { if let Err(Error::UnknownObject(_)) = res { return Ok(()); @@ -517,7 +517,7 @@ where .boxed(); let mut result = CleanResult::default(); while let Some(blob) = stream.try_next().await? { - result.removed_payloads.insert(blob.payload); + result.removed_payloads.insert(*blob.payload()); self.reporter.payload_removed(&blob) } drop(stream); @@ -532,7 +532,7 @@ where // TODO: this should be able to get the size of the payload, but // currently there is no way to do this unless you start with // the blob - let blob = graph::Blob { payload, size: 0 }; + let blob = graph::Blob::new(payload, 0); ready(Ok(Some(blob))) }) .and_then(|blob| { @@ -543,7 +543,7 @@ where } let future = self .repo - .remove_payload(blob.payload) + .remove_payload(*blob.payload()) .map(|res| { if let Err(Error::UnknownObject(_)) = res { return Ok(()); @@ -556,7 +556,7 @@ where .try_buffer_unordered(self.removal_concurrency) .boxed(); while let Some(blob) = stream.try_next().await? { - result.removed_payloads.insert(blob.payload); + result.removed_payloads.insert(*blob.payload()); self.reporter.payload_removed(&blob) } drop(stream); @@ -861,19 +861,19 @@ pub struct TracingCleanReporter; impl CleanReporter for TracingCleanReporter { fn visit_tag(&self, tag: &tracking::Tag) { - tracing::info!(?tag, "visit tag"); + tracing::info!(%tag, "visit tag"); } fn tag_removed(&self, tag: &tracking::Tag) { - tracing::info!(?tag, "tag removed"); + tracing::info!(%tag, "tag removed"); } fn visit_object(&self, object: &graph::Object) { - tracing::info!(?object, "visit object"); + tracing::info!(%object, "visit object"); } fn object_removed(&self, object: &graph::Object) { - tracing::info!(?object, "object removed"); + tracing::info!(%object, "object removed"); } fn visit_payload(&self, payload: &graph::Blob) { @@ -885,19 +885,19 @@ impl CleanReporter for TracingCleanReporter { } fn visit_proxy(&self, proxy: &encoding::Digest) { - tracing::info!(?proxy, "visit proxy"); + tracing::info!(%proxy, "visit proxy"); } fn proxy_removed(&self, proxy: &encoding::Digest) { - tracing::info!(?proxy, "proxy removed"); + tracing::info!(%proxy, "proxy removed"); } fn visit_render(&self, render: &encoding::Digest) { - tracing::info!(?render, "visit render"); + tracing::info!(%render, "visit render"); } fn render_removed(&self, render: &encoding::Digest) { - tracing::info!(?render, "render removed"); + tracing::info!(%render, "render removed"); } fn error_encountered(&self, err: &Error) { diff --git a/crates/spfs/src/clean_test.rs b/crates/spfs/src/clean_test.rs index 8ed5dce74a..473843fcf6 100644 --- a/crates/spfs/src/clean_test.rs +++ b/crates/spfs/src/clean_test.rs @@ -11,9 +11,9 @@ use storage::prelude::*; use tokio::time::sleep; use super::{Cleaner, TracingCleanReporter}; -use crate::encoding::Encodable; +use crate::encoding::prelude::*; use crate::fixtures::*; -use crate::{graph, storage, tracking, Error}; +use crate::{storage, tracking, Error}; #[rstest] #[tokio::test] @@ -54,7 +54,7 @@ async fn test_get_attached_unattached_objects_blob( .await .unwrap(); let layer = tmprepo - .create_layer(&graph::Manifest::from(&manifest)) + .create_layer(&manifest.to_graph_manifest()) .await .unwrap(); let tag = tracking::TagSpec::parse("my_tag").unwrap(); @@ -108,7 +108,7 @@ async fn test_clean_untagged_objects(#[future] tmprepo: TempRepo, tmpdir: tempfi .await .unwrap(); let layer = tmprepo - .create_layer(&graph::Manifest::from(&manifest2)) + .create_layer(&manifest2.to_graph_manifest()) .await .unwrap(); let tag = tracking::TagSpec::parse("tagged_manifest").unwrap(); @@ -192,7 +192,7 @@ async fn test_clean_untagged_objects_layers_platforms(#[future] tmprepo: TempRep let tmprepo = tmprepo.await; let manifest = tracking::Manifest::<()>::default(); let layer = tmprepo - .create_layer(&graph::Manifest::from(&manifest)) + .create_layer(&manifest.to_graph_manifest()) .await .unwrap(); let platform = tmprepo @@ -240,7 +240,7 @@ async fn test_clean_manifest_renders(tmpdir: tempfile::TempDir) { .await .unwrap(); let layer = tmprepo - .create_layer(&graph::Manifest::from(&manifest)) + .create_layer(&manifest.to_graph_manifest()) .await .unwrap(); let _platform = tmprepo @@ -255,7 +255,7 @@ async fn test_clean_manifest_renders(tmpdir: tempfile::TempDir) { let fs_repo = fs_repo.opened().await.unwrap(); storage::fs::Renderer::new(&*fs_repo) - .render_manifest(&graph::Manifest::from(&manifest), None) + .render_manifest(&manifest.to_graph_manifest(), None) .await .unwrap(); diff --git a/crates/spfs/src/commit.rs b/crates/spfs/src/commit.rs index 89187feef9..0860a198e5 100644 --- a/crates/spfs/src/commit.rs +++ b/crates/spfs/src/commit.rs @@ -10,7 +10,7 @@ use std::sync::Arc; use futures::{FutureExt, StreamExt, TryStreamExt}; use once_cell::sync::OnceCell; use progress_bar_derive_macro::ProgressBar; -use spfs_encoding::Encodable; +use spfs_encoding::prelude::*; use super::status::remount_runtime; use crate::prelude::*; @@ -35,7 +35,7 @@ pub struct InMemoryBlobHasher; #[tonic::async_trait] impl BlobHasher for InMemoryBlobHasher { async fn hash_blob(&self, reader: Pin>) -> Result { - Ok(encoding::Digest::from_async_reader(reader).await?) + Ok(encoding::Hasher::hash_async_reader(reader).await?) } } @@ -72,6 +72,7 @@ pub struct Committer< reporter: Arc, builder: ManifestBuilder>, max_concurrent_blobs: usize, + allow_empty: bool, } impl<'repo> Committer<'repo, InMemoryBlobHasher, (), SilentCommitReporter> { @@ -86,6 +87,7 @@ impl<'repo> Committer<'repo, InMemoryBlobHasher, (), SilentCommitReporter> { reporter, builder, max_concurrent_blobs: tracking::DEFAULT_MAX_CONCURRENT_BLOBS, + allow_empty: false, } } } @@ -96,6 +98,14 @@ where F: PathFilter + Send + Sync, R: CommitReporter, { + /// Set if an empty commit is allowed. + /// + /// Defaults to false. + pub fn with_allow_empty(mut self, allow_empty: bool) -> Self { + self.allow_empty = allow_empty; + self + } + /// Set how many blobs should be processed at once. /// /// Defaults to [`tracking::DEFAULT_MAX_CONCURRENT_BLOBS`]. @@ -134,6 +144,7 @@ where builder: self.builder.with_blob_hasher(hasher), reporter: self.reporter, max_concurrent_blobs: self.max_concurrent_blobs, + allow_empty: self.allow_empty, } } @@ -148,6 +159,7 @@ where builder: self.builder.with_reporter(Arc::clone(&reporter)), reporter, max_concurrent_blobs: self.max_concurrent_blobs, + allow_empty: self.allow_empty, } } @@ -167,6 +179,7 @@ where builder: self.builder.with_path_filter(filter), reporter: self.reporter, max_concurrent_blobs: self.max_concurrent_blobs, + allow_empty: self.allow_empty, } } @@ -184,19 +197,23 @@ where manifest: tracking::Manifest, runtime: &mut runtime::Runtime, ) -> Result { - if manifest.is_empty() { + if manifest.is_empty() && !self.allow_empty { return Err(Error::NothingToCommit); } let layer = self .repo - .create_layer(&graph::Manifest::from(&manifest)) + .create_layer(&manifest.to_graph_manifest()) .await?; - if !runtime.push_digest(layer.digest()?) { - return Err(Error::NothingToCommit); + if !manifest.is_empty() { + // Don't bother putting the empty layer on the stack, the goal + // with allow_empty is to create an empty manifest. + if !runtime.push_digest(layer.digest()?) { + return Err(Error::NothingToCommit); + } + runtime.status.editable = false; + runtime.save_state_to_storage().await?; + remount_runtime(runtime).await?; } - runtime.status.editable = false; - runtime.save_state_to_storage().await?; - remount_runtime(runtime).await?; Ok(layer) } @@ -208,7 +225,7 @@ where } runtime.reload_state_from_storage().await?; - if runtime.status.stack.is_empty() { + if runtime.status.stack.is_empty() && !self.allow_empty { Err(Error::NothingToCommit) } else { self.repo @@ -309,10 +326,8 @@ where } drop(stream); - let storable = graph::Manifest::from(&manifest); - self.repo - .write_object(&graph::Object::Manifest(storable)) - .await?; + let storable = manifest.to_graph_manifest(); + self.repo.write_object(&storable).await?; Ok(manifest) } diff --git a/crates/spfs/src/config.rs b/crates/spfs/src/config.rs index 551c1c582e..7947e14a5f 100644 --- a/crates/spfs/src/config.rs +++ b/crates/spfs/src/config.rs @@ -14,7 +14,7 @@ use storage::{FromConfig, FromUrl}; use tokio_stream::StreamExt; use crate::storage::{TagNamespaceBuf, TagStorageMut}; -use crate::{runtime, storage, tracking, Error, Result}; +use crate::{graph, runtime, storage, tracking, Error, Result}; #[cfg(test)] #[path = "./config_test.rs"] @@ -83,6 +83,16 @@ pub struct Storage { /// payloads readable by "other". pub allow_payload_sharing_between_users: bool, pub tag_namespace: Option, + /// The strategy to use when generating new objects. + /// + /// All available strategies are still supported for reading. + #[serde(default)] + pub digest_strategy: graph::object::DigestStrategy, + /// The format to use when generating new objects. + /// + /// All available formats are still supported for reading. + #[serde(default)] + pub encoding_format: graph::object::EncodingFormat, } impl Storage { @@ -103,6 +113,8 @@ impl Default for Storage { .unwrap_or_else(|| PathBuf::from(FALLBACK_STORAGE_ROOT)), allow_payload_sharing_between_users: false, tag_namespace: None, + digest_strategy: graph::object::DigestStrategy::default(), + encoding_format: graph::object::EncodingFormat::default(), } } } diff --git a/crates/spfs/src/config_test.rs b/crates/spfs/src/config_test.rs index 5600306583..a3c5d80024 100644 --- a/crates/spfs/src/config_test.rs +++ b/crates/spfs/src/config_test.rs @@ -5,6 +5,7 @@ use rstest::rstest; use super::{Config, RemoteConfig}; +use crate::storage::prelude::*; use crate::storage::RepositoryHandle; use crate::{get_config, load_config}; diff --git a/crates/spfs/src/env.rs b/crates/spfs/src/env.rs index 91f9cd50b0..1c2a1a9408 100644 --- a/crates/spfs/src/env.rs +++ b/crates/spfs/src/env.rs @@ -540,7 +540,7 @@ where where P: AsRef, { - use spfs_encoding::Encodable; + use spfs_encoding::prelude::*; let path = path.as_ref().to_owned(); let platform = rt.to_platform().digest()?.to_string(); diff --git a/crates/spfs/src/error.rs b/crates/spfs/src/error.rs index 6fc6b05e98..87addc446e 100644 --- a/crates/spfs/src/error.rs +++ b/crates/spfs/src/error.rs @@ -8,7 +8,7 @@ use std::str::Utf8Error; use miette::Diagnostic; use thiserror::Error; -use crate::{encoding, storage}; +use crate::{encoding, graph, storage}; #[derive(Diagnostic, Debug, Error)] #[diagnostic( @@ -42,6 +42,9 @@ pub enum Error { #[error(transparent)] #[diagnostic(forward(0))] Encoding(#[from] super::encoding::Error), + #[error(transparent)] + #[diagnostic(forward(0))] + GraphObject(#[from] super::graph::error::ObjectError), #[error("Invalid repository url: {0:?}")] InvalidRemoteUrl(#[from] url::ParseError), @@ -80,8 +83,11 @@ pub enum Error { InvalidReference(String), #[error("Repository does not support manifest rendering: {0:?}")] NoRenderStorage(url::Url), - #[error("Object is not a blob: {1}")] - ObjectNotABlob(crate::graph::Object, encoding::Digest), + #[error("Object is not a {desired:?}: {digest}")] + NotCorrectKind { + desired: graph::ObjectKind, + digest: encoding::Digest, + }, #[error("Cannot write to a repository which has been pinned in time")] RepositoryIsPinned, @@ -255,37 +261,24 @@ impl From for Error { Self::String(err) } } + impl From<&str> for Error { fn from(err: &str) -> Self { Self::String(err.to_string()) } } + impl From for Error { fn from(err: std::path::StripPrefixError) -> Self { Error::String(err.to_string()) } } -// impl IntoError for storage::OpenRepositoryError { -// type Context = url::Url; -// type Error = Error; - -// fn into_error(self, context: Self::Context) -> Self::Error { -// Error::FailedToOpenRepository { -// repository: context.into(), -// source: self, -// } -// } -// } - -// /// A type that can be converted into an error if -// /// some additional context is provided -// pub trait IntoError { -// type Context; -// type Error; - -// fn into_error(self, context: Self::Context) -> Self::Error; -// } +impl From for Error { + fn from(err: spfs_proto::digest::Error) -> Self { + Error::Encoding(err.into()) + } +} /// An OS error represents an error that may have an associated /// error code from the operating system diff --git a/crates/spfs/src/find_path.rs b/crates/spfs/src/find_path.rs index dd0253a7ac..00316f5b82 100644 --- a/crates/spfs/src/find_path.rs +++ b/crates/spfs/src/find_path.rs @@ -4,9 +4,10 @@ use async_recursion::async_recursion; use relative_path::RelativePath; -use spfs_encoding::{Digest, Encodable}; +use spfs_encoding::prelude::*; +use spfs_encoding::Digest; -use crate::graph::{self, Object}; +use crate::graph::{self, DatabaseView, Object}; use crate::{env, status, storage, tracking, Error, Result}; /// Used for items in a list of spfs objects that contain a filepath. @@ -27,14 +28,7 @@ pub enum ObjectPathEntry { impl ObjectPathEntry { pub fn digest(&self) -> Result { match self { - ObjectPathEntry::Parent(obj) => match obj { - Object::Platform(obj) => obj.digest(), - Object::Layer(obj) => obj.digest(), - Object::Manifest(obj) => obj.digest(), - Object::Blob(obj) => Ok(obj.digest()), - Object::Tree(obj) => obj.digest(), - Object::Mask => Err(Error::String("spfs Mask object has no digest".to_string())), - }, + ObjectPathEntry::Parent(obj) => Ok(obj.digest()?), ObjectPathEntry::FilePath(entry) => Ok(entry.object), } } @@ -80,38 +74,38 @@ async fn find_path_in_spfs_item( ) -> Result> { let mut paths: Vec = Vec::new(); - match obj { - Object::Platform(obj) => { - for reference in obj.stack.iter_bottom_up() { - let item = repo.read_object(reference).await?; + match obj.to_enum() { + graph::object::Enum::Platform(obj) => { + for reference in obj.iter_bottom_up() { + let item = repo.read_object(*reference).await?; let paths_to_file = find_path_in_spfs_item(filepath, &item, repo).await?; for path in paths_to_file { let mut new_path: ObjectPath = Vec::new(); - new_path.push(ObjectPathEntry::Parent(Object::Platform(obj.clone()))); + new_path.push(ObjectPathEntry::Parent(obj.to_object())); new_path.extend(path); paths.push(new_path); } } } - Object::Layer(obj) => { - let item = repo.read_object(obj.manifest).await?; + graph::object::Enum::Layer(obj) => { + let item = repo.read_object(*obj.manifest()).await?; let paths_to_file = find_path_in_spfs_item(filepath, &item, repo).await?; for path in paths_to_file { let mut new_path: ObjectPath = Vec::new(); - new_path.push(ObjectPathEntry::Parent(Object::Layer(obj.clone()))); + new_path.push(ObjectPathEntry::Parent(obj.to_object())); new_path.extend(path); paths.push(new_path); } } - Object::Manifest(obj) => { + graph::object::Enum::Manifest(obj) => { let path = RelativePath::new(filepath); for node in obj.to_tracking_manifest().walk_abs(env::SPFS_DIR) { if node.path == path { let new_path = vec![ - ObjectPathEntry::Parent(Object::Manifest(obj.clone())), + ObjectPathEntry::Parent(obj.into_object()), ObjectPathEntry::FilePath(node.entry.clone()), ]; paths.push(new_path); @@ -120,7 +114,7 @@ async fn find_path_in_spfs_item( } } - Object::Blob(_) | Object::Tree(_) | Object::Mask => { + graph::object::Enum::Blob(_) => { // These are not examined here when searching for the // filepath because the filepath will be found by walking // Manifest objects. diff --git a/crates/spfs/src/graph/blob.rs b/crates/spfs/src/graph/blob.rs index 031996b21c..265cdde2b3 100644 --- a/crates/spfs/src/graph/blob.rs +++ b/crates/spfs/src/graph/blob.rs @@ -2,47 +2,139 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk -use crate::{encoding, Error, Result}; +use super::object::HeaderBuilder; +use super::ObjectKind; +use crate::encoding::Digest; +use crate::{encoding, Result}; /// Blobs represent an arbitrary chunk of binary data, usually a file. -#[derive(Debug, Eq, PartialEq, Clone)] -pub struct Blob { - pub payload: encoding::Digest, - pub size: u64, +pub type Blob = super::FlatObject>; + +impl std::fmt::Debug for Blob { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Blob") + .field("payload", &self.payload().to_string()) + .field("size", &self.size()) + .finish() + } } impl Blob { - pub fn new(payload: encoding::Digest, size: u64) -> Self { - Self { payload, size } + /// Construct a new blob with default header values, + /// for more configuration use [`Self::builder`] + pub fn new(payload: Digest, size: u64) -> Self { + Self::builder() + .with_payload(payload) + .with_size(size) + .build() } - pub fn digest(&self) -> encoding::Digest { - self.payload + #[inline] + pub fn builder() -> BlobBuilder { + BlobBuilder::default() } - /// Return the child object of this one in the object DG. - pub fn child_objects(&self) -> Vec { - Vec::new() + #[inline] + pub fn digest(&self) -> &Digest { + self.proto().payload() } -} -impl encoding::Encodable for Blob { - type Error = Error; + #[inline] + pub fn payload(&self) -> &Digest { + self.digest() + } - fn digest(&self) -> Result { - Ok(self.digest()) + #[inline] + pub fn size(&self) -> u64 { + self.proto().size_() } - fn encode(&self, mut writer: &mut impl std::io::Write) -> Result<()> { - encoding::write_digest(&mut writer, &self.payload)?; - encoding::write_uint(writer, self.size)?; + + pub(super) fn legacy_encode(&self, mut writer: &mut impl std::io::Write) -> Result<()> { + encoding::write_digest(&mut writer, self.payload())?; + encoding::write_uint64(writer, self.size())?; Ok(()) } } -impl encoding::Decodable for Blob { - fn decode(mut reader: &mut impl std::io::Read) -> Result { - Ok(Blob { - payload: encoding::read_digest(&mut reader)?, - size: encoding::read_uint(reader)?, + +#[derive(Debug)] +pub struct BlobBuilder { + header: super::object::HeaderBuilder, + payload: encoding::Digest, + size: u64, +} + +impl Default for BlobBuilder { + fn default() -> Self { + Self { + header: super::object::HeaderBuilder::new(ObjectKind::Blob), + payload: Default::default(), + size: Default::default(), + } + } +} + +impl BlobBuilder { + pub fn with_header(mut self, mut header: F) -> Self + where + F: FnMut(HeaderBuilder) -> HeaderBuilder, + { + self.header = header(self.header).with_object_kind(ObjectKind::Blob); + self + } + + pub fn with_payload(mut self, payload: Digest) -> Self { + self.payload = payload; + self + } + + pub fn with_size(mut self, size: u64) -> Self { + self.size = size; + self + } + + pub fn build(&self) -> Blob { + super::BUILDER.with_borrow_mut(|builder| { + let blob = spfs_proto::Blob::create( + builder, + &spfs_proto::BlobArgs { + payload: Some(&self.payload), + size_: self.size, + }, + ); + let any = spfs_proto::AnyObject::create( + builder, + &spfs_proto::AnyObjectArgs { + object_type: spfs_proto::Object::Blob, + object: Some(blob.as_union_value()), + }, + ); + builder.finish_minimal(any); + let offset = unsafe { + // Safety: we have just created this buffer + // so already know the root type with certainty + flatbuffers::root_unchecked::(builder.finished_data()) + .object_as_blob() + .unwrap() + ._tab + .loc() + }; + let obj = unsafe { + // Safety: the provided buf and offset mut contain + // a valid object and point to the contained blob + // which is what we've done + Blob::new_with_header(self.header.build(), builder.finished_data(), offset) + }; + builder.reset(); // to be used again + obj }) } + + /// Read a data encoded using the legacy format, and + /// use the data to fill and complete this builder + pub fn legacy_decode(self, mut reader: &mut impl std::io::Read) -> Result { + Ok(self + .with_payload(encoding::read_digest(&mut reader)?) + .with_size(encoding::read_uint64(reader)?) + .build()) + } } diff --git a/crates/spfs/src/graph/database.rs b/crates/spfs/src/graph/database.rs index e815fdb758..bf6b01bf62 100644 --- a/crates/spfs/src/graph/database.rs +++ b/crates/spfs/src/graph/database.rs @@ -9,7 +9,7 @@ use std::task::Poll; use chrono::{DateTime, Utc}; use futures::{Future, Stream, StreamExt, TryStreamExt}; -use super::Object; +use super::{FlatObject, Object, ObjectProto}; use crate::{encoding, Error, Result}; /// Walks an object tree depth-first starting at some root digest @@ -251,7 +251,7 @@ impl DatabaseView for &T { #[async_trait::async_trait] pub trait Database: DatabaseView { /// Write an object to the database, for later retrieval. - async fn write_object(&self, obj: &Object) -> Result<()>; + async fn write_object(&self, obj: &FlatObject) -> Result<()>; /// Remove an object from the database. async fn remove_object(&self, digest: encoding::Digest) -> Result<()>; @@ -269,7 +269,7 @@ pub trait Database: DatabaseView { #[async_trait::async_trait] impl Database for &T { - async fn write_object(&self, obj: &Object) -> Result<()> { + async fn write_object(&self, obj: &FlatObject) -> Result<()> { Database::write_object(&**self, obj).await } diff --git a/crates/spfs/src/graph/entry.rs b/crates/spfs/src/graph/entry.rs index beefbe7041..15e45a6251 100644 --- a/crates/spfs/src/graph/entry.rs +++ b/crates/spfs/src/graph/entry.rs @@ -4,89 +4,231 @@ use std::io::BufRead; -use crate::{encoding, tracking, Error, Result}; +use encoding::prelude::*; +use spfs_proto::EntryArgs; + +use crate::{encoding, tracking, Result}; #[cfg(test)] #[path = "./entry_test.rs"] mod entry_test; -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct Entry { - pub object: encoding::Digest, - pub kind: tracking::EntryKind, - pub mode: u32, - pub size: u64, - pub name: String, +/// Entry represents one item in the file system, such as +/// a file or directory. +#[derive(Copy, Clone)] +pub struct Entry<'buf>(pub(super) spfs_proto::Entry<'buf>); + +impl<'buf> std::fmt::Debug for Entry<'buf> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Entry") + .field("name", &self.name()) + .field("kind", &self.kind()) + .field("mode", &self.mode()) + .field("size", &self.size()) + .field("object", self.object()) + .finish() + } } -impl Entry { - pub fn from(name: String, entry: &tracking::Entry) -> Self { - Self { - object: entry.object, - kind: entry.kind, - mode: entry.mode, - size: entry.size, +impl<'buf> From> for Entry<'buf> { + fn from(value: spfs_proto::Entry<'buf>) -> Self { + Self(value) + } +} + +impl<'buf> Entry<'buf> { + /// Construct a valid entry from its component parts + #[allow(clippy::too_many_arguments)] + pub fn build<'fbb>( + builder: &mut flatbuffers::FlatBufferBuilder<'fbb>, + name: &str, + kind: tracking::EntryKind, + mode: u32, + size: u64, + object: &encoding::Digest, + ) -> flatbuffers::WIPOffset> { + let name = builder.create_string(name); + spfs_proto::Entry::create( + builder, + &EntryArgs { + name: Some(name), + kind: kind.into(), + mode, + size_: size, + object: Some(object), + }, + ) + } + + pub fn from<'fbb, T>( + builder: &mut flatbuffers::FlatBufferBuilder<'fbb>, + name: &str, + entry: &tracking::Entry, + ) -> flatbuffers::WIPOffset> { + Self::build( + builder, name, + entry.kind, + entry.mode, + entry.size, + &entry.object, + ) + } + + #[inline] + pub fn name(&self) -> &'buf str { + self.0.name() + } + + pub fn kind(&self) -> tracking::EntryKind { + match self.0.kind() { + spfs_proto::EntryKind::Blob => tracking::EntryKind::Blob, + spfs_proto::EntryKind::Tree => tracking::EntryKind::Tree, + spfs_proto::EntryKind::Mask => tracking::EntryKind::Mask, + _ => unreachable!("internally valid entry buffer"), } } + #[inline] + pub fn mode(&self) -> u32 { + self.0.mode() + } + + #[inline] + pub fn size(&self) -> u64 { + self.0.size_() + } + + #[inline] + pub fn object(&self) -> &'buf encoding::Digest { + self.0.object() + } + + #[inline] pub fn is_symlink(&self) -> bool { - unix_mode::is_symlink(self.mode) + unix_mode::is_symlink(self.mode()) } + #[inline] pub fn is_dir(&self) -> bool { - unix_mode::is_dir(self.mode) + unix_mode::is_dir(self.mode()) } + #[inline] pub fn is_regular_file(&self) -> bool { - unix_mode::is_file(self.mode) + unix_mode::is_file(self.mode()) } } -impl std::fmt::Display for Entry { +impl<'buf> std::fmt::Display for Entry<'buf> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!( "{:06o} {:?} {} {}", - self.mode, self.kind, self.name, self.object + self.mode(), + self.kind(), + self.name(), + self.object() )) } } -impl PartialOrd for Entry { - fn partial_cmp(&self, other: &Self) -> Option { +impl<'buf1, 'buf2> PartialEq> for Entry<'buf1> { + fn eq(&self, other: &Entry<'buf2>) -> bool { + self.0 == other.0 + } +} + +impl<'buf1> Eq for Entry<'buf1> {} + +impl<'buf1, 'buf2> PartialOrd> for Entry<'buf1> { + fn partial_cmp(&self, other: &Entry<'buf2>) -> Option { Some(self.cmp(other)) } } -impl Ord for Entry { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - if self.kind == other.kind { - self.name.cmp(&other.name) + +impl<'buf> Ord for Entry<'buf> { + fn cmp(&self, other: &Entry<'buf>) -> std::cmp::Ordering { + if self.kind() == other.kind() { + self.name().cmp(other.name()) } else { - self.kind.cmp(&other.kind) + self.kind().cmp(&other.kind()) } } } -impl encoding::Encodable for Entry { - type Error = Error; +impl<'buf> encoding::Digestible for Entry<'buf> { + type Error = crate::Error; + + fn digest(&self) -> std::result::Result { + let mut hasher = encoding::Hasher::new_sync(); + self.legacy_encode(&mut hasher)?; + Ok(hasher.digest()) + } +} - fn encode(&self, mut writer: &mut impl std::io::Write) -> Result<()> { - encoding::write_digest(&mut writer, &self.object)?; - self.kind.encode(&mut writer)?; - encoding::write_uint(&mut writer, self.mode as u64)?; - encoding::write_uint(&mut writer, self.size)?; - encoding::write_string(writer, self.name.as_str())?; +impl<'buf> Entry<'buf> { + pub(super) fn legacy_encode(&self, mut writer: &mut impl std::io::Write) -> Result<()> { + encoding::write_digest(&mut writer, self.object())?; + self.kind().encode(&mut writer)?; + encoding::write_uint64(&mut writer, self.mode() as u64)?; + encoding::write_uint64(&mut writer, self.size())?; + encoding::write_string(writer, self.name())?; Ok(()) } + + pub(super) fn legacy_decode<'builder>( + builder: &mut flatbuffers::FlatBufferBuilder<'builder>, + mut reader: &mut impl BufRead, + ) -> Result>> { + // fields in the same order as above + let object = encoding::read_digest(&mut reader)?; + let kind = tracking::EntryKind::decode(&mut reader)?; + let mode = encoding::read_uint64(&mut reader)? as u32; + let size = encoding::read_uint64(&mut reader)?; + let name = encoding::read_string(reader)?; + Ok(Self::build(builder, &name, kind, mode, size, &object)) + } } -impl encoding::Decodable for Entry { - fn decode(mut reader: &mut impl BufRead) -> Result { - Ok(Entry { - object: encoding::read_digest(&mut reader)?, - kind: tracking::EntryKind::decode(&mut reader)?, - mode: encoding::read_uint(&mut reader)? as u32, - size: encoding::read_uint(&mut reader)?, - name: encoding::read_string(reader)?, + +/// A wrapper type that holds an owned buffer to an [`Entry`]. +/// +/// Entries are usually only constructed as part of a larger +/// type, such as a [`super::Manifest`], but for testing it helps +/// to be able to create one on its own. +#[cfg(test)] +pub struct EntryBuf(Box<[u8]>); + +#[cfg(test)] +impl EntryBuf { + pub fn build( + name: &str, + kind: tracking::EntryKind, + mode: u32, + size: u64, + object: &encoding::Digest, + ) -> Self { + crate::graph::BUILDER.with_borrow_mut(|builder| { + let name = builder.create_string(name); + let e = spfs_proto::Entry::create( + builder, + &EntryArgs { + kind: kind.into(), + object: Some(object), + mode, + size_: size, + name: Some(name), + }, + ); + builder.finish_minimal(e); + let bytes = builder.finished_data().into(); + builder.reset(); + Self(bytes) }) } + + pub fn as_entry(&self) -> Entry<'_> { + let e = + flatbuffers::root::>(&self.0[..]).expect("valid internal buffer"); + Entry(e) + } } diff --git a/crates/spfs/src/graph/entry_test.rs b/crates/spfs/src/graph/entry_test.rs index e8326b7214..979a536d74 100644 --- a/crates/spfs/src/graph/entry_test.rs +++ b/crates/spfs/src/graph/entry_test.rs @@ -2,35 +2,40 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk +use encoding::Digestible; use rstest::rstest; -use super::Entry; -use crate::encoding::{self, Encodable}; +use super::EntryBuf; +use crate::encoding::{self}; use crate::fixtures::*; use crate::tracking::EntryKind; #[rstest(entry, digest, - case(Entry{ - name: "testcase".into(), - mode: 0o40755, - size: 36, - kind: EntryKind::Tree, - object: "K53HFSBQEYR4SVFIDUGT63IE233MAMVKBQFXLA7M6HCR2AEMKIJQ====".parse().unwrap(), - }, - "VTTVI5AZULVVVIWRQMWKJ67TUAGWIECAS2GVTA7Q2QINS4XK4HQQ====".parse().unwrap()), - case(Entry{ - name: "swig_full_names.xsl".into(), - mode: 0o100644, - size: 3293, - kind: EntryKind::Blob, - object: "ZD25L3AN5E3LTZ6MDQOIZUV6KRV5Y4SSXRE4YMYZJJ3PXCQ3FMQA====".parse().unwrap(), - }, - "GP7DYE22DYLH3I5MB33PW5Z3AZXZIBGOND7MX65KECBMHVMXBUHQ====".parse().unwrap()), + case( + EntryBuf::build( + "testcase", + EntryKind::Tree, + 0o40755, + 36, + &"K53HFSBQEYR4SVFIDUGT63IE233MAMVKBQFXLA7M6HCR2AEMKIJQ====".parse().unwrap(), + ), + "VTTVI5AZULVVVIWRQMWKJ67TUAGWIECAS2GVTA7Q2QINS4XK4HQQ====".parse().unwrap(), + ), + case( + EntryBuf::build( + "swig_full_names.xsl", + EntryKind::Blob, + 0o100644, + 3293, + &"ZD25L3AN5E3LTZ6MDQOIZUV6KRV5Y4SSXRE4YMYZJJ3PXCQ3FMQA====".parse().unwrap(), + ), + "GP7DYE22DYLH3I5MB33PW5Z3AZXZIBGOND7MX65KECBMHVMXBUHQ====".parse().unwrap(), + ), )] -fn test_entry_encoding_compat(entry: Entry, digest: encoding::Digest) { +fn test_entry_encoding_compat(entry: EntryBuf, digest: encoding::Digest) { init_logging(); - let actual_digest = entry.digest().unwrap(); + let actual_digest = entry.as_entry().digest().unwrap(); assert_eq!( actual_digest, digest, "expected encoding to match existing result" diff --git a/crates/spfs/src/graph/error.rs b/crates/spfs/src/graph/error.rs new file mode 100644 index 0000000000..23b3ace3b3 --- /dev/null +++ b/crates/spfs/src/graph/error.rs @@ -0,0 +1,37 @@ +// Copyright (c) Sony Pictures Imageworks, et al. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/imageworks/spk + +use miette::Diagnostic; +use thiserror::Error; + +#[derive(Diagnostic, Debug, Error)] +#[diagnostic( + url( + "https://getspk.io/error_codes#{}", + self.code().unwrap_or_else(|| Box::new("spfs::generic")) + ) +)] +pub enum ObjectError { + #[error("Invalid object header, not enough data")] + HeaderTooShort, + + #[error("Invalid object header, prefix was incorrect")] + HeaderMissingPrefix, + + #[error("Invalid object data")] + InvalidFlatbuffer(#[from] flatbuffers::InvalidFlatbuffer), + + #[error("Unexpected or unknown object kind {0:?}")] + UnexpectedKind(u8), + + #[error("Unrecognized object encoding: {0}")] + #[diagnostic(help("Your version of spfs may be too old to read this data"))] + UnknownEncoding(u8), + + #[error("Unrecognized object digest strategy: {0}")] + #[diagnostic(help("Your version of spfs may be too old to read this data"))] + UnknownDigestStrategy(u8), +} + +pub type ObjectResult = std::result::Result; diff --git a/crates/spfs/src/graph/kind.rs b/crates/spfs/src/graph/kind.rs new file mode 100644 index 0000000000..437203152d --- /dev/null +++ b/crates/spfs/src/graph/kind.rs @@ -0,0 +1,101 @@ +// Copyright (c) Sony Pictures Imageworks, et al. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/imageworks/spk + +use strum::IntoEnumIterator; + +/// Identifies the kind of object this is for the purposes of encoding +#[derive(Debug, Clone, Copy, Eq, PartialEq, strum::EnumIter)] +pub enum ObjectKind { + Blob = 0, + Manifest = 1, + Layer = 2, + Platform = 3, + Tree = 4, + Mask = 5, +} + +impl ObjectKind { + #[inline] + pub fn from_u8(kind: u8) -> Option { + Self::iter().find(|v| *v as u8 == kind) + } + + pub fn from(kind: spfs_proto::Object) -> Option { + match kind { + x if x == spfs_proto::Object::Blob => Some(Self::Blob), + x if x == spfs_proto::Object::Manifest => Some(Self::Manifest), + x if x == spfs_proto::Object::Layer => Some(Self::Layer), + x if x == spfs_proto::Object::Platform => Some(Self::Platform), + _ => None, + } + } +} + +/// A trait for spfs object types that have an inherent [`ObjectKind`]. +pub trait Kind { + /// The kind of this object + fn kind() -> ObjectKind; +} + +/// An object instance with an associated [`ObjectKind`]. +pub trait HasKind { + /// Identifies the kind of object this is for the purposes of encoding + fn kind(&self) -> ObjectKind; +} + +impl<'buf> Kind for spfs_proto::Platform<'buf> { + #[inline] + fn kind() -> ObjectKind { + ObjectKind::Platform + } +} + +impl<'buf> HasKind for spfs_proto::Platform<'buf> { + #[inline] + fn kind(&self) -> ObjectKind { + ::kind() + } +} + +impl<'buf> Kind for spfs_proto::Layer<'buf> { + #[inline] + fn kind() -> ObjectKind { + ObjectKind::Layer + } +} + +impl<'buf> HasKind for spfs_proto::Layer<'buf> { + #[inline] + fn kind(&self) -> ObjectKind { + ::kind() + } +} + +impl<'buf> Kind for spfs_proto::Manifest<'buf> { + #[inline] + fn kind() -> ObjectKind { + ObjectKind::Manifest + } +} + +impl<'buf> HasKind for spfs_proto::Manifest<'buf> { + #[inline] + fn kind(&self) -> ObjectKind { + ::kind() + } +} + +impl<'buf> Kind for spfs_proto::Blob<'buf> { + #[inline] + fn kind() -> ObjectKind { + ObjectKind::Blob + } +} + +impl<'buf> HasKind for spfs_proto::Blob<'buf> { + #[inline] + fn kind(&self) -> ObjectKind { + ::kind() + } +} diff --git a/crates/spfs/src/graph/layer.rs b/crates/spfs/src/graph/layer.rs index 3b1a353c65..902bd5381f 100644 --- a/crates/spfs/src/graph/layer.rs +++ b/crates/spfs/src/graph/layer.rs @@ -2,6 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk +use spfs_proto::LayerArgs; + +use super::object::HeaderBuilder; +use super::ObjectKind; use crate::{encoding, Error, Result}; #[cfg(test)] @@ -13,34 +17,127 @@ mod layer_test; /// Layers are considered completely immutable, and are /// uniquely identifiable by the computed hash of all /// relevant file and metadata. -#[derive(Debug, Eq, PartialEq, Clone, Hash)] -pub struct Layer { - pub manifest: encoding::Digest, +pub type Layer = super::object::FlatObject>; + +impl std::fmt::Debug for Layer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Layer") + .field("manifest", &self.manifest().to_string()) + .finish() + } } impl Layer { + /// Build a layer with the default header that points + /// at the provided manifest digest, + /// for more configuration use [`Self::builder`] + #[inline] pub fn new(manifest: encoding::Digest) -> Self { - Layer { manifest } + Self::builder().with_manifest(manifest).build() + } + + #[inline] + pub fn builder() -> LayerBuilder { + LayerBuilder::default() + } + + #[inline] + pub fn manifest(&self) -> &encoding::Digest { + self.proto().manifest() } /// Return the child object of this one in the object DG. + #[inline] pub fn child_objects(&self) -> Vec { - vec![self.manifest] + vec![*self.manifest()] + } + + pub(super) fn legacy_encode(&self, writer: &mut impl std::io::Write) -> Result<()> { + encoding::write_digest(writer, self.manifest()).map_err(Error::Encoding) } } -impl encoding::Encodable for Layer { - type Error = Error; +impl std::hash::Hash for Layer { + fn hash(&self, state: &mut H) { + self.proto().manifest().hash(state) + } +} - fn encode(&self, writer: &mut impl std::io::Write) -> Result<()> { - encoding::write_digest(writer, &self.manifest).map_err(Error::Encoding) +impl std::cmp::PartialEq for Layer { + fn eq(&self, other: &Self) -> bool { + self.proto().manifest() == other.proto().manifest() } } -impl encoding::Decodable for Layer { - fn decode(reader: &mut impl std::io::Read) -> Result { - Ok(Layer { - manifest: encoding::read_digest(reader)?, +impl std::cmp::Eq for Layer {} + +pub struct LayerBuilder { + header: super::object::HeaderBuilder, + manifest: encoding::Digest, +} + +impl Default for LayerBuilder { + fn default() -> Self { + Self { + header: super::object::HeaderBuilder::new(ObjectKind::Layer), + manifest: encoding::NULL_DIGEST.into(), + } + } +} + +impl LayerBuilder { + pub fn with_header(mut self, mut header: F) -> Self + where + F: FnMut(HeaderBuilder) -> HeaderBuilder, + { + self.header = header(self.header).with_object_kind(ObjectKind::Layer); + self + } + + pub fn with_manifest(mut self, manifest: encoding::Digest) -> Self { + self.manifest = manifest; + self + } + + pub fn build(&self) -> Layer { + super::BUILDER.with_borrow_mut(|builder| { + let layer = spfs_proto::Layer::create( + builder, + &LayerArgs { + manifest: Some(&self.manifest), + }, + ); + let any = spfs_proto::AnyObject::create( + builder, + &spfs_proto::AnyObjectArgs { + object_type: spfs_proto::Object::Layer, + object: Some(layer.as_union_value()), + }, + ); + builder.finish_minimal(any); + let offset = unsafe { + // Safety: we have just created this buffer + // so already know the root type with certainty + flatbuffers::root_unchecked::(builder.finished_data()) + .object_as_layer() + .unwrap() + ._tab + .loc() + }; + let obj = unsafe { + // Safety: the provided buf and offset mut contain + // a valid object and point to the contained layer + // which is what we've done + Layer::new_with_header(self.header.build(), builder.finished_data(), offset) + }; + builder.reset(); // to be used again + obj }) } + + /// Read a data encoded using the legacy format, and + /// use the data to fill and complete this builder + pub fn legacy_decode(self, reader: &mut impl std::io::Read) -> Result { + Ok(self.with_manifest(encoding::read_digest(reader)?).build()) + } } diff --git a/crates/spfs/src/graph/layer_test.rs b/crates/spfs/src/graph/layer_test.rs index 239f0344c6..1bbfdf5be4 100644 --- a/crates/spfs/src/graph/layer_test.rs +++ b/crates/spfs/src/graph/layer_test.rs @@ -7,12 +7,16 @@ use rstest::rstest; use super::Layer; use crate::encoding; use crate::encoding::prelude::*; +use crate::graph::Object; #[rstest] fn test_layer_encoding() { let expected = Layer::new(encoding::EMPTY_DIGEST.into()); let mut stream = Vec::new(); expected.encode(&mut stream).unwrap(); - let actual = Layer::decode(&mut stream.as_slice()).unwrap(); + let actual = Object::decode(&mut stream.as_slice()) + .unwrap() + .into_layer() + .unwrap(); assert_eq!(actual.digest().unwrap(), expected.digest().unwrap()) } diff --git a/crates/spfs/src/graph/manifest.rs b/crates/spfs/src/graph/manifest.rs index 68652e278f..8f26083554 100644 --- a/crates/spfs/src/graph/manifest.rs +++ b/crates/spfs/src/graph/manifest.rs @@ -2,208 +2,309 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeSet; use std::io::BufRead; -use encoding::Decodable; +use spfs_proto::ManifestArgs; -use super::{Entry, Tree}; -use crate::encoding::Encodable; -use crate::{encoding, tracking, Error, Result}; +use super::object::HeaderBuilder; +use super::{Entry, ObjectKind, Tree}; +use crate::prelude::*; +use crate::{encoding, tracking, Result}; #[cfg(test)] #[path = "./manifest_test.rs"] mod manifest_test; -#[derive(Debug, Eq, PartialEq, Clone, Default)] -pub struct Manifest { - root: Tree, - // because manifests are encoded - the ordering of trees are important - // to maintain in order to create consistent hashing - tree_order: Vec, - trees: BTreeMap, -} +/// A manifest holds the state of a filesystem tree. +pub type Manifest = super::object::FlatObject>; -impl From<&tracking::Manifest> for Manifest -where - T: std::cmp::Eq + std::cmp::PartialEq, -{ - fn from(source: &tracking::Manifest) -> Self { - Self::from(source.root()) +impl Default for Manifest { + fn default() -> Self { + Self::builder().build(&crate::tracking::Entry::<()>::empty_dir_with_open_perms()) } } -impl From<&tracking::Entry> for Manifest -where - T: std::cmp::Eq + std::cmp::PartialEq, -{ - fn from(source: &tracking::Entry) -> Self { - let mut manifest = Self::default(); - let mut root = Tree::default(); - - let mut entries: Vec<_> = source.iter_entries().collect(); - entries.sort_unstable(); - for node in entries { - let converted = match node.entry.kind { - tracking::EntryKind::Tree => { - let sub = Self::from(node.entry); - for tree in sub.iter_trees() { - manifest - .insert_tree(tree.clone()) - .expect("should not fail to insert tree entry"); - } - Entry { - object: sub.root.digest().unwrap(), - kind: node.entry.kind, - mode: node.entry.mode, - size: node.entry.size, - name: node.path.to_string(), - } - } - _ => Entry::from(node.path.to_string(), node.entry), - }; - root.entries.insert(converted); - } - manifest.root = root; - manifest +impl std::fmt::Debug for Manifest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Manifest") + .field("root", &self.root()) + .field("trees", &self.trees().collect::>()) + .finish() } } impl Manifest { - /// Create a new manifest with the given tree as the root. - /// - /// It's very possible to create an internally inconsistent manifest - /// this way, so ensure that any additional tree entries in the given - /// root tree are subsequently inserted into the created manifest - pub(crate) fn new(root: Tree) -> Self { - Self { - root, - ..Default::default() - } + #[inline] + pub fn builder() -> ManifestBuilder { + ManifestBuilder::default() } /// Return the root tree object of this manifest. - pub fn root(&self) -> &Tree { - &self.root + pub fn root(&self) -> Tree<'_> { + self.proto() + .trees() + .iter() + .next() + .map(Tree::from) + .expect("should always have at least one tree") + } + + /// Iterate all of the trees in this manifest (excluding the root). + pub fn trees(&self) -> impl Iterator> { + self.proto().trees().iter().skip(1).map(Tree::from) + } + + /// Iterate all of the trees in this manifest starting with the root. + pub fn iter_trees(&self) -> impl Iterator> { + std::iter::once(self.root()).chain(self.trees()) } /// Return the digests of objects that this manifest refers to. pub fn child_objects(&self) -> Vec { let mut children = BTreeSet::new(); for tree in self.iter_trees() { - for entry in tree.entries.iter() { - if let tracking::EntryKind::Blob = entry.kind { - children.insert(entry.object); + for entry in tree.entries() { + if entry.kind().is_blob() { + children.insert(*entry.object()); } } } children.into_iter().collect() } - /// Add a tree to be tracked in this manifest, returning - /// it if the same tree already exists. - pub(crate) fn insert_tree(&mut self, tree: Tree) -> Result> { - let digest = tree.digest()?; - if let Some(tree) = self.trees.insert(digest, tree) { - Ok(Some(tree)) - } else { - self.tree_order.push(digest); - Ok(None) - } - } - - pub fn get_tree<'a>(&'a self, digest: &encoding::Digest) -> Option<&'a Tree> { - match self.trees.get(digest) { - None => { - if digest == &self.root.digest().unwrap() { - Some(&self.root) - } else { - None - } - } - some => some, - } - } - - /// Iterate all of the trees in this manifest. - /// - /// Will panic if this manifest is internally inconsistent, though this - /// would point to a programming error or bug. - pub fn iter_trees(&self) -> impl Iterator { - std::iter::once(&self.root).chain(self.tree_order.iter().map(|digest| { - self.trees - .get(digest) - .expect("manifest is internally inconsistent (missing indexed tree)") - })) + pub fn get_tree(&self, digest: &encoding::Digest) -> Option> { + self.iter_trees() + .find(|t| t.digest().ok().as_ref() == Some(digest)) } /// Iterate all of the entries in this manifest. - pub fn iter_entries(&self) -> impl Iterator { - self.iter_trees().flat_map(|t| t.entries.iter()) + pub fn iter_entries(&self) -> impl Iterator> { + self.iter_trees().flat_map(Tree::into_entries) } /// Convert this manifest into a more workable form for editing. pub fn to_tracking_manifest(&self) -> tracking::Manifest { let mut root = tracking::Entry::empty_dir_with_open_perms(); - fn iter_tree(source: &Manifest, tree: &Tree, parent: &mut tracking::Entry) { - for entry in tree.entries.iter() { + fn iter_tree(source: &Manifest, tree: Tree<'_>, parent: &mut tracking::Entry) { + for entry in tree.entries() { let mut new_entry = tracking::Entry { - kind: entry.kind, - mode: entry.mode, - size: entry.size, + kind: entry.kind(), + mode: entry.mode(), + size: entry.size(), entries: Default::default(), - object: entry.object, + object: *entry.object(), user_data: (), }; - if let tracking::EntryKind::Tree = entry.kind { + if entry.kind().is_tree() { new_entry.object = encoding::NULL_DIGEST.into(); iter_tree( source, source - .get_tree(&entry.object) + .get_tree(entry.object()) .expect("manifest is internally inconsistent (missing child tree)"), &mut new_entry, ) } - parent.entries.insert(entry.name.clone(), new_entry); + parent.entries.insert(entry.name().to_owned(), new_entry); } } - iter_tree(self, &self.root, &mut root); - tracking::Manifest::new(root) + iter_tree(self, self.root(), &mut root); + let mut manifest = tracking::Manifest::new(root); + // ensure that the manifest will round-trip in the case of it + // being converted back into this type + manifest.set_header(self.header().to_owned()); + manifest } -} -impl Encodable for Manifest { - type Error = Error; - - fn encode(&self, mut writer: &mut impl std::io::Write) -> Result<()> { - self.root().encode(&mut writer)?; - encoding::write_uint(&mut writer, self.tree_order.len() as u64)?; - for digest in &self.tree_order { - match self.trees.get(digest) { - Some(tree) => tree.encode(writer)?, - None => { - return Err("manifest is internally inconsistent (missing indexed tree)".into()) - } - } + pub(super) fn legacy_encode(&self, mut writer: &mut impl std::io::Write) -> Result<()> { + self.root().legacy_encode(&mut writer)?; + // this method encodes the root tree first, and does not + // include it in the count of remaining trees since at least + // one root is always required + encoding::write_uint64(&mut writer, self.proto().trees().len() as u64 - 1)?; + // skip the root tree when saving the rest + for tree in self.iter_trees().skip(1) { + tree.legacy_encode(writer)?; } Ok(()) } } -impl Decodable for Manifest { - fn decode(mut reader: &mut impl BufRead) -> Result { - let mut manifest = Manifest { - root: Tree::decode(&mut reader)?, - ..Default::default() - }; - let num_trees = encoding::read_uint(&mut reader)?; - for _ in 0..num_trees { - let tree = Tree::decode(reader)?; - manifest.insert_tree(tree)?; +pub struct ManifestBuilder { + header: super::object::HeaderBuilder, +} + +impl Default for ManifestBuilder { + fn default() -> Self { + Self { + header: super::object::HeaderBuilder::new(ObjectKind::Manifest), } - Ok(manifest) + } +} + +impl ManifestBuilder { + pub fn with_header(mut self, mut header: F) -> Self + where + F: FnMut(HeaderBuilder) -> HeaderBuilder, + { + self.header = header(self.header).with_object_kind(ObjectKind::Manifest); + self + } + + /// Build a manifest that contains `source` as the root + /// entry. If `source` is not a tree, an empty manifest is + /// returned. + pub fn build(&self, source: &tracking::Entry) -> Manifest + where + T: std::cmp::Eq + std::cmp::PartialEq, + { + super::BUILDER.with_borrow_mut(|builder| { + let trees = Self::build_from_entry(builder, source); + let trees = builder.create_vector(&trees); + let manifest = + spfs_proto::Manifest::create(builder, &ManifestArgs { trees: Some(trees) }); + let any = spfs_proto::AnyObject::create( + builder, + &spfs_proto::AnyObjectArgs { + object_type: spfs_proto::Object::Manifest, + object: Some(manifest.as_union_value()), + }, + ); + builder.finish_minimal(any); + let offset = unsafe { + // Safety: we have just created this buffer + // so already know the root type with certainty + flatbuffers::root_unchecked::(builder.finished_data()) + .object_as_manifest() + .unwrap() + ._tab + .loc() + }; + let obj = unsafe { + // Safety: the provided buf and offset mut contain + // a valid object and point to the contained layer + // which is what we've done + Manifest::new_with_header(self.header.build(), builder.finished_data(), offset) + }; + builder.reset(); // to be used again + obj + }) + } + + /// Read a data encoded using the legacy format, and + /// use the data to fill and complete this builder + pub fn legacy_decode(self, mut reader: &mut impl BufRead) -> Result { + super::BUILDER.with_borrow_mut(|builder| { + // historically, the root tree was stored first an not included in the count + // since it is an error to not have at least one root tree + let root = Tree::legacy_decode(builder, &mut reader)?; + let num_trees = encoding::read_uint64(&mut reader)?; + let mut trees = Vec::with_capacity(num_trees as usize + 1); + trees.push(root); + for _ in 0..num_trees { + let tree = Tree::legacy_decode(builder, reader)?; + trees.push(tree); + } + let trees = builder.create_vector(&trees); + let manifest = + spfs_proto::Manifest::create(builder, &ManifestArgs { trees: Some(trees) }); + let any = spfs_proto::AnyObject::create( + builder, + &spfs_proto::AnyObjectArgs { + object_type: spfs_proto::Object::Manifest, + object: Some(manifest.as_union_value()), + }, + ); + builder.finish_minimal(any); + let offset = unsafe { + // Safety: we have just created this buffer + // so already know the root type with certainty + flatbuffers::root_unchecked::(builder.finished_data()) + .object_as_manifest() + .unwrap() + ._tab + .loc() + }; + let obj = unsafe { + // Safety: the provided buf and offset mut contain + // a valid object and point to the contained layer + // which is what we've done + Manifest::new_with_header(self.header.build(), builder.finished_data(), offset) + }; + builder.reset(); // to be used again + Ok(obj) + }) + } + + fn build_from_entry<'buf, T>( + builder: &mut flatbuffers::FlatBufferBuilder<'buf>, + source: &tracking::Entry, + ) -> Vec>> + where + T: std::cmp::Eq + std::cmp::PartialEq, + { + use flatbuffers::Follow; + + let mut entries: Vec<_> = source.iter_entries().collect(); + let mut roots = Vec::with_capacity(entries.len()); + let mut sub_manifests = Vec::new(); + entries.sort_unstable(); + + for node in entries { + let converted = match node.entry.kind { + tracking::EntryKind::Tree => { + let sub = Self::build_from_entry(builder, node.entry); + let first_offset = sub.first().expect("should always have a root entry"); + let wip_data = builder.unfinished_data(); + // WIPOffset is relative to the end of the buffer + let loc = wip_data.len() - first_offset.value() as usize; + let sub_root = unsafe { + // Safety: follow requires the offset to be valid + // and we trust the one that was just created + spfs_proto::Tree::follow(wip_data, loc) + }; + let sub_root_digest = Tree(sub_root) + .digest() + .expect("entry should have a valid digest"); + sub_manifests.push(sub); + Entry::build( + builder, + node.path.as_str(), + node.entry.kind, + node.entry.mode, + node.entry.size, + &sub_root_digest, + ) + } + _ => Entry::from(builder, node.path.as_str(), node.entry), + }; + roots.push(converted); + } + let root_entries = builder.create_vector(&roots); + let root = spfs_proto::Tree::create( + builder, + &spfs_proto::TreeArgs { + entries: Some(root_entries), + }, + ); + let mut seen_trees = std::collections::HashSet::new(); + std::iter::once(vec![root]) + .chain(sub_manifests) + .flatten() + .filter(|t| { + let wip_data = builder.unfinished_data(); + // WIPOffset is relative to the end of the buffer + let loc = wip_data.len() - t.value() as usize; + let t = unsafe { + // Safety: follow requires the offset to be valid + // and we trust the one that was just created + spfs_proto::Tree::follow(wip_data, loc) + }; + seen_trees.insert(Tree(t).digest().expect("tree should have a valid digest")) + }) + .collect() } } diff --git a/crates/spfs/src/graph/manifest_test.rs b/crates/spfs/src/graph/manifest_test.rs index fda457a77b..49f4f08595 100644 --- a/crates/spfs/src/graph/manifest_test.rs +++ b/crates/spfs/src/graph/manifest_test.rs @@ -4,84 +4,84 @@ use rstest::rstest; -use super::Entry; +use crate::graph::entry::EntryBuf; use crate::{encoding, tracking}; #[rstest] fn test_entry_blobs_compare_name() { - let a = Entry { - name: "a".to_string(), - kind: tracking::EntryKind::Blob, - mode: 0, - object: encoding::EMPTY_DIGEST.into(), - size: 0, - }; - let b = Entry { - name: "b".to_string(), - kind: tracking::EntryKind::Blob, - mode: 0, - object: encoding::EMPTY_DIGEST.into(), - size: 0, - }; - assert!(a < b); - assert!(b > a); + let a = EntryBuf::build( + "a", + tracking::EntryKind::Blob, + 0, + 0, + &encoding::EMPTY_DIGEST.into(), + ); + let b = EntryBuf::build( + "b", + tracking::EntryKind::Blob, + 0, + 0, + &encoding::EMPTY_DIGEST.into(), + ); + assert!(a.as_entry() < b.as_entry()); + assert!(b.as_entry() > a.as_entry()); } #[rstest] fn test_entry_trees_compare_name() { - let a = Entry { - name: "a".to_string(), - kind: tracking::EntryKind::Tree, - mode: 0, - object: encoding::EMPTY_DIGEST.into(), - size: 0, - }; - let b = Entry { - name: "b".to_string(), - kind: tracking::EntryKind::Tree, - mode: 0, - object: encoding::EMPTY_DIGEST.into(), - size: 0, - }; - assert!(a < b); - assert!(b > a); + let a = EntryBuf::build( + "a", + tracking::EntryKind::Tree, + 0, + 0, + &encoding::EMPTY_DIGEST.into(), + ); + let b = EntryBuf::build( + "b", + tracking::EntryKind::Tree, + 0, + 0, + &encoding::EMPTY_DIGEST.into(), + ); + assert!(a.as_entry() < b.as_entry()); + assert!(b.as_entry() > a.as_entry()); } #[rstest] fn test_entry_compare_kind() { - let blob = Entry { - name: "a".to_string(), - kind: tracking::EntryKind::Blob, - mode: 0, - object: encoding::EMPTY_DIGEST.into(), - size: 0, - }; - let tree = Entry { - name: "b".to_string(), - kind: tracking::EntryKind::Tree, - mode: 0, - object: encoding::EMPTY_DIGEST.into(), - size: 0, - }; - assert!(tree > blob); - assert!(blob < tree); + let blob = EntryBuf::build( + "a", + tracking::EntryKind::Blob, + 0, + 0, + &encoding::EMPTY_DIGEST.into(), + ); + let tree = EntryBuf::build( + "b", + tracking::EntryKind::Tree, + 0, + 0, + &encoding::EMPTY_DIGEST.into(), + ); + assert!(tree.as_entry() > blob.as_entry()); + assert!(blob.as_entry() < tree.as_entry()); } #[rstest] fn test_entry_compare() { - let root_file = Entry { - name: "file".to_string(), - kind: tracking::EntryKind::Blob, - mode: 0, - object: encoding::NULL_DIGEST.into(), - size: 0, - }; - let root_dir = Entry { - name: "xdir".to_string(), - kind: tracking::EntryKind::Tree, - mode: 0, - object: encoding::NULL_DIGEST.into(), - size: 0, - }; - assert!(root_dir > root_file); + let root_file = EntryBuf::build( + "file", + tracking::EntryKind::Blob, + 0, + 0, + &encoding::NULL_DIGEST.into(), + ); + let root_dir = EntryBuf::build( + "xdir", + tracking::EntryKind::Tree, + 0, + 0, + &encoding::NULL_DIGEST.into(), + ); + assert!(root_dir.as_entry() > root_file.as_entry()); } diff --git a/crates/spfs/src/graph/mod.rs b/crates/spfs/src/graph/mod.rs index eea5350f56..87de2962b8 100644 --- a/crates/spfs/src/graph/mod.rs +++ b/crates/spfs/src/graph/mod.rs @@ -7,13 +7,17 @@ mod blob; mod database; mod entry; +pub mod error; +mod kind; mod layer; mod manifest; -mod object; +pub mod object; mod platform; pub mod stack; mod tree; +use std::cell::RefCell; + pub use blob::Blob; pub use database::{ Database, @@ -23,9 +27,16 @@ pub use database::{ DigestSearchCriteria, }; pub use entry::Entry; +pub use kind::{HasKind, Kind, ObjectKind}; pub use layer::Layer; pub use manifest::Manifest; -pub use object::{Object, ObjectKind}; +pub use object::{FlatObject, Object, ObjectProto}; pub use platform::Platform; pub use stack::Stack; pub use tree::Tree; + +thread_local! { + /// A shared, thread-local builder to avoid extraneous allocations + /// when creating new instances of objects via [`flatbuffers`]. + static BUILDER: RefCell> = RefCell::new(flatbuffers::FlatBufferBuilder::with_capacity(256)); +} diff --git a/crates/spfs/src/graph/object.rs b/crates/spfs/src/graph/object.rs index d7d6fb46ed..8789135ca5 100644 --- a/crates/spfs/src/graph/object.rs +++ b/crates/spfs/src/graph/object.rs @@ -2,51 +2,143 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk -use std::io::BufRead; +use std::io::{BufRead, Write}; +use std::marker::PhantomData; -use strum::Display; +use bytes::BufMut; +use encoding::prelude::*; +use serde::{Deserialize, Serialize}; -use super::{Blob, Layer, Manifest, Platform, Tree}; +use super::error::{ObjectError, ObjectResult}; +use super::{Blob, DatabaseView, HasKind, Kind, Layer, Manifest, ObjectKind, Platform}; +use crate::encoding; use crate::storage::RepositoryHandle; -use crate::{encoding, Error}; -#[derive(Debug, Display, Eq, PartialEq, Clone)] -pub enum Object { - Platform(Platform), - Layer(Layer), - Manifest(Manifest), - Tree(Tree), - Blob(Blob), - Mask, +#[cfg(test)] +#[path = "./object_test.rs"] +mod object_test; + +/// An node in the spfs object graph +pub type Object = FlatObject>; + +impl From> for Object +where + T: Kind, +{ + fn from(value: FlatObject) -> Self { + let FlatObject { buf, offset, _t } = value; + Self { + buf, + offset, + _t: PhantomData, + } + } +} + +impl std::fmt::Display for Object { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("Object") + .field(&self.kind()) + .field(&self.digest().unwrap()) + .finish() + } +} + +impl std::fmt::Debug for Object { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.to_enum().fmt(f) + } } impl Object { - pub fn child_objects(&self) -> Vec { - match self { - Self::Platform(platform) => platform.child_objects(), - Self::Layer(layer) => layer.child_objects(), - Self::Manifest(manifest) => manifest.child_objects(), - Self::Tree(tree) => tree.entries.iter().map(|e| e.object).collect(), - Self::Blob(_blob) => Vec::new(), - Self::Mask => Vec::new(), + /// A salt use to prime the digest calculation so it is less likely to + /// collide with digests produced for user content (blob and payloads) + const DIGEST_SALT: &'static [u8] = b"spfs digest da8d8e62-9459-11ee-adab-00155dcb338b\0"; + + /// Create an object from the encoded bytes. + /// + /// In memory, objects always use the latest flatbuffer + /// format. The given bytes may be discarded or reconstructed + /// if a conversion is necessary, but the header is preserved + /// in order to ensure that the object does not change it's + /// digest unless explicitly marked to do so. + pub fn new>(buf: B) -> crate::Result { + let bytes = buf.into(); + let header = Header::new(&bytes)?; + let Some(kind) = header.object_kind() else { + return Err(ObjectError::UnexpectedKind(header.object_kind_number()).into()); + }; + let Some(format) = header.encoding_format() else { + return Err(ObjectError::UnknownEncoding(header.encoding_format_number()).into()); + }; + match format { + EncodingFormat::Legacy => { + let mut reader = std::io::BufReader::new(&bytes[Header::SIZE..]); + let object = match kind { + ObjectKind::Blob => Blob::builder() + .with_header(|h| h.copy_from(header)) + .legacy_decode(&mut reader)? + .into_object(), + ObjectKind::Manifest => Manifest::builder() + .with_header(|h| h.copy_from(header)) + .legacy_decode(&mut reader)? + .into_object(), + ObjectKind::Layer => Layer::builder() + .with_header(|h| h.copy_from(header)) + .legacy_decode(&mut reader)? + .into_object(), + ObjectKind::Platform => Platform::builder() + .with_header(|h| h.copy_from(header)) + .legacy_decode(&mut reader)? + .into_object(), + ObjectKind::Tree | ObjectKind::Mask => { + // although these kinds used to be supported, they were never actually encoded + // separately into files and so should not appear in this context + return Err(ObjectError::UnexpectedKind(kind as u8).into()); + } + }; + Ok(object) + } + EncodingFormat::FlatBuffers => { + // all we need to do with a flatbuffer is validate it, without + // any need to change or reallocate the buffer + flatbuffers::root::(&bytes[Header::SIZE..]) + .map_err(ObjectError::InvalidFlatbuffer)?; + Ok(Object { + buf: bytes, + offset: 0, + _t: PhantomData, + }) + } } } - /// Identifies the kind of object this is for the purposes of encoding - pub fn kind(&self) -> ObjectKind { - match self { - Self::Blob(_) => ObjectKind::Blob, - Self::Manifest(_) => ObjectKind::Manifest, - Self::Layer(_) => ObjectKind::Layer, - Self::Platform(_) => ObjectKind::Platform, - Self::Tree(_) => ObjectKind::Tree, - Self::Mask => ObjectKind::Mask, + /// Constructs a new [`Object`] instance from the provided flatbuffer. + /// + /// # Safety + /// `buf` must contain a valid flatbuffer with an [`spfs_proto::AnyObject`] + /// at its root of the provided kind. + pub unsafe fn new_with_default_header(buf: &[u8], kind: ObjectKind) -> Self { + unsafe { + // Safety: We are building a valid header and passing the other + // requirements up to the caller + Self::new_with_header(Header::builder(kind).build(), buf, 0) + } + } + + pub fn child_objects(&self) -> Vec { + match self.to_enum() { + Enum::Platform(platform) => platform.child_objects(), + Enum::Layer(layer) => layer.child_objects(), + Enum::Manifest(manifest) => manifest.child_objects(), + Enum::Blob(_blob) => Vec::new(), } } /// Return true if this Object kind also has a payload + #[inline] pub fn has_payload(&self) -> bool { - matches!(self, Self::Blob(_)) + self.kind() == ObjectKind::Blob } /// Calculates the total size of the object and all children, recursively @@ -57,29 +149,23 @@ impl Object { while !items_to_process.is_empty() { let mut next_iter_objects: Vec = Vec::new(); for object in items_to_process.iter() { - match object { - Object::Platform(object) => { - for digest in object.stack.iter_bottom_up() { - let item = repo.read_object(digest).await?; + match object.to_enum() { + Enum::Platform(object) => { + for digest in object.iter_bottom_up() { + let item = repo.read_object(*digest).await?; next_iter_objects.push(item); } } - Object::Layer(object) => { - let item = repo.read_object(object.manifest).await?; + Enum::Layer(object) => { + let item = repo.read_object(*object.manifest()).await?; next_iter_objects.push(item); } - Object::Manifest(object) => { + Enum::Manifest(object) => { for node in object.to_tracking_manifest().walk_abs("/spfs") { total_size += node.entry.size } } - Object::Tree(object) => { - for entry in object.entries.iter() { - total_size += entry.size - } - } - Object::Blob(object) => total_size += object.size, - Object::Mask => (), + Enum::Blob(object) => total_size += object.size(), } } items_to_process = std::mem::take(&mut next_iter_objects); @@ -88,99 +174,648 @@ impl Object { } } -impl From for Object { - fn from(platform: Platform) -> Self { - Self::Platform(platform) +impl HasKind for Object { + fn kind(&self) -> super::ObjectKind { + self.header() + .object_kind() + .expect("buffer already validated") } } -impl From for Object { - fn from(layer: Layer) -> Self { - Self::Layer(layer) + +impl encoding::Digestible for FlatObject { + type Error = crate::Error; + + fn digest(&self) -> crate::Result { + let header = self.header(); + let strategy = header.digest_strategy().ok_or_else(|| { + super::error::ObjectError::UnknownDigestStrategy(header.digest_strategy_number()) + })?; + let variant = self.to_enum(); + if let Enum::Blob(b) = variant { + // blobs share a digest with the payload that they represent. + // Much of the codebase leverages this fact to skip additional + // steps, just as we are doing here to avoid running the hasher + return Ok(*b.payload()); + }; + let mut hasher = encoding::Hasher::new_sync(); + match strategy { + DigestStrategy::Legacy => { + // the original digest strategy did + // not include the kind or any special salting + } + DigestStrategy::WithKindAndSalt => { + hasher + .write_all(Object::DIGEST_SALT) + .map_err(encoding::Error::FailedWrite)?; + hasher + .write_all(&[header.object_kind_number()]) + .map_err(encoding::Error::FailedWrite)?; + } + } + match variant { + Enum::Platform(obj) => obj.legacy_encode(&mut hasher)?, + Enum::Layer(obj) => obj.legacy_encode(&mut hasher)?, + Enum::Manifest(obj) => obj.legacy_encode(&mut hasher)?, + Enum::Blob(_obj) => unreachable!("handled above"), + } + Ok(hasher.digest()) } } -impl From for Object { - fn from(manifest: Manifest) -> Self { - Self::Manifest(manifest) + +impl encoding::Encodable for FlatObject { + type Error = crate::Error; + + fn encode(&self, mut writer: &mut impl std::io::Write) -> crate::Result<()> { + let format = self + .header() + .encoding_format() + .expect("an already validated header"); + match format { + EncodingFormat::Legacy => { + writer + .write_all(&self.buf[..Header::SIZE]) + .map_err(encoding::Error::FailedWrite)?; + match self.to_enum() { + Enum::Blob(obj) => obj.legacy_encode(&mut writer), + Enum::Manifest(obj) => obj.legacy_encode(&mut writer), + Enum::Layer(obj) => obj.legacy_encode(&mut writer), + Enum::Platform(obj) => obj.legacy_encode(&mut writer), + } + } + EncodingFormat::FlatBuffers => { + // the flatbuffer format is useful exactly because it does + // not require the data to be encoded or decoded from the wire + // format + writer + .write_all(&self.buf) + .map_err(encoding::Error::FailedWrite)?; + Ok(()) + } + } } } -impl From for Object { - fn from(tree: Tree) -> Self { - Self::Tree(tree) + +impl encoding::Decodable for Object { + fn decode(reader: &mut impl BufRead) -> crate::Result { + let mut bytes = bytes::BytesMut::new().writer(); + std::io::copy(reader, &mut bytes).map_err(encoding::Error::FailedRead)?; + Self::new(bytes.into_inner()) } } -impl From for Object { - fn from(blob: Blob) -> Self { - Self::Blob(blob) + +/// A cheaply clone-able, disambiguation of an [`Object`] +#[derive(Debug, Clone, strum::Display)] +pub enum Enum { + Platform(super::Platform), + Layer(super::Layer), + Manifest(super::Manifest), + Blob(super::Blob), +} + +impl HasKind for Enum { + fn kind(&self) -> super::ObjectKind { + match self { + Enum::Platform(_) => super::ObjectKind::Platform, + Enum::Layer(_) => super::ObjectKind::Layer, + Enum::Manifest(_) => super::ObjectKind::Manifest, + Enum::Blob(_) => super::ObjectKind::Blob, + } } } -/// Identifies the kind of object this is for the purposes of encoding -#[derive(Debug)] -pub enum ObjectKind { - Blob = 0, - Manifest = 1, - Layer = 2, - Platform = 3, - Tree = 4, - Mask = 5, +pub struct FlatObject { + /// The underlying flatbuffer is setup to always contain + /// an [`spfs_proto::AnyObject`] with the generic `T` used + /// to disambiguate the actual object kind stored within for + /// easier API usage and adding specific behaviors to each kind. + buf: bytes::Bytes, + /// For any specific object type (not `Object`) this + /// field stores the pre-validated offset to the underlying + /// flatbuffer table for the more specific type. + offset: usize, + // using an fn type allows this type to still be Send/Sync even + // if T is not, which is appropriate because it does not actually + // contain an instance of T + _t: PhantomData T>, } -impl ObjectKind { - pub fn from_u64(kind: u64) -> Option { - match kind { - 0 => Some(Self::Blob), - 1 => Some(Self::Manifest), - 2 => Some(Self::Layer), - 3 => Some(Self::Platform), - 4 => Some(Self::Tree), - 5 => Some(Self::Mask), - _ => None, +impl Clone for FlatObject { + fn clone(&self) -> Self { + Self { + buf: self.buf.clone(), + offset: self.offset, + _t: PhantomData, } } } -const OBJECT_HEADER: &[u8] = "--SPFS--".as_bytes(); +impl FlatObject { + /// Constructs a new [`FlatObject`] instance from the provided + /// flatbuffer and offset value. + /// + /// # Safety + /// `buf` must contain a valid flatbuffer with an [`spfs_proto::AnyObject`] + /// at its root. Additionally, offset must point to the start of a + /// valid instance of `T` within the flatbuffer. + pub unsafe fn new_with_default_header(buf: &[u8], offset: usize) -> Self { + unsafe { + // Safety: we are ensuring a good header and pass the other + // requirements up to our caller + Self::new_with_header(Header::builder(T::kind()).build(), buf, offset) + } + } +} -impl encoding::Encodable for Object { - type Error = Error; +impl FlatObject { + /// Constructs a new [`FlatObject`] instance from the provided + /// header, flatbuffer and offset value. + /// + /// # Safety + /// `buf` must contain a valid flatbuffer with an [`spfs_proto::AnyObject`] + /// at its root. Additionally, offset must point to the start of a + /// valid instance of `T` within the flatbuffer and the header must + /// be valid and contain the appropriate type of `T` + pub unsafe fn new_with_header(header: H, buf: &[u8], offset: usize) -> Self + where + H: AsRef
, + { + let mut bytes = bytes::BytesMut::with_capacity(buf.len() + Header::SIZE); + bytes.put(&header.as_ref()[..]); + bytes.put(buf); + Self { + buf: bytes.freeze(), + offset, + _t: PhantomData, + } + } - fn digest(&self) -> crate::Result { - match self { - Self::Platform(obj) => obj.digest(), - Self::Layer(obj) => obj.digest(), - Self::Manifest(obj) => obj.digest(), - Self::Tree(obj) => obj.digest(), - Self::Blob(obj) => Ok(obj.digest()), - Self::Mask => Ok(encoding::EMPTY_DIGEST.into()), + #[inline] + pub fn header(&self) -> &'_ Header { + #[cfg(debug_assertions)] + { + Header::new(&self.buf[..]).expect("header should be already validated") + } + #[cfg(not(debug_assertions))] + // Safety: the header is validated when the object is built + unsafe { + Header::new_unchecked(&self.buf[..]) } } - fn encode(&self, mut writer: &mut impl std::io::Write) -> crate::Result<()> { - encoding::write_header(&mut writer, OBJECT_HEADER)?; - encoding::write_uint(&mut writer, self.kind() as u64)?; - match self { - Self::Blob(obj) => obj.encode(&mut writer), - Self::Manifest(obj) => obj.encode(&mut writer), - Self::Layer(obj) => obj.encode(&mut writer), - Self::Platform(obj) => obj.encode(&mut writer), - Self::Tree(obj) => obj.encode(&mut writer), - Self::Mask => Ok(()), + pub fn into_object(self) -> super::Object { + let Self { buf, offset: _, _t } = self; + super::Object { + buf, + offset: 0, + _t: PhantomData, + } + } + + pub fn into_enum(self) -> Enum { + let proto = self.root_proto(); + let offset = proto.object().loc(); + match proto.object_type() { + spfs_proto::Object::Blob => Enum::Blob(Blob { + buf: self.buf, + offset, + _t: PhantomData, + }), + spfs_proto::Object::Layer => Enum::Layer(Layer { + buf: self.buf, + offset, + _t: PhantomData, + }), + spfs_proto::Object::Manifest => Enum::Manifest(Manifest { + buf: self.buf, + offset, + _t: PhantomData, + }), + spfs_proto::Object::Platform => Enum::Platform(Platform { + buf: self.buf, + offset, + _t: PhantomData, + }), + spfs_proto::Object::NONE | spfs_proto::Object(spfs_proto::Object::ENUM_MAX..) => { + unreachable!("already recognized kind") + } + } + } + + pub fn into_layer(self) -> Option { + if let Enum::Layer(l) = self.into_enum() { + Some(l) + } else { + None + } + } + + pub fn into_manifest(self) -> Option { + if let Enum::Manifest(l) = self.into_enum() { + Some(l) + } else { + None + } + } + + pub fn into_blob(self) -> Option { + if let Enum::Blob(l) = self.into_enum() { + Some(l) + } else { + None + } + } + + pub fn into_platform(self) -> Option { + if let Enum::Platform(l) = self.into_enum() { + Some(l) + } else { + None + } + } + + /// Clone (cheaply) this object and make a generic one + #[inline] + pub fn to_object(&self) -> Object { + self.clone().into_object() + } + + /// Clone (cheaply) this object and identify its type + #[inline] + pub fn to_enum(&self) -> Enum { + self.clone().into_enum() + } + + /// Read the underlying [`spfs_proto::AnyObject`] flatbuffer + #[inline] + pub fn root_proto(&self) -> spfs_proto::AnyObject { + let buf = &self.buf[Header::SIZE..]; + #[cfg(debug_assertions)] + { + flatbuffers::root::<'_, spfs_proto::AnyObject>(buf) + .expect("object should already be validated") + } + #[cfg(not(debug_assertions))] + // Safety: root_unchecked does no validation, but this type + // promises that the internal buffer is already valid + unsafe { + flatbuffers::root_unchecked::<'_, spfs_proto::AnyObject>(buf) } } } -impl encoding::Decodable for Object { - fn decode(mut reader: &mut impl BufRead) -> crate::Result { - encoding::consume_header(&mut reader, OBJECT_HEADER)?; - let type_id = encoding::read_uint(&mut reader)?; - match ObjectKind::from_u64(type_id) { - Some(ObjectKind::Blob) => Ok(Self::Blob(Blob::decode(&mut reader)?)), - Some(ObjectKind::Manifest) => Ok(Self::Manifest(Manifest::decode(&mut reader)?)), - Some(ObjectKind::Layer) => Ok(Self::Layer(Layer::decode(&mut reader)?)), - Some(ObjectKind::Platform) => Ok(Self::Platform(Platform::decode(&mut reader)?)), - Some(ObjectKind::Tree) => Ok(Self::Tree(Tree::decode(&mut reader)?)), - Some(ObjectKind::Mask) => Ok(Self::Mask), - None => Err(format!("Cannot read object: unknown object kind {type_id}").into()), +impl<'buf, T> FlatObject +where + T: flatbuffers::Follow<'buf> + ObjectProto, + T::Inner: 'buf, +{ + #[inline] + pub fn proto(&'buf self) -> T::Inner { + use flatbuffers::Follow; + // Safety: we trust that the buffer and offset have been + // validated already when this instance was created + unsafe { ::follow(&self.buf[..], Header::SIZE + self.offset) } + } +} + +impl Kind for FlatObject +where + T: ObjectProto + Kind, +{ + #[inline] + fn kind() -> super::ObjectKind { + T::kind() + } +} + +impl HasKind for FlatObject +where + T: ObjectProto + Kind, +{ + #[inline] + fn kind(&self) -> super::ObjectKind { + T::kind() + } +} + +/// Each encoded object consists of the magic header string +/// followed by 8 bytes containing information about the rest +/// of the encoded data: +/// +/// ```txt +/// | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +/// | digest strategy | encoding format | _ | _ | _ | _ | _ | object kind | +/// ``` +/// +/// - digest strategy +/// The strategy used to compute this object's digest. If the strategy +/// is not known then the library cannot faithfully recompute the digest +/// for this object and should consider it unusable. +/// - encoding format +/// The format that the rest of the data is encoded with. If this is not +/// known then the library cannot safely interpret the data that follows +/// the header and should consider the object unusable. +/// - bytes 2..6 are reserved for future use +/// - object kind +/// Denotes the kind of the object that is encoded after the header. This +/// byte may not be used by all encoding formats. +/// +/// The original header format was a single 8-byte u64 to denote +/// the kind of the object, but never defined more than 6 +#[derive(Eq, PartialEq, Hash)] +pub struct Header([u8]); + +impl std::fmt::Debug for Header { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Header") + .field("prefix", &Self::PREFIX) + .field("body", &&self.0[Self::PREFIX.len()..]) + .field("digest_strategy_num", &self.digest_strategy_number()) + .field("digest_strategy", &self.digest_strategy()) + .field("encoding_format_num", &self.encoding_format_number()) + .field("encoding_format", &self.encoding_format()) + .field("object_kind_num", &self.object_kind_number()) + .field("object_kind", &self.object_kind()) + .finish() + } +} + +impl std::borrow::ToOwned for Header { + type Owned = HeaderBuf; + + fn to_owned(&self) -> Self::Owned { + let mut buf = HeaderBuf(Default::default()); + buf.0[..].clone_from_slice(&self.0); + buf + } +} + +impl std::ops::Deref for Header { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + // dereferences to bytes, but only the header portion + &self.0[..Self::SIZE] + } +} + +impl Header { + /// A special string required for all headers + const PREFIX: &'static [u8] = "--SPFS--\n".as_bytes(); + /// A fixed-size prefix followed by 8 separated byte fields + /// (this was originally a single u64) + const SIZE: usize = Self::PREFIX.len() + std::mem::size_of::(); + const DIGEST_OFFSET: usize = Self::PREFIX.len(); + const ENCODING_OFFSET: usize = Self::PREFIX.len() + 1; + const KIND_OFFSET: usize = Self::PREFIX.len() + 7; + + #[inline] + pub fn builder(kind: ObjectKind) -> HeaderBuilder { + HeaderBuilder::new(kind) + } + + /// Read the first bytes of `buf` as a header + /// + /// The buffer can be be longer than just a header + /// as only the initial header bytes will be validated + pub fn new(buf: &[u8]) -> ObjectResult<&Self> { + if buf.len() < Self::SIZE { + return Err(ObjectError::HeaderTooShort); + } + if &buf[..Self::PREFIX.len()] != Self::PREFIX { + return Err(ObjectError::HeaderMissingPrefix); + } + Ok(unsafe { + // Safety: we have just validated the buffer above + Self::new_unchecked(buf) + }) + } + + /// Read the first bytes of `buf` as a header + /// + /// # Safety + /// This function does not validate that the data buffer + /// is long enough or has the right shape to be a header. + pub unsafe fn new_unchecked(buf: &[u8]) -> &Self { + // Safety: raw pointer casting is usually unsafe but our type + // wraps/is exactly a slice of bytes + unsafe { &*(&buf[..Self::SIZE] as *const [u8] as *const Self) } + } + + /// The [`DigestStrategy`] in this header, if recognized + #[inline] + pub fn digest_strategy(&self) -> Option { + DigestStrategy::from_u8(self.digest_strategy_number()) + } + + #[inline] + fn digest_strategy_number(&self) -> u8 { + self.0[Self::DIGEST_OFFSET] + } + + /// The [`EncodingFormat`] in this header, if recognized + #[inline] + pub fn encoding_format(&self) -> Option { + EncodingFormat::from_u8(self.encoding_format_number()) + } + + #[inline] + fn encoding_format_number(&self) -> u8 { + self.0[Self::ENCODING_OFFSET] + } + + /// The [`ObjectKind`] in this header, if recognized + #[inline] + pub fn object_kind(&self) -> Option { + ObjectKind::from_u8(self.object_kind_number()) + } + + #[inline] + fn object_kind_number(&self) -> u8 { + self.0[Self::KIND_OFFSET] + } +} + +/// An owned, mutable [`Header`] +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct HeaderBuf([u8; Header::SIZE]); + +impl std::ops::Deref for HeaderBuf { + type Target = Header; + + fn deref(&self) -> &Self::Target { + // Safety: we always contain a valid header + unsafe { Header::new_unchecked(&self.0) } + } +} + +impl std::borrow::Borrow
for HeaderBuf { + fn borrow(&self) -> &Header { + self + } +} + +impl AsRef
for HeaderBuf { + #[inline] + fn as_ref(&self) -> &Header { + self + } +} + +impl HeaderBuf { + #[inline] + pub fn new(kind: ObjectKind) -> Self { + HeaderBuilder::new(kind).build() + } + + #[inline] + pub fn set_object_kind(&mut self, object_kind: ObjectKind) { + self.0[Header::KIND_OFFSET] = object_kind as u8; + } + + #[inline] + pub fn set_digest_strategy(&mut self, digest_strategy: DigestStrategy) { + self.0[Header::DIGEST_OFFSET] = digest_strategy as u8; + } + + #[inline] + pub fn set_encoding_format(&mut self, encoding_format: EncodingFormat) { + self.0[Header::ENCODING_OFFSET] = encoding_format as u8; + } +} + +#[derive(Debug)] +pub struct HeaderBuilder { + digest_strategy: DigestStrategy, + encoding_format: EncodingFormat, + object_kind: ObjectKind, +} + +impl HeaderBuilder { + pub fn new(object_kind: ObjectKind) -> Self { + let config = crate::get_config(); + Self { + digest_strategy: config + .as_ref() + .map(|s| s.storage.digest_strategy) + // for safety, default to the oldest supported format + .unwrap_or(DigestStrategy::Legacy), + encoding_format: config + .as_ref() + .map(|s| s.storage.encoding_format) + // for safety, default to the oldest supported format + .unwrap_or(EncodingFormat::Legacy), + object_kind, + } + } + + pub fn with_object_kind(mut self, object_kind: ObjectKind) -> Self { + self.object_kind = object_kind; + self + } + + pub fn with_digest_strategy(mut self, digest_strategy: DigestStrategy) -> Self { + self.digest_strategy = digest_strategy; + self + } + + pub fn with_encoding_format(mut self, encoding_format: EncodingFormat) -> Self { + self.encoding_format = encoding_format; + self + } + + /// Copy valid and known components from another header + pub fn copy_from(mut self, other: &Header) -> Self { + if let Some(digest_strategy) = other.digest_strategy() { + self = self.with_digest_strategy(digest_strategy); + } + if let Some(encoding_format) = other.encoding_format() { + self = self.with_encoding_format(encoding_format); + } + if let Some(object_kind) = other.object_kind() { + self = self.with_object_kind(object_kind); + } + self + } + + /// Build the header bytes for the current settings + pub fn build(&self) -> HeaderBuf { + let mut bytes = [0_u8; Header::SIZE]; + bytes[..Header::PREFIX.len()].copy_from_slice(Header::PREFIX); + let mut buf = HeaderBuf(bytes); + buf.set_object_kind(self.object_kind); + buf.set_digest_strategy(self.digest_strategy); + buf.set_encoding_format(self.encoding_format); + buf + } +} + +/// See [`Header`]. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Deserialize, Serialize)] +#[repr(u8)] +pub enum DigestStrategy { + /// Hash the output of the original spfs encoding, which + /// has known collision issues. Not recommended for use + /// except for backwards-compatibility + Legacy = 0, + /// Encoding using the original spfs encoding, but adds salt + /// and the [`ObjectKind`] to mitigate issues found in the + /// original encoding mechanism + #[default] + WithKindAndSalt = 1, +} + +impl DigestStrategy { + pub fn from_u8(value: u8) -> Option { + match value { + 0 => Some(Self::Legacy), + 1 => Some(Self::WithKindAndSalt), + 2.. => None, } } } + +/// See [`Header`]. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Deserialize, Serialize)] +#[repr(u8)] +pub enum EncodingFormat { + /// Encode using the original spfs encoding, which uses + /// a bespoke binary format + Legacy = 0, + /// Encode using the [`spfs_proto::AnyObject`] flatbuffers + /// schema. + #[default] + FlatBuffers = 1, +} + +impl EncodingFormat { + pub fn from_u8(value: u8) -> Option { + match value { + 0 => Some(Self::Legacy), + 1 => Some(Self::FlatBuffers), + 2.. => None, + } + } +} + +/// A valid variant of the [`spfs_proto::Object`] union type +/// that can be loaded from an [`spfs_proto::AnyObject`] flatbuffer. +#[allow(private_bounds)] +pub trait ObjectProto: private::Sealed {} + +mod private { + /// Seals the [`super::FlatObject`] type from being created + /// for invalid `flatbuffer` types. + /// + /// The [`super::FlatObject`] type is only valid to be generic over + /// types that are a variant of the [`spfs_proto::Object`] union type + /// and the higher-level [`spfs_proto::AnyObject`]. + pub(super) trait Sealed {} + + impl<'buf> Sealed for spfs_proto::AnyObject<'buf> {} + impl<'buf> Sealed for spfs_proto::Platform<'buf> {} + impl<'buf> Sealed for spfs_proto::Layer<'buf> {} + impl<'buf> Sealed for spfs_proto::Manifest<'buf> {} + impl<'buf> Sealed for spfs_proto::Blob<'buf> {} + + impl super::ObjectProto for T where T: Sealed {} +} diff --git a/crates/spfs/src/graph/object_test.rs b/crates/spfs/src/graph/object_test.rs new file mode 100644 index 0000000000..8d3d4c9884 --- /dev/null +++ b/crates/spfs/src/graph/object_test.rs @@ -0,0 +1,158 @@ +// Copyright (c) Sony Pictures Imageworks, et al. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/imageworks/spk + +use rstest::rstest; +use strum::IntoEnumIterator; + +use super::{DigestStrategy, EncodingFormat, HeaderBuilder}; +use crate::encoding; +use crate::fixtures::*; +use crate::graph::{ObjectKind, Platform}; +use crate::prelude::*; + +#[rstest] +fn test_legacy_header_compat() { + init_logging(); + + // the old spfs codebase used a single u64 instead of 8 x u8 + // in the header, so make sure that objects saved in the legacy + // format an still be read by the new code and visa-versa + + for kind in ObjectKind::iter() { + let mut old_style = Vec::new(); + encoding::write_header( + &mut old_style, + // this prefix includes the newline that was previously written and + // validated separately + &super::Header::PREFIX[..super::Header::PREFIX.len() - 1], + ) + .unwrap(); + encoding::write_uint64(&mut old_style, kind as u8 as u64).unwrap(); + let old_style = super::Header::new(old_style.as_slice()) + .expect("old encoding should create a valid header"); + + let new_style = HeaderBuilder::new(kind) + .with_digest_strategy(DigestStrategy::Legacy) + .with_encoding_format(EncodingFormat::Legacy) + .build(); + + tracing::info!("{kind:?}"); + tracing::info!("old: {old_style:?}"); + tracing::info!("new: {new_style:?}"); + + assert_eq!( + old_style.object_kind(), + Some(kind), + "kind should read as u8 when saved via legacy encoding" + ); + + let mut reader = std::io::Cursor::new(&new_style[..]); + encoding::consume_header( + &mut reader, + // this prefix includes the newline that was previously written and + // validated separately + &super::Header::PREFIX[..super::Header::PREFIX.len() - 1], + ) + .expect("header prefix should be consumable"); + let result = encoding::read_uint64(&mut reader).expect("header kind should read as a u64"); + assert_eq!( + kind as u8 as u64, result, + "kind should read as u64 when saved via legacy modes" + ); + } +} + +#[rstest] +fn test_digest_with_salting() { + // the digest based on legacy encoding for a platform could easily + // collide with eight null bytes. + let legacy_platform = Platform::builder() + .with_header(|h| h.with_digest_strategy(DigestStrategy::Legacy)) + .build() + .digest() + .unwrap(); + let nulls_digest = [0, 0, 0, 0, 0, 0, 0, 0].as_slice().digest().unwrap(); + assert_eq!(legacy_platform, nulls_digest); + + // the newer digest method adds the kind and salt to make + // such cases less likely + let salted_platform = Platform::builder() + .with_header(|h| h.with_digest_strategy(DigestStrategy::WithKindAndSalt)) + .build() + .digest() + .unwrap(); + assert_ne!(salted_platform, nulls_digest); +} + +#[rstest] +fn test_digest_with_encoding() { + // check that two objects with the same digest strategy + // can be saved with two different encoding methods and + // still yield the same result + let legacy_platform = Platform::builder() + .with_header(|h| h.with_encoding_format(EncodingFormat::Legacy)) + .build() + .digest() + .unwrap(); + let flatbuf_platform = Platform::builder() + .with_header(|h| h.with_encoding_format(EncodingFormat::FlatBuffers)) + .build() + .digest() + .unwrap(); + assert_eq!(legacy_platform, flatbuf_platform); +} + +#[rstest] +#[case::legacy(EncodingFormat::Legacy)] +#[case::flatbuf(EncodingFormat::FlatBuffers)] +#[tokio::test] +async fn test_encoding_round_trip( + #[case] encoding_format: EncodingFormat, + #[future] tmprepo: TempRepo, +) { + // check that each encoding format can save and load back + // the same object data + + init_logging(); + let tmprepo = tmprepo.await; + + let mut manifest = generate_tree(&tmprepo).await; + manifest.set_header( + HeaderBuilder::new(ObjectKind::Manifest) + .with_encoding_format(encoding_format) + .build(), + ); + // generate tree stores the object using the current configured + // digest and encoding format, so we will store it again in the + // format that is being tested + let storable = manifest.to_graph_manifest(); + let digest = storable.digest().unwrap(); + tmprepo.remove_object(digest).await.unwrap(); + tmprepo.write_object(&storable).await.unwrap(); + + let loaded = tmprepo.read_manifest(digest).await.unwrap(); + assert_eq!( + loaded.header().encoding_format().unwrap(), + encoding_format, + "should retain config encoding format" + ); + let result = loaded.to_tracking_manifest(); + let mut diffs = crate::tracking::compute_diff(&manifest, &result); + diffs.retain(|d| !d.mode.is_unchanged()); + tracing::info!("Diffs:"); + for diff in diffs.iter() { + tracing::info!(" {diff}"); + } + assert!( + diffs.is_empty(), + "should generate, save and reload manifest with no changes to content" + ); + + let second = result.to_graph_manifest(); + assert_eq!( + second.digest().unwrap(), + digest, + "save, load and convert should have no effect on digest" + ); +} diff --git a/crates/spfs/src/graph/platform.rs b/crates/spfs/src/graph/platform.rs index 25386e0279..f43eb08422 100644 --- a/crates/spfs/src/graph/platform.rs +++ b/crates/spfs/src/graph/platform.rs @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk -use super::Stack; -use crate::encoding::Encodable; +use super::object::HeaderBuilder; +use super::{ObjectKind, Stack}; use crate::{encoding, Error, Result}; #[cfg(test)] @@ -15,67 +15,66 @@ mod platform_test; /// Platforms capture an entire runtime stack of layers or other platforms /// as a single, identifiable object which can be applied/installed to /// future runtimes. -#[derive(Debug, Eq, PartialEq, Default, Clone)] -pub struct Platform { - /// Items in the platform, where the first element is the bottom of the - /// stack, and may be overridden by later elements higher in the stack - pub stack: Stack, +pub type Platform = super::object::FlatObject>; + +impl std::fmt::Debug for Platform { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Platform") + .field("stack", &self.to_stack()) + .finish() + } } impl Platform { - pub fn from_encodable(layers: I) -> Result + pub fn from_digestible(layers: I) -> Result where - E: encoding::Encodable, - Error: std::convert::From, - I: IntoIterator, + D: encoding::Digestible, + Error: std::convert::From, + I: IntoIterator, { - Stack::from_encodable(layers).map(|stack| Self { stack }) + Stack::from_digestible(layers).map(Into::into) + } + + #[inline] + pub fn builder() -> PlatformBuilder { + PlatformBuilder::default() + } + + /// Reconstruct a mutable stack from this platform's layers + pub fn to_stack(&self) -> Stack { + self.iter_bottom_up().copied().collect() + } + + /// Iterate the stack lazily from bottom to top + pub fn iter_bottom_up(&self) -> impl Iterator { + self.proto().layers().iter() } /// Return the digests of objects that this manifest refers to. pub fn child_objects(&self) -> Vec { - self.stack.iter_bottom_up().collect() + self.iter_bottom_up().copied().collect() } -} -impl Encodable for Platform { - type Error = Error; - - fn encode(&self, mut writer: &mut impl std::io::Write) -> Result<()> { + pub(super) fn legacy_encode(&self, mut writer: &mut impl std::io::Write) -> Result<()> { // use a vec to know the name ahead of time and // avoid iterating the stack twice - let digests = self.stack.iter_bottom_up().collect::>(); - encoding::write_uint(&mut writer, digests.len() as u64)?; + let digests = self.iter_bottom_up().collect::>(); + encoding::write_uint64(&mut writer, digests.len() as u64)?; // for historical reasons, and to remain backward-compatible, platform // stacks are stored in reverse (top-down) order for digest in digests.into_iter().rev() { - encoding::write_digest(&mut writer, &digest)?; + encoding::write_digest(&mut writer, digest)?; } Ok(()) } } -impl encoding::Decodable for Platform { - fn decode(mut reader: &mut impl std::io::Read) -> Result { - let num_layers = encoding::read_uint(&mut reader)?; - let mut layers = Vec::with_capacity(num_layers as usize); - for _ in 0..num_layers { - layers.push(encoding::read_digest(&mut reader)?); - } - // for historical reasons, and to remain backward-compatible, platform - // stacks are stored in reverse (top-down) order - Ok(Self::from_iter(layers.into_iter().rev())) - } -} - impl From for Platform where T: Into, { fn from(value: T) -> Self { - Self { - stack: value.into(), - } + Self::builder().with_stack(value.into()).build() } } @@ -84,8 +83,88 @@ where Stack: FromIterator, { fn from_iter>(iter: I) -> Self { + Self::builder().with_stack(Stack::from_iter(iter)).build() + } +} + +#[derive(Debug)] +pub struct PlatformBuilder { + header: HeaderBuilder, + stack: Stack, +} + +impl Default for PlatformBuilder { + fn default() -> Self { Self { - stack: Stack::from_iter(iter), + header: HeaderBuilder::new(ObjectKind::Platform), + stack: Stack::default(), } } } + +impl PlatformBuilder { + pub fn with_stack(mut self, stack: Stack) -> Self { + self.stack.extend(stack.iter_bottom_up()); + self + } + + pub fn with_header(mut self, mut header: F) -> Self + where + F: FnMut(HeaderBuilder) -> HeaderBuilder, + { + self.header = header(self.header).with_object_kind(ObjectKind::Platform); + self + } + + pub fn build(self) -> Platform { + super::BUILDER.with_borrow_mut(|builder| { + let stack: Vec<_> = self.stack.iter_bottom_up().collect(); + let stack = builder.create_vector(&stack); + let platform = spfs_proto::Platform::create( + builder, + &spfs_proto::PlatformArgs { + layers: Some(stack), + }, + ); + let any = spfs_proto::AnyObject::create( + builder, + &spfs_proto::AnyObjectArgs { + object_type: spfs_proto::Object::Platform, + object: Some(platform.as_union_value()), + }, + ); + builder.finish_minimal(any); + let offset = unsafe { + // Safety: we have just created this buffer + // so already know the root type with certainty + flatbuffers::root_unchecked::(builder.finished_data()) + .object_as_platform() + .unwrap() + ._tab + .loc() + }; + let obj = unsafe { + // Safety: the provided buf and offset mut contain + // a valid object and point to the contained layer + // which is what we've done + Platform::new_with_header(self.header.build(), builder.finished_data(), offset) + }; + builder.reset(); // to be used again + obj + }) + } + + /// Read a data encoded using the legacy format, and + /// use the data to fill and complete this builder + pub fn legacy_decode(self, mut reader: &mut impl std::io::Read) -> Result { + let num_layers = encoding::read_uint64(&mut reader)?; + tracing::error!("read {} layers in platform", num_layers); + let mut layers = Vec::with_capacity(num_layers as usize); + for _ in 0..num_layers { + layers.push(encoding::read_digest(&mut reader)?); + } + // for historical reasons, and to remain backward-compatible, platform + // stacks are stored in reverse (top-down) order + Ok(Platform::from_iter(layers.into_iter().rev())) + } +} diff --git a/crates/spfs/src/graph/platform_test.rs b/crates/spfs/src/graph/platform_test.rs index 51fd99330b..2a995462b5 100644 --- a/crates/spfs/src/graph/platform_test.rs +++ b/crates/spfs/src/graph/platform_test.rs @@ -6,7 +6,7 @@ use rstest::rstest; use super::Platform; use crate::encoding; -use crate::encoding::{Decodable, Encodable}; +use crate::encoding::prelude::*; #[rstest] fn test_platform_encoding() { @@ -16,6 +16,9 @@ fn test_platform_encoding() { let mut stream = Vec::new(); expected.encode(&mut stream).unwrap(); - let actual = Platform::decode(&mut stream.as_slice()).unwrap(); - assert_eq!(actual, expected); + let actual = crate::graph::Object::decode(&mut stream.as_slice()) + .unwrap() + .into_platform() + .unwrap(); + assert_eq!(actual.digest().unwrap(), expected.digest().unwrap()); } diff --git a/crates/spfs/src/graph/stack.rs b/crates/spfs/src/graph/stack.rs index d1ac22cb1a..4e8c7bc128 100644 --- a/crates/spfs/src/graph/stack.rs +++ b/crates/spfs/src/graph/stack.rs @@ -21,12 +21,18 @@ pub struct Stack { bottom: Option>, } -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Clone, Eq, PartialEq)] struct Entry { value: Digest, next: Option>, } +impl std::fmt::Debug for Entry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}", self.value)) + } +} + impl Entry { fn new(value: Digest) -> Box { Box::new(Self { value, next: None }) @@ -40,11 +46,11 @@ impl std::fmt::Debug for Stack { } impl Stack { - pub fn from_encodable(items: I) -> Result + pub fn from_digestible(items: I) -> Result where - E: encoding::Encodable, - Error: std::convert::From, - I: IntoIterator, + D: encoding::Digestible, + Error: std::convert::From, + I: IntoIterator, { let mut stack = Self { bottom: None }; for item in items.into_iter() { @@ -53,10 +59,12 @@ impl Stack { Ok(stack) } + #[inline] pub fn is_empty(&self) -> bool { self.bottom.is_none() } + #[inline] pub fn clear(&mut self) { self.bottom.take(); } @@ -64,7 +72,7 @@ impl Stack { /// Add an item to the top of the stack. /// /// If the digest already exists in this stack, the previous - /// one is removed from it's position. + /// one is removed from its position. /// /// False is returned if no change was made to the stack /// because the digest was already at the top. @@ -85,7 +93,7 @@ impl Stack { if entry.next.is_none() { return false; } - // replace this node with it's next entry, + // replace this node with its next entry, // removing it from the stack let replace = entry.next.take(); *node = replace; diff --git a/crates/spfs/src/graph/tree.rs b/crates/spfs/src/graph/tree.rs index dc180e7c90..11febb57b0 100644 --- a/crates/spfs/src/graph/tree.rs +++ b/crates/spfs/src/graph/tree.rs @@ -2,116 +2,141 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk -use std::collections::BTreeSet; use std::io::BufRead; -use super::Entry; -use crate::encoding::Encodable; -use crate::{encoding, Error, Result}; +use super::{Entry, HasKind, ObjectKind}; +use crate::encoding::prelude::*; +use crate::{encoding, Result}; #[cfg(test)] #[path = "./tree_test.rs"] mod tree_test; -/// Tree is an ordered collection of entries. -/// -/// Only one entry of a given name is allowed at a time. -#[derive(Default, Clone)] -pub struct Tree { - pub entries: BTreeSet, -} +// Tree is an ordered collection of entries. +// +// Only one entry of a given name is allowed at a time. +#[derive(Copy, Clone)] +pub struct Tree<'buf>(pub(super) spfs_proto::Tree<'buf>); -impl Tree { - pub fn new(entries: impl Iterator) -> Self { - Self { - entries: entries.collect(), - } +impl<'buf> From> for Tree<'buf> { + fn from(value: spfs_proto::Tree<'buf>) -> Self { + Self(value) } +} - pub fn get>(&self, name: S) -> Option<&Entry> { - self.entries - .iter() - .find(|&entry| entry.name == name.as_ref()) +impl<'buf> Tree<'buf> { + pub fn entries(&self) -> impl Iterator> { + self.0.entries().iter().map(Into::into) } - pub fn is_empty(&self) -> bool { - self.entries.is_empty() + pub fn into_entries(self) -> impl Iterator> { + self.0.entries().iter().map(Into::into) } - pub fn len(&self) -> usize { - self.entries.len() + pub fn get>(&self, name: S) -> Option> { + self.entries().find(|entry| entry.name() == name.as_ref()) } - ///Add an entry to this tree. - /// - /// # Errors: - /// - if an entry with the same name exists - pub fn add(&mut self, entry: Entry) -> Result<()> { - if !self.entries.insert(entry) { - Err(Error::String("Tree entry already exists".to_owned())) - } else { - Ok(()) + pub(super) fn legacy_encode(&self, mut writer: &mut impl std::io::Write) -> Result<()> { + let mut entries: Vec<_> = self.entries().collect(); + encoding::write_uint64(&mut writer, entries.len() as u64)?; + // this is not the default sort mode for entries but + // matches the existing compatible encoding order + entries.sort_unstable_by_key(|e| e.name()); + for entry in entries.into_iter() { + entry.legacy_encode(writer)?; } + Ok(()) } - pub fn update(&mut self, entry: Entry) -> Result<()> { - let _ = self.remove(entry.name.as_str()); - self.add(entry) + pub(super) fn legacy_decode<'builder>( + builder: &mut flatbuffers::FlatBufferBuilder<'builder>, + mut reader: &mut impl BufRead, + ) -> Result>> { + let mut entries = Vec::new(); + let entry_count = encoding::read_uint64(&mut reader)?; + for _ in 0..entry_count { + entries.push(Entry::legacy_decode(builder, reader)?); + } + let entries = builder.create_vector(&entries); + Ok(spfs_proto::Tree::create( + builder, + &spfs_proto::TreeArgs { + entries: Some(entries), + }, + )) } +} - pub fn remove>(&mut self, name: S) -> Option<&Entry> { - self.entries - .iter() - .find(|&entry| entry.name == name.as_ref()) - } +impl<'buf> encoding::Digestible for Tree<'buf> { + type Error = crate::Error; - pub fn iter(&self) -> impl Iterator { - self.entries.iter() + fn digest(&self) -> std::result::Result { + let mut hasher = encoding::Hasher::new_sync(); + self.legacy_encode(&mut hasher)?; + Ok(hasher.digest()) } } -impl std::fmt::Debug for Tree { +impl<'buf> std::fmt::Debug for Tree<'buf> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!("Tree {{ {:?} }}", self.digest().unwrap())) } } -impl PartialEq for Tree { - fn eq(&self, other: &Self) -> bool { - self.digest() - .unwrap_or_else(|_| encoding::NULL_DIGEST.into()) - == other - .digest() - .unwrap_or_else(|_| encoding::NULL_DIGEST.into()) +impl<'buf> HasKind for Tree<'buf> { + #[inline] + fn kind(&self) -> ObjectKind { + ObjectKind::Tree } } -impl Eq for Tree {} -impl encoding::Encodable for Tree { - type Error = Error; +/// A wrapper type that holds an owned buffer to an [`Tree`]. +/// +/// Trees are usually only constructed as part of a larger +/// type, such as a [`super::Manifest`], but for testing it helps +/// to be able to create one on its own. +#[cfg(test)] +pub struct TreeBuf(Box<[u8]>); - fn encode(&self, mut writer: &mut impl std::io::Write) -> Result<()> { - encoding::write_uint(&mut writer, self.len() as u64)?; - let mut entries: Vec<_> = self.entries.iter().collect(); - // this is not the default sort mode for entries but - // matches the existing compatible encoding order - entries.sort_unstable_by_key(|e| &e.name); - for entry in entries.into_iter() { - entry.encode(writer)?; - } - Ok(()) +#[cfg(test)] +impl TreeBuf { + pub fn build(entries: Vec) -> Self { + crate::graph::BUILDER.with_borrow_mut(|builder| { + let entries = entries + .into_iter() + .map(|entry| { + let entry = entry.as_entry(); + let name = builder.create_string(entry.name()); + spfs_proto::Entry::create( + builder, + &spfs_proto::EntryArgs { + kind: entry.kind().into(), + object: Some(entry.object()), + mode: entry.mode(), + size_: entry.size(), + name: Some(name), + }, + ) + }) + .collect::>(); + let entries = builder.create_vector(&entries); + let tree = spfs_proto::Tree::create( + builder, + &spfs_proto::TreeArgs { + entries: Some(entries), + }, + ); + builder.finish_minimal(tree); + let bytes = builder.finished_data().into(); + builder.reset(); + Self(bytes) + }) } -} -impl encoding::Decodable for Tree { - fn decode(mut reader: &mut impl BufRead) -> Result { - let mut tree = Tree { - entries: Default::default(), - }; - let entry_count = encoding::read_uint(&mut reader)?; - for _ in 0..entry_count { - tree.entries.insert(Entry::decode(reader)?); - } - Ok(tree) + pub fn as_tree(&self) -> Tree<'_> { + let e = + flatbuffers::root::>(&self.0[..]).expect("valid internal buffer"); + Tree(e) } } diff --git a/crates/spfs/src/graph/tree_test.rs b/crates/spfs/src/graph/tree_test.rs index b2f986bf6e..9d7c037dc6 100644 --- a/crates/spfs/src/graph/tree_test.rs +++ b/crates/spfs/src/graph/tree_test.rs @@ -4,73 +4,63 @@ use rstest::rstest; -use super::{Entry, Tree}; -use crate::encoding::{self, Encodable}; +use super::TreeBuf; +use crate::encoding; +use crate::encoding::prelude::*; use crate::fixtures::*; -use crate::graph::Object; +use crate::graph::entry::EntryBuf; use crate::tracking::EntryKind; -#[rstest(entries, digest, - case(vec![ - Entry{ - name: "pkg".into(), - mode: 0o40755, - size: 1, - kind: EntryKind::Tree, - object: "CLWYXZVIKLJ2YUQC32ZLMZGDCVUOL577YAMRABDTZYTJNU6O6SYA====".parse().unwrap(), - }, - ], - "CHDST3RTAIOJWFBV3OXFB2QZ4U5FTCCJIZXPEQJFCESZMIFZSPTA====".parse().unwrap() - ), - case(vec![ - Entry{ - name: ".helmignore".into(), - mode: 0o100644, - size: 342, - kind: EntryKind::Blob, - object: "NJDVDBWMXKU2BJG6L2LKLS3N3T47VUGXNPBY4BCHBLEEIRNSDILA====".parse().unwrap(), - }, - Entry{ - name: "Chart.yaml".into(), - mode: 0o100644, - size: 911, - kind: EntryKind::Blob, - object: "ULAX2BMLX3WKVI7YKRQLJEQDEWJRSDPCFPZPGBJCIQZJ4FIVZIKA====".parse().unwrap(), - }, - Entry{ - name: "templates".into(), - mode: 0o40755, - size: 1, - kind: EntryKind::Tree, - object: "6LMAERTKGND5WAA4VQQLDAVPXZIHGBOVUCVS7WEGHQIZPWSRJ5FA====".parse().unwrap(), - }, - Entry{ - name: "values.yaml".into(), - mode: 0o100644, - size: 1699, - kind: EntryKind::Blob, - object: "IZFXS6UQJTHYBVYK3KYPPZC3FYX6NL3L3MWXAJUULAJMFTGZPODQ====".parse().unwrap(), - }, - ], - "KP7FNGMD5XRT5KGZRDT5R33M3BGFS2SJG5DHFKJV3KKWZG3AGVXA====".parse().unwrap()), +#[rstest] +#[case( + TreeBuf::build(vec![ + EntryBuf::build( + "pkg", + EntryKind::Tree, + 0o40755, + 1, + &"CLWYXZVIKLJ2YUQC32ZLMZGDCVUOL577YAMRABDTZYTJNU6O6SYA====".parse().unwrap(), + ), + ]), + "CHDST3RTAIOJWFBV3OXFB2QZ4U5FTCCJIZXPEQJFCESZMIFZSPTA====".parse().unwrap() )] -fn test_tree_encoding_compat(entries: Vec, digest: encoding::Digest) { +#[case( + TreeBuf::build(vec![ + EntryBuf::build( + ".helmignore", + EntryKind::Blob, + 0o100644, + 342, + &"NJDVDBWMXKU2BJG6L2LKLS3N3T47VUGXNPBY4BCHBLEEIRNSDILA====".parse().unwrap(), + ), + EntryBuf::build( + "Chart.yaml", + EntryKind::Blob, + 0o100644, + 911, + &"ULAX2BMLX3WKVI7YKRQLJEQDEWJRSDPCFPZPGBJCIQZJ4FIVZIKA====".parse().unwrap(), + ), + EntryBuf::build( + "templates", + EntryKind::Tree, + 0o40755, + 1, + &"6LMAERTKGND5WAA4VQQLDAVPXZIHGBOVUCVS7WEGHQIZPWSRJ5FA====".parse().unwrap(), + ), + EntryBuf::build( + "values.yaml", + EntryKind::Blob, + 0o100644, + 1699, + &"IZFXS6UQJTHYBVYK3KYPPZC3FYX6NL3L3MWXAJUULAJMFTGZPODQ====".parse().unwrap(), + ), + ]), + "KP7FNGMD5XRT5KGZRDT5R33M3BGFS2SJG5DHFKJV3KKWZG3AGVXA====".parse().unwrap(), +)] +fn test_tree_encoding_compat(#[case] tree: TreeBuf, #[case] digest: encoding::Digest) { init_logging(); - let mut tree = Tree::default(); - for entry in entries.into_iter() { - tree.add(entry).unwrap(); - } - - let actual_digest = tree.digest().unwrap(); - assert_eq!( - actual_digest, digest, - "expected encoding to match existing result" - ); - - // Also check via `Object` - let tree_object = Object::Tree(tree); - let actual_digest = tree_object.digest().unwrap(); + let actual_digest = tree.as_tree().digest().unwrap(); assert_eq!( actual_digest, digest, "expected encoding to match existing result" diff --git a/crates/spfs/src/io.rs b/crates/spfs/src/io.rs index 2b9a7e6897..756edf9d68 100644 --- a/crates/spfs/src/io.rs +++ b/crates/spfs/src/io.rs @@ -3,12 +3,11 @@ // https://github.com/imageworks/spk use colored::*; -use spfs_encoding::Encodable; +use spfs_encoding::prelude::*; use crate::find_path::{ObjectPath, ObjectPathEntry}; -use crate::graph::Object; use crate::prelude::*; -use crate::{encoding, storage, tracking, Result}; +use crate::{encoding, graph, storage, tracking, Result}; /// Specifies how a digest should be formatted /// @@ -167,13 +166,11 @@ pub async fn pretty_print_filepath( match item { ObjectPathEntry::Parent(obj) => { - let name = match obj { - Object::Platform(_) => "platform", - Object::Layer(_) => "layer", - Object::Manifest(_) => "manifest", - Object::Blob(_) => "blob", - Object::Tree(_) => "tree", - Object::Mask => "mask", + let name = match obj.to_enum() { + graph::object::Enum::Platform(_) => "platform", + graph::object::Enum::Layer(_) => "layer", + graph::object::Enum::Manifest(_) => "manifest", + graph::object::Enum::Blob(_) => "blob", }; println!( diff --git a/crates/spfs/src/prelude.rs b/crates/spfs/src/prelude.rs index aa9b9581fe..44a23869ed 100644 --- a/crates/spfs/src/prelude.rs +++ b/crates/spfs/src/prelude.rs @@ -3,5 +3,6 @@ // https://github.com/imageworks/spk pub use crate::encoding::prelude::*; +pub use crate::graph::{HasKind, Kind}; pub use crate::storage::prelude::*; pub use crate::tracking::{BlobRead, BlobReadExt}; diff --git a/crates/spfs/src/proto/conversions.rs b/crates/spfs/src/proto/conversions.rs index bda2faaf0b..c3ec42321b 100644 --- a/crates/spfs/src/proto/conversions.rs +++ b/crates/spfs/src/proto/conversions.rs @@ -32,7 +32,7 @@ pub fn convert_digest(source: Option) -> Result impl TryFrom for encoding::Digest { type Error = Error; fn try_from(source: super::Digest) -> Result { - Self::from_bytes(source.bytes.as_slice()).map_err(Error::Encoding) + Ok(Self::from_bytes(source.bytes.as_slice())?) } } @@ -136,17 +136,15 @@ impl From for Error { } } -impl From<&graph::Object> for super::Object { - fn from(source: &graph::Object) -> Self { +impl From<&graph::object::Enum> for super::Object { + fn from(source: &graph::object::Enum) -> Self { use super::object::Kind; super::Object { kind: Some(match source { - graph::Object::Platform(o) => Kind::Platform(o.into()), - graph::Object::Layer(o) => Kind::Layer(o.into()), - graph::Object::Manifest(o) => Kind::Manifest(o.into()), - graph::Object::Tree(o) => Kind::Tree(o.into()), - graph::Object::Blob(o) => Kind::Blob(o.into()), - graph::Object::Mask => Kind::Mask(true), + graph::object::Enum::Platform(o) => Kind::Platform(o.into()), + graph::object::Enum::Layer(o) => Kind::Layer(o.into()), + graph::object::Enum::Manifest(o) => Kind::Manifest(o.into()), + graph::object::Enum::Blob(o) => Kind::Blob(o.into()), }), } } @@ -166,12 +164,14 @@ impl TryFrom for graph::Object { fn try_from(source: super::Object) -> Result { use super::object::Kind; match source.kind { - Some(Kind::Platform(o)) => Ok(graph::Object::Platform(o.try_into()?)), - Some(Kind::Layer(o)) => Ok(graph::Object::Layer(o.try_into()?)), - Some(Kind::Manifest(o)) => Ok(graph::Object::Manifest(o.try_into()?)), - Some(Kind::Tree(o)) => Ok(graph::Object::Tree(o.try_into()?)), - Some(Kind::Blob(o)) => Ok(graph::Object::Blob(o.try_into()?)), - Some(Kind::Mask(_)) => Ok(graph::Object::Mask), + Some(Kind::Platform(o)) => Ok(graph::Platform::try_from(o)?.into_object()), + Some(Kind::Layer(o)) => Ok(graph::Layer::try_from(o)?.into_object()), + Some(Kind::Manifest(o)) => Ok(graph::Manifest::try_from(o)?.into_object()), + Some(Kind::Blob(o)) => Ok(graph::Blob::try_from(o)?.into_object()), + Some(Kind::Tree(_)) | Some(Kind::Mask(_)) => Err(Error::String(format!( + "Unexpected and unsupported object kind {:?}", + source.kind + ))), None => Err(Error::String( "Expected non-empty object kind in rpc message".to_string(), )), @@ -182,7 +182,7 @@ impl TryFrom for graph::Object { impl From<&graph::Platform> for super::Platform { fn from(source: &graph::Platform) -> Self { Self { - stack: source.stack.iter_bottom_up().map(Into::into).collect(), + stack: source.iter_bottom_up().map(Into::into).collect(), } } } @@ -191,20 +191,20 @@ impl TryFrom for graph::Platform { type Error = Error; fn try_from(source: super::Platform) -> Result { - Ok(Self { - stack: source + Ok(Self::from( + source .stack .into_iter() .map(TryInto::try_into) - .collect::>()?, - }) + .collect::>()?, + )) } } impl From<&graph::Layer> for super::Layer { fn from(source: &graph::Layer) -> Self { Self { - manifest: Some((&source.manifest).into()), + manifest: Some(source.manifest().into()), } } } @@ -212,19 +212,17 @@ impl From<&graph::Layer> for super::Layer { impl TryFrom for graph::Layer { type Error = Error; fn try_from(source: super::Layer) -> Result { - Ok(Self { - manifest: convert_digest(source.manifest)?, - }) + Ok(Self::new(convert_digest(source.manifest)?)) } } impl From<&graph::Manifest> for super::Manifest { fn from(source: &graph::Manifest) -> Self { - let mut trees = source.iter_trees(); - let root = trees.next().map(Into::into); + let mut trees = source.iter_trees().map(|t| (&t).into()); + let root = trees.next(); Self { root, - trees: trees.map(Into::into).collect(), + trees: trees.collect(), } } } @@ -232,85 +230,101 @@ impl From<&graph::Manifest> for super::Manifest { impl TryFrom for graph::Manifest { type Error = Error; fn try_from(source: super::Manifest) -> Result { - let mut out = Self::new(source.root.try_into()?); - for tree in source.trees.into_iter() { - out.insert_tree(tree.try_into()?)?; + let mut builder = flatbuffers::FlatBufferBuilder::with_capacity(256); + let make_tree = |entry: super::Tree| { + let entries = entry + .entries + .into_iter() + .map(|entry: super::Entry| { + let kind = match super::EntryKind::try_from(entry.kind) { + Ok(super::EntryKind::Tree) => spfs_proto::EntryKind::Tree, + Ok(super::EntryKind::Blob) => spfs_proto::EntryKind::Blob, + Ok(super::EntryKind::Mask) => spfs_proto::EntryKind::Mask, + Err(_) => return Err("Received unknown entry kind in rpc data".into()), + }; + let name = builder.create_string(&entry.name); + Ok(spfs_proto::Entry::create( + &mut builder, + &spfs_proto::EntryArgs { + kind, + object: Some(&convert_digest(entry.object)?), + mode: entry.mode, + size_: entry.size, + name: Some(name), + }, + )) + }) + .collect::>>()?; + let entries = builder.create_vector(&entries); + Ok(spfs_proto::Tree::create( + &mut builder, + &spfs_proto::TreeArgs { + entries: Some(entries), + }, + )) + }; + let trees = source + .root + .into_iter() + .chain(source.trees) + .map(make_tree) + .collect::>>()?; + let trees = builder.create_vector(&trees); + let manifest = spfs_proto::Manifest::create( + &mut builder, + &spfs_proto::ManifestArgs { trees: Some(trees) }, + ); + let any = spfs_proto::AnyObject::create( + &mut builder, + &spfs_proto::AnyObjectArgs { + object_type: spfs_proto::Object::Manifest, + object: Some(manifest.as_union_value()), + }, + ); + builder.finish_minimal(any); + Ok(unsafe { + // Safety: buf must contain an AnyObject of the provided + // type, which is what we just constructed + graph::Object::new_with_default_header( + builder.finished_data(), + graph::ObjectKind::Manifest, + ) } - Ok(out) + .into_manifest() + .expect("known to be a manifest")) } } -impl From<&graph::Tree> for super::Tree { +impl<'buf> From<&graph::Tree<'buf>> for super::Tree { fn from(source: &graph::Tree) -> Self { Self { - entries: source.entries.iter().map(Into::into).collect(), + entries: source.entries().map(|e| (&e).into()).collect(), } } } -impl TryFrom> for graph::Tree { - type Error = Error; - fn try_from(source: Option) -> Result { - source - .ok_or_else(|| Error::String("Expected non-null tree in rpc message".into()))? - .try_into() - } -} - -impl TryFrom for graph::Tree { - type Error = Error; - fn try_from(source: super::Tree) -> Result { - Ok(Self { - entries: source - .entries - .into_iter() - .map(TryInto::try_into) - .collect::>()?, - }) - } -} - -impl From<&graph::Entry> for super::Entry { +impl<'buf> From<&graph::Entry<'buf>> for super::Entry { fn from(source: &graph::Entry) -> Self { - let kind = match source.kind { + let kind = match source.kind() { tracking::EntryKind::Tree => super::EntryKind::Tree as i32, tracking::EntryKind::Blob => super::EntryKind::Blob as i32, tracking::EntryKind::Mask => super::EntryKind::Mask as i32, }; Self { - object: Some((&source.object).into()), + object: Some((source.object()).into()), kind, - mode: source.mode, - size: source.size, - name: source.name.clone(), + mode: source.mode(), + size: source.size(), + name: source.name().to_owned(), } } } -impl TryFrom for graph::Entry { - type Error = Error; - fn try_from(source: super::Entry) -> Result { - let kind = match super::EntryKind::try_from(source.kind) { - Ok(super::EntryKind::Tree) => tracking::EntryKind::Tree, - Ok(super::EntryKind::Blob) => tracking::EntryKind::Blob, - Ok(super::EntryKind::Mask) => tracking::EntryKind::Mask, - Err(_) => return Err("Received unknown entry kind in rpm data".into()), - }; - Ok(Self { - object: convert_digest(source.object)?, - kind, - mode: source.mode, - size: source.size, - name: source.name, - }) - } -} - impl From<&graph::Blob> for super::Blob { fn from(source: &graph::Blob) -> Self { Self { - payload: Some((&source.payload).into()), - size: source.size, + payload: Some(source.payload().into()), + size: source.size(), } } } @@ -318,10 +332,7 @@ impl From<&graph::Blob> for super::Blob { impl TryFrom for graph::Blob { type Error = Error; fn try_from(source: super::Blob) -> Result { - Ok(Self { - payload: convert_digest(source.payload)?, - size: source.size, - }) + Ok(Self::new(convert_digest(source.payload)?, source.size)) } } diff --git a/crates/spfs/src/proto/defs/types.proto b/crates/spfs/src/proto/defs/types.proto index 834b500038..c056310c67 100644 --- a/crates/spfs/src/proto/defs/types.proto +++ b/crates/spfs/src/proto/defs/types.proto @@ -18,9 +18,9 @@ message Object { Platform platform = 1; Layer layer = 2; Manifest manifest = 3; - Tree tree = 4; + Tree tree = 4 [deprecated = true]; Blob blob = 5; - bool mask = 6; + bool mask = 6 [deprecated = true]; } } diff --git a/crates/spfs/src/resolve.rs b/crates/spfs/src/resolve.rs index 77fc6e488a..4088bd41a7 100644 --- a/crates/spfs/src/resolve.rs +++ b/crates/spfs/src/resolve.rs @@ -5,7 +5,6 @@ use std::collections::HashSet; use std::path::{Path, PathBuf}; -use encoding::Encodable; use futures::{FutureExt, TryFutureExt, TryStreamExt}; use itertools::Itertools; use nonempty::NonEmpty; @@ -15,7 +14,7 @@ use super::config::get_config; use crate::prelude::*; use crate::storage::fallback::FallbackProxy; use crate::storage::fs::{ManifestRenderPath, RenderSummary}; -use crate::{encoding, graph, runtime, storage, tracking, Error, Result}; +use crate::{graph, runtime, storage, tracking, Error, Result}; #[cfg(test)] #[path = "./resolve_test.rs"] @@ -158,7 +157,7 @@ pub async fn compute_environment_manifest( for layer in layers { manifest.update( &repo - .read_manifest(layer.manifest) + .read_manifest(*layer.manifest()) .await? .to_tracking_manifest(), ) @@ -170,21 +169,21 @@ pub async fn compute_object_manifest( obj: graph::Object, repo: &storage::RepositoryHandle, ) -> Result { - match obj { - graph::Object::Layer(obj) => Ok(repo - .read_manifest(obj.manifest) + match obj.into_enum() { + graph::object::Enum::Layer(obj) => Ok(repo + .read_manifest(*obj.manifest()) .await? .to_tracking_manifest()), - graph::Object::Platform(obj) => { - let layers = resolve_stack_to_layers(&obj.stack, Some(repo)).await?; + graph::object::Enum::Platform(obj) => { + let layers = resolve_stack_to_layers(&obj.to_stack(), Some(repo)).await?; let mut manifest = tracking::Manifest::default(); for layer in layers.iter() { - let layer_manifest = repo.read_manifest(layer.manifest).await?; + let layer_manifest = repo.read_manifest(*layer.manifest()).await?; manifest.update(&layer_manifest.to_tracking_manifest()); } Ok(manifest) } - graph::Object::Manifest(obj) => Ok(obj.to_tracking_manifest()), + graph::object::Enum::Manifest(obj) => Ok(obj.to_tracking_manifest()), obj => Err(format!("Resolve: Unhandled object of type {:?}", obj.kind()).into()), } } @@ -250,7 +249,7 @@ where for (index, layer) in layers.iter().enumerate() { manifests.push(ResolvedManifest::Existing { order: index, - manifest: repo.read_manifest(layer.manifest).await?, + manifest: repo.read_manifest(*layer.manifest()).await?, }); } @@ -313,17 +312,12 @@ where manifest.update(&next.to_tracking_manifest()); } } - let manifest = graph::Manifest::from(&manifest); + let manifest = manifest.to_graph_manifest(); // Store the newly created manifest so that the render process - // can read it back. This little dance avoid an expensive - // (300 ms) clone. - let object = manifest.into(); - repo.write_object(&object).await?; - flattened_layers.insert(object.digest().expect("Object has valid digest")); - match object { - graph::Object::Manifest(m) => resolved_manifests.push(m), - _ => unreachable!(), - } + // can read it back. + repo.write_object(&manifest).await?; + flattened_layers.insert(manifest.digest().expect("Object has valid digest")); + resolved_manifests.push(manifest); } } } @@ -416,16 +410,17 @@ where let mut layers = Vec::new(); for digest in stack.iter_bottom_up() { let entry = repo.read_object(digest).await?; - match entry { - graph::Object::Layer(layer) => layers.push(layer), - graph::Object::Platform(platform) => { - let mut expanded = resolve_stack_to_layers_with_repo(&platform.stack, repo).await?; + match entry.into_enum() { + graph::object::Enum::Layer(layer) => layers.push(layer), + graph::object::Enum::Platform(platform) => { + let mut expanded = + resolve_stack_to_layers_with_repo(&platform.to_stack(), repo).await?; layers.append(&mut expanded); } - graph::Object::Manifest(manifest) => { + graph::object::Enum::Manifest(manifest) => { layers.push(graph::Layer::new(manifest.digest().unwrap())) } - obj => { + obj @ graph::object::Enum::Blob(_) => { return Err(format!( "Cannot resolve object into a mountable filesystem layer: {:?}", obj.kind() diff --git a/crates/spfs/src/resolve_test.rs b/crates/spfs/src/resolve_test.rs index 5fa8286e35..514584ec50 100644 --- a/crates/spfs/src/resolve_test.rs +++ b/crates/spfs/src/resolve_test.rs @@ -16,15 +16,11 @@ use crate::{encoding, graph}; async fn test_stack_to_layers_dedupe(#[future] tmprepo: TempRepo) { let repo = tmprepo.await; let layer = graph::Layer::new(encoding::EMPTY_DIGEST.into()); - let platform = graph::Platform::from_encodable([&layer, &layer]).unwrap(); - let mut stack = graph::Stack::from_encodable([&layer]).unwrap(); + let platform = graph::Platform::from_digestible([&layer, &layer]).unwrap(); + let mut stack = graph::Stack::from_digestible([&layer]).unwrap(); stack.push(platform.digest().unwrap()); - repo.write_object(&graph::Object::Layer(layer)) - .await - .unwrap(); - repo.write_object(&graph::Object::Platform(platform)) - .await - .unwrap(); + repo.write_object(&layer).await.unwrap(); + repo.write_object(&platform).await.unwrap(); let resolved = resolve_stack_to_layers(&stack, Some(&repo)).await.unwrap(); assert_eq!(resolved.len(), 1, "should deduplicate layers in resolve"); } @@ -52,7 +48,7 @@ async fn test_auto_merge_layers(tmpdir: tempfile::TempDir) { .await .unwrap(); let layer = repo - .create_layer(&graph::Manifest::from(&manifest)) + .create_layer(&manifest.to_graph_manifest()) .await .unwrap(); layers.push(layer); diff --git a/crates/spfs/src/runtime/storage.rs b/crates/spfs/src/runtime/storage.rs index fa8b52a278..1a2b65b1e8 100644 --- a/crates/spfs/src/runtime/storage.rs +++ b/crates/spfs/src/runtime/storage.rs @@ -27,12 +27,11 @@ use tokio::io::AsyncReadExt; use super::startup_ps; #[cfg(unix)] use super::{startup_csh, startup_sh}; -use crate::encoding::{self, Encodable}; use crate::env::SPFS_DIR_PREFIX; use crate::prelude::*; use crate::storage::fs::DURABLE_EDITS_DIR; use crate::storage::RepositoryHandle; -use crate::{bootstrap, graph, storage, tracking, Error, Result}; +use crate::{bootstrap, encoding, graph, storage, tracking, Error, Result}; #[cfg(test)] #[path = "./storage_test.rs"] @@ -882,11 +881,9 @@ impl Runtime { /// Generate a platform with all the layers from this runtime /// properly stacked. pub fn to_platform(&self) -> graph::Platform { - let mut platform = graph::Platform { - stack: self.status.stack.clone(), - }; - platform.stack.extend(self.status.flattened_layers.iter()); - platform + let mut stack = self.status.stack.clone(); + stack.extend(self.status.flattened_layers.iter()); + stack.into() } /// Write out the startup script data to disk, ensuring @@ -1223,7 +1220,7 @@ impl Storage { pub async fn save_runtime(&self, rt: &Runtime) -> Result<()> { let payload_tag = runtime_tag(RuntimeDataType::Payload, rt.name())?; let meta_tag = runtime_tag(RuntimeDataType::Metadata, rt.name())?; - let platform: graph::Object = rt.to_platform().into(); + let platform = rt.to_platform(); let platform_digest = platform.digest()?; let config_data = serde_json::to_string(&rt.data)?; let (_, config_digest) = tokio::try_join!( diff --git a/crates/spfs/src/server/database.rs b/crates/spfs/src/server/database.rs index c4e2dffcf2..46de0c89fc 100644 --- a/crates/spfs/src/server/database.rs +++ b/crates/spfs/src/server/database.rs @@ -48,7 +48,7 @@ impl proto::database_service_server::DatabaseService for DatabaseService { let request = request.into_inner(); let digest = proto::handle_error!(convert_digest(request.digest)); let object = { proto::handle_error!(self.repo.read_object(digest).await) }; - let result = proto::ReadObjectResponse::ok((&object).into()); + let result = proto::ReadObjectResponse::ok((&object.into_enum()).into()); Ok(Response::new(result)) } diff --git a/crates/spfs/src/server/payload.rs b/crates/spfs/src/server/payload.rs index d5967d99f8..7a2e3a06a2 100644 --- a/crates/spfs/src/server/payload.rs +++ b/crates/spfs/src/server/payload.rs @@ -175,7 +175,7 @@ async fn handle_uncompressed_upload( repo: Arc, reader: Pin>, ) -> crate::Result> { - // Safety: it is unsafe to create a payload without it's corresponding + // Safety: it is unsafe to create a payload without its corresponding // blob, but this payload http server is part of a larger repository // and does not intend to be responsible for ensuring the integrity // of the object graph - only the up/down of payload data diff --git a/crates/spfs/src/storage/blob.rs b/crates/spfs/src/storage/blob.rs index d6585dbc26..73a62f9161 100644 --- a/crates/spfs/src/storage/blob.rs +++ b/crates/spfs/src/storage/blob.rs @@ -15,12 +15,8 @@ pub type BlobStreamItem = Result<(encoding::Digest, graph::Blob)>; pub trait BlobStorage: graph::Database + Sync + Send { /// Iterate the objects in this storage which are blobs. fn iter_blobs<'db>(&'db self) -> Pin + 'db>> { - use graph::Object; let stream = self.iter_objects().filter_map(|res| match res { - Ok((digest, obj)) => match obj { - Object::Blob(manifest) => Some(Ok((digest, manifest))), - _ => None, - }, + Ok((digest, obj)) => obj.into_blob().map(|b| Ok((digest, b))), Err(err) => Some(Err(err)), }); Box::pin(stream) @@ -28,17 +24,19 @@ pub trait BlobStorage: graph::Database + Sync + Send { /// Return the blob identified by the given digest. async fn read_blob(&self, digest: encoding::Digest) -> Result { - use graph::Object; - match self.read_object(digest).await { + match self.read_object(digest).await.map(graph::Object::into_blob) { Err(err) => Err(err), - Ok(Object::Blob(blob)) => Ok(blob), - Ok(object) => Err(Error::ObjectNotABlob(object, digest)), + Ok(Some(blob)) => Ok(blob), + Ok(None) => Err(Error::NotCorrectKind { + desired: graph::ObjectKind::Blob, + digest, + }), } } /// Store the given blob async fn write_blob(&self, blob: graph::Blob) -> Result<()> { - self.write_object(&graph::Object::Blob(blob)).await + self.write_object(&blob).await } } diff --git a/crates/spfs/src/storage/database_test.rs b/crates/spfs/src/storage/database_test.rs index d6e89d2ac7..50b0a97e8a 100644 --- a/crates/spfs/src/storage/database_test.rs +++ b/crates/spfs/src/storage/database_test.rs @@ -20,7 +20,7 @@ async fn test_object_existence( ) { let tmprepo = tmprepo.await; let digest = encoding::EMPTY_DIGEST.into(); - let obj = graph::Blob::new(digest, 0).into(); + let obj = graph::Blob::new(digest, 0); tmprepo .write_object(&obj) .await diff --git a/crates/spfs/src/storage/fallback/repository.rs b/crates/spfs/src/storage/fallback/repository.rs index da1ff9ad19..3b405db54e 100644 --- a/crates/spfs/src/storage/fallback/repository.rs +++ b/crates/spfs/src/storage/fallback/repository.rs @@ -11,6 +11,7 @@ use futures::Stream; use relative_path::RelativePath; use crate::config::ToAddress; +use crate::graph::ObjectProto; use crate::prelude::*; use crate::storage::fs::{FsHashStore, ManifestRenderPath, OpenFsRepository, RenderStore}; use crate::storage::tag::TagSpecAndTagStream; @@ -113,7 +114,7 @@ impl graph::DatabaseView for FallbackProxy { // missing object. Best effort; ignore errors. if let Err(err) = self.primary.write_object(obj).await { #[cfg(feature = "sentry")] - tracing::error!(target: "sentry", ?err, ?digest, "Failed to repair missing object"); + tracing::error!(target: "sentry", ?err, %digest, "Failed to repair missing object"); tracing::warn!("Failed to repair missing object: {err}"); } else { @@ -155,7 +156,7 @@ impl graph::DatabaseView for FallbackProxy { #[async_trait::async_trait] impl graph::Database for FallbackProxy { - async fn write_object(&self, obj: &graph::Object) -> Result<()> { + async fn write_object(&self, obj: &graph::FlatObject) -> Result<()> { self.primary.write_object(obj).await?; Ok(()) } diff --git a/crates/spfs/src/storage/fs/database.rs b/crates/spfs/src/storage/fs/database.rs index b2680cb243..1964afb65a 100644 --- a/crates/spfs/src/storage/fs/database.rs +++ b/crates/spfs/src/storage/fs/database.rs @@ -8,12 +8,11 @@ use std::pin::Pin; use chrono::{DateTime, Utc}; use close_err::Closable; -use encoding::{Decodable, Encodable}; +use encoding::prelude::*; use futures::{Stream, StreamExt, TryFutureExt}; -use graph::DatabaseView; use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use crate::graph::Object; +use crate::graph::{DatabaseView, Object, ObjectProto}; use crate::{encoding, graph, Error, Result}; #[async_trait::async_trait] @@ -57,7 +56,7 @@ impl DatabaseView for super::FsRepository { #[async_trait::async_trait] impl graph::Database for super::FsRepository { - async fn write_object(&self, obj: &graph::Object) -> Result<()> { + async fn write_object(&self, obj: &graph::FlatObject) -> Result<()> { self.opened().await?.write_object(obj).await } @@ -97,7 +96,10 @@ impl DatabaseView for super::OpenFsRepository { file.read_to_end(&mut buf).await.map_err(|err| { Error::StorageReadError("read_to_end on object file", filepath.clone(), err) })?; - Object::decode(&mut buf.as_slice()) + // if the capacity of the vec already equals the length then a conversion + // to the Bytes type in the `new` call will avoid reallocating the data + buf.shrink_to_fit(); + Object::new(buf) } fn find_digests( @@ -125,14 +127,14 @@ impl DatabaseView for super::OpenFsRepository { #[async_trait::async_trait] impl graph::Database for super::OpenFsRepository { - async fn write_object(&self, obj: &graph::Object) -> Result<()> { + async fn write_object(&self, obj: &graph::FlatObject) -> Result<()> { let digest = obj.digest()?; let filepath = self.objects.build_digest_path(&digest); if filepath.exists() { - tracing::trace!(?digest, "object already exists"); + tracing::trace!(%digest, kind=%std::any::type_name::(), "object already exists"); return Ok(()); } - tracing::trace!(?digest, kind = ?obj.kind(), "writing object to db"); + tracing::trace!(%digest, kind=%std::any::type_name::(), "writing object to db"); // we need to use a temporary file here, so that // other processes don't try to read our incomplete @@ -223,7 +225,7 @@ impl graph::Database for super::OpenFsRepository { )), }; } - tracing::trace!(?digest, "removed object from db"); + tracing::trace!(%digest, "removed object from db"); Ok(()) } diff --git a/crates/spfs/src/storage/fs/hash_store.rs b/crates/spfs/src/storage/fs/hash_store.rs index 16649e4da8..ea6192677f 100644 --- a/crates/spfs/src/storage/fs/hash_store.rs +++ b/crates/spfs/src/storage/fs/hash_store.rs @@ -310,7 +310,7 @@ impl FsHashStore { object_permissions, } => { tracing::trace!( - ?digest, + %digest, ?working_file, ?copied, ?object_permissions, diff --git a/crates/spfs/src/storage/fs/payloads.rs b/crates/spfs/src/storage/fs/payloads.rs index 64001b6f04..38fc6d3882 100644 --- a/crates/spfs/src/storage/fs/payloads.rs +++ b/crates/spfs/src/storage/fs/payloads.rs @@ -11,7 +11,7 @@ use futures::{Stream, StreamExt, TryFutureExt}; use super::{FsRepository, OpenFsRepository}; use crate::storage::prelude::*; use crate::tracking::BlobRead; -use crate::{encoding, Error, Result}; +use crate::{encoding, graph, Error, Result}; #[async_trait::async_trait] impl crate::storage::PayloadStorage for FsRepository { @@ -82,7 +82,12 @@ impl crate::storage::PayloadStorage for OpenFsRepository { // blob is really unknown or just the payload is missing. match self.read_blob(digest).await { Ok(blob) => Err(Error::ObjectMissingPayload(blob.into(), digest)), - Err(err @ Error::ObjectNotABlob(_, _)) => Err(err), + Err( + err @ Error::NotCorrectKind { + desired: graph::ObjectKind::Blob, + .. + }, + ) => Err(err), Err(_) => Err(Error::UnknownObject(digest)), } } diff --git a/crates/spfs/src/storage/fs/render_reporter.rs b/crates/spfs/src/storage/fs/render_reporter.rs index 61374dc2d1..37172a5848 100644 --- a/crates/spfs/src/storage/fs/render_reporter.rs +++ b/crates/spfs/src/storage/fs/render_reporter.rs @@ -39,18 +39,18 @@ pub trait RenderReporter: Send + Sync { fn rendered_layer(&self, _manifest: &graph::Manifest) {} /// Called when an entry has been identified to render - fn visit_entry(&self, _entry: &graph::Entry) {} + fn visit_entry(&self, _entry: graph::Entry<'_>) {} /// Called when a blob has finished rendering. /// /// [`Self::rendered_entry`] will also be called for the same entry. - fn rendered_blob(&self, _entry: &graph::Entry, _render_blob_result: &RenderBlobResult) {} + fn rendered_blob(&self, _entry: graph::Entry<'_>, _render_blob_result: &RenderBlobResult) {} /// Called when an entry has finished rendering. /// /// [`Self::rendered_blob`] will also be called for the same entry when the entry /// is a blob. - fn rendered_entry(&self, _entry: &graph::Entry) {} + fn rendered_entry(&self, _entry: graph::Entry<'_>) {} } #[derive(Default)] @@ -80,19 +80,19 @@ impl RenderReporter for ConsoleRenderReporter { bars.layers.inc(1); } - fn visit_entry(&self, entry: &graph::Entry) { + fn visit_entry(&self, entry: graph::Entry<'_>) { let bars = self.get_bars(); bars.entries.inc_length(1); - if entry.kind.is_blob() { - bars.bytes.inc_length(entry.size); + if entry.kind().is_blob() { + bars.bytes.inc_length(entry.size()); } } - fn rendered_entry(&self, entry: &graph::Entry) { + fn rendered_entry(&self, entry: graph::Entry<'_>) { let bars = self.get_bars(); bars.entries.inc(1); - if entry.kind.is_blob() { - bars.bytes.inc(entry.size); + if entry.kind().is_blob() { + bars.bytes.inc(entry.size()); } } } @@ -143,19 +143,19 @@ impl<'a> RenderReporter for MultiReporter<'a> { } } - fn visit_entry(&self, entry: &graph::Entry) { + fn visit_entry(&self, entry: graph::Entry<'_>) { for reporter in self.reporters.iter() { reporter.visit_entry(entry) } } - fn rendered_blob(&self, entry: &graph::Entry, render_blob_result: &RenderBlobResult) { + fn rendered_blob(&self, entry: graph::Entry<'_>, render_blob_result: &RenderBlobResult) { for reporter in self.reporters.iter() { reporter.rendered_blob(entry, render_blob_result) } } - fn rendered_entry(&self, entry: &graph::Entry) { + fn rendered_entry(&self, entry: graph::Entry<'_>) { for reporter in self.reporters.iter() { reporter.rendered_entry(entry) } diff --git a/crates/spfs/src/storage/fs/render_summary.rs b/crates/spfs/src/storage/fs/render_summary.rs index 3267957ce0..05c33dd439 100644 --- a/crates/spfs/src/storage/fs/render_summary.rs +++ b/crates/spfs/src/storage/fs/render_summary.rs @@ -134,10 +134,10 @@ impl RenderSummaryReporter { } impl RenderReporter for RenderSummaryReporter { - fn rendered_blob(&self, entry: &crate::graph::Entry, render_blob_result: &RenderBlobResult) { + fn rendered_blob(&self, entry: crate::graph::Entry<'_>, render_blob_result: &RenderBlobResult) { self.render_summary.add(RenderBlobResultWithEntrySize( render_blob_result, - entry.size as usize, + entry.size() as usize, )); } } diff --git a/crates/spfs/src/storage/fs/renderer_test.rs b/crates/spfs/src/storage/fs/renderer_test.rs index 0dd0b34c4f..0a7ebd8561 100644 --- a/crates/spfs/src/storage/fs/renderer_test.rs +++ b/crates/spfs/src/storage/fs/renderer_test.rs @@ -7,9 +7,8 @@ use std::sync::Arc; use rstest::rstest; use super::was_render_completed; -use crate::encoding::Encodable; +use crate::encoding::prelude::*; use crate::fixtures::*; -use crate::graph::Manifest; use crate::storage::fs::{FsRepository, OpenFsRepository}; use crate::storage::{Repository, RepositoryHandle}; use crate::tracking; @@ -41,12 +40,15 @@ async fn test_render_manifest(tmpdir: tempfile::TempDir) { } } - let expected = Manifest::from(&manifest); + let expected = manifest.to_graph_manifest(); let rendered_path = crate::storage::fs::Renderer::new(&storage) .render_manifest(&expected, None) .await .expect("should successfully render manifest"); - let actual = Manifest::from(&tracking::compute_manifest(rendered_path).await.unwrap()); + let actual = tracking::compute_manifest(rendered_path) + .await + .unwrap() + .to_graph_manifest(); assert_eq!(actual.digest().unwrap(), expected.digest().unwrap()); } @@ -69,7 +71,7 @@ async fn test_render_manifest_with_repo(tmpdir: tempfile::TempDir) { .commit_dir(&src_dir) .await .unwrap(); - let manifest = Manifest::from(&expected_manifest); + let manifest = expected_manifest.to_graph_manifest(); // Safety: tmprepo was created as an FsRepository let tmprepo = match &*tmprepo { @@ -95,7 +97,7 @@ async fn test_render_manifest_with_repo(tmpdir: tempfile::TempDir) { println!("DIFFS:"); println!("{}", crate::io::format_diffs(diffs.iter())); assert_eq!( - Manifest::from(&expected_manifest).digest().unwrap(), - Manifest::from(&rendered_manifest).digest().unwrap() + expected_manifest.to_graph_manifest().digest().unwrap(), + rendered_manifest.to_graph_manifest().digest().unwrap() ); } diff --git a/crates/spfs/src/storage/fs/renderer_unix.rs b/crates/spfs/src/storage/fs/renderer_unix.rs index 01b798b0f6..a8d7d8fef1 100644 --- a/crates/spfs/src/storage/fs/renderer_unix.rs +++ b/crates/spfs/src/storage/fs/renderer_unix.rs @@ -20,7 +20,7 @@ use rand::seq::SliceRandom; use tokio::io::AsyncReadExt; use tokio::sync::Semaphore; -use crate::encoding::{self, Encodable}; +use crate::encoding::prelude::*; use crate::runtime::makedirs_with_perms; use crate::storage::fs::render_reporter::RenderBlobResult; use crate::storage::fs::{ @@ -31,7 +31,7 @@ use crate::storage::fs::{ }; use crate::storage::prelude::*; use crate::storage::LocalRepository; -use crate::{get_config, graph, tracking, Error, OsError, Result}; +use crate::{encoding, get_config, graph, tracking, Error, OsError, Result}; #[cfg(test)] #[path = "./renderer_test.rs"] @@ -261,14 +261,15 @@ where .map_err(|err| err.wrap("resolve stack to layers"))?; let mut futures = futures::stream::FuturesOrdered::new(); for layer in layers { + let digest = *layer.manifest(); let fut = self .repo - .read_manifest(layer.manifest) - .map_err(move |err| err.wrap(format!("read manifest {}", layer.manifest))) + .read_manifest(digest) + .map_err(move |err| err.wrap(format!("read manifest {digest}"))) .and_then(move |manifest| async move { self.render_manifest(&manifest, render_type) .await - .map_err(move |err| err.wrap(format!("render manifest {}", layer.manifest))) + .map_err(move |err| err.wrap(format!("render manifest {digest}"))) }); futures.push_back(fut); } @@ -292,13 +293,13 @@ where let layers = crate::resolve::resolve_stack_to_layers_with_repo(&stack, self.repo).await?; let mut manifests = Vec::with_capacity(layers.len()); for layer in layers { - manifests.push(self.repo.read_manifest(layer.manifest).await?); + manifests.push(self.repo.read_manifest(*layer.manifest()).await?); } let mut manifest = tracking::Manifest::default(); for next in manifests.into_iter() { manifest.update(&next.to_tracking_manifest()); } - let manifest = graph::Manifest::from(&manifest); + let manifest = manifest.to_graph_manifest(); self.render_manifest_into_dir(&manifest, target_dir, render_type) .await } @@ -408,7 +409,7 @@ where } let mut res = self - .render_into_dir_fd(root_dir, root_node.clone(), manifest, render_type) + .render_into_dir_fd(root_dir, root_node, manifest, render_type) .await; if let Err(Error::StorageWriteError(_, p, _)) = &mut res { *p = target_dir.join(p.as_path()); @@ -422,7 +423,7 @@ where async fn render_into_dir_fd( &self, root_dir_fd: Fd, - tree: graph::Tree, + tree: graph::Tree<'async_recursion>, manifest: &graph::Manifest, render_type: RenderType, ) -> Result<()> @@ -441,42 +442,43 @@ where // the same files as one another at the same time. This can happen, // for example, when multiple frames land on the same machine in a // render farm and they both start to render the same env at the same time - let mut entries = tree.entries.into_iter().collect::>(); + let mut entries = tree.entries().collect::>(); entries.shuffle(&mut rand::thread_rng()); let root_dir_fd = root_dir_fd.as_raw_fd(); let mut stream = futures::stream::iter(entries) .then(move |entry| { let fut = async move { - let mut root_path = PathBuf::from(&entry.name); - match entry.kind { + let mut root_path = PathBuf::from(entry.name()); + match entry.kind() { tracking::EntryKind::Tree => { - let tree = manifest.get_tree(&entry.object).ok_or_else(|| { - Error::String(format!("Failed to render: manifest is internally inconsistent (missing child tree {})", entry.object)) + let tree = manifest.get_tree(entry.object()).ok_or_else(|| { + Error::String(format!("Failed to render: manifest is internally inconsistent (missing child tree {})", *entry.object())) })?; - let child_dir = create_and_open_dir_at(root_dir_fd, entry.name.clone()) + let child_dir = create_and_open_dir_at(root_dir_fd, entry.name().to_owned()) .await .map_err(|err| { Error::StorageWriteError( "create dir during render", - PathBuf::from(&entry.name), + PathBuf::from(entry.name()), err, ) })?; let mut res = self .render_into_dir_fd( child_dir.as_raw_fd(), - tree.clone(), + tree, manifest, render_type, ) .await; if res.is_ok() { + let mode = Mode::from_bits_truncate(entry.mode()); res = tokio::task::spawn_blocking(move || { nix::sys::stat::fchmod( child_dir.as_raw_fd(), - Mode::from_bits_truncate(entry.mode), + mode, ) }) .await @@ -493,11 +495,11 @@ where root_path.push(p.as_path()); *p = root_path; } - res.map(|_| None).map_err(|err| err.wrap(format!("render_into_dir '{}'", entry.name))) + res.map(|_| None).map_err(|err| err.wrap(format!("render_into_dir '{}'", entry.name()))) } tracking::EntryKind::Mask => Ok(None), tracking::EntryKind::Blob => { - self.render_blob(root_dir_fd, &entry, render_type).await.map(Some).map_err(|err| err.wrap(format!("render blob '{}'", entry.name))) + self.render_blob(root_dir_fd, entry, render_type).await.map(Some).map_err(|err| err.wrap(format!("render blob '{}'", entry.name()))) } }.map(|render_blob_result_opt| (entry, render_blob_result_opt)) }; @@ -510,10 +512,10 @@ where match res { Err(error) => return Err(error), Ok((entry, Some(render_blob_result))) => { - self.reporter.rendered_blob(&entry, &render_blob_result); - self.reporter.rendered_entry(&entry); + self.reporter.rendered_blob(entry, &render_blob_result); + self.reporter.rendered_entry(entry); } - Ok((entry, _)) => self.reporter.rendered_entry(&entry), + Ok((entry, _)) => self.reporter.rendered_entry(entry), } } @@ -525,7 +527,7 @@ where async fn render_blob( &self, dir_fd: Fd, - entry: &graph::Entry, + entry: graph::Entry<'_>, render_type: RenderType, ) -> Result where @@ -542,7 +544,7 @@ where async fn render_blob_with_permit<'a, Fd>( &self, dir_fd: Fd, - entry: &graph::Entry, + entry: graph::Entry<'async_recursion>, render_type: RenderType, permit: BlobSemaphorePermit<'a>, ) -> Result @@ -559,7 +561,7 @@ where // a payload that could have been repaired. let (mut reader, filename) = self .repo - .open_payload(entry.object) + .open_payload(*entry.object()) .await .map_err(|err| err.wrap("open payload"))?; let target_dir_fd = dir_fd.as_raw_fd(); @@ -571,13 +573,13 @@ where })?; } return if let Err(err) = - nix::unistd::symlinkat(target.as_str(), Some(target_dir_fd), entry.name.as_str()) + nix::unistd::symlinkat(target.as_str(), Some(target_dir_fd), entry.name()) { match err { nix::errno::Errno::EEXIST => Ok(RenderBlobResult::SymlinkAlreadyExists), _ => Err(Error::StorageWriteError( "symlink on rendered blob", - PathBuf::from(&entry.name), + PathBuf::from(entry.name()), err.into(), )), } @@ -588,7 +590,7 @@ where // Free up file resources as early as possible. drop(reader); - let mut committed_path = self.repo.payloads().build_digest_path(&entry.object); + let mut committed_path = self.repo.payloads().build_digest_path(entry.object()); Ok(match render_type { RenderType::HardLink | RenderType::HardLinkNoProxy => { let mut retry_count = 0; @@ -607,8 +609,8 @@ where } else if let Ok(render_store) = self.repo.render_store() { let proxy_path = render_store .proxy - .build_digest_path(&entry.object) - .join(entry.mode.to_string()); + .build_digest_path(entry.object()) + .join(entry.mode().to_string()); tracing::trace!(?proxy_path, "proxy"); let render_blob_result = if !proxy_path.exists() { let path_to_create = proxy_path.parent().unwrap(); @@ -638,7 +640,7 @@ where Ok(metadata) => metadata, }; - let has_correct_mode = metadata.permissions().mode() == entry.mode; + let has_correct_mode = metadata.permissions().mode() == entry.mode(); let mut has_correct_owner = metadata.uid() == geteuid().as_raw(); // Can we still share this payload if it doesn't @@ -646,7 +648,7 @@ where if has_correct_mode && !has_correct_owner && { // require that a file has the "other" read bit // enabled before sharing it with other users. - (entry.mode & 0o004) != 0 + (entry.mode() & 0o004) != 0 } { if let Ok(config) = get_config() { if config.storage.allow_payload_sharing_between_users { @@ -715,7 +717,7 @@ where } } else { if !has_correct_mode { - tracing::debug!(actual_mode = ?metadata.permissions().mode(), expected_mode = ?entry.mode, ?payload_path, "couldn't skip proxy copy; payload had wrong mode"); + tracing::debug!(actual_mode = ?metadata.permissions().mode(), expected_mode = ?entry.mode(), ?payload_path, "couldn't skip proxy copy; payload had wrong mode"); } else if !has_correct_owner { tracing::debug!(actual_uid = ?metadata.uid(), expected_uid = ?geteuid().as_raw(), ?payload_path, "couldn't skip proxy copy; payload had wrong uid"); } @@ -737,7 +739,7 @@ where tokio::fs::File::open(&payload_path).await.map_err(|err| { if err.kind() == std::io::ErrorKind::NotFound { // in the case of a corrupt repository, this is a more appropriate error - Error::UnknownObject(entry.object) + Error::UnknownObject(*entry.object()) } else { Error::StorageReadError( "open payload for proxying", @@ -762,12 +764,12 @@ where })?; nix::sys::stat::fchmod( proxy_file_fd, - Mode::from_bits_truncate(entry.mode), + Mode::from_bits_truncate(entry.mode()), ) .map_err(|err| { Error::StorageWriteError( "set permissions on proxy payload", - PathBuf::from(&entry.name), + PathBuf::from(entry.name()), err.into(), ) })?; @@ -811,7 +813,7 @@ where None, committed_path.as_path(), Some(target_dir_fd), - std::path::Path::new(&entry.name), + std::path::Path::new(entry.name()), nix::unistd::LinkatFlags::NoSymlinkFollow, ) { match err { @@ -830,7 +832,7 @@ where nix::errno::Errno::ENOENT if !committed_path.exists() => { return Err(if committed_path == payload_path { // in the case of a corrupt repository, this is a more appropriate error - Error::UnknownObject(entry.object) + Error::UnknownObject(*entry.object()) } else { Error::StorageWriteError( "hard_link from committed path", @@ -858,14 +860,14 @@ where _ if matches!(render_type, RenderType::HardLink) => { return Err(Error::StorageWriteError( "hard_link of blob proxy to rendered path", - PathBuf::from(&entry.name), + PathBuf::from(entry.name()), err.into(), )) } _ => { return Err(Error::StorageWriteError( "hard_link of blob to rendered path", - PathBuf::from(&entry.name), + PathBuf::from(entry.name()), err.into(), )) } @@ -876,7 +878,7 @@ where } } RenderType::Copy => { - let name = entry.name.clone(); + let name = entry.name().to_owned(); let mut payload_file = tokio::fs::File::open(&committed_path) .await @@ -904,7 +906,7 @@ where .map_err(|err| { Error::StorageWriteError( "creation of rendered blob file", - PathBuf::from(&entry.name), + PathBuf::from(entry.name()), err, ) })?; @@ -913,11 +915,11 @@ where .map_err(|err| { Error::StorageWriteError( "copy of blob to rendered file", - PathBuf::from(&entry.name), + PathBuf::from(entry.name()), err, ) })?; - let mode = entry.mode; + let mode = entry.mode(); return tokio::task::spawn_blocking(move || { nix::sys::stat::fchmod( rendered_file.as_raw_fd(), @@ -930,7 +932,7 @@ where .map_err(|err| { Error::StorageWriteError( "set permissions on copied payload", - PathBuf::from(&entry.name), + PathBuf::from(entry.name()), err.into(), ) }); diff --git a/crates/spfs/src/storage/fs/renderer_win.rs b/crates/spfs/src/storage/fs/renderer_win.rs index 12e31ee97d..b0ef948a3e 100644 --- a/crates/spfs/src/storage/fs/renderer_win.rs +++ b/crates/spfs/src/storage/fs/renderer_win.rs @@ -11,12 +11,11 @@ use chrono::{DateTime, Utc}; use futures::{Stream, TryFutureExt, TryStreamExt}; use tokio::sync::Semaphore; -use crate::encoding::{self, Encodable}; +use crate::prelude::*; use crate::runtime::makedirs_with_perms; use crate::storage::fs::{OpenFsRepository, RenderReporter, SilentRenderReporter}; -use crate::storage::prelude::*; use crate::storage::LocalRepository; -use crate::{graph, tracking, Error, OsError, Result}; +use crate::{encoding, graph, tracking, Error, OsError, Result}; #[cfg(test)] #[path = "./renderer_test.rs"] @@ -241,14 +240,15 @@ where .map_err(|err| err.wrap("resolve stack to layers"))?; let mut futures = futures::stream::FuturesOrdered::new(); for layer in layers { + let digest = *layer.manifest(); let fut = self .repo - .read_manifest(layer.manifest) - .map_err(move |err| err.wrap(format!("read manifest {}", layer.manifest))) + .read_manifest(digest) + .map_err(move |err| err.wrap(format!("read manifest {digest}"))) .and_then(move |manifest| async move { self.render_manifest(&manifest, render_type) .await - .map_err(move |err| err.wrap(format!("render manifest {}", layer.manifest))) + .map_err(move |err| err.wrap(format!("render manifest {digest}"))) }); futures.push_back(fut); } @@ -272,13 +272,13 @@ where let layers = crate::resolve::resolve_stack_to_layers_with_repo(&stack, self.repo).await?; let mut manifests = Vec::with_capacity(layers.len()); for layer in layers { - manifests.push(self.repo.read_manifest(layer.manifest).await?); + manifests.push(self.repo.read_manifest(*layer.manifest()).await?); } let mut manifest = tracking::Manifest::default(); for next in manifests.into_iter() { manifest.update(&next.to_tracking_manifest()); } - let manifest = graph::Manifest::from(&manifest); + let manifest = manifest.to_graph_manifest(); self.render_manifest_into_dir(&manifest, target_dir, render_type) .await } diff --git a/crates/spfs/src/storage/handle.rs b/crates/spfs/src/storage/handle.rs index 697c049610..cafd8a7a4b 100644 --- a/crates/spfs/src/storage/handle.rs +++ b/crates/spfs/src/storage/handle.rs @@ -16,6 +16,7 @@ use super::prelude::*; use super::repository::Ref; use super::tag::TagSpecAndTagStream; use super::{RepositoryHandle, TagNamespace, TagNamespaceBuf, TagStorageMut}; +use crate::graph::ObjectProto; use crate::tracking::{self, BlobRead}; use crate::{graph, Error, Result}; @@ -227,7 +228,7 @@ impl DatabaseView for RepositoryHandle { #[async_trait::async_trait] impl Database for RepositoryHandle { - async fn write_object(&self, obj: &graph::Object) -> Result<()> { + async fn write_object(&self, obj: &graph::FlatObject) -> Result<()> { each_variant!(self, repo, { repo.write_object(obj).await }) } @@ -258,7 +259,7 @@ impl Repository for Arc { each_variant!(&**self, repo, { repo.has_ref(reference).await }) } - /// Resolve a tag or digest string into it's absolute digest. + /// Resolve a tag or digest string into its absolute digest. async fn resolve_ref(&self, reference: &str) -> Result { each_variant!(&**self, repo, { repo.resolve_ref(reference).await }) } @@ -430,7 +431,7 @@ impl DatabaseView for Arc { #[async_trait::async_trait] impl Database for Arc { - async fn write_object(&self, obj: &graph::Object) -> Result<()> { + async fn write_object(&self, obj: &graph::FlatObject) -> Result<()> { each_variant!(&**self, repo, { repo.write_object(obj).await }) } diff --git a/crates/spfs/src/storage/layer.rs b/crates/spfs/src/storage/layer.rs index 94957c8fde..84052d2923 100644 --- a/crates/spfs/src/storage/layer.rs +++ b/crates/spfs/src/storage/layer.rs @@ -4,7 +4,7 @@ use std::pin::Pin; -use encoding::Encodable; +use encoding::prelude::*; use futures::Stream; use tokio_stream::StreamExt; @@ -16,12 +16,8 @@ pub type LayerStreamItem = Result<(encoding::Digest, graph::Layer)>; pub trait LayerStorage: graph::Database + Sync + Send { /// Iterate the objects in this storage which are layers. fn iter_layers<'db>(&'db self) -> Pin + 'db>> { - use graph::Object; let stream = self.iter_objects().filter_map(|res| match res { - Ok((digest, obj)) => match obj { - Object::Layer(layer) => Some(Ok((digest, layer))), - _ => None, - }, + Ok((digest, obj)) => obj.into_layer().map(|b| Ok((digest, b))), Err(err) => Some(Err(err)), }); Box::pin(stream) @@ -29,24 +25,25 @@ pub trait LayerStorage: graph::Database + Sync + Send { /// Return the layer identified by the given digest. async fn read_layer(&self, digest: encoding::Digest) -> Result { - use graph::Object; - match self.read_object(digest).await { + match self + .read_object(digest) + .await + .map(graph::Object::into_layer) + { Err(err) => Err(err), - Ok(Object::Layer(layer)) => Ok(layer), - Ok(_) => Err(format!("Object is not a layer: {digest:?}").into()), + Ok(Some(layer)) => Ok(layer), + Ok(None) => Err(crate::Error::NotCorrectKind { + desired: graph::ObjectKind::Layer, + digest, + }), } } /// Create and storage a new layer for the given layer. async fn create_layer(&self, manifest: &graph::Manifest) -> Result { let layer = graph::Layer::new(manifest.digest()?); - let storable = graph::Object::Layer(layer); - self.write_object(&storable).await?; - if let graph::Object::Layer(layer) = storable { - Ok(layer) - } else { - panic!("this is impossible!"); - } + self.write_object(&layer).await?; + Ok(layer) } /// Create new layer from an arbitrary manifest @@ -54,9 +51,8 @@ pub trait LayerStorage: graph::Database + Sync + Send { &self, manifest: &tracking::Manifest, ) -> Result { - let storable_manifest = graph::Manifest::from(manifest); - self.write_object(&graph::Object::Manifest(storable_manifest.clone())) - .await?; + let storable_manifest = manifest.to_graph_manifest(); + self.write_object(&storable_manifest).await?; self.create_layer(&storable_manifest).await } } diff --git a/crates/spfs/src/storage/manifest.rs b/crates/spfs/src/storage/manifest.rs index 326388f397..48ccad7daf 100644 --- a/crates/spfs/src/storage/manifest.rs +++ b/crates/spfs/src/storage/manifest.rs @@ -19,12 +19,8 @@ pub type ManifestStreamItem = Result<(encoding::Digest, graph::Manifest)>; pub trait ManifestStorage: graph::Database + Sync + Send { /// Iterate the objects in this storage which are manifests. fn iter_manifests<'db>(&'db self) -> Pin + 'db>> { - use graph::Object; let stream = self.iter_objects().filter_map(|res| match res { - Ok((digest, obj)) => match obj { - Object::Manifest(manifest) => Some(Ok((digest, manifest))), - _ => None, - }, + Ok((digest, obj)) => obj.into_manifest().map(|b| Ok((digest, b))), Err(err) => Some(Err(err)), }); Box::pin(stream) @@ -32,11 +28,17 @@ pub trait ManifestStorage: graph::Database + Sync + Send { /// Return the manifest identified by the given digest. async fn read_manifest(&self, digest: encoding::Digest) -> Result { - use graph::Object; - match self.read_object(digest).await { + match self + .read_object(digest) + .await + .map(graph::Object::into_manifest) + { Err(err) => Err(err), - Ok(Object::Manifest(manifest)) => Ok(manifest), - Ok(_) => Err(format!("Object is not a manifest: {digest:?}").into()), + Ok(Some(manifest)) => Ok(manifest), + Ok(None) => Err(crate::Error::NotCorrectKind { + desired: graph::ObjectKind::Manifest, + digest, + }), } } } diff --git a/crates/spfs/src/storage/manifest_test.rs b/crates/spfs/src/storage/manifest_test.rs index cf53fe52ab..1f6c02212b 100644 --- a/crates/spfs/src/storage/manifest_test.rs +++ b/crates/spfs/src/storage/manifest_test.rs @@ -5,9 +5,7 @@ use rstest::rstest; use tokio_stream::StreamExt; -use crate::encoding::Encodable; use crate::fixtures::*; -use crate::graph::Manifest; use crate::prelude::*; use crate::tracking; @@ -25,15 +23,21 @@ async fn test_read_write_manifest( let dir = tmpdir.path(); let repo = repo.await; std::fs::File::create(dir.join("file.txt")).unwrap(); - let manifest = Manifest::from(&tracking::compute_manifest(&dir).await.unwrap()); + let manifest = tracking::compute_manifest(&dir) + .await + .unwrap() + .to_graph_manifest(); let expected = manifest.digest().unwrap(); - repo.write_object(&manifest.into()) + repo.write_object(&manifest) .await .expect("failed to write manifest"); std::fs::write(dir.join("file.txt"), "newrootdata").unwrap(); - let manifest2 = Manifest::from(&tracking::compute_manifest(dir).await.unwrap()); - repo.write_object(&manifest2.into()).await.unwrap(); + let manifest2 = tracking::compute_manifest(dir) + .await + .unwrap() + .to_graph_manifest(); + repo.write_object(&manifest2).await.unwrap(); let digests: crate::Result> = repo .find_digests(crate::graph::DigestSearchCriteria::All) @@ -62,9 +66,9 @@ async fn test_manifest_parity( std::fs::create_dir(dir.join("dir")).unwrap(); std::fs::write(dir.join("dir/file.txt"), "").unwrap(); let expected = tracking::compute_manifest(&dir).await.unwrap(); - let storable = Manifest::from(&expected); + let storable = expected.to_graph_manifest(); let digest = storable.digest().unwrap(); - repo.write_object(&storable.into()) + repo.write_object(&storable) .await .expect("failed to store manifest object"); let out = repo diff --git a/crates/spfs/src/storage/mod.rs b/crates/spfs/src/storage/mod.rs index 680c09fd92..205478f69b 100644 --- a/crates/spfs/src/storage/mod.rs +++ b/crates/spfs/src/storage/mod.rs @@ -99,45 +99,6 @@ impl RepositoryHandle { RepositoryHandle::Pinned(_) => Err(Error::RepositoryIsPinned), } } - - pub fn to_repo(self) -> Box { - match self { - Self::FS(repo) => Box::new(repo), - Self::Tar(repo) => Box::new(repo), - Self::Rpc(repo) => Box::new(repo), - Self::FallbackProxy(repo) => repo, - Self::Proxy(repo) => repo, - Self::Pinned(repo) => repo, - } - } -} - -impl std::ops::Deref for RepositoryHandle { - type Target = dyn Repository; - - fn deref(&self) -> &Self::Target { - match self { - RepositoryHandle::FS(repo) => repo, - RepositoryHandle::Tar(repo) => repo, - RepositoryHandle::Rpc(repo) => repo, - RepositoryHandle::FallbackProxy(repo) => &**repo, - RepositoryHandle::Proxy(repo) => &**repo, - RepositoryHandle::Pinned(repo) => &**repo, - } - } -} - -impl std::ops::DerefMut for RepositoryHandle { - fn deref_mut(&mut self) -> &mut Self::Target { - match self { - RepositoryHandle::FS(repo) => repo, - RepositoryHandle::Tar(repo) => repo, - RepositoryHandle::Rpc(repo) => repo, - RepositoryHandle::FallbackProxy(repo) => &mut **repo, - RepositoryHandle::Proxy(repo) => &mut **repo, - RepositoryHandle::Pinned(repo) => &mut **repo, - } - } } impl From for RepositoryHandle { diff --git a/crates/spfs/src/storage/pinned/repository.rs b/crates/spfs/src/storage/pinned/repository.rs index e7ac438780..66fc04d6ce 100644 --- a/crates/spfs/src/storage/pinned/repository.rs +++ b/crates/spfs/src/storage/pinned/repository.rs @@ -9,6 +9,7 @@ use chrono::{DateTime, Utc}; use futures::Stream; use spfs_encoding as encoding; +use crate::graph::ObjectProto; use crate::storage::prelude::*; use crate::tracking::BlobRead; use crate::{graph, Error, Result}; @@ -89,7 +90,7 @@ impl graph::Database for super::PinnedRepository where T: graph::Database + 'static, { - async fn write_object(&self, obj: &graph::Object) -> Result<()> { + async fn write_object(&self, obj: &graph::FlatObject) -> Result<()> { // objects are stored by digest, not time, and so can still // be safely written to a past repository view. In practice, // this allows some recovery and sync operations to still function @@ -185,6 +186,7 @@ mod test { let build = || -> super::PinnedRepository { unimplemented!() }; let repo = build(); - let _: &dyn super::Repository = &repo; + fn needs_repo(_: &impl super::Repository) {} + needs_repo(&repo); } } diff --git a/crates/spfs/src/storage/platform.rs b/crates/spfs/src/storage/platform.rs index 918924a8f2..59d989589b 100644 --- a/crates/spfs/src/storage/platform.rs +++ b/crates/spfs/src/storage/platform.rs @@ -15,12 +15,8 @@ pub type PlatformStreamItem = Result<(encoding::Digest, graph::Platform)>; pub trait PlatformStorage: graph::Database + Sync + Send { /// Iterate the objects in this storage which are platforms. fn iter_platforms<'db>(&'db self) -> Pin + 'db>> { - use graph::Object; let stream = self.iter_objects().filter_map(|res| match res { - Ok((digest, obj)) => match obj { - Object::Platform(platform) => Some(Ok((digest, platform))), - _ => None, - }, + Ok((digest, obj)) => obj.into_platform().map(|b| Ok((digest, b))), Err(err) => Some(Err(err)), }); Box::pin(stream) @@ -28,11 +24,17 @@ pub trait PlatformStorage: graph::Database + Sync + Send { /// Return the platform identified by the given digest. async fn read_platform(&self, digest: encoding::Digest) -> Result { - use graph::Object; - match self.read_object(digest).await { + match self + .read_object(digest) + .await + .map(graph::Object::into_platform) + { Err(err) => Err(err), - Ok(Object::Platform(platform)) => Ok(platform), - Ok(_) => Err(format!("Object is not a platform: {digest:?}").into()), + Ok(Some(platform)) => Ok(platform), + Ok(None) => Err(crate::Error::NotCorrectKind { + desired: graph::ObjectKind::Platform, + digest, + }), } } @@ -40,13 +42,8 @@ pub trait PlatformStorage: graph::Database + Sync + Send { /// Layers are ordered bottom to top. async fn create_platform(&self, layers: graph::Stack) -> Result { let platform = graph::Platform::from(layers); - let storable = graph::Object::Platform(platform); - self.write_object(&storable).await?; - if let graph::Object::Platform(platform) = storable { - Ok(platform) - } else { - panic!("this is impossible!"); - } + self.write_object(&platform).await?; + Ok(platform) } } diff --git a/crates/spfs/src/storage/proxy/repository.rs b/crates/spfs/src/storage/proxy/repository.rs index e2652c27ef..7d2b3d3d9e 100644 --- a/crates/spfs/src/storage/proxy/repository.rs +++ b/crates/spfs/src/storage/proxy/repository.rs @@ -10,6 +10,7 @@ use futures::Stream; use relative_path::RelativePath; use crate::config::ToAddress; +use crate::graph::ObjectProto; use crate::prelude::*; use crate::storage::tag::TagSpecAndTagStream; use crate::storage::{ @@ -164,7 +165,7 @@ impl graph::DatabaseView for ProxyRepository { #[async_trait::async_trait] impl graph::Database for ProxyRepository { - async fn write_object(&self, obj: &graph::Object) -> Result<()> { + async fn write_object(&self, obj: &graph::FlatObject) -> Result<()> { self.primary.write_object(obj).await?; Ok(()) } diff --git a/crates/spfs/src/storage/repository.rs b/crates/spfs/src/storage/repository.rs index 898184c786..0b7348b6ff 100644 --- a/crates/spfs/src/storage/repository.rs +++ b/crates/spfs/src/storage/repository.rs @@ -6,8 +6,7 @@ use std::collections::HashSet; use std::pin::Pin; use async_trait::async_trait; -use encoding::Encodable; -use graph::Blob; +use encoding::prelude::*; use tokio_stream::StreamExt; use super::fs::{FsHashStore, RenderStore}; @@ -60,7 +59,7 @@ pub trait Repository: self.read_ref(reference).await.is_ok() } - /// Resolve a tag or digest string into it's absolute digest. + /// Resolve a tag or digest string into its absolute digest. async fn resolve_ref(&self, reference: &str) -> Result { if let Ok(tag_spec) = tracking::TagSpec::parse(reference) { if let Ok(tag) = self.resolve_tag(&tag_spec).await { @@ -108,8 +107,8 @@ pub trait Repository: // Safety: it is unsafe to write data without also creating a blob // to track that payload, which is exactly what this function is doing let (digest, size) = unsafe { self.write_data(reader).await? }; - let blob = Blob::new(digest, size); - self.write_object(&graph::Object::Blob(blob)).await?; + let blob = graph::Blob::new(digest, size); + self.write_object(&blob).await?; Ok(digest) } } diff --git a/crates/spfs/src/storage/repository_test.rs b/crates/spfs/src/storage/repository_test.rs index ae08bea325..21b86f39fc 100644 --- a/crates/spfs/src/storage/repository_test.rs +++ b/crates/spfs/src/storage/repository_test.rs @@ -10,9 +10,8 @@ use std::sync::Arc; use rstest::rstest; use super::Ref; -use crate::encoding::Encodable; +use crate::encoding::prelude::*; use crate::fixtures::*; -use crate::graph::Manifest; use crate::storage::fs; use crate::storage::prelude::*; use crate::tracking::TagSpec; @@ -39,7 +38,7 @@ async fn test_find_aliases( .await .unwrap(); let layer = tmprepo - .create_layer(&Manifest::from(&manifest)) + .create_layer(&manifest.to_graph_manifest()) .await .unwrap(); let test_tag = TagSpec::parse("test-tag").unwrap(); @@ -92,7 +91,7 @@ async fn test_commit_mode_fs(tmpdir: tempfile::TempDir) { }; let rendered_dir = fs::Renderer::new(&*tmprepo) - .render_manifest(&Manifest::from(&manifest), None) + .render_manifest(&manifest.to_graph_manifest(), None) .await .expect("failed to render manifest"); let rendered_symlink = rendered_dir.join(symlink_path); @@ -155,17 +154,15 @@ async fn test_commit_dir( ensure(src_dir.join("dir2.0/file.txt"), "evenmoredata"); ensure(src_dir.join("file.txt"), "rootdata"); - let manifest = Manifest::from( - &crate::Committer::new(&tmprepo) - .commit_dir(&src_dir) - .await - .unwrap(), - ); - let manifest2 = Manifest::from( - &crate::Committer::new(&tmprepo) - .commit_dir(&src_dir) - .await - .unwrap(), - ); - assert_eq!(manifest, manifest2); + let manifest = crate::Committer::new(&tmprepo) + .commit_dir(&src_dir) + .await + .unwrap() + .to_graph_manifest(); + let manifest2 = crate::Committer::new(&tmprepo) + .commit_dir(&src_dir) + .await + .unwrap() + .to_graph_manifest(); + assert_eq!(manifest.digest().unwrap(), manifest2.digest().unwrap()); } diff --git a/crates/spfs/src/storage/rpc/database.rs b/crates/spfs/src/storage/rpc/database.rs index 11dd9effb6..37b5188788 100644 --- a/crates/spfs/src/storage/rpc/database.rs +++ b/crates/spfs/src/storage/rpc/database.rs @@ -8,7 +8,8 @@ use std::pin::Pin; use futures::{Stream, TryStreamExt}; use proto::RpcResult; -use crate::{encoding, graph, proto, storage, Result}; +use crate::graph::{self, ObjectProto}; +use crate::{encoding, proto, storage, Result}; #[async_trait::async_trait] impl graph::DatabaseView for super::RpcRepository { @@ -67,9 +68,9 @@ impl graph::DatabaseView for super::RpcRepository { #[async_trait::async_trait] impl graph::Database for super::RpcRepository { - async fn write_object(&self, obj: &graph::Object) -> Result<()> { + async fn write_object(&self, obj: &graph::FlatObject) -> Result<()> { let request = proto::WriteObjectRequest { - object: Some(obj.into()), + object: Some((&obj.to_enum()).into()), }; self.db_client .clone() diff --git a/crates/spfs/src/storage/tag.rs b/crates/spfs/src/storage/tag.rs index 1de081563a..c4a2f91c8b 100644 --- a/crates/spfs/src/storage/tag.rs +++ b/crates/spfs/src/storage/tag.rs @@ -6,12 +6,12 @@ use std::borrow::Cow; use std::fmt::Display; use std::pin::Pin; -use encoding::Encodable; use futures::Stream; use relative_path::RelativePath; use tokio_stream::StreamExt; use super::{TagNamespace, TagNamespaceBuf, TAG_NAMESPACE_MARKER}; +use crate::prelude::*; use crate::{encoding, tracking, Error, Result}; pub(crate) type TagStream = Pin> + Send>>; diff --git a/crates/spfs/src/storage/tar/repository.rs b/crates/spfs/src/storage/tar/repository.rs index 9de7deb519..cbb6b6bcf3 100644 --- a/crates/spfs/src/storage/tar/repository.rs +++ b/crates/spfs/src/storage/tar/repository.rs @@ -14,6 +14,7 @@ use relative_path::RelativePath; use tar::{Archive, Builder}; use crate::config::ToAddress; +use crate::graph::ObjectProto; use crate::prelude::*; use crate::storage::fs::DURABLE_EDITS_DIR; use crate::storage::tag::TagSpecAndTagStream; @@ -248,7 +249,7 @@ impl graph::DatabaseView for TarRepository { #[async_trait::async_trait] impl graph::Database for TarRepository { - async fn write_object(&self, obj: &graph::Object) -> Result<()> { + async fn write_object(&self, obj: &graph::FlatObject) -> Result<()> { self.repo.write_object(obj).await?; self.up_to_date.store(false, Ordering::Release); Ok(()) diff --git a/crates/spfs/src/sync.rs b/crates/spfs/src/sync.rs index bf2b2b2c54..f645481d73 100644 --- a/crates/spfs/src/sync.rs +++ b/crates/spfs/src/sync.rs @@ -268,15 +268,13 @@ where #[async_recursion::async_recursion] pub async fn sync_object(&self, obj: graph::Object) -> Result { - use graph::Object; + use graph::object::Enum; self.reporter.visit_object(&obj); - let res = match obj { - Object::Layer(obj) => SyncObjectResult::Layer(self.sync_layer(obj).await?), - Object::Platform(obj) => SyncObjectResult::Platform(self.sync_platform(obj).await?), - Object::Blob(obj) => SyncObjectResult::Blob(self.sync_blob(obj).await?), - Object::Manifest(obj) => SyncObjectResult::Manifest(self.sync_manifest(obj).await?), - Object::Tree(obj) => SyncObjectResult::Tree(obj), - Object::Mask => SyncObjectResult::Mask, + let res = match obj.into_enum() { + Enum::Layer(obj) => SyncObjectResult::Layer(self.sync_layer(obj).await?), + Enum::Platform(obj) => SyncObjectResult::Platform(self.sync_platform(obj).await?), + Enum::Blob(obj) => SyncObjectResult::Blob(self.sync_blob(&obj).await?), + Enum::Manifest(obj) => SyncObjectResult::Manifest(self.sync_manifest(obj).await?), }; self.reporter.synced_object(&res); Ok(res) @@ -293,15 +291,15 @@ where self.reporter.visit_platform(&platform); let mut futures = FuturesUnordered::new(); - for digest in platform.stack.iter_bottom_up() { - futures.push(self.sync_digest(digest)); + for digest in platform.iter_bottom_up() { + futures.push(self.sync_digest(*digest)); } let mut results = Vec::with_capacity(futures.len()); while let Some(result) = futures.try_next().await? { results.push(result); } - let platform = self.dest.create_platform(platform.stack).await?; + self.dest.write_object(&platform).await?; let res = SyncPlatformResult::Synced { platform, results }; self.reporter.synced_platform(&res); @@ -318,11 +316,9 @@ where } self.reporter.visit_layer(&layer); - let manifest = self.src.read_manifest(layer.manifest).await?; + let manifest = self.src.read_manifest(*layer.manifest()).await?; let result = self.sync_manifest(manifest).await?; - self.dest - .write_object(&graph::Object::Layer(layer.clone())) - .await?; + self.dest.write_object(&layer).await?; let res = SyncLayerResult::Synced { layer, result }; self.reporter.synced_layer(&res); Ok(res) @@ -345,8 +341,7 @@ where let entries: Vec<_> = manifest .iter_entries() - .filter(|e| e.kind.is_blob()) - .cloned() + .filter(|e| e.kind().is_blob()) .collect(); let mut results = Vec::with_capacity(entries.len()); let mut futures = FuturesUnordered::new(); @@ -357,66 +352,65 @@ where results.push(res); } - self.dest - .write_object(&graph::Object::Manifest(manifest.clone())) - .await?; + self.dest.write_object(&manifest).await?; + drop(futures); let res = SyncManifestResult::Synced { manifest, results }; self.reporter.synced_manifest(&res); Ok(res) } - async fn sync_entry(&self, entry: graph::Entry) -> Result { - if !entry.kind.is_blob() { + async fn sync_entry(&self, entry: graph::Entry<'_>) -> Result { + if !entry.kind().is_blob() { return Ok(SyncEntryResult::Skipped); } self.reporter.visit_entry(&entry); - let blob = graph::Blob { - payload: entry.object, - size: entry.size, - }; + let blob = graph::Blob::new(*entry.object(), entry.size()); let result = self - .sync_blob_with_perms_opt(blob, Some(entry.mode)) + .sync_blob_with_perms_opt(&blob, Some(entry.mode())) .await?; - let res = SyncEntryResult::Synced { entry, result }; + let res = SyncEntryResult::Synced { result }; self.reporter.synced_entry(&res); Ok(res) } /// Sync the identified blob to the destination repository. - pub async fn sync_blob(&self, blob: graph::Blob) -> Result { + pub async fn sync_blob(&self, blob: &graph::Blob) -> Result { self.sync_blob_with_perms_opt(blob, None).await } async fn sync_blob_with_perms_opt( &self, - blob: graph::Blob, + blob: &graph::Blob, perms: Option, ) -> Result { let digest = blob.digest(); - if self.processed_digests.contains(&digest) { + if self.processed_digests.contains(digest) { // do not insert here because blobs share a digest with payloads // which should also must be visited at least once if needed return Ok(SyncBlobResult::Duplicate); } if self.policy.check_existing_objects() - && self.dest.has_object(digest).await - && self.dest.has_payload(blob.payload).await + && self.dest.has_object(*digest).await + && self.dest.has_payload(*blob.payload()).await { - self.processed_digests.insert(digest); + self.processed_digests.insert(*digest); return Ok(SyncBlobResult::Skipped); } - self.reporter.visit_blob(&blob); + self.reporter.visit_blob(blob); // Safety: sync_payload is unsafe to call unless the blob // is synced with it, which is the purpose of this function. let result = unsafe { - self.sync_payload_with_perms_opt(blob.payload, perms) + self.sync_payload_with_perms_opt(*blob.payload(), perms) .await? }; - self.dest.write_blob(blob.clone()).await?; - self.processed_digests.insert(digest); - let res = SyncBlobResult::Synced { blob, result }; + self.dest.write_blob(blob.to_owned()).await?; + self.processed_digests.insert(*digest); + let res = SyncBlobResult::Synced { + blob: blob.to_owned(), + result, + }; self.reporter.synced_blob(&res); Ok(res) } @@ -541,7 +535,7 @@ pub trait SyncReporter: Send + Sync { fn synced_manifest(&self, _result: &SyncManifestResult) {} /// Called when an entry has been identified to sync - fn visit_entry(&self, _entry: &graph::Entry) {} + fn visit_entry(&self, _entry: &graph::Entry<'_>) {} /// Called when an entry has finished syncing fn synced_entry(&self, _result: &SyncEntryResult) {} @@ -587,7 +581,7 @@ impl SyncReporter for ConsoleSyncReporter { fn visit_blob(&self, blob: &graph::Blob) { let bars = self.get_bars(); bars.payloads.inc_length(1); - bars.bytes.inc_length(blob.size); + bars.bytes.inc_length(blob.size()); } fn synced_blob(&self, result: &SyncBlobResult) { @@ -749,24 +743,21 @@ pub enum SyncObjectResult { Layer(SyncLayerResult), Blob(SyncBlobResult), Manifest(SyncManifestResult), - Tree(graph::Tree), - Mask, } impl SyncObjectResult { pub fn summary(&self) -> SyncSummary { - use SyncObjectResult::*; + use SyncObjectResult as R; match self { - Duplicate => SyncSummary { + R::Duplicate => SyncSummary { skipped_objects: 1, ..Default::default() }, - Ignorable => SyncSummary::default(), - Platform(res) => res.summary(), - Layer(res) => res.summary(), - Blob(res) => res.summary(), - Manifest(res) => res.summary(), - Mask | Tree(_) => SyncSummary::default(), + R::Ignorable => SyncSummary::default(), + R::Platform(res) => res.summary(), + R::Layer(res) => res.summary(), + R::Blob(res) => res.summary(), + R::Manifest(res) => res.summary(), } } } @@ -856,10 +847,7 @@ pub enum SyncEntryResult { /// The entry was already synced in this session Duplicate, /// The entry was synced - Synced { - entry: graph::Entry, - result: SyncBlobResult, - }, + Synced { result: SyncBlobResult }, } impl SyncEntryResult { diff --git a/crates/spfs/src/sync_test.rs b/crates/spfs/src/sync_test.rs index 9a0e512e7b..b6455924bf 100644 --- a/crates/spfs/src/sync_test.rs +++ b/crates/spfs/src/sync_test.rs @@ -11,7 +11,7 @@ use super::Syncer; use crate::config::Config; use crate::fixtures::*; use crate::prelude::*; -use crate::{encoding, graph, storage, tracking, Error}; +use crate::{encoding, storage, tracking, Error}; #[rstest] #[tokio::test] @@ -54,7 +54,7 @@ async fn test_push_ref(#[future] config: (tempfile::TempDir, Config)) { .await .unwrap(); let layer = local - .create_layer(&graph::Manifest::from(&manifest)) + .create_layer(&manifest.to_graph_manifest()) .await .unwrap(); let tag = tracking::TagSpec::parse("testing").unwrap(); @@ -100,7 +100,7 @@ async fn test_sync_ref( .await .unwrap(); let layer = repo_a - .create_layer(&graph::Manifest::from(&manifest)) + .create_layer(&manifest.to_graph_manifest()) .await .unwrap(); let platform = repo_a @@ -167,7 +167,7 @@ async fn test_sync_missing_from_source( .await .unwrap(); let layer = repo_b - .create_layer(&graph::Manifest::from(&manifest)) + .create_layer(&manifest.to_graph_manifest()) .await .unwrap(); let platform = repo_b @@ -236,7 +236,7 @@ async fn test_sync_through_tar( .await .unwrap(); let layer = repo_a - .create_layer(&graph::Manifest::from(&manifest)) + .create_layer(&manifest.to_graph_manifest()) .await .unwrap(); let platform = repo_a diff --git a/crates/spfs/src/tracking/entry.rs b/crates/spfs/src/tracking/entry.rs index ccd0a76c93..386318d92d 100644 --- a/crates/spfs/src/tracking/entry.rs +++ b/crates/spfs/src/tracking/entry.rs @@ -73,6 +73,16 @@ impl FromStr for EntryKind { } } +impl From for spfs_proto::EntryKind { + fn from(val: EntryKind) -> spfs_proto::EntryKind { + match val { + EntryKind::Blob => spfs_proto::EntryKind::Blob, + EntryKind::Tree => spfs_proto::EntryKind::Tree, + EntryKind::Mask => spfs_proto::EntryKind::Mask, + } + } +} + impl encoding::Encodable for EntryKind { type Error = Error; diff --git a/crates/spfs/src/tracking/manifest.rs b/crates/spfs/src/tracking/manifest.rs index 8ebbe2e607..2419ea6964 100644 --- a/crates/spfs/src/tracking/manifest.rs +++ b/crates/spfs/src/tracking/manifest.rs @@ -20,7 +20,7 @@ use tokio::sync::Semaphore; use super::entry::{Entry, EntryKind}; use super::{BlobRead, BlobReadExt, Diff}; -use crate::{encoding, runtime, Error, Result}; +use crate::{encoding, graph, runtime, Error, Result}; #[cfg(test)] #[path = "./manifest_test.rs"] @@ -36,6 +36,9 @@ pub const DEFAULT_MAX_CONCURRENT_BRANCHES: usize = 5; #[derive(Clone)] pub struct Manifest { + /// retains the original header values/configuration + /// of the constructed or loaded data + header: graph::object::HeaderBuf, root: Entry, } @@ -45,6 +48,7 @@ where { fn default() -> Self { Self { + header: graph::object::HeaderBuf::new(graph::ObjectKind::Manifest), root: Entry::empty_dir_with_open_perms(), } } @@ -74,13 +78,26 @@ impl std::cmp::Eq for Manifest where T: std::cmp::Eq {} impl From> for Manifest { fn from(root: Entry) -> Self { - Self { root } + Self::new(root) } } impl Manifest { pub fn new(root: Entry) -> Self { - Self { root } + Self { + header: graph::object::HeaderBuf::new(graph::ObjectKind::Manifest), + root, + } + } + + pub fn header(&self) -> &graph::object::Header { + &self.header + } + + pub fn set_header(&mut self, mut header: graph::object::HeaderBuf) { + // a different object kind would cause bugs and should never be allowed + header.set_object_kind(graph::ObjectKind::Manifest); + self.header = header; } pub fn root(&self) -> &Entry { @@ -96,7 +113,7 @@ impl Manifest { self.root.entries.len() == 0 } - /// Get an entry in this manifest given it's filepath. + /// Get an entry in this manifest given its filepath. pub fn get_path>(&self, path: P) -> Option<&Entry> { const TRIM_START: &[char] = &['/', '.']; const TRIM_END: &[char] = &['/']; @@ -155,7 +172,9 @@ where /// Convert this manifest into its encodable, /// hashable form for storage. pub fn to_graph_manifest(&self) -> crate::graph::Manifest { - self.into() + crate::graph::Manifest::builder() + .with_header(|h| h.copy_from(&self.header)) + .build(self.root()) } } @@ -369,7 +388,7 @@ pub trait BlobHasher { #[tonic::async_trait] impl BlobHasher for () { async fn hash_blob(&self, reader: Pin>) -> Result { - Ok(encoding::Digest::from_async_reader(reader).await?) + Ok(encoding::Hasher::hash_async_reader(reader).await?) } } @@ -532,11 +551,10 @@ where path: P, ) -> Result { tracing::trace!("computing manifest for {:?}", path.as_ref()); - let manifest = Manifest { - root: self - .compute_tree_node(Arc::new(path.as_ref().to_owned()), path.as_ref()) + let manifest = Manifest::new( + self.compute_tree_node(Arc::new(path.as_ref().to_owned()), path.as_ref()) .await?, - }; + ); Ok(manifest) } diff --git a/crates/spfs/src/tracking/manifest_test.rs b/crates/spfs/src/tracking/manifest_test.rs index 726856074b..38cda506b6 100644 --- a/crates/spfs/src/tracking/manifest_test.rs +++ b/crates/spfs/src/tracking/manifest_test.rs @@ -5,8 +5,8 @@ use rstest::rstest; use super::{compute_manifest, EntryKind, Manifest}; +use crate::encoding::prelude::*; use crate::fixtures::*; -use crate::graph; #[rstest] #[tokio::test] @@ -100,7 +100,10 @@ async fn test_layer_manifests(tmpdir: tempfile::TempDir) { a.update(&b); assert_eq!(a, both); - assert_eq!(graph::Manifest::from(&a), graph::Manifest::from(&both)); + assert_eq!( + a.to_graph_manifest().digest().unwrap(), + both.to_graph_manifest().digest().unwrap() + ); } #[rstest] #[tokio::test] diff --git a/crates/spfs/src/tracking/tag.rs b/crates/spfs/src/tracking/tag.rs index a6e4b8ac86..4c141052b5 100644 --- a/crates/spfs/src/tracking/tag.rs +++ b/crates/spfs/src/tracking/tag.rs @@ -6,7 +6,7 @@ use std::io::BufRead; use chrono::prelude::*; -use crate::encoding::Encodable; +use crate::encoding::prelude::*; use crate::{encoding, Error, Result}; #[cfg(test)] @@ -127,6 +127,16 @@ impl std::fmt::Display for Tag { } } +impl Digestible for Tag { + type Error = Error; + + fn digest(&self) -> std::result::Result { + let mut hasher = encoding::Hasher::new_sync(); + self.encode(&mut hasher)?; + Ok(hasher.digest()) + } +} + impl Encodable for Tag { type Error = Error; diff --git a/crates/spfs/src/tracking/time_spec_test.rs b/crates/spfs/src/tracking/time_spec_test.rs index e2ea93e5ce..dd8880e4db 100644 --- a/crates/spfs/src/tracking/time_spec_test.rs +++ b/crates/spfs/src/tracking/time_spec_test.rs @@ -26,6 +26,6 @@ fn test_parsing(#[case] source: &str) { let spec2 = TimeSpec::parse(out).expect("Should re-parse formatted spec"); assert_eq!( spec2, spec, - "Re-parsed spec should be the same as it's source" + "Re-parsed spec should be the same as its source" ); } diff --git a/crates/spfs/tests/integration/unprivileged/test_manifest_digest_collision.sh b/crates/spfs/tests/integration/unprivileged/test_manifest_digest_collision.sh new file mode 100644 index 0000000000..6450e8d7a8 --- /dev/null +++ b/crates/spfs/tests/integration/unprivileged/test_manifest_digest_collision.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Copyright (c) Sony Pictures Imageworks, et al. +# SPDX-License-Identifier: Apache-2.0 +# https://github.com/imageworks/spk + +set -o errexit + +# test that an empty platform's digest doesn't collide with a specially crafted +# blob + +# Commit an empty platform. At the time of writing with the current digest +# calculation strategy, this should produce a digest of +# V5KXB5NBQEFXV54MV5F4OCTGB4G7KHSCXL4R2TPFWIZI3YHIHX6A==== +spfs run - -- bash -c "spfs commit --allow-empty platform -t test/empty_platform" + +# Commit a blob containing 8 null bytes. This blob will also have the digest +# V5KXB5NBQEFXV54MV5F4OCTGB4G7KHSCXL4R2TPFWIZI3YHIHX6A==== +spfs run - -- bash -c "dd if=/dev/zero bs=1 count=8 2>/dev/null | spfs write -t test/blob" + +# It should be possible to `spfs read` the blob; this command fails if the +# object is not a blob. +spfs read test/blob + +# Reading the blob should succeed and have the expected contents. +expected_hash=$(dd if=/dev/zero bs=1 count=8 2>/dev/null | sha256sum | cut -d' ' -f1) +actual_hash=$(spfs read test/blob 2>/dev/null | sha256sum | cut -d' ' -f1) + +if [ "$expected_hash" != "$actual_hash" ]; then + echo "Expected hash $expected_hash but got $actual_hash" + exit 1 +fi diff --git a/crates/spk-build/src/build/binary.rs b/crates/spk-build/src/build/binary.rs index c3d2c80a3d..bbd8e824c5 100644 --- a/crates/spk-build/src/build/binary.rs +++ b/crates/spk-build/src/build/binary.rs @@ -844,7 +844,7 @@ where .commit_layer(&mut runtime) .await?; let collected_layer = repo - .read_manifest(layer.manifest) + .read_manifest(*layer.manifest()) .await? .to_tracking_manifest(); let manifests = split_manifest_by_component( @@ -854,15 +854,13 @@ where )?; let mut components = HashMap::new(); for (component, manifest) in manifests { - let storable_manifest = spfs::graph::Manifest::from(&manifest); - let layer = spfs::graph::Layer { - manifest: storable_manifest.digest().unwrap(), - }; + let storable_manifest = manifest.to_graph_manifest(); + let layer = spfs::graph::Layer::new(storable_manifest.digest().unwrap()); let layer_digest = layer.digest().unwrap(); #[rustfmt::skip] tokio::try_join!( - async { repo.write_object(&storable_manifest.into()).await }, - async { repo.write_object(&layer.into()).await } + async { repo.write_object(&storable_manifest).await }, + async { repo.write_object(&layer).await } )?; components.insert( component, @@ -888,6 +886,9 @@ fn split_manifest_by_component( let mut manifests = HashMap::with_capacity(components.len()); for component in components.iter() { let mut component_manifest = spfs::tracking::Manifest::default(); + // ensure we are storing things with the same settings as the + // original manifest that was generated by the build + component_manifest.set_header(manifest.header().to_owned()); // identify all the file paths that we will replicate // first so that we can also identify necessary diff --git a/crates/spk-build/src/build/binary_test.rs b/crates/spk-build/src/build/binary_test.rs index 1baab0ec3f..7c6401b5e5 100644 --- a/crates/spk-build/src/build/binary_test.rs +++ b/crates/spk-build/src/build/binary_test.rs @@ -598,7 +598,7 @@ async fn test_build_package_source_cleanup() { let repo = config.get_local_repository().await.unwrap(); let layer = repo.read_layer(digest).await.unwrap(); let manifest = repo - .read_manifest(layer.manifest) + .read_manifest(*layer.manifest()) .await .unwrap() .to_tracking_manifest(); @@ -688,7 +688,7 @@ async fn test_build_filters_reset_files() { let repo = config.get_local_repository().await.unwrap(); let layer = repo.read_layer(digest).await.unwrap(); let manifest = repo - .read_manifest(layer.manifest) + .read_manifest(*layer.manifest()) .await .unwrap() .to_tracking_manifest(); diff --git a/crates/spk-build/src/build/sources.rs b/crates/spk-build/src/build/sources.rs index d80779b123..032a5e39e2 100644 --- a/crates/spk-build/src/build/sources.rs +++ b/crates/spk-build/src/build/sources.rs @@ -6,7 +6,7 @@ use std::collections::HashMap; use std::path::{Path, PathBuf}; use relative_path::{RelativePath, RelativePathBuf}; -use spfs::prelude::Encodable; +use spfs::prelude::*; use spk_schema::foundation::env::data_path; use spk_schema::foundation::ident_component::Component; use spk_schema::{Package, PackageMut}; diff --git a/crates/spk-cli/cmd-build/Cargo.toml b/crates/spk-cli/cmd-build/Cargo.toml index 11883b056b..8e2fb81d6a 100644 --- a/crates/spk-cli/cmd-build/Cargo.toml +++ b/crates/spk-cli/cmd-build/Cargo.toml @@ -18,6 +18,7 @@ migration-to-components = [ miette = { workspace = true, features = ["fancy"] } async-trait = { workspace = true } clap = { workspace = true } +spfs = { workspace = true } spk-cli-common = { workspace = true } spk-cmd-make-binary = { workspace = true } spk-cmd-make-source = { workspace = true } diff --git a/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs b/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs index ebad556e06..b9b65b01e1 100644 --- a/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs +++ b/crates/spk-cli/cmd-build/src/cmd_build_test/mod.rs @@ -7,6 +7,7 @@ use std::io::Write; use clap::Parser; use rstest::rstest; +use spfs::storage::prelude::*; use spk_cli_common::Run; use spk_schema::foundation::fixtures::*; use spk_schema::foundation::option_map; @@ -473,7 +474,7 @@ build: let layer = repo.read_layer(digest).await.unwrap(); let manifest = repo - .read_manifest(layer.manifest) + .read_manifest(*layer.manifest()) .await .unwrap() .to_tracking_manifest(); @@ -573,7 +574,7 @@ build: let layer = repo.read_layer(digest).await.unwrap(); let manifest = repo - .read_manifest(layer.manifest) + .read_manifest(*layer.manifest()) .await .unwrap() .to_tracking_manifest(); diff --git a/crates/spk-cli/cmd-debug/src/cmd_debug.rs b/crates/spk-cli/cmd-debug/src/cmd_debug.rs index e31e333ef4..880d4bca3a 100644 --- a/crates/spk-cli/cmd-debug/src/cmd_debug.rs +++ b/crates/spk-cli/cmd-debug/src/cmd_debug.rs @@ -8,6 +8,7 @@ use std::convert::TryInto; use clap::Args; use futures::TryFutureExt; use miette::Result; +use spfs::prelude::*; use spk_cli_common::{current_env, flags, CommandArgs, Run}; use spk_schema::foundation::format::FormatIdent; use spk_schema::ident_build::Build; diff --git a/crates/spk-cli/cmd-du/src/cmd_du.rs b/crates/spk-cli/cmd-du/src/cmd_du.rs index ecedcd10ec..c5b0962921 100644 --- a/crates/spk-cli/cmd-du/src/cmd_du.rs +++ b/crates/spk-cli/cmd-du/src/cmd_du.rs @@ -13,7 +13,8 @@ use colored::Colorize; use futures::{Stream, TryStreamExt}; use itertools::Itertools; use miette::Result; -use spfs::graph::Object; +use spfs::graph::object::Enum; +use spfs::prelude::*; use spfs::tracking::Entry; use spfs::Digest; use spk_cli_common::{flags, CommandArgs, Run}; @@ -340,23 +341,23 @@ impl Du { let spk_storage::RepositoryHandle::SPFS(repo) = repo else { continue; }; - let mut item = repo.read_ref(digest.to_string().as_str()).await?; + let mut item = repo.read_object(digest).await?; let mut items_to_process: Vec = vec![item]; while !items_to_process.is_empty() { let mut next_iter_objects: Vec = Vec::new(); for object in items_to_process.iter() { - match object { - Object::Platform(object) => { - for digest in object.stack.iter_bottom_up() { - item = repo.read_object(digest).await?; + match object.to_enum() { + Enum::Platform(object) => { + for digest in object.iter_bottom_up() { + item = repo.read_object(*digest).await?; next_iter_objects.push(item); } } - Object::Layer(object) => { - item = repo.read_object(object.manifest).await?; + Enum::Layer(object) => { + item = repo.read_object(*object.manifest()).await?; next_iter_objects.push(item); } - Object::Manifest(object) => { + Enum::Manifest(object) => { let tracking_manifest = object.to_tracking_manifest(); let root_entry = tracking_manifest.take_root(); let mut walked_entries = root_entry.walk(); @@ -369,9 +370,7 @@ impl Du { } } } - Object::Tree(_) => self.output.warn(format_args!("Tree object cannot have disk usage generated")), - Object::Blob(_) => self.output.warn(format_args!("Blob object cannot have disk usage generated")), - Object::Mask => () + Enum::Blob(_) => self.output.warn(format_args!("Blob object cannot have disk usage generated")), } } items_to_process = std::mem::take(&mut next_iter_objects); diff --git a/crates/spk-cli/cmd-render/src/cmd_render.rs b/crates/spk-cli/cmd-render/src/cmd_render.rs index f44775fa90..80c8af1cc1 100644 --- a/crates/spk-cli/cmd-render/src/cmd_render.rs +++ b/crates/spk-cli/cmd-render/src/cmd_render.rs @@ -69,7 +69,7 @@ impl Run for Render { let path = dunce::canonicalize(&self.target).into_diagnostic()?; tracing::info!("Rendering into dir: {path:?}"); - let config = spfs::load_config().wrap_err("Failed to load spfs config")?; + let config = spfs::get_config().wrap_err("Failed to load spfs config")?; let local = config .get_opened_local_repository() .await diff --git a/crates/spk-cli/common/src/env.rs b/crates/spk-cli/common/src/env.rs index da1f3bdec8..cc43eae325 100644 --- a/crates/spk-cli/common/src/env.rs +++ b/crates/spk-cli/common/src/env.rs @@ -257,7 +257,7 @@ pub fn configure_logging(verbosity: u8) -> Result<()> { } // this is not ideal, because it can propagate annoyingly into // created environments, but without it the spfs logging configuration - // takes over in it's current setup/state. + // takes over in its current setup/state. std::env::set_var("RUST_LOG", &directives); let env_filter = tracing_subscriber::filter::EnvFilter::new(directives); let stderr_log = tracing_subscriber::fmt::layer() diff --git a/crates/spk-cli/group2/src/cmd_ls_test.rs b/crates/spk-cli/group2/src/cmd_ls_test.rs index bd550d3397..3c1ebebe0f 100644 --- a/crates/spk-cli/group2/src/cmd_ls_test.rs +++ b/crates/spk-cli/group2/src/cmd_ls_test.rs @@ -6,6 +6,7 @@ use clap::Parser; use futures::prelude::*; use relative_path::RelativePathBuf; use spfs::config::Remote; +use spfs::prelude::*; use spfs::storage::EntryType; use spfs::RemoteAddress; use spk_schema::foundation::ident_component::Component; diff --git a/crates/spk-cli/group2/src/cmd_publish_test.rs b/crates/spk-cli/group2/src/cmd_publish_test.rs index da5d05c220..0d7bcbf6a8 100644 --- a/crates/spk-cli/group2/src/cmd_publish_test.rs +++ b/crates/spk-cli/group2/src/cmd_publish_test.rs @@ -7,6 +7,7 @@ use futures::prelude::*; use relative_path::RelativePathBuf; use rstest::rstest; use spfs::config::Remote; +use spfs::prelude::*; use spfs::storage::EntryType; use spfs::RemoteAddress; use spk_schema::foundation::ident_component::Component; diff --git a/crates/spk-cli/group2/src/cmd_remove_test.rs b/crates/spk-cli/group2/src/cmd_remove_test.rs index 6c0dbcf50a..ea715060fa 100644 --- a/crates/spk-cli/group2/src/cmd_remove_test.rs +++ b/crates/spk-cli/group2/src/cmd_remove_test.rs @@ -7,6 +7,7 @@ use futures::prelude::*; use relative_path::RelativePathBuf; use rstest::rstest; use spfs::config::Remote; +use spfs::prelude::*; use spfs::storage::EntryType; use spfs::RemoteAddress; use spk_schema::foundation::ident_component::Component; diff --git a/crates/spk-cli/group3/src/cmd_export_test.rs b/crates/spk-cli/group3/src/cmd_export_test.rs index 667853d268..7739ed14e7 100644 --- a/crates/spk-cli/group3/src/cmd_export_test.rs +++ b/crates/spk-cli/group3/src/cmd_export_test.rs @@ -4,6 +4,7 @@ use rstest::rstest; use spfs::config::Remote; +use spfs::prelude::*; use spfs::RemoteAddress; use spk_build::{BinaryPackageBuilder, BuildSource}; use spk_schema::foundation::option_map; diff --git a/crates/spk-cli/group4/src/cmd_view.rs b/crates/spk-cli/group4/src/cmd_view.rs index 1ce6258d17..6c06349f04 100644 --- a/crates/spk-cli/group4/src/cmd_view.rs +++ b/crates/spk-cli/group4/src/cmd_view.rs @@ -12,7 +12,7 @@ use futures::{StreamExt, TryStreamExt}; use miette::{bail, Context, IntoDiagnostic, Result}; use serde::Serialize; use spfs::find_path::ObjectPathEntry; -use spfs::graph::Object; +use spfs::graph::{HasKind, ObjectKind}; use spfs::io::Pluralize; use spfs::Digest; use spk_cli_common::with_version_and_build_set::WithVersionSet; @@ -275,10 +275,9 @@ impl View { let mut layers_that_contain_filepath = BTreeMap::new(); let mut stack_order: Vec = Vec::new(); for pathlist in found.iter() { - let layer_digest = match pathlist - .iter() - .find(|item| matches!(item, ObjectPathEntry::Parent(Object::Layer(_)))) - { + let layer_digest = match pathlist.iter().find( + |item| matches!(item, ObjectPathEntry::Parent(o) if o.kind() == ObjectKind::Layer), + ) { Some(l) => l.digest()?, None => { return Err(spk_cli_common::Error::String( diff --git a/crates/spk-exec/src/exec.rs b/crates/spk-exec/src/exec.rs index 10a85f4b07..2592f64f05 100644 --- a/crates/spk-exec/src/exec.rs +++ b/crates/spk-exec/src/exec.rs @@ -9,7 +9,6 @@ use async_stream::try_stream; use futures::Stream; use relative_path::RelativePathBuf; use spfs::encoding::Digest; -use spfs::graph::Object; use spfs::prelude::*; use spfs::tracking::Entry; use spk_schema::foundation::format::{FormatIdent, FormatOptionMap}; @@ -41,19 +40,20 @@ impl ResolvedLayers { pub fn iter_entries( &self, ) -> impl Stream> + '_ { + use spfs::graph::object::Enum; try_stream! { for resolved_layer in self.0.iter() { let manifest = match &*resolved_layer.repo { RepositoryHandle::SPFS(repo) => { let object = repo.read_object(resolved_layer.digest).await?; - match object { - Object::Layer(obj) => { - match repo.read_object(obj.manifest).await? { - Object::Manifest(obj) => obj, + match object.into_enum() { + Enum::Layer(obj) => { + match repo.read_object(*obj.manifest()).await?.into_enum() { + Enum::Manifest(obj) => obj, _ => continue, } } - Object::Manifest(obj) => obj, + Enum::Manifest(obj) => obj, _ => continue, } } diff --git a/crates/spk-launcher/src/main.rs b/crates/spk-launcher/src/main.rs index 0b2ba71c5b..b62f8cf2b4 100644 --- a/crates/spk-launcher/src/main.rs +++ b/crates/spk-launcher/src/main.rs @@ -214,7 +214,7 @@ impl<'a> Dynamic<'a> { unreachable!(); } - let config = spfs::load_config().expect("loaded spfs config"); + let config = spfs::get_config().expect("loaded spfs config"); let local_repo = config .get_opened_local_repository() .await @@ -230,7 +230,11 @@ impl<'a> Dynamic<'a> { SPFS_TAG_SUBDIR, bin_tag.to_string_lossy(), ); - match remote_repo.read_ref(&spfs_tag).await { + match remote_repo + .read_ref(&spfs_tag) + .await + .map(spfs::graph::Object::into_platform) + { Err(spfs::Error::UnknownReference(_)) => { bail!( "Unable to resolve ${} == \"{}\"", @@ -239,8 +243,8 @@ impl<'a> Dynamic<'a> { ); } Err(err) => bail!(err.to_string()), - Ok(spfs::graph::Object::Platform(platform)) => { - if platform.stack.is_empty() { + Ok(Some(platform)) => { + if !platform.iter_bottom_up().any(|_| true) { bail!("Unexpected empty platform stack"); } @@ -276,7 +280,7 @@ impl<'a> Dynamic<'a> { .wrap_err("process replaced")?; unreachable!(); } - Ok(obj) => bail!("Expected platform object from spfs; found: {}", obj), + Ok(None) => bail!("Expected platform object from spfs"), } } } diff --git a/crates/spk-schema/crates/ident/src/request.rs b/crates/spk-schema/crates/ident/src/request.rs index 0cbb3d6c64..93c52adb76 100644 --- a/crates/spk-schema/crates/ident/src/request.rs +++ b/crates/spk-schema/crates/ident/src/request.rs @@ -1035,7 +1035,7 @@ pub fn is_false(value: &bool) -> bool { } /// A deserializable name and optional value where -/// the value it identified by it's position following +/// the value it identified by its position following /// a forward slash (eg: `/`) pub struct NameAndValue(pub Name, pub Option) where diff --git a/crates/spk-solve/crates/package-iterator/src/package_iterator.rs b/crates/spk-solve/crates/package-iterator/src/package_iterator.rs index 7637b3c841..9be833d0e5 100644 --- a/crates/spk-solve/crates/package-iterator/src/package_iterator.rs +++ b/crates/spk-solve/crates/package-iterator/src/package_iterator.rs @@ -526,7 +526,7 @@ impl SortedBuildIterator { let mut changes: HashMap = HashMap::new(); for (build, _) in self.builds.iter().flat_map(|hm| hm.values()) { - // Skip this if it's '/src' build because '/src' builds + // Skip this if it's a '/src' build because '/src' builds // won't use the build option values in their key, they // don't need to be looked at. They have a type of key // that always puts them last in the build order. diff --git a/crates/spk-solve/src/solver_test.rs b/crates/spk-solve/src/solver_test.rs index 891bc8de03..24b558be64 100644 --- a/crates/spk-solve/src/solver_test.rs +++ b/crates/spk-solve/src/solver_test.rs @@ -1304,7 +1304,7 @@ async fn test_solver_initial_request_impossible_masks_embedded_package_solution( // - the solver will find a solution using the embedded package init_logging(); - // Needs a repo with an embedded package, it's parent package, and + // Needs a repo with an embedded package, its parent package, and // a non-embedded different version of the same package name as // the embedded package. let repo = make_repo!( @@ -1986,7 +1986,7 @@ async fn test_solver_component_availability(mut solver: Solver) { #[rstest] #[tokio::test] async fn test_solver_component_requirements(mut solver: Solver) { - // test when a component has it's own list of requirements + // test when a component has its own list of requirements // - the requirements are added to the existing set of requirements // - the additional requirements are resolved // - even if it's a component that's only used by the one that was requested @@ -2067,7 +2067,7 @@ async fn test_solver_component_requirements_extending(mut solver: Solver) { #[rstest] #[tokio::test] async fn test_solver_component_embedded(mut solver: Solver) { - // test when a component has it's own list of embedded packages + // test when a component has its own list of embedded packages // - the embedded package is immediately selected // - it must be compatible with any previous requirements diff --git a/crates/spk-storage/src/fixtures.rs b/crates/spk-storage/src/fixtures.rs index f10cda2b8e..ec84068f10 100644 --- a/crates/spk-storage/src/fixtures.rs +++ b/crates/spk-storage/src/fixtures.rs @@ -75,9 +75,7 @@ impl std::ops::Deref for TempRepo { /// Returns an empty spfs layer object for easy testing pub fn empty_layer() -> spfs::graph::Layer { - spfs::graph::Layer { - manifest: Default::default(), - } + spfs::graph::Layer::new(spfs::encoding::EMPTY_DIGEST.into()) } /// Returns the digest for an empty spfs layer. @@ -132,11 +130,11 @@ where let empty_manifest = spfs::graph::Manifest::default(); let empty_layer = empty_layer(); spfs_repo - .write_object(&empty_layer.into()) + .write_object(&empty_layer) .await .expect("failed to save empty layer to spfs repo"); spfs_repo - .write_object(&empty_manifest.into()) + .write_object(&empty_manifest) .await .expect("failed to save empty manifest to spfs repo"); assert_eq!(written, spfs::encoding::EMPTY_DIGEST.into()); diff --git a/crates/spk-storage/src/storage/runtime.rs b/crates/spk-storage/src/storage/runtime.rs index e6a89bac3c..f6f4987a51 100644 --- a/crates/spk-storage/src/storage/runtime.rs +++ b/crates/spk-storage/src/storage/runtime.rs @@ -491,7 +491,7 @@ async fn find_layer_by_filename>(path: S) -> Result>( let layers = spfs::resolve_stack_to_layers(&runtime.status.stack, Some(&repo)).await?; for layer in layers.iter().rev() { let manifest = repo - .read_manifest(layer.manifest) + .read_manifest(*layer.manifest()) .await? .to_tracking_manifest(); diff --git a/cspell.json b/cspell.json index e9dbd59dd7..b89b1d4312 100644 --- a/cspell.json +++ b/cspell.json @@ -99,6 +99,7 @@ "Errno", "FETCHCONTENT", "FIVZIKA", + "FLATC", "FMQA", "FNGMD", "FOPEN", @@ -364,7 +365,10 @@ "filesequence", "filesystems", "filesytem", + "flatbuf", + "flatbuffer", "flatbuffers", + "flatc", "flifetime", "fowner", "freetype", @@ -373,6 +377,7 @@ "fsname", "fullpath", "functools", + "funzip", "fuser", "fusermount", "gdkpixbuf", @@ -512,6 +517,7 @@ "noclobber", "nodev", "noexec", + "nofile", "nonminimal", "nonperm", "norc", @@ -688,6 +694,7 @@ "stdfs", "stdlib", "stepback", + "struct", "styfle", "subdir", "subsecs", diff --git a/rpmbuild.Dockerfile b/rpmbuild.Dockerfile index 38f9866a48..680d45a9fb 100644 --- a/rpmbuild.Dockerfile +++ b/rpmbuild.Dockerfile @@ -8,9 +8,18 @@ RUN yum install -y \ && yum clean all RUN ln -s cmake3 /usr/bin/cmake - -# Rust Toolchain +# install rustup for the cargo tool and compile toolchain RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh /dev/stdin -y --default-toolchain=1.76.0 +# install protobuf compiler (protoc command) +ENV PB_REL="https://github.com/protocolbuffers/protobuf/releases" +RUN curl --proto '=https' --tlsv1.2 -sSfLO ${PB_REL}/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip && \ + unzip -o protoc-3.15.8-linux-x86_64.zip -d "/usr" && \ + rm protoc-3.15.8-linux-x86_64.zip +RUN chmod +x /usr/bin/protoc +# install flatbuffers compiler (flatc command) +ENV FB_REL=https://github.com/google/flatbuffers/releases/ +RUN curl --proto '=https' --tlsv1.2 -sSfL ${FB_REL}/download/v23.5.26/Linux.flatc.binary.g++-10.zip | funzip > /usr/bin/flatc +RUN chmod +x /usr/bin/flatc ENV PATH $PATH:/root/.cargo/bin # Protobuf compiler (more recent than yum package) diff --git a/spfs.spec b/spfs.spec index 558497f7cb..d480874533 100644 --- a/spfs.spec +++ b/spfs.spec @@ -13,6 +13,7 @@ BuildRequires: make BuildRequires: cmake3 BuildRequires: openssl-devel BuildRequires: fuse3-devel +BuildRequires: flatbuffers-compiler BuildRequires: m4 Requires: fuse3 Requires: rsync diff --git a/spk.spec b/spk.spec index 964afe8df1..0ce321aee9 100644 --- a/spk.spec +++ b/spk.spec @@ -17,6 +17,8 @@ BuildRequires: fuse3-devel BuildRequires: m4 BuildRequires: cmake3 BuildRequires: make +# not available in CentOS +# BuildRequires: flatbuffers-compiler Requires: bash Requires: fuse3 Requires: rsync