From 72f41649c50fe639ebd73fb3f07b0480c055e7af Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Mon, 24 Jul 2023 11:13:53 -0700 Subject: [PATCH] Adds extra key-value string data storage to runtime objects Allows external processes to store data in durable runtimes. Signed-off-by: David Gilligan-Cook --- Cargo.lock | 3 + crates/spfs-cli/main/Cargo.toml | 5 + crates/spfs-cli/main/src/cmd_run.rs | 101 +++++++++++- crates/spfs-cli/main/src/cmd_run_test.rs | 196 +++++++++++++++++++++++ crates/spfs-cli/main/src/cmd_shell.rs | 5 + crates/spfs-cli/main/src/fixtures.rs | 10 ++ crates/spfs/src/runtime/storage.rs | 18 ++- crates/spfs/src/runtime/storage_test.rs | 36 +++++ docs/spfs/usage.md | 10 ++ 9 files changed, 382 insertions(+), 2 deletions(-) create mode 100644 crates/spfs-cli/main/src/cmd_run_test.rs create mode 100644 crates/spfs-cli/main/src/fixtures.rs diff --git a/Cargo.lock b/Cargo.lock index dab936b680..fd07116323 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3253,10 +3253,13 @@ dependencies = [ "number_prefix", "procfs", "relative-path", + "rstest", "serde_json", + "serde_yaml 0.8.25", "spfs", "spfs-cli-common", "strum", + "tempfile", "tokio", "tokio-stream", "tonic", diff --git a/crates/spfs-cli/main/Cargo.toml b/crates/spfs-cli/main/Cargo.toml index 5c93381437..7082d5bac7 100644 --- a/crates/spfs-cli/main/Cargo.toml +++ b/crates/spfs-cli/main/Cargo.toml @@ -27,6 +27,7 @@ nix = { workspace = true } number_prefix = "*" # we hope to match versions with indicatif relative-path = "1.3" serde_json = { workspace = true } +serde_yaml = "0.8.17" spfs = { path = "../../spfs" } spfs-cli-common = { path = "../common" } strum = { workspace = true, features = ["derive"] } @@ -47,3 +48,7 @@ features = [ "Win32_System_SystemInformation", "Win32_System_Threading", ] + +[dev-dependencies] +rstest = { workspace = true } +tempfile = { workspace = true } diff --git a/crates/spfs-cli/main/src/cmd_run.rs b/crates/spfs-cli/main/src/cmd_run.rs index a699dd612c..184918a7b8 100644 --- a/crates/spfs-cli/main/src/cmd_run.rs +++ b/crates/spfs-cli/main/src/cmd_run.rs @@ -2,16 +2,103 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/imageworks/spk +use std::collections::BTreeMap; use std::ffi::OsString; +use std::io; use std::time::Instant; -use anyhow::{Context, Result}; +use anyhow::{anyhow, Context, Result}; use clap::{ArgGroup, Args}; use spfs::runtime::BindMount; use spfs::storage::FromConfig; use spfs::tracking::{EnvSpec, EnvSpecItem}; use spfs_cli_common as cli; +#[cfg(test)] +#[path = "./cmd_run_test.rs"] +mod cmd_run_test; +#[cfg(test)] +#[path = "./fixtures.rs"] +mod fixtures; + +#[derive(Args, Clone, Debug)] +pub struct ExtraData { + /// Adds extra key-value string data to the new runtime. + /// + /// This allows external processes to store arbitrary data in the + /// runtimes they create. This is most useful with durable runtimes. + /// The data can be retrieved by parsing the output of `spfs runtime + /// info` + /// + /// Extra data is key/value string pairs separated by either an + /// equals sign or colon (--extra-data name=value --extra-data + /// other:value). Additionally, many pair of extra data can be + /// specified at once in yaml or json format (--extra-data '{name: + /// value, other: value}'). + /// + /// Extra data can also be given in a json or yaml file via the + /// --extra-data--file flag. If given, --extra-data will supersede + /// anything in the options file(s). + #[clap(long, value_name = "KEY:VALUE")] + pub extra_data: Vec, + + /// Specify extra key-value data from a json or yaml file (see --extra-data) + #[clap(long)] + pub extra_data_file: Vec, +} + +type KeyValuePair = (String, String); + +impl ExtraData { + // Returns a list of extra data key-value pairs gathered from all + // the extra data related command line arguments. The same keys, + // and values, can appear multiple times in the list if specified + // multiple times in various command line arguments. + pub fn get_data(&self) -> Result> { + let mut data: Vec = Vec::new(); + + for filename in self.extra_data_file.iter() { + let reader: Box = if Ok("-".to_string()) + == filename.clone().into_os_string().into_string() + { + // Treat '-' as "read from stdin" + Box::new(io::stdin()) + } else { + Box::new( + std::fs::File::open(filename) + .with_context(|| format!("Failed to open extra data file: {filename:?}"))?, + ) + }; + let extra_data: BTreeMap = serde_yaml::from_reader(reader) + .with_context(|| { + format!("Failed to parse as extra data key-value pairs: {filename:?}") + })?; + data.extend(extra_data); + } + + for pair in self.extra_data.iter() { + let pair = pair.trim(); + if pair.starts_with('{') { + let given: BTreeMap = serde_yaml::from_str(pair) + .context("--extra-data value looked like yaml, but could not be parsed")?; + data.extend(given); + continue; + } + + let (name, value) = pair + .split_once('=') + .or_else(|| pair.split_once(':')) + .ok_or_else(|| { + anyhow!("Invalid option: -extra-data {pair} (should be in the form name=value)") + })?; + + data.push((name.to_string(), value.to_string())); + } + + Ok(data) + } +} + /// Run a program in a configured spfs environment #[derive(Debug, Args)] #[clap(group( @@ -57,9 +144,13 @@ pub struct CmdRun { /// '/spfs/MOUNTPOINT' under /spfs. Mounts are specified as /// key/value pairs separated by either an equals sign or colon /// (--extra-mount path=mountpoint --extra-mount path:mountpoint). + #[clap(long, value_name = "SRC_PATH:MOUNTPOINT_UNDER_SPFS")] pub extra_mount: Option>, + #[clap(flatten)] + pub external_data: ExtraData, + /// The tag or id of the desired runtime /// /// Use '-' to request an empty environment @@ -159,6 +250,14 @@ impl CmdRun { tracing::debug!("with extra mounts: {extras:?}"); } + let data = self.external_data.get_data()?; + if !data.is_empty() { + tracing::debug!("with extra data: {data:?}"); + for (key, value) in data { + runtime.insert_extra_data(key, value); + } + } + let start_time = Instant::now(); runtime.config.mount_backend = config.filesystem.backend; runtime.config.secondary_repositories = config.get_secondary_runtime_repositories(); diff --git a/crates/spfs-cli/main/src/cmd_run_test.rs b/crates/spfs-cli/main/src/cmd_run_test.rs new file mode 100644 index 0000000000..c0bd35ec4b --- /dev/null +++ b/crates/spfs-cli/main/src/cmd_run_test.rs @@ -0,0 +1,196 @@ +// Copyright (c) Sony Pictures Imageworks, et al. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/imageworks/spk + +use std::fs; + +use rstest::rstest; + +use super::fixtures::*; +use super::ExtraData; + +// Test - --extra-data field1:value1 --extra-data field2:value2 +// Test - --extra-data field1=value1 --extra-data field2=value2 +// Test - --extra-data field1:value1 --extra-data field2=value2 +// Test - --extra-data field1=value1 --extra-data field2:value2 +// Test - --extra-data '{ field1: value1, field2: value2 }' +#[rstest] +#[case(vec!["field1:value1".to_string(), "field2:value2".to_string()])] +#[case(vec!["field1=value1".to_string(), "field2=value2".to_string()])] +#[case(vec!["field1:value1".to_string(), "field2=value2".to_string()])] +#[case(vec!["field1=value1".to_string(), "field2:value2".to_string()])] +#[case(vec!["{field1: value1, field2: value2}".to_string()])] +fn test_cmd_run_create_extra_data(#[case] values: Vec) { + // Setup some data for the key value pairs + let field1 = "field1".to_string(); + let field2 = "field2".to_string(); + let value1 = "value1".to_string(); + let value2 = "value2".to_string(); + + let filenames = Vec::new(); + + // Test - --extra-data field1:value1 --extra-data field2:value2 + let data = ExtraData { + extra_data: values, + extra_data_file: filenames, + }; + + let result = data + .get_data() + .expect("unable to parse cmd_run --extra-data values"); + assert!(!result.is_empty()); + assert!(result.len() == 2); + assert!(result[0] == (field1, value1)); + assert!(result[1] == (field2, value2)); +} + +#[rstest] +#[should_panic] +#[case(vec!["field1,value1".to_string(), "field2/value2".to_string()])] +#[should_panic] +#[case(vec!["field1 value1".to_string(), "field2 value2".to_string()])] +#[should_panic] +#[case(vec!["{field1: value1, field2: - value2 - value1}".to_string()])] +fn test_cmd_run_create_extra_data_invalid(#[case] invalid_values: Vec) { + let filenames = Vec::new(); + + // Test - --extra-data with_an_invalid_argument_value + let data = ExtraData { + extra_data: invalid_values, + extra_data_file: filenames, + }; + + let _result = data + .get_data() + .expect("unable to parse cmd_run --extra-data values"); +} + +#[rstest] +fn test_cmd_run_create_extra_data_from_file(tmpdir: tempfile::TempDir) { + // Setup some data for the key value pairs + let field1 = "field1".to_string(); + let field2 = "field2".to_string(); + let value1 = "value1".to_string(); + let value2 = "value2".to_string(); + let extra_data = format!("{field1}: {value1}\n{field2}: {value2}\n"); + + let filename = tmpdir.path().join("filename.yaml"); + fs::write(filename.clone(), extra_data) + .expect("Unable to write extra data to file during setup"); + let filenames = vec![filename]; + + let values = Vec::new(); + + // Test - --extra-data-file filename.yaml + let data = ExtraData { + extra_data: values, + extra_data_file: filenames, + }; + + let result = data + .get_data() + .expect("unable to parse cmd_run --extra-data values"); + assert!(!result.is_empty()); + assert!(result.len() == 2); + assert!(result[0] == (field1, value1)); + assert!(result[1] == (field2, value2)); +} + +#[rstest] +#[should_panic] +fn test_cmd_run_create_extra_data_from_file_not_exist(tmpdir: tempfile::TempDir) { + // Setup a file name that does not exist + let filename = tmpdir.path().join("nosuchfile.yaml"); + let filenames = vec![filename]; + + let values = Vec::new(); + + // Test - --extra-data-file nosuchfile.yaml + let data = ExtraData { + extra_data: values, + extra_data_file: filenames, + }; + + let _result = data + .get_data() + .expect("unable to parse cmd_run --extra-data values"); +} + +#[rstest] +#[should_panic] +fn test_cmd_run_create_extra_data_from_file_invalid_keyvalues(tmpdir: tempfile::TempDir) { + // Setup some data for the key value pairs + let field1 = "field1".to_string(); + let field2 = "field2".to_string(); + let value1 = "value1".to_string(); + let value2 = "value2".to_string(); + let extra_data = format!("{field1}: {value1}\n{field2}:\n - {value2}\n - {value1}\n"); + + let filename = tmpdir.path().join("filename.yaml"); + fs::write(filename.clone(), extra_data) + .expect("Unable to write extra data to file during setup"); + let filenames = vec![filename]; + + let values = Vec::new(); + + // Test - --extra-data-file filename.yaml that contains more than key-value string pairs + let data = ExtraData { + extra_data: values, + extra_data_file: filenames, + }; + + let _result = data + .get_data() + .expect("unable to parse cmd_run --extra-data values"); +} + +#[rstest] +fn test_cmd_run_create_extra_data_all(tmpdir: tempfile::TempDir) { + // Setup some data for the key value pairs + let field1 = "field1".to_string(); + let field2 = "field2".to_string(); + let field3 = "field3".to_string(); + let field4 = "field4".to_string(); + + let value1 = "value1".to_string(); + let value2 = "value2".to_string(); + let value3 = "value3".to_string(); + let value4 = "value4".to_string(); + + let values: Vec = vec![ + format!("{field1}:{value1}"), + format!("{field2}={value2}"), + "{field3: value3, field4: value4}".to_string(), + ]; + + let extra_data = format!("{field4}: {value4}\n{field3}: {value3}\n"); + + let filename = tmpdir.path().join("filename.yaml"); + fs::write(filename.clone(), extra_data) + .expect("Unable to write extra data to file during setup"); + let filenames = vec![filename]; + + // Test - --extra-data field1:value1 --extra-data field2=value2 + // --extra-data '{ field1: value1, field2: value2 }' + // --extra-data-file filename.yaml + let data = ExtraData { + extra_data: values, + extra_data_file: filenames, + }; + + let result = data + .get_data() + .expect("unable to parse cmd_run --extra-data values"); + + assert!(!result.is_empty()); + assert!(result.len() == 6); + // from --extra-data-file filename.taml + assert!(result[0] == (field3.clone(), value3.clone())); + assert!(result[1] == (field4.clone(), value4.clone())); + // from --extra-data field1:value1 extra-data field2:value2 + assert!(result[2] == (field1, value1)); + assert!(result[3] == (field2, value2)); + // from --extra-data '{field3: value3, field4: value4}' + assert!(result[4] == (field3, value3)); + assert!(result[5] == (field4, value4)); +} diff --git a/crates/spfs-cli/main/src/cmd_shell.rs b/crates/spfs-cli/main/src/cmd_shell.rs index 613c14a120..7ddeac0bc9 100644 --- a/crates/spfs-cli/main/src/cmd_shell.rs +++ b/crates/spfs-cli/main/src/cmd_shell.rs @@ -8,6 +8,7 @@ use spfs::runtime::BindMount; use spfs_cli_common as cli; use super::cmd_run; +use super::cmd_run::ExtraData; /// Enter a subshell in a configured spfs environment #[derive(Debug, Args)] @@ -54,6 +55,9 @@ pub struct CmdShell { #[clap(long, value_name = "SRCPATH:SPFSLOCATION")] pub extra_mount: Option>, + #[clap(flatten)] + pub external_data: ExtraData, + /// The tag or id of the desired runtime /// /// Use '-' or nothing to request an empty environment @@ -74,6 +78,7 @@ impl CmdShell { reference: self.reference.clone(), keep_runtime: self.keep_runtime, extra_mount: self.extra_mount.clone(), + external_data: self.external_data.clone(), command: Default::default(), }; run_cmd.run(config).await diff --git a/crates/spfs-cli/main/src/fixtures.rs b/crates/spfs-cli/main/src/fixtures.rs new file mode 100644 index 0000000000..96eee98c40 --- /dev/null +++ b/crates/spfs-cli/main/src/fixtures.rs @@ -0,0 +1,10 @@ +use rstest::fixture; +use tempfile::TempDir; + +#[fixture] +pub fn tmpdir() -> TempDir { + tempfile::Builder::new() + .prefix("spfs-test-") + .tempdir() + .expect("failed to create dir for test") +} diff --git a/crates/spfs/src/runtime/storage.rs b/crates/spfs/src/runtime/storage.rs index db9aa6a776..cd2a6f6e1c 100644 --- a/crates/spfs/src/runtime/storage.rs +++ b/crates/spfs/src/runtime/storage.rs @@ -4,7 +4,7 @@ //! Definition and persistent storage of runtimes. -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; #[cfg(unix)] use std::os::unix::fs::{MetadataExt, PermissionsExt}; #[cfg(windows)] @@ -312,6 +312,11 @@ pub struct Data { pub status: Status, /// Parameters for this runtime's execution (should not change over time) pub config: Config, + /// A collection of extra key-value string data for libraries and + /// tools external to spfs to use to store arbitrary data in the + /// runtimes they create. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub(crate) extra_data: HashMap, } impl Data { @@ -321,6 +326,7 @@ impl Data { status: Default::default(), author: Default::default(), config: Default::default(), + extra_data: HashMap::new(), } } @@ -473,6 +479,16 @@ impl Runtime { self.data.keep_runtime() } + /// Storage an arbitrary key-value string pair in the runtime + pub fn insert_extra_data(&mut self, key: String, value: String) -> Option { + self.data.extra_data.insert(key, value) + } + + /// Get the string data stored as extra data under the given key. + pub fn extra_data(&self, key: &String) -> Option<&String> { + self.data.extra_data.get(key) + } + /// Reset parts of the runtime's state so it can be reused in /// another process run. Note: this saves the runtime to storage. pub async fn reinit_for_reuse(&mut self) -> Result<()> { diff --git a/crates/spfs/src/runtime/storage_test.rs b/crates/spfs/src/runtime/storage_test.rs index b0ebb375f7..aaa2469736 100644 --- a/crates/spfs/src/runtime/storage_test.rs +++ b/crates/spfs/src/runtime/storage_test.rs @@ -93,6 +93,42 @@ async fn test_storage_create_runtime(tmpdir: tempfile::TempDir) { .is_err()); } +#[rstest] +#[tokio::test] +async fn test_storage_runtime_with_extra_data(tmpdir: tempfile::TempDir) { + let root = tmpdir.path().to_string_lossy().to_string(); + let repo = crate::storage::RepositoryHandle::from( + crate::storage::fs::FSRepository::create(root) + .await + .unwrap(), + ); + let storage = Storage::new(repo); + + let keep_runtime = false; + let extra_mounts = None; + let mut runtime = storage + .create_named_runtime("test-with-extra-data", keep_runtime, extra_mounts) + .await + .expect("failed to create runtime in storage"); + + // Test - insert new data + let key = "somefield".to_string(); + let value = "somevalue".to_string(); + assert!(runtime.insert_extra_data(key.clone(), value).is_none()); + + // Test - insert over existing data + let value2 = "someothervalue".to_string(); + assert!(runtime + .insert_extra_data(key.clone(), value2.clone()) + .is_some()); + + // Test - retrieve data + let result = runtime.extra_data(&key); + assert!(result.is_some()); + + assert!(value2 == *result.unwrap()); +} + #[rstest] #[tokio::test] async fn test_storage_remove_runtime(tmpdir: tempfile::TempDir) { diff --git a/docs/spfs/usage.md b/docs/spfs/usage.md index a4dd8f3623..eba748eee3 100644 --- a/docs/spfs/usage.md +++ b/docs/spfs/usage.md @@ -110,3 +110,13 @@ The spfs runtime uses a temporary, in-memory filesystem, which means that large ## Extra bind mounts Spfs supports adding extra bind mounts over the top directories under /spfs. You can use the `--extra-mount ` argument with `spfs run ...` to bind mount the `SRCS_PATH` at the `MOUNTPOINT_UNDER_SPFS` location. If the mountpoint location doesn't exist under /spfs, spfs will create it. You do not have to start the mountpoint path with /spfs, it will be added if it is not present in the given path. + +## Storing external program data + +spfs runtimes can be created by using `spfs run` with a stack of layers. However most of the time an external program above spfs will be used to create the `spfs run` command, or a spfs runtime via the `spfs` api (`spk` does this when you use `spk`). + +Sometimes the external program would benefit from storing some of its own data in the spfs runtime environment so it can be retrieved later, e.g. running `spk info` inside a spfs runtime generated by `spk` accessing the original spk solver related data. + +spfs supports this by allowing external processes, or programs that uses the spfs library, to store some of their own data by using the `--extra-data ` and `--extra-data-file ` arguments to `spfs run...`. The keys and values must be strings. Complex structured data should be encoded into strings by the external program. The key-value data is stored internally in spfs and can be accessed by parsing the json output of `spfs runtime info` inside the /spfs environment. + +External data is useful for tools with commands often run inside and outside a /spfs environment that rely on data normally lost when spfs operations are performed, e.g. spk and others. This is particularly useful when working with durable runtimes (`--keep-runtime`).