Skip to content

Commit

Permalink
Adds external key-value string data storage to runtime via ExternalDa…
Browse files Browse the repository at this point in the history
…taLayer objects.

Adds --external-data argument to spfs run/shell, --get and --get-all
arguments to spfs info, spfs runtime info, and adds support for
reading the value from an ExternalDataLayer using spfs read.

Signed-off-by: David Gilligan-Cook <dcook@imageworks.com>
  • Loading branch information
dcookspi committed Oct 31, 2023
1 parent fb7695e commit 1c75710
Show file tree
Hide file tree
Showing 29 changed files with 1,258 additions and 11 deletions.
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/spfs-cli/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ sentry = { workspace = true, optional = true }
sentry-anyhow = { workspace = true, optional = true }
sentry-tracing = { workspace = true, optional = true }
serde_json = { version = "1.0.57", optional = true }
serde_yaml = { workspace = true }
spfs = { path = "../../spfs" }
strip-ansi-escapes = { workspace = true, optional = true }
syslog-tracing = "0.2.0"
Expand Down
54 changes: 53 additions & 1 deletion crates/spfs-cli/common/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ use std::panic::catch_unwind;
#[cfg(feature = "sentry")]
use std::sync::Mutex;

use anyhow::Error;
use anyhow::{Error, Result};
#[cfg(feature = "sentry")]
use once_cell::sync::OnceCell;
use spfs::io::Pluralize;
use spfs::storage::LocalRepository;
use tracing_subscriber::prelude::*;

Expand Down Expand Up @@ -416,6 +417,57 @@ impl Logging {
}
}

/// Command line flags for viewing external data in a runtime
#[derive(Debug, Clone, clap::Args)]
pub struct ExternalDataViewing {
/// Output the data value for the given external data key(s) from
/// the active runtime. The each value is output on its own line
/// without its key.
#[clap(long, alias = "external_data")]
pub get: Option<Vec<String>>,

/// Output all the external data keys and values from the active
/// runtime as a yaml dictionary
#[clap(long, alias = "all_external_data")]
pub get_all: bool,
}

impl ExternalDataViewing {
/// Display external data values based on the command line arguments
pub async fn print_data(&self, runtime: &spfs::runtime::Runtime) -> Result<()> {
if self.get_all {
let data = runtime.all_external_data().await?;
let keys = data
.keys()
.map(ToString::to_string)
.collect::<Vec<String>>();
let num_keys = keys.len();
tracing::debug!(
"{num_keys} external data {}: {}",
"key".pluralize(num_keys),
keys.join(", ")
);
println!("{}", serde_yaml::to_string(&data)?);
} else if let Some(keys) = &self.get {
tracing::debug!("--get these keys: {}", keys.join(", "));
for key in keys.iter() {
match runtime.external_data(key).await? {
Some(value) => {
tracing::debug!("{key} = {value}");
println!("{value}");
}
None => {
tracing::warn!("No external data stored under: {key}");
println!();
}
}
}
}

Ok(())
}
}

/// Trait all spfs cli command parsers must implement to provide the
/// name of the spfs command that has been parsed. This method will be
/// called when configuring sentry.
Expand Down
2 changes: 1 addition & 1 deletion crates/spfs-cli/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ pub mod __private {
pub use {libc, spfs};
}

pub use args::{capture_if_relevant, CommandName, Logging, Render, Sync};
pub use args::{capture_if_relevant, CommandName, ExternalDataViewing, Logging, Render, Sync};
#[cfg(feature = "sentry")]
pub use args::{configure_sentry, shutdown_sentry};
5 changes: 5 additions & 0 deletions crates/spfs-cli/main/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ nix = { workspace = true }
number_prefix = "*" # we hope to match versions with indicatif
relative-path = "1.3"
serde_json = { workspace = true }
serde_yaml = { workspace = true }
spfs = { path = "../../spfs" }
spfs-cli-common = { path = "../common" }
strum = { workspace = true, features = ["derive"] }
Expand All @@ -49,3 +50,7 @@ features = [
"Win32_System_SystemInformation",
"Win32_System_Threading",
]

[dev-dependencies]
rstest = { workspace = true }
tempfile = { workspace = true }
16 changes: 16 additions & 0 deletions crates/spfs-cli/main/src/cmd_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ pub struct CmdInfo {
#[clap(flatten)]
logging: cli::Logging,

#[clap(flatten)]
external_data: cli::ExternalDataViewing,

/// Lists file sizes in human readable format
#[clap(long, short = 'H')]
human_readable: bool,
Expand Down Expand Up @@ -179,6 +182,15 @@ impl CmdInfo {
);
}
Object::Tree(_) | Object::Mask => println!("{obj:?}"),
Object::ExternalDataLayer(obj) => {
println!(
"{}:\n{}",
self.format_digest(obj.digest()?, repo).await?,
"external data layer:".green()
);
println!(" {} {}", "key:".bright_blue(), obj.key);
println!(" {} {:?}", "value:".bright_blue(), obj.value);
}
}
Ok(())
}
Expand All @@ -187,6 +199,10 @@ impl CmdInfo {
async fn print_global_info(&self, repo: &spfs::storage::RepositoryHandle) -> Result<()> {
let runtime = spfs::active_runtime().await?;

if self.external_data.get_all || self.external_data.get.is_some() {
return self.external_data.print_data(&runtime).await;
}

println!("{}", "Active Runtime:".green());
println!(" {}: {}", "id:".bright_blue(), runtime.name());
println!(
Expand Down
10 changes: 10 additions & 0 deletions crates/spfs-cli/main/src/cmd_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ impl CmdRead {
use spfs::graph::Object;
let blob = match item {
Object::Blob(blob) => blob,
Object::ExternalDataLayer(external_data_layer) => {
use spfs::graph::ExternalDataValue;
match &external_data_layer.value {
ExternalDataValue::String(value) => {
println!("{value}");
return Ok(0);
}
ExternalDataValue::Blob(digest) => repo.read_blob(*digest).await?,
}
}
_ => {
let path = match &self.path {
None => {
Expand Down
105 changes: 104 additions & 1 deletion crates/spfs-cli/main/src/cmd_run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,105 @@
// SPDX-License-Identifier: Apache-2.0
// https://github.com/imageworks/spk

use std::collections::BTreeMap;
use std::ffi::OsString;
use std::io;
use std::time::Instant;

use anyhow::{Context, Result};
use anyhow::{anyhow, Context, Result};
use clap::{ArgGroup, Args};
use spfs::runtime::KeyValuePair;
use spfs::storage::FromConfig;
use spfs::tracking::EnvSpec;
use spfs_cli_common as cli;

#[cfg(test)]
#[path = "./cmd_run_test.rs"]
mod cmd_run_test;
#[cfg(test)]
#[path = "./fixtures.rs"]
mod fixtures;

#[derive(Args, Clone, Debug)]
pub struct ExternalData {
/// Adds extra external key-value string data to the new runtime.
///
/// This allows external processes to store arbitrary data in the
/// runtimes they create. This is most useful with durable runtimes.
/// The data can be retrieved by running `spfs runtime info` or
/// `spfs info` with the `--get <KEY>` or `--get-all` arguments
///
/// External data is key/value string pairs separated by either an
/// equals sign or colon (--external-data name=value --external-data
/// other:value). Additionally, many pair of external data can be
/// specified at once in yaml or json format (--external-data '{name:
/// value, other: value}').
///
/// External data can also be given in a json or yaml file via the
/// `--external-data-file <FILE>` argument. If given, `--external-data`
/// will supersede anything given in external data files.
///
/// If the same key is used more than once, the last key-value pair
/// will override the earlier values for the same key.
#[clap(long, value_name = "KEY:VALUE")]
pub external_data: Vec<String>,

/// Specify external extra key-value data from a json or yaml file
/// (see --external-data)
#[clap(long)]
pub external_data_file: Vec<std::path::PathBuf>,
}

impl ExternalData {
// Returns a list of external data key-value pairs gathered from all
// the external data related command line arguments. The same keys,
// and values, can appear multiple times in the list if specified
// multiple times in various command line arguments.
pub fn get_data(&self) -> Result<Vec<KeyValuePair>> {
let mut data: Vec<KeyValuePair> = Vec::new();

for filename in self.external_data_file.iter() {
let reader: Box<dyn io::Read> =
if Ok("-".to_string()) == filename.clone().into_os_string().into_string() {
// Treat '-' as "read from stdin"
Box::new(io::stdin())
} else {
Box::new(std::fs::File::open(filename).with_context(|| {
format!("Failed to open external data file: {filename:?}")
})?)
};
let external_data: BTreeMap<String, String> = serde_yaml::from_reader(reader)
.with_context(|| {
format!("Failed to parse as external data key-value pairs: {filename:?}")
})?;
data.extend(external_data);
}

for pair in self.external_data.iter() {
let pair = pair.trim();
if pair.starts_with('{') {
let given: BTreeMap<String, String> = serde_yaml::from_str(pair)
.context("--external-data value looked like yaml, but could not be parsed")?;
data.extend(given);
continue;
}

let (name, value) = pair
.split_once('=')
.or_else(|| pair.split_once(':'))
.ok_or_else(|| {
anyhow!(
"Invalid option: -external-data {pair} (should be in the form name=value)"
)
})?;

data.push((name.to_string(), value.to_string()));
}

Ok(data)
}
}

/// Run a program in a configured spfs environment
#[derive(Debug, Args)]
#[clap(group(
Expand Down Expand Up @@ -51,6 +141,9 @@ pub struct CmdRun {
#[clap(long, value_name = "RUNTIME_NAME")]
pub rerun: Option<String>,

#[clap(flatten)]
pub external_data: ExternalData,

/// The tag or id of the desired runtime
///
/// Use '-' to request an empty environment
Expand Down Expand Up @@ -153,6 +246,16 @@ impl CmdRun {
tracing::debug!("with extra mounts: {extras:?}");
}

let data = self.external_data.get_data()?;
if !data.is_empty() {
tracing::debug!("with extra external data: {data:?}");
for (key, value) in data {
runtime
.add_external_data(key, value, config.filesystem.external_data_size_limit)
.await?;
}
}

let start_time = Instant::now();
runtime.config.mount_backend = config.filesystem.backend;
runtime.config.secondary_repositories = config.get_secondary_runtime_repositories();
Expand Down
Loading

0 comments on commit 1c75710

Please sign in to comment.