From 920beb2dc1d548f5514bdac01e52e09c217ad704 Mon Sep 17 00:00:00 2001 From: wlandau Date: Tue, 4 Feb 2025 11:19:00 -0500 Subject: [PATCH] tar_workspace_download() --- NAMESPACE | 1 + R/class_builder.R | 2 + R/class_database.R | 6 +++ R/class_database_aws.R | 33 +++++++++++++ R/class_reporter.R | 2 + R/class_timestamp.R | 9 ++++ R/class_verbose.R | 5 ++ R/tar_workspace.R | 10 +++- R/tar_workspace_download.R | 85 +++++++++++++++++++++++++++++++ R/tar_workspaces.R | 7 ++- R/utils_cli.R | 6 +++ _pkgdown.yml | 1 + man/tar_load_globals.Rd | 1 + man/tar_traceback.Rd | 1 + man/tar_workspace.Rd | 11 ++++- man/tar_workspace_download.Rd | 88 +++++++++++++++++++++++++++++++++ man/tar_workspaces.Rd | 10 ++-- tests/aws/test-aws-workspaces.R | 37 ++++++++++++++ 18 files changed, 306 insertions(+), 9 deletions(-) create mode 100644 R/tar_workspace_download.R create mode 100644 man/tar_workspace_download.Rd create mode 100644 tests/aws/test-aws-workspaces.R diff --git a/NAMESPACE b/NAMESPACE index c463e2d8..17f76096 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -551,6 +551,7 @@ export(tar_watch_app_ui) export(tar_watch_server) export(tar_watch_ui) export(tar_workspace) +export(tar_workspace_download) export(tar_workspaces) export(target_run_worker) export(use_targets) diff --git a/R/class_builder.R b/R/class_builder.R index 5404abca..05ea543a 100644 --- a/R/class_builder.R +++ b/R/class_builder.R @@ -419,6 +419,8 @@ builder_save_workspace <- function(target, pipeline, scheduler, meta) { path_store = meta$store ) scheduler$reporter$report_workspace(target) + meta$database$upload_workspace(target, meta) + scheduler$reporter$report_workspace_upload(target) } builder_record_error_meta <- function(target, pipeline, meta) { diff --git a/R/class_database.R b/R/class_database.R index e2efe7a8..191183e3 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -435,12 +435,18 @@ database_class <- R6::R6Class( } "upload" }, + upload_workspace = function(target, meta) { + "upload_workspace" + }, download = function(verbose = TRUE) { if (verbose) { tar_message_run("downloading") } "download" }, + download_workspace = function(name, store) { + "download_workspace" + }, head = function() { file <- file_init(path = "path_cloud") file_ensure_hash(file) diff --git a/R/class_database_aws.R b/R/class_database_aws.R index f0196b66..8be7cc0d 100644 --- a/R/class_database_aws.R +++ b/R/class_database_aws.R @@ -67,6 +67,22 @@ database_aws_class <- R6::R6Class( ) invisible() }, + download_workspace = function(name, store) { + path <- path_workspace(store, name) + key <- path_workspace(dirname(self$key), name) + aws <- self$resources$aws + dir_create(dirname(path)) + aws_s3_download( + file = path, + key = key, + bucket = aws$bucket, + region = aws$region, + endpoint = aws$endpoint, + args = aws$args, + max_tries = aws$max_tries %|||% 5L + ) + invisible() + }, upload = function(verbose = TRUE) { if (verbose) { tar_print( @@ -96,6 +112,23 @@ database_aws_class <- R6::R6Class( ) invisible() }, + upload_workspace = function(target, meta) { + name <- target_get_name(target) + path <- path_workspace(meta$store, name) + key <- path_workspace(dirname(self$key), name) + aws <- self$resources$aws + aws_s3_upload( + file = path, + key = key, + bucket = aws$bucket, + region = aws$region, + endpoint = aws$endpoint, + part_size = aws$part_size, + args = aws$args, + max_tries = aws$max_tries %|||% 5L + ) + invisible() + }, head = function() { aws <- self$resources$aws head <- aws_s3_head( diff --git a/R/class_reporter.R b/R/class_reporter.R index 12c5aa12..10b04be4 100644 --- a/R/class_reporter.R +++ b/R/class_reporter.R @@ -73,6 +73,8 @@ reporter_class <- R6::R6Class( }, report_workspace = function(target) { }, + report_workspace_upload = function(target) { + }, report_retry = function(target = NULL, progress = NULL) { }, report_finalize = function(progress = NULL) { diff --git a/R/class_timestamp.R b/R/class_timestamp.R index e61f45cd..c8ee83d3 100644 --- a/R/class_timestamp.R +++ b/R/class_timestamp.R @@ -80,6 +80,15 @@ timestamp_class <- R6::R6Class( ) ) }, + report_workspace_upload = function(target) { + self$buffer_message( + cli_workspace_upload( + target_get_name(target), + time_stamp = TRUE, + print = FALSE + ) + ) + }, report_retry = function(target, progress = NULL) { self$buffer_message( cli_retry( diff --git a/R/class_verbose.R b/R/class_verbose.R index 17d17197..4eb2358e 100644 --- a/R/class_verbose.R +++ b/R/class_verbose.R @@ -69,6 +69,11 @@ verbose_class <- R6::R6Class( report_workspace = function(target) { self$buffer_message(cli_workspace(target_get_name(target), print = FALSE)) }, + report_workspace_upload = function(target) { + self$buffer_message( + cli_workspace_upload(target_get_name(target), print = FALSE) + ) + }, report_retry = function(target, progress = NULL) { self$buffer_message( cli_retry( diff --git a/R/tar_workspace.R b/R/tar_workspace.R index 6fe4d741..f1a1c02b 100644 --- a/R/tar_workspace.R +++ b/R/tar_workspace.R @@ -1,4 +1,4 @@ -#' @title Load a saved workspace and seed for debugging. +#' @title Load a locally saved workspace and seed for debugging. #' @export #' @family debug #' @description Load the packages, environment, and random number generator @@ -12,6 +12,13 @@ #' are still in the data store (usually files in `_targets/objects/`). #' When you are done debugging, you can remove the workspace files #' using `tar_destroy(destroy = "workspaces")`. +#' +#' If `tar_option_get("repository_meta")` is `"aws"` or `"gcp"`, then +#' [tar_make()] uploads workspaces to the bucket and prefix provided. +#' Download one of these workspaces with [tar_workspace_download()]. +#' Downloaded workspaces can be loaded the usual way with +#' [tar_workspace()], and you should see them in +#' character vector returned by [tar_workspaces()]. #' @return This function returns `NULL`, but it does load #' the target's required packages, as well as multiple objects #' into the environment (`envir` argument) in order to replicate the @@ -35,7 +42,6 @@ #' tar_script({ #' library(targets) #' library(tarchetypes) -#' tar_option_set(workspace_on_error = TRUE) #' list( #' tar_target(x, "loaded"), #' tar_target(y, stop(x)) diff --git a/R/tar_workspace_download.R b/R/tar_workspace_download.R new file mode 100644 index 00000000..50785e02 --- /dev/null +++ b/R/tar_workspace_download.R @@ -0,0 +1,85 @@ +#' @title Download a workspace from the cloud. +#' @export +#' @family debug +#' @description Download a workspace file from the cloud +#' so it can be loaded with [tar_workspace()]. +#' @details If `tar_option_get("repository_meta")` is `"aws"` or `"gcp"`, then +#' [tar_make()] uploads workspaces to the bucket and prefix provided. +#' Download one of these workspaces with [tar_workspace_download()]. +#' Downloaded workspaces can be loaded the usual way with +#' [tar_workspace()], and you should see them in +#' character vector returned by [tar_workspaces()]. +#' @return `NULL` (invisibly). Returns an error if the workspace +#' cannot be downloaded. +#' @inheritParams tar_validate +#' @param name Symbol, name of the target whose workspace to download. +#' @param script Character string, file path to the `_targets.R` file +#' defining the pipeline. Must be configured with the right `aws` +#' and `repository_meta` options (in [tar_option_set()]) +#' to support downloading workspaces from the cloud. +#' @examples +#' if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN +#' tar_dir({ # tar_dir() runs code from a temp dir for CRAN. +#' tmp <- sample(1) +#' tar_script({ +#' library(targets) +#' library(tarchetypes) +#' tar_option_set( +#' tar_option_set( +#' resources = tar_resources( +#' tar_resources_aws( +#' bucket = "YOUR_AWS_BUCKET", +#' prefix = "_targets" +#' ) +#' ), +#' repository = "aws", +#' repository_meta = "aws" +#' ) +#' f <- function() stop("this is an error and thus triggers a workspace") +#' list( +#' tar_target(x, f()), +#' ) +#' }, ask = FALSE) +#' # The following code throws an error for demonstration purposes. +#' try(tar_make()) +#' # Say the workspace file for target x does not exist. +#' unlink("_targets/workspaces/x") +#' file.exists("_targets/workspaces/x") +#' # We can download it with tar_workspace_download() +#' tar_workspace_download(x) +#' file.exists("_targets/workspaces/x") +#' tar_workspace(x) +#' }) +#' } +tar_workspace_download <- function( + name, + script = targets::tar_config_get("script"), + store = targets::tar_config_get("store") +) { + name <- tar_deparse_language(substitute(name)) + tar_assert_chr(name) + tar_assert_scalar(name) + tar_assert_nzchar(name) + tar_assert_scalar(store) + tar_assert_chr(store) + tar_assert_nzchar(store) + options <- tar_script_options(script = script) + old_repository_meta <- tar_options$get_repository_meta() + old_resources <- tar_options$get_resources() + on.exit({ + tar_options$set_repository_meta(old_repository_meta) + tar_options$set_resources(old_resources) + }) + tar_options$set_repository_meta(options$repository_meta) + tar_options$set_resources(options$resources) + tar_assert_not_in( + x = options$repository_meta, + choices = "local", + msg = paste( + "tar_workspace_download() is not supported for", + "tar_option_get(\"repository_meta\") == \"local\"." + ) + ) + database_meta(path_store = store)$download_workspace(name, store) + invisible() +} diff --git a/R/tar_workspaces.R b/R/tar_workspaces.R index 701f0703..894119cc 100644 --- a/R/tar_workspaces.R +++ b/R/tar_workspaces.R @@ -1,8 +1,11 @@ -#' @title List saved target workspaces. +#' @title List locally saved target workspaces. #' @export #' @family debug #' @description List target workspaces currently saved to -#' `_targets/workspaces/`. See [tar_workspace()] for more information. +#' `_targets/workspaces/` locally. +#' Does not include workspaces saved to the cloud. +#' See [tar_workspace()] and [tar_workspace_download()] +#' for more information. #' @return Character vector of available workspaces to load with #' [tar_workspace()]. #' @inheritParams tar_validate diff --git a/R/utils_cli.R b/R/utils_cli.R index a451a4dc..a2e6ff4b 100644 --- a/R/utils_cli.R +++ b/R/utils_cli.R @@ -126,6 +126,12 @@ cli_workspace <- function(name, time_stamp = FALSE, print = TRUE) { cli_blue_play(msg, print = print) } +cli_workspace_upload <- function(name, time_stamp = FALSE, print = TRUE) { + time <- if_any(time_stamp, time_stamp_cli(), NULL) + msg <- paste(c(time, "uploaded workspace", name), collapse = " ") + cli_blue_play(msg, print = print) +} + cli_retry <- function(name, prefix = NULL, time_stamp = FALSE, print = TRUE) { time <- if_any(time_stamp, time_stamp_cli(), NULL) msg <- paste(c(time, "retrying", prefix, name), collapse = " ") diff --git a/_pkgdown.yml b/_pkgdown.yml index 02fa0c5a..92aa622a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -52,6 +52,7 @@ reference: - 'tar_load_globals' - 'tar_traceback' - 'tar_workspace' + - 'tar_workspace_download' - 'tar_workspaces' - title: Storage contents: diff --git a/man/tar_load_globals.Rd b/man/tar_load_globals.Rd index d8800861..96360886 100644 --- a/man/tar_load_globals.Rd +++ b/man/tar_load_globals.Rd @@ -80,6 +80,7 @@ print("callr" \%in\% (.packages())) Other debug: \code{\link{tar_traceback}()}, \code{\link{tar_workspace}()}, +\code{\link{tar_workspace_download}()}, \code{\link{tar_workspaces}()} } \concept{debug} diff --git a/man/tar_traceback.Rd b/man/tar_traceback.Rd index 353abce6..bf67199b 100644 --- a/man/tar_traceback.Rd +++ b/man/tar_traceback.Rd @@ -70,6 +70,7 @@ tar_traceback(y, characters = 60) Other debug: \code{\link{tar_load_globals}()}, \code{\link{tar_workspace}()}, +\code{\link{tar_workspace_download}()}, \code{\link{tar_workspaces}()} } \concept{debug} diff --git a/man/tar_workspace.Rd b/man/tar_workspace.Rd index 05e6f856..f94641ae 100644 --- a/man/tar_workspace.Rd +++ b/man/tar_workspace.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tar_workspace.R \name{tar_workspace} \alias{tar_workspace} -\title{Load a saved workspace and seed for debugging.} +\title{Load a locally saved workspace and seed for debugging.} \usage{ tar_workspace( name, @@ -68,6 +68,13 @@ and random number generator seed as long as the data objects are still in the data store (usually files in \verb{_targets/objects/}). When you are done debugging, you can remove the workspace files using \code{tar_destroy(destroy = "workspaces")}. + +If \code{tar_option_get("repository_meta")} is \code{"aws"} or \code{"gcp"}, then +\code{\link[=tar_make]{tar_make()}} uploads workspaces to the bucket and prefix provided. +Download one of these workspaces with \code{\link[=tar_workspace_download]{tar_workspace_download()}}. +Downloaded workspaces can be loaded the usual way with +\code{\link[=tar_workspace]{tar_workspace()}}, and you should see them in +character vector returned by \code{\link[=tar_workspaces]{tar_workspaces()}}. } \examples{ if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN @@ -76,7 +83,6 @@ tmp <- sample(1) tar_script({ library(targets) library(tarchetypes) - tar_option_set(workspace_on_error = TRUE) list( tar_target(x, "loaded"), tar_target(y, stop(x)) @@ -99,6 +105,7 @@ tail(.Random.seed) Other debug: \code{\link{tar_load_globals}()}, \code{\link{tar_traceback}()}, +\code{\link{tar_workspace_download}()}, \code{\link{tar_workspaces}()} } \concept{debug} diff --git a/man/tar_workspace_download.Rd b/man/tar_workspace_download.Rd new file mode 100644 index 00000000..eff0fc2c --- /dev/null +++ b/man/tar_workspace_download.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tar_workspace_download.R +\name{tar_workspace_download} +\alias{tar_workspace_download} +\title{Download a workspace from the cloud.} +\usage{ +tar_workspace_download( + name, + script = targets::tar_config_get("script"), + store = targets::tar_config_get("store") +) +} +\arguments{ +\item{name}{Symbol, name of the target whose workspace to download.} + +\item{script}{Character string, file path to the \verb{_targets.R} file +defining the pipeline. Must be configured with the right \code{aws} +and \code{repository_meta} options (in \code{\link[=tar_option_set]{tar_option_set()}}) +to support downloading workspaces from the cloud.} + +\item{store}{Character of length 1, path to the +\code{targets} data store. Defaults to \code{tar_config_get("store")}, +which in turn defaults to \verb{_targets/}. +When you set this argument, the value of \code{tar_config_get("store")} +is temporarily changed for the current function call. +See \code{\link[=tar_config_get]{tar_config_get()}} and \code{\link[=tar_config_set]{tar_config_set()}} for details +about how to set the data store path persistently +for a project.} +} +\value{ +\code{NULL} (invisibly). Returns an error if the workspace +cannot be downloaded. +} +\description{ +Download a workspace file from the cloud +so it can be loaded with \code{\link[=tar_workspace]{tar_workspace()}}. +} +\details{ +If \code{tar_option_get("repository_meta")} is \code{"aws"} or \code{"gcp"}, then +\code{\link[=tar_make]{tar_make()}} uploads workspaces to the bucket and prefix provided. +Download one of these workspaces with \code{\link[=tar_workspace_download]{tar_workspace_download()}}. +Downloaded workspaces can be loaded the usual way with +\code{\link[=tar_workspace]{tar_workspace()}}, and you should see them in +character vector returned by \code{\link[=tar_workspaces]{tar_workspaces()}}. +} +\examples{ +if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN +tar_dir({ # tar_dir() runs code from a temp dir for CRAN. +tmp <- sample(1) +tar_script({ + library(targets) + library(tarchetypes) + tar_option_set( + tar_option_set( + resources = tar_resources( + tar_resources_aws( + bucket = "YOUR_AWS_BUCKET", + prefix = "_targets" + ) + ), + repository = "aws", + repository_meta = "aws" + ) + f <- function() stop("this is an error and thus triggers a workspace") + list( + tar_target(x, f()), + ) +}, ask = FALSE) +# The following code throws an error for demonstration purposes. +try(tar_make()) +# Say the workspace file for target x does not exist. +unlink("_targets/workspaces/x") +file.exists("_targets/workspaces/x") +# We can download it with tar_workspace_download() +tar_workspace_download(x) +file.exists("_targets/workspaces/x") +tar_workspace(x) +}) +} +} +\seealso{ +Other debug: +\code{\link{tar_load_globals}()}, +\code{\link{tar_traceback}()}, +\code{\link{tar_workspace}()}, +\code{\link{tar_workspaces}()} +} +\concept{debug} diff --git a/man/tar_workspaces.Rd b/man/tar_workspaces.Rd index bc5e33bb..edfe9898 100644 --- a/man/tar_workspaces.Rd +++ b/man/tar_workspaces.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tar_workspaces.R \name{tar_workspaces} \alias{tar_workspaces} -\title{List saved target workspaces.} +\title{List locally saved target workspaces.} \usage{ tar_workspaces(names = NULL, store = targets::tar_config_get("store")) } @@ -30,7 +30,10 @@ Character vector of available workspaces to load with } \description{ List target workspaces currently saved to -\verb{_targets/workspaces/}. See \code{\link[=tar_workspace]{tar_workspace()}} for more information. +\verb{_targets/workspaces/} locally. +Does not include workspaces saved to the cloud. +See \code{\link[=tar_workspace]{tar_workspace()}} and \code{\link[=tar_workspace_download]{tar_workspace_download()}} +for more information. } \examples{ if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN @@ -54,6 +57,7 @@ tar_workspaces(contains("x")) Other debug: \code{\link{tar_load_globals}()}, \code{\link{tar_traceback}()}, -\code{\link{tar_workspace}()} +\code{\link{tar_workspace}()}, +\code{\link{tar_workspace_download}()} } \concept{debug} diff --git a/tests/aws/test-aws-workspaces.R b/tests/aws/test-aws-workspaces.R new file mode 100644 index 00000000..7e5b612a --- /dev/null +++ b/tests/aws/test-aws-workspaces.R @@ -0,0 +1,37 @@ +# Use sparingly to minimize AWS costs. +# Verify all `targets` buckets are deleted afterwards. +tar_test("aws workspaces are uploaded and downloaded", { + skip_if_no_aws() + skip_if_not_installed("arrow") + s3 <- paws.storage::s3() + bucket_name <- random_bucket_name() + s3$create_bucket(Bucket = bucket_name) + on.exit(aws_s3_delete_bucket(bucket_name)) + expr <- quote({ + tar_option_set( + resources = tar_resources( + aws = tar_resources_aws( + bucket = !!bucket_name, + prefix = "_targets", + max_tries = 20 + ) + ), + format = "parquet", + repository = "aws", + repository_meta = "aws" + ) + list( + tar_target(x, stop("this is an error")) + ) + }) + expr <- tar_tidy_eval(expr, environment(), TRUE) + eval(as.call(list(`tar_script`, expr, ask = FALSE))) + expect_error(tar_make(callr_function = NULL), class = "tar_condition_run") + path <- "_targets/workspaces/x" + unlink(path) + expect_false(file.exists(path)) + expect_error(tar_workspace(x), class = "tar_condition_validate") + tar_workspace_download(x) + expect_true(file.exists(path)) + expect_silent(tar_workspace(x)) +})