Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Imports:
rlang,
tibble,
tidyr,
ulid,
yaml
Suggests:
argparse,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export(list_files_dir)
export(nemo_cli)
export(nemo_log)
export(nemo_log_date)
export(nemo_osfx)
export(nemo_out_formats)
export(nemo_write)
export(nemoverse_wf_dispatch)
Expand Down
43 changes: 29 additions & 14 deletions R/Tool.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#' @examples
#' \dontrun{
#' path <- here::here("inst/extdata/tool1")
#' x <- Tool$new("tool1", pkg = "nemo", path = path)
#' # demo filter + tidy
#' x <- Tool1$new(path = path)$
#' filter_files(exclude = "alignments_dupfreq")$
Expand Down Expand Up @@ -351,27 +350,34 @@ Tool <- R6::R6Class(
#' Directory path to output tidy files. Ignored if format is db.
#' @param format (`character(1)`)\cr
#' Format of output files.
#' @param id (`character(1)`)\cr
#' ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
#' @param input_id (`character(1)`)\cr
#' Input ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
#' @param dbconn (`DBIConnection`)\cr
#' Database connection object (see `DBI::dbConnect`).
#' @param output_id (`character(1)`)\cr
#' Output ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
#' @return A tibble with the tidy data and their output location prefix.
write = function(odir = ".", format = "tsv", id = NULL, dbconn = NULL) {
write = function(
odir = ".",
format = "tsv",
input_id = NULL,
dbconn = NULL,
output_id = ulid::ulid()
) {
if (format != "db") {
if (is.null(odir)) {
stop("Output directory must be specified when format is not 'db'.")
}
fs::dir_create(odir)
odir <- normalizePath(odir)
}
stopifnot(!is.null(id))
stopifnot(!is.null(input_id), !is.null(output_id))
stopifnot("Did you forget to tidy?" = !private$needs_tidying)
if (is.null(self$tbls)) {
# even though tidying is not needed, there must be no files detected
# for tidying (and therefore writing). So return NULL.
return(NULL)
}

d_write <- self$tbls |>
dplyr::select(
"tool_parser",
Expand All @@ -385,8 +391,9 @@ Tool <- R6::R6Class(
tidy_data = list(
tidy_data |>
tibble::add_column(
nemo_id = as.character(id),
nemo_pfix = as.character(prefix),
input_id = as.character(input_id),
input_pfix = as.character(prefix),
output_id = as.character(output_id),
.before = 1
)
),
Expand All @@ -411,18 +418,26 @@ Tool <- R6::R6Class(
dbconn = dbconn,
dbtab = .data$dbtab
)
)
),
outpath = attr(out, "outpath")
) |>
dplyr::ungroup()
dplyr::ungroup() |>
dplyr::select(
"tool_parser",
"prefix",
"tidy_data",
"tbl_name",
"outpath"
)
invisible(d_write)
},
#' @description Parse, filter, tidy and write files.
#' @param odir (`character(1)`)\cr
#' Directory path to output tidy files.
#' @param format (`character(1)`)\cr
#' Format of output files.
#' @param id (`character(1)`)\cr
#' ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
#' @param input_id (`character(1)`)\cr
#' Input ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
#' @param dbconn (`DBIConnection`)\cr
#' Database connection object (see `DBI::dbConnect`).
#' @param include (`character(n)`)\cr
Expand All @@ -433,7 +448,7 @@ Tool <- R6::R6Class(
nemofy = function(
odir = ".",
format = "tsv",
id = NULL,
input_id = NULL,
dbconn = NULL,
include = NULL,
exclude = NULL
Expand All @@ -445,7 +460,7 @@ Tool <- R6::R6Class(
write(
odir = odir,
format = format,
id = id,
input_id = input_id,
dbconn = dbconn
)
}
Expand Down
50 changes: 35 additions & 15 deletions R/write.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,23 +47,20 @@ nemo_write <- function(d, fpfix = NULL, format = "tsv", dbconn = NULL, dbtab = N
} else {
stopifnot(!is.null(fpfix))
fpfix <- as.character(fpfix)
sfx <- c(tsv = "tsv.gz", csv = "csv.gz", parquet = "parquet", rds = "rds")
osfx <- function(s) glue("{fpfix}.{sfx[s]}")
osfx <- nemo_osfx(fpfix, format)
fs::dir_create(dirname(fpfix))
if (format == "tsv") {
readr::write_tsv(d, osfx("tsv"))
} else if (format == "csv") {
readr::write_csv(d, osfx("csv"))
} else if (format == "parquet") {
arrow::write_parquet(d, osfx("parquet"))
} else if (format == "rds") {
readr::write_rds(d, osfx("rds"))
} else {
stop("No where else to go, check your output format!")
}
w <- list(
tsv = list(fun = "write_tsv", pkg = "readr"),
csv = list(fun = "write_csv", pkg = "readr"),
parquet = list(fun = "write_parquet", pkg = "arrow"),
rds = list(fun = "write_rds", pkg = "readr")
)
x <- w[[format]]
fun <- getExportedValue(x[["pkg"]], x[["fun"]])
fun(d, osfx)
}
# also gets returned in case of NULL format
return(invisible(d))
attr(d, "outpath") <- if (format == "db") NULL else osfx
invisible(d)
}

#' Output Format is Valid
Expand Down Expand Up @@ -93,3 +90,26 @@ valid_out_fmt <- function(x, choices = nemo_out_formats()) {
nemo_out_formats <- function() {
c("parquet", "db", "tsv", "csv", "rds")
}

#' Construct Output File Paths with Format Suffix
#'
#' @param fpfix (`character(n)`)\cr
#' Vector of one or more file prefixes e.g. /path/to/foo
#' @param format (`character(1)`)\cr
#' Output format. One of tsv, csv, parquet, rds, or db.
#' @return Character vector of output file paths
#'
#' @examples
#' fpfix <- "path/to/foo"
#' format <- "tsv"
#' o <- nemo_osfx(fpfix, format)
#' @testexamples
#' expect_equal(o, glue("{fpfix}.tsv.gz"))
#'
#' @export
nemo_osfx <- function(fpfix, format) {
valid_out_fmt(format)
fpfix <- as.character(fpfix)
sfx <- c(tsv = "tsv.gz", csv = "csv.gz", parquet = "parquet", rds = "rds")
paste0(fpfix, ".", sfx[format])
}
26 changes: 26 additions & 0 deletions man/nemo_osfx.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 10 additions & 1 deletion tests/testthat/test-roxytest-testexamples-write.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,20 @@ test_that("Function nemo_write() @ L34", {
})


test_that("Function valid_out_fmt() @ L81", {
test_that("Function valid_out_fmt() @ L77", {

valid_out_fmt("tsv")
expect_true(valid_out_fmt("tsv"))
expect_error(valid_out_fmt("foo"))
expect_error(valid_out_fmt(c("tsv", "csv")))
})


test_that("Function nemo_osfx() @ L109", {

fpfix <- "path/to/foo"
format <- "tsv"
o <- nemo_osfx(fpfix, format)
expect_equal(o, glue("{fpfix}.tsv.gz"))
})