From 7817017d45c1bfb853ff5863b11f77c4ae3bb951 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 24 Jan 2026 00:34:30 +1100
Subject: [PATCH 1/4] refactor nemo_osfx

---
 NAMESPACE                                     |  1 +
 R/write.R                                     | 47 +++++++++++++------
 man/nemo_osfx.Rd                              | 26 ++++++++++
 .../test-roxytest-testexamples-write.R        | 11 ++++-
 4 files changed, 70 insertions(+), 15 deletions(-)
 create mode 100644 man/nemo_osfx.Rd

diff --git a/NAMESPACE b/NAMESPACE
index c3658e2..58aedb5 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -18,6 +18,7 @@ export(list_files_dir)
 export(nemo_cli)
 export(nemo_log)
 export(nemo_log_date)
+export(nemo_osfx)
 export(nemo_out_formats)
 export(nemo_write)
 export(nemoverse_wf_dispatch)
diff --git a/R/write.R b/R/write.R
index c176772..661d371 100644
--- a/R/write.R
+++ b/R/write.R
@@ -47,22 +47,18 @@ nemo_write <- function(d, fpfix = NULL, format = "tsv", dbconn = NULL, dbtab = N
   } else {
     stopifnot(!is.null(fpfix))
     fpfix <- as.character(fpfix)
-    sfx <- c(tsv = "tsv.gz", csv = "csv.gz", parquet = "parquet", rds = "rds")
-    osfx <- function(s) glue("{fpfix}.{sfx[s]}")
+    osfx <- nemo_osfx(fpfix, format)
     fs::dir_create(dirname(fpfix))
-    if (format == "tsv") {
-      readr::write_tsv(d, osfx("tsv"))
-    } else if (format == "csv") {
-      readr::write_csv(d, osfx("csv"))
-    } else if (format == "parquet") {
-      arrow::write_parquet(d, osfx("parquet"))
-    } else if (format == "rds") {
-      readr::write_rds(d, osfx("rds"))
-    } else {
-      stop("No where else to go, check your output format!")
-    }
+    w <- list(
+      tsv = list(fun = "write_tsv", pkg = "readr"),
+      csv = list(fun = "write_csv", pkg = "readr"),
+      parquet = list(fun = "write_parquet", pkg = "arrow"),
+      rds = list(fun = "write_rds", pkg = "readr")
+    )
+    x <- w[[format]]
+    fun <- getExportedValue(x[["pkg"]], x[["fun"]])
+    fun(d, osfx)
   }
-  # also gets returned in case of NULL format
   return(invisible(d))
 }
 
@@ -93,3 +89,26 @@ valid_out_fmt <- function(x, choices = nemo_out_formats()) {
 nemo_out_formats <- function() {
   c("parquet", "db", "tsv", "csv", "rds")
 }
+
+#' Construct Output File Paths with Format Suffix
+#'
+#' @param fpfix (`character(n)`)\cr
+#' Vector of one or more file prefixes e.g. /path/to/foo
+#' @param format (`character(1)`)\cr
+#' Output format. One of tsv, csv, parquet, rds, or db.
+#' @return Character vector of output file paths
+#'
+#' @examples
+#' fpfix <- "path/to/foo"
+#' format <- "tsv"
+#' o <- nemo_osfx(fpfix, format)
+#' @testexamples
+#' expect_equal(o, glue("{fpfix}.tsv.gz"))
+#'
+#' @export
+nemo_osfx <- function(fpfix, format) {
+  valid_out_fmt(format)
+  fpfix <- as.character(fpfix)
+  sfx <- c(tsv = "tsv.gz", csv = "csv.gz", parquet = "parquet", rds = "rds")
+  paste0(fpfix, ".", sfx[format])
+}
diff --git a/man/nemo_osfx.Rd b/man/nemo_osfx.Rd
new file mode 100644
index 0000000..fa0796a
--- /dev/null
+++ b/man/nemo_osfx.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/write.R
+\name{nemo_osfx}
+\alias{nemo_osfx}
+\title{Construct Output File Paths with Format Suffix}
+\usage{
+nemo_osfx(fpfix, format)
+}
+\arguments{
+\item{fpfix}{(\code{character(n)})\cr
+Vector of one or more file prefixes e.g. /path/to/foo}
+
+\item{format}{(\code{character(1)})\cr
+Output format. One of tsv, csv, parquet, rds, or db.}
+}
+\value{
+Character vector of output file paths
+}
+\description{
+Construct Output File Paths with Format Suffix
+}
+\examples{
+fpfix <- "path/to/foo"
+format <- "tsv"
+o <- nemo_osfx(fpfix, format)
+}
diff --git a/tests/testthat/test-roxytest-testexamples-write.R b/tests/testthat/test-roxytest-testexamples-write.R
index a9955f4..18c02b9 100644
--- a/tests/testthat/test-roxytest-testexamples-write.R
+++ b/tests/testthat/test-roxytest-testexamples-write.R
@@ -13,7 +13,7 @@ test_that("Function nemo_write() @ L34", {
 })
 
 
-test_that("Function valid_out_fmt() @ L81", {
+test_that("Function valid_out_fmt() @ L77", {
   
   valid_out_fmt("tsv")
   expect_true(valid_out_fmt("tsv"))
@@ -21,3 +21,12 @@ test_that("Function valid_out_fmt() @ L81", {
   expect_error(valid_out_fmt(c("tsv", "csv")))
 })
 
+
+test_that("Function nemo_osfx() @ L109", {
+  
+  fpfix <- "path/to/foo"
+  format <- "tsv"
+  o <- nemo_osfx(fpfix, format)
+  expect_equal(o, glue("{fpfix}.tsv.gz"))
+})
+

From 5aa731ec386ddda0cbc4107d3676cc15baca4261 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sun, 25 Jan 2026 15:44:56 +1100
Subject: [PATCH 2/4] rename nemo_id + nemo_pfix; keep outpaths in writer

---
 R/Tool.R  | 18 ++++++++++++------
 R/write.R |  3 ++-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/R/Tool.R b/R/Tool.R
index 14768bd..33a1f30 100644
--- a/R/Tool.R
+++ b/R/Tool.R
@@ -5,7 +5,6 @@
 #' @examples
 #' \dontrun{
 #' path <- here::here("inst/extdata/tool1")
-#' x <- Tool$new("tool1", pkg = "nemo", path = path)
 #' # demo filter + tidy
 #' x <- Tool1$new(path = path)$
 #'   filter_files(exclude = "alignments_dupfreq")$
@@ -371,7 +370,6 @@ Tool <- R6::R6Class(
         # for tidying (and therefore writing). So return NULL.
         return(NULL)
       }
-
       d_write <- self$tbls |>
         dplyr::select(
           "tool_parser",
@@ -385,8 +383,8 @@ Tool <- R6::R6Class(
           tidy_data = list(
             tidy_data |>
               tibble::add_column(
-                nemo_id = as.character(id),
-                nemo_pfix = as.character(prefix),
+                input_id = as.character(id),
+                input_pfix = as.character(prefix),
                 .before = 1
               )
           ),
@@ -411,9 +409,17 @@ Tool <- R6::R6Class(
               dbconn = dbconn,
               dbtab = .data$dbtab
             )
-          )
+          ),
+          outpath = attr(out, "outpath")
         ) |>
-        dplyr::ungroup()
+        dplyr::ungroup() |>
+        dplyr::select(
+          "tool_parser",
+          "prefix",
+          "tidy_data",
+          "tbl_name",
+          "outpath"
+        )
       invisible(d_write)
     },
     #' @description Parse, filter, tidy and write files.
diff --git a/R/write.R b/R/write.R
index 661d371..08207a4 100644
--- a/R/write.R
+++ b/R/write.R
@@ -59,7 +59,8 @@ nemo_write <- function(d, fpfix = NULL, format = "tsv", dbconn = NULL, dbtab = N
     fun <- getExportedValue(x[["pkg"]], x[["fun"]])
     fun(d, osfx)
   }
-  return(invisible(d))
+  attr(d, "outpath") <- if (format == "db") NULL else osfx
+  invisible(d)
 }
 
 #' Output Format is Valid

From 0d5d93c178cb05908a767c56316c92c3cccf90b1 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Mon, 26 Jan 2026 23:59:26 +1100
Subject: [PATCH 3/4] add output_id

---
 DESCRIPTION |  1 +
 R/Tool.R    | 27 ++++++++++++++++++---------
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 1f8aad1..250b76e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -29,6 +29,7 @@ Imports:
     rlang,
     tibble,
     tidyr,
+    ulid,
     yaml
 Suggests:
     argparse,
diff --git a/R/Tool.R b/R/Tool.R
index 33a1f30..dc97487 100644
--- a/R/Tool.R
+++ b/R/Tool.R
@@ -350,12 +350,20 @@ Tool <- R6::R6Class(
     #' Directory path to output tidy files. Ignored if format is db.
     #' @param format (`character(1)`)\cr
     #' Format of output files.
-    #' @param id (`character(1)`)\cr
-    #' ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
+    #' @param input_id (`character(1)`)\cr
+    #' Input ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
     #' @param dbconn (`DBIConnection`)\cr
     #' Database connection object (see `DBI::dbConnect`).
+    #' @param output_id (`character(1)`)\cr
+    #' Output ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
     #' @return A tibble with the tidy data and their output location prefix.
-    write = function(odir = ".", format = "tsv", id = NULL, dbconn = NULL) {
+    write = function(
+      odir = ".",
+      format = "tsv",
+      input_id = NULL,
+      dbconn = NULL,
+      output_id = ulid::ulid()
+    ) {
       if (format != "db") {
         if (is.null(odir)) {
           stop("Output directory must be specified when format is not 'db'.")
@@ -363,7 +371,7 @@ Tool <- R6::R6Class(
         fs::dir_create(odir)
         odir <- normalizePath(odir)
       }
-      stopifnot(!is.null(id))
+      stopifnot(!is.null(input_id), !is.null(output_id))
       stopifnot("Did you forget to tidy?" = !private$needs_tidying)
       if (is.null(self$tbls)) {
         # even though tidying is not needed, there must be no files detected
@@ -383,8 +391,9 @@ Tool <- R6::R6Class(
           tidy_data = list(
             tidy_data |>
               tibble::add_column(
-                input_id = as.character(id),
+                input_id = as.character(input_id),
                 input_pfix = as.character(prefix),
+                output_id = as.character(output_id),
                 .before = 1
               )
           ),
@@ -427,8 +436,8 @@ Tool <- R6::R6Class(
     #' Directory path to output tidy files.
     #' @param format (`character(1)`)\cr
     #' Format of output files.
-    #' @param id (`character(1)`)\cr
-    #' ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
+    #' @param input_id (`character(1)`)\cr
+    #' Input ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
     #' @param dbconn (`DBIConnection`)\cr
     #' Database connection object (see `DBI::dbConnect`).
     #' @param include (`character(n)`)\cr
@@ -439,7 +448,7 @@ Tool <- R6::R6Class(
     nemofy = function(
       odir = ".",
       format = "tsv",
-      id = NULL,
+      input_id = NULL,
       dbconn = NULL,
       include = NULL,
       exclude = NULL
@@ -451,7 +460,7 @@ Tool <- R6::R6Class(
         write(
           odir = odir,
           format = format,
-          id = id,
+          input_id = input_id,
           dbconn = dbconn
       )
     }

From 7f87d73fa22b201ddaa8fa6bf6f1e385abd4425b Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Thu, 29 Jan 2026 23:56:17 +1100
Subject: [PATCH 4/4] Tool/Workflow minor refac

---
 R/Tool.R     | 40 ++++++++++++-----------
 R/Workflow.R | 89 +++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 90 insertions(+), 39 deletions(-)

diff --git a/R/Tool.R b/R/Tool.R
index dc97487..f2393c8 100644
--- a/R/Tool.R
+++ b/R/Tool.R
@@ -19,7 +19,7 @@
 #'   user = "orcabus"
 #' )
 #' lx$nemofy(
-#'     odir = "nogit/test_data",
+#'     diro = "nogit/test_data",
 #'     format = "db", # "parquet",
 #'     id = "run2",
 #'     dbconn = dbconn,
@@ -346,30 +346,30 @@ Tool <- R6::R6Class(
       return(invisible(self))
     },
     #' @description Write tidy tibbles.
-    #' @param odir (`character(1)`)\cr
+    #' @param diro (`character(1)`)\cr
     #' Directory path to output tidy files. Ignored if format is db.
     #' @param format (`character(1)`)\cr
-    #' Format of output files.
+    #' Format of output.
     #' @param input_id (`character(1)`)\cr
-    #' Input ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
+    #' Input ID to use for the dataset (e.g. `run123`).
+    #' @param output_id (`character(1)`)\cr
+    #' Output ID to use for the dataset (e.g. `run123`).
     #' @param dbconn (`DBIConnection`)\cr
     #' Database connection object (see `DBI::dbConnect`).
-    #' @param output_id (`character(1)`)\cr
-    #' Output ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
     #' @return A tibble with the tidy data and their output location prefix.
     write = function(
-      odir = ".",
+      diro = ".",
       format = "tsv",
       input_id = NULL,
-      dbconn = NULL,
-      output_id = ulid::ulid()
+      output_id = ulid::ulid(),
+      dbconn = NULL
     ) {
       if (format != "db") {
-        if (is.null(odir)) {
+        if (is.null(diro)) {
           stop("Output directory must be specified when format is not 'db'.")
         }
-        fs::dir_create(odir)
-        odir <- normalizePath(odir)
+        fs::dir_create(diro)
+        diro <- normalizePath(diro)
       }
       stopifnot(!is.null(input_id), !is.null(output_id))
       stopifnot("Did you forget to tidy?" = !private$needs_tidying)
@@ -404,7 +404,7 @@ Tool <- R6::R6Class(
             paste(.data$tool_parser, .data$tidy_name, sep = "_")
           ),
           # used to write when non-db format
-          fpfix = paste(file.path(odir, .data$prefix), .data$tbl_name, sep = "_"),
+          fpfix = paste(file.path(diro, .data$prefix), .data$tbl_name, sep = "_"),
           dbtab = ifelse(
             format == "db",
             list(.data$tbl_name),
@@ -432,12 +432,14 @@ Tool <- R6::R6Class(
       invisible(d_write)
     },
     #' @description Parse, filter, tidy and write files.
-    #' @param odir (`character(1)`)\cr
+    #' @param diro (`character(1)`)\cr
     #' Directory path to output tidy files.
     #' @param format (`character(1)`)\cr
-    #' Format of output files.
+    #' Format of output.
     #' @param input_id (`character(1)`)\cr
-    #' Input ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
+    #' Input ID to use for the dataset (e.g. `run123`).
+    #' @param output_id (`character(1)`)\cr
+    #' Output ID to use for the dataset (e.g. `run123`).
     #' @param dbconn (`DBIConnection`)\cr
     #' Database connection object (see `DBI::dbConnect`).
     #' @param include (`character(n)`)\cr
@@ -446,9 +448,10 @@ Tool <- R6::R6Class(
     #' Files to exclude.
     #' @return A tibble with the tidy data and their output location prefix.
     nemofy = function(
-      odir = ".",
+      diro = ".",
       format = "tsv",
       input_id = NULL,
+      output_id = ulid::ulid(),
       dbconn = NULL,
       include = NULL,
       exclude = NULL
@@ -458,9 +461,10 @@ Tool <- R6::R6Class(
         filter_files(include = include, exclude = exclude)$
         tidy()$
         write(
-          odir = odir,
+          diro = diro,
           format = format,
           input_id = input_id,
+          output_id = output_id,
           dbconn = dbconn
       )
     }
diff --git a/R/Workflow.R b/R/Workflow.R
index cff5421..785cfaf 100644
--- a/R/Workflow.R
+++ b/R/Workflow.R
@@ -6,10 +6,10 @@
 #' path <- system.file("extdata/tool1", package = "nemo")
 #' tools <- list(tool1 = Tool1)
 #' wf1 <- Workflow$new(name = "foo", path = path, tools = tools)
-#' odir <- tempdir()
+#' diro <- tempdir()
 #' wf1$list_files()
-#' wf1$nemofy(odir = odir, format = "parquet", id = "run1")
-#' (lf <- list.files(odir, pattern = "tool1.*parquet", full.names = FALSE))
+#' wf1$nemofy(diro = diro, format = "parquet", id = "run1")
+#' (lf <- list.files(diro, pattern = "tool1.*parquet", full.names = FALSE))
 #' #dbconn <- DBI::dbConnect(drv = RPostgres::Postgres(), dbname = "nemo", user = "orcabus")
 #' #wf1$nemofy(format = "db", id = "runABC", dbconn = dbconn)
 #' @testexamples
@@ -31,6 +31,9 @@ Workflow <- R6::R6Class(
     #' @field files_tbl (`tibble(n)`)\cr
     #' Tibble of files from [list_files_dir()].
     files_tbl = NULL,
+    #' @field written_files (`tibble(n)`)\cr
+    #' Tibble of files written from `self$write()`.
+    written_files = NULL,
 
     #' @description Create a new Workflow object.
     #' @param name (`character(1)`)\cr
@@ -42,6 +45,8 @@ Workflow <- R6::R6Class(
     initialize = function(name = NULL, path = NULL, tools = NULL) {
       self$name <- name
       private$validate_tools(tools)
+      private$is_tidied <- FALSE
+      private$is_written <- FALSE
       self$path <- normalizePath(path)
       self$files_tbl <- list_files_dir(self$path)
       # handle everything in a list of Tools
@@ -52,10 +57,12 @@ Workflow <- R6::R6Class(
     #' @param ... (ignored).
     print = function(...) {
       res <- tibble::tribble(
-        ~var     , ~value                                     ,
-        "name"   , self$name                                  ,
-        "path"   , glue::glue_collapse(self$path, sep = ", ") ,
-        "ntools" , as.character(length(self$tools))
+        ~var      , ~value                                     ,
+        "name"    , self$name                                  ,
+        "path"    , glue::glue_collapse(self$path, sep = ", ") ,
+        "ntools"  , as.character(length(self$tools))           ,
+        "tidied"  , as.character(private$is_tidied)            ,
+        "written" , as.character(private$is_written)
       )
       cat("#--- Workflow ---#\n")
       print(res)
@@ -88,30 +95,66 @@ Workflow <- R6::R6Class(
     #' Should the raw parsed tibbles be kept in the final output?
     #' @return self invisibly.
     tidy = function(tidy = TRUE, keep_raw = FALSE) {
+      # if no tidying needed, early return
+      if (private$is_tidied) {
+        return(invisible(self))
+      }
       self$tools <- self$tools |>
         purrr::map(\(x) x$tidy(tidy = tidy, keep_raw = keep_raw))
-      invisible(self)
+      private$is_tidied <- TRUE
+      return(invisible(self))
     },
     #' @description Write tidy tibbles.
-    #' @param odir (`character(1)`)\cr
+    #' @param diro (`character(1)`)\cr
     #' Directory path to output tidy files.
     #' @param format (`character(1)`)\cr
-    #' Format of output files.
-    #' @param id (`character(1)`)\cr
-    #' ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
+    #' Format of output.
+    #' @param input_id (`character(1)`)\cr
+    #' Input ID to use for the dataset (e.g. `run123`).
+    #' @param output_id (`character(1)`)\cr
+    #' Output ID to use for the dataset (e.g. `run123`).
     #' @param dbconn (`DBIConnection`)\cr
     #' Database connection object (see `DBI::dbConnect`).
-    #' @return A tibble with the tidy data and their output location prefix.
-    write = function(odir = ".", format = "tsv", id = NULL, dbconn = NULL) {
-      self$tools |>
-        purrr::map(\(x) x$write(odir = odir, format = format, id = id, dbconn = dbconn)) |>
+    #' @return self invisibly.
+    write = function(
+      diro = ".",
+      format = "tsv",
+      input_id = NULL,
+      output_id = ulid::ulid(),
+      dbconn = NULL
+    ) {
+      res <- self$tools |>
+        purrr::map(\(x) {
+          x$write(
+            diro = diro,
+            format = format,
+            input_id = input_id,
+            output_id = output_id,
+            dbconn = dbconn
+          )
+        }) |>
         dplyr::bind_rows()
+      private$is_written <- TRUE
+      self$written <- res
+      # Write metadata
+      # if (format != "db" && !is.null(res) && nrow(res) > 0) {
+      #   meta <- Metadata$new(
+      #     workflow = self,
+      #     write_result = res,
+      #     output_dir = diro,
+      #     format = format,
+      #     input_id = input_id,
+      #     output_id = output_id
+      #   )
+      #   meta$write()
+      # }
+      return(invisible(self))
     },
     #' @description Parse, filter, tidy and write files.
-    #' @param odir (`character(1)`)\cr
+    #' @param diro (`character(1)`)\cr
     #' Directory path to output tidy files.
     #' @param format (`character(1)`)\cr
-    #' Format of output files.
+    #' Format of output.
     #' @param id (`character(1)`)\cr
     #' ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
     #' @param dbconn (`DBIConnection`)\cr
@@ -122,7 +165,7 @@ Workflow <- R6::R6Class(
     #' Files to exclude.
     #' @return A tibble with the tidy data and their output location prefix.
     nemofy = function(
-      odir = ".",
+      diro = ".",
       format = "tsv",
       id = NULL,
       dbconn = NULL,
@@ -133,7 +176,7 @@ Workflow <- R6::R6Class(
       self$
         filter_files(include = include, exclude = exclude)$
         tidy()$
-        write(odir = odir, format = format, id = id, dbconn = dbconn)
+        write(diro = diro, format = format, id = id, dbconn = dbconn)
     },
     #' @description Get raw schemas for all Tools.
     #' @return Tibble with names of tool and file, schema and its version.
@@ -172,6 +215,10 @@ Workflow <- R6::R6Class(
       tool_nms <- purrr::map_chr(x, "classname") |> tolower()
       stopifnot(!is.null(tool_nms))
       stopifnot(all(purrr::map(x, "inherit") == as.symbol("Tool")))
-    }
+    },
+    # Do files need to be tidied? Used when no files are detected, so we can
+    # use downstream as a bypass.
+    is_tidied = NULL,
+    is_written = NULL
   ) # private end
 )