diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..de20c61
Binary files /dev/null and b/.DS_Store differ
diff --git a/DESCRIPTION b/DESCRIPTION
index 69063f8..ef7ed31 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -13,6 +13,8 @@ Suggests:
testthat (>= 3.0.0),
crayon,
dplyr,
+ ggplot2,
+ stringr,
tictoc,
knitr,
rmarkdown
diff --git a/NAMESPACE b/NAMESPACE
index 39c3598..5884dc8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,3 +3,4 @@
export(get_log)
export(rm_log)
export(time_pipe)
+importFrom(stats,setNames)
diff --git a/R/emit.R b/R/emit_time.R
similarity index 56%
rename from R/emit.R
rename to R/emit_time.R
index d61566e..b54e5c8 100644
--- a/R/emit.R
+++ b/R/emit_time.R
@@ -2,8 +2,8 @@
#'
#' Prints and/or logs the execution time of an operation.
#'
-#' @param start POSIXct. Operation start time.
-#' @param end POSIXct. Operation end time.
+#' @param start_time POSIXct. Pipeline start time.
+#' @param duration Numeric. Duration since pipeline start.
#' @param label Character. Operation label.
#' @param unit Character. Time unit ("secs", "mins", "hours", "days", or "weeks").
#' @param console Logical. Print timing to the console?
@@ -12,40 +12,36 @@
#' @return Invisibly, the numeric duration of the operation.
#' @keywords internal
#' @noRd
-emit <- function(start, end, label, unit, console, log) {
- duration <- as.numeric(difftime(end, start, units = unit))
- duration <- sprintf("%.4f", duration)
- timestamp <- format(start, "%Y-%m-%d %H:%M:%OS3")
+emit_time <- function(start_time, duration, label, unit, console, log) {
+ duration_fmt <- sprintf("%.4f", duration)
+ timestamp_fmt <- format(start_time, "%Y-%m-%d %H:%M:%OS3")
- build_msg <- function(timestamp, label, duration, unit) {
- paste0("[", timestamp, "] ", label, ": ", duration, " ", unit)
+ build_msg <- function(ts, label, dur, unit) {
+ paste0("[", ts, "] ", label, ": ", dur, " ", unit)
}
- # Console message
+ # Console
if (isTRUE(console)) {
if (requireNamespace("crayon", quietly = TRUE)) {
- console_msg <- build_msg(
- timestamp,
+ msg <- build_msg(
+ timestamp_fmt,
crayon::blue(label),
- crayon::green(duration),
+ crayon::green(paste0("+", duration_fmt)),
crayon::green(unit)
)
} else {
- console_msg <- build_msg(timestamp, label, duration, unit)
+ msg <- build_msg(timestamp_fmt, label, paste0("+", duration_fmt), unit)
}
- message(console_msg)
+ message(msg)
}
- # Log to .pipetime_env
+ # Log
if (!is.null(log)) {
- if (!is.character(log)) {
- stop("'log' must be a character string.")
- }
-
+ stopifnot(is.character(log), length(log) == 1)
new_row <- data.frame(
- timestamp = timestamp,
+ timestamp = start_time,
label = label,
- duration = as.numeric(duration),
+ duration = duration,
unit = unit,
stringsAsFactors = FALSE
)
diff --git a/R/get_log.R b/R/get_log.R
index b9e23e6..405bc0c 100644
--- a/R/get_log.R
+++ b/R/get_log.R
@@ -1,13 +1,40 @@
-#' Retrieve a stored timing log
+#' Retrieve a timing log (or all logs)
#'
-#' @param log Character. Name of the data frame to load from `.pipetime_env`.
+#' Return a stored timing log from `.pipetime_env`.
+#' If `log = NULL`, return all logs as a named list.
+#'
+#' @param log Character string or `NULL`. Name of the log to retrieve. If `NULL`, all logs are returned.
+#'
+#' @return Either:
+#' - A data frame with columns:
+#' - `timestamp` (`POSIXct`): Pipeline start time
+#' - `label` (`character`): Operation label
+#' - `duration` (`numeric`): Elapsed time since pipeline start
+#' - `unit` (`character`): Time unit used
+#' - Or, if `log = NULL`, a named list of such data frames.
+#'
+#' @seealso [rm_log()]
+#'
+#' @importFrom stats setNames
#'
-#' @return A data frame of timing logs.
#' @export
-get_log <- function(log) {
- if (exists(log, envir = .pipetime_env, inherits = FALSE)) {
- get(log, envir = .pipetime_env)
+get_log <- function(log = NULL) {
+ logs <- setdiff(ls(envir = .pipetime_env), "start_times")
+ if (!length(logs)) {
+ return(list())
+ }
+
+ if (is.null(log)) {
+ # Return all logs
+ stats::setNames(
+ lapply(logs, function(x) get(x, envir = .pipetime_env)),
+ logs
+ )
} else {
- stop("No data frame named '", log, "' found in .pipetime_env.")
+ stopifnot(is.character(log), length(log) == 1)
+ if (!exists(log, envir = .pipetime_env, inherits = FALSE)) {
+ stop("No log named '", log, "' found in .pipetime_env.")
+ }
+ get(log, envir = .pipetime_env)
}
}
diff --git a/R/rm_log.R b/R/rm_log.R
index 3e093c3..ca56c39 100644
--- a/R/rm_log.R
+++ b/R/rm_log.R
@@ -1,17 +1,36 @@
-#' Remove a stored timing log
+#' Remove a timing log (or all logs)
#'
-#' @param log Character. Name of the timing log to delete from `.pipetime_env`.
+#' Delete a timing log from `.pipetime_env`.
+#' If `log = NULL`, all logs are removed, but only when `force = TRUE`.
#'
+#' @param log Character string or `NULL`. Name of the log to remove. If `NULL`, all logs are targeted.
+#' @param force Logical. To remove all logs, `force` must be `TRUE`. Default: `FALSE`.
+#'
+#' @return Invisibly, `TRUE`.
+#' @seealso [get_log()]
#' @export
-rm_log <- function(log) {
- if (!is.character(log) || length(log) != 1) {
- stop("`log` must be a single character string.")
+rm_log <- function(log = NULL, force = FALSE) {
+ logs <- setdiff(ls(envir = .pipetime_env), "start_times")
+ if (!length(logs)) {
+ warning("No logs to remove.")
+ return(invisible(FALSE))
}
- if (exists(log, envir = .pipetime_env, inherits = FALSE)) {
- rm(list = log, envir = .pipetime_env)
- invisible(TRUE)
+
+ if (is.null(log)) {
+ if (!force) {
+ stop("To remove all logs, set force = TRUE.")
+ }
+ rm(list = logs, envir = .pipetime_env)
+ .pipetime_env$start_times <- list()
} else {
- warning("No data frame named '", log, "' found in pipetime environment.")
- invisible(FALSE)
+ if (!is.character(log) || length(log) != 1) {
+ stop("`log` must be a single character string.")
+ }
+ if (!exists(log, envir = .pipetime_env, inherits = FALSE)) {
+ stop("No log named '", log, "' found in .pipetime_env.")
+ }
+ rm(list = log, envir = .pipetime_env)
+ .pipetime_env$start_times[[log]] <- NULL
}
+ invisible(TRUE)
}
diff --git a/R/time_pipe.R b/R/time_pipe.R
index 6b898b5..afd0c4f 100644
--- a/R/time_pipe.R
+++ b/R/time_pipe.R
@@ -1,19 +1,26 @@
#' Measure execution time in a pipeline
#'
-#' Records the runtime of pipeline (|>) operation.
-#' Can print the timing to the console and optionally log it to a data frame in `.pipetime_env`.
-#' Defaults can be set via `options()`.
+#' Records the runtime of a pipeline (`|>`) from its start to the point where `time_pipe()` is called.
+#' Prints results to the console and/or logs them in `.pipetime_env`.
+#' Defaults can be set via `options(pipetime.*)`.
#'
#' @param .data Input object passed through the pipeline.
-#' @param label Optional. Name for the operation. Defaults to the expression if not provided.
-#' @param log Character or NULL. Name of a data frame to store logs in `.pipetime_env`. Defaults to NULL (no storage).
-#' @param console Logical. Print timing to the console? Defaults to TRUE.
-#' @param unit Character. Time unit passed to [base::difftime()]. One of `"secs"`, `"mins"`, `"hours"`, `"days"`, or `"weeks"`. Defaults to `"secs"`.
+#' @param label Character string. Operation name. Defaults to the expression if `NULL`.
+#' @param log Character string or `NULL`. Name of a log data frame in `.pipetime_env`. Default: `NULL`.
+#' @param console Logical. Print timing to console? Default: `TRUE`.
+#' @param unit Character string. Time unit for [base::difftime()]. One of `"secs"`, `"mins"`, `"hours"`, `"days"`, `"weeks"`. Default: `"secs"`.
#'
-#' @return The input object, unchanged. Timing information is printed or stored separately.
+#' @return `.data`, unchanged. Timing information is printed and/or stored separately.
#'
#' @details
-#' `time_pipe()` measures the elapsed time of the pipeline from its start to the point where `time_pipe()` is called.
+#' `time_pipe()` measures elapsed time from pipeline start to the call.
+#' If `log` is set, results are appended to a data frame in `.pipetime_env` with columns:
+#' - `timestamp`: Pipeline start time (`POSIXct`)
+#' - `label`: Operation label
+#' - `duration`: Elapsed time since pipeline start (`numeric`)
+#' - `unit`: Time unit used
+#'
+#' Stored logs can be retrieved with [get_log()].
#'
#' @examples
#' library(dplyr)
@@ -24,7 +31,6 @@
#' time_pipe("total pipeline")
#'
#' @export
-#'
time_pipe <- function(
.data,
label = NULL,
@@ -32,21 +38,30 @@ time_pipe <- function(
console = getOption("pipetime.console", TRUE),
unit = getOption("pipetime.unit", "secs")
) {
- unit <- match.arg(
- unit,
- choices = c("secs", "mins", "hours", "days", "weeks")
- )
+ # Track pipeline start
+ if (!is.null(log)) {
+ if (is.null(.pipetime_env$start_times[[log]])) {
+ .pipetime_env$start_times[[log]] <- Sys.time()
+ on.exit(.pipetime_env$start_times[[log]] <- NULL, add = TRUE)
+ }
+ start_time <- .pipetime_env$start_times[[log]]
+ } else {
+ start_time <- Sys.time()
+ }
- start <- Sys.time()
+ # Force evaluation and calculate duration
result <- .data
- end <- Sys.time()
+ end_time <- Sys.time()
+ duration <- as.numeric(difftime(end_time, start_time, units = unit))
+ # Generate label if not provided
if (is.null(label)) {
- expr <- substitute(.data)
- label <- gsub("\\s+", "", paste(deparse(expr), collapse = ""))
+ label <- paste(deparse(substitute(.data)), collapse = "")
+ label <- gsub("\\s+", " ", trimws(label))
}
- emit(start, end, label, unit, console, log)
+ # Output results
+ emit_time(start_time, duration, label, unit, console, log)
result
}
diff --git a/R/zzz.R b/R/zzz.R
index 4056707..a978632 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -1 +1,3 @@
+# Environment for pipetime
.pipetime_env <- new.env(parent = emptyenv())
+.pipetime_env$start_times <- list()
diff --git a/README.Rmd b/README.Rmd
index 176c7aa..7228b3e 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -9,15 +9,22 @@ knitr::opts_chunk$set(
)
```
-# pipetime
+# pipetime
+
-[](https://github.com/CyGei/pipetime/actions/workflows/R-CMD-check.yaml)
-[](https://www.codefactor.io/repository/github/cygei/pipetime)
+
+[](https://github.com/CyGei/pipetime/actions/workflows/R-CMD-check.yaml) [](https://www.codefactor.io/repository/github/cygei/pipetime)
+
-`pipetime` measures the runtime of a pipeline from its start up to the `time_pipe()` call. It works with the native R pipe (`|>`) and integrates seamlessly into '*tidy workflows*'.
+⏳ `pipetime` measures elapsed time in R pipelines.
+
+Insert `time_pipe()` anywhere in a pipeline to print or log the time since the pipeline started. It works with the native R pipe (`|>`) and fits naturally into [tidyverse](https://www.tidyverse.org/) workflows.
# Installation
+
+Install from GitHub and load alongside `dplyr` for examples:
+
```{r setup, message=FALSE, warning=FALSE}
# devtools::install_github("CyGei/pipetime")
library(pipetime)
@@ -25,46 +32,67 @@ library(dplyr)
```
# Example
-Place `time_pipe()` anywhere in a pipeline to measure elapsed time from the start up to that point.
+
+Place `time_pipe()` at the end of a pipeline to measure total elapsed time:
+
```{r}
+slow_op <- function(delay, x) {
+ Sys.sleep(delay) # Simulate a time-consuming operation
+ rnorm(n = length(x), mean = x, sd = 1)
+}
+
data.frame(x = 1:3) |>
- mutate(sleep = Sys.sleep(0.1)) |> # e.g. a complex operation
+ mutate(sleep = slow_op(0.1, x)) |>
summarise(mean_x = mean(x)) |>
- time_pipe("total pipeline") # ~0.1 sec
+ time_pipe("total pipeline") # ~+0.1 sec
```
-Insert multiple `time_pipe()` calls to add *timestamps* along the pipeline:
-```{r}
-complex_fn <- function(duration,x) {
- Sys.sleep(duration) # Simulate a time-consuming operation
- rnorm(n = length(x), mean = x, sd = 1)
-}
+Use multiple `time_pipe()` calls to mark steps along a pipeline:
+```{r}
data.frame(x = 1:5) |>
- mutate(y = complex_fn(0.5, x)) |>
+ mutate(y = slow_op(0.5, x)) |>
time_pipe("compute y") |>
- mutate(z = complex_fn(0.5, y)) |>
+ mutate(z = slow_op(0.5, y)) |>
time_pipe("compute z") |>
summarise(mean_z = mean(z)) |>
time_pipe("total pipeline")
```
-Each `time_pipe()` reports the cumulative time since the start of the pipeline.
+⏱️ **Each `time_pipe()` reports the cumulative time since the pipeline started.**
-# Logging to a dataframe
-Save timings to a dataframe in the package’s private environment (`.pipetime_env`) with the `log` argument:
-```{r}
-df_1 <- data.frame(x = 1:5) |>
- mutate(y = complex_fn(0.5, x)) |>
- time_pipe("compute y", log = "timings")
+# Logging
+
+📝 Use `log` to save timings to a hidden environment (`.pipetime_env`):
-df_2 <- df_1 |>
- mutate(z = complex_fn(0.5, y)) |>
+```{r}
+df <- data.frame(x = 1:5) |>
+ mutate(y = slow_op(0.5, x)) |>
+ time_pipe("compute y", log = "timings") |>
+ mutate(z = slow_op(0.5, y)) |>
time_pipe("compute z", log = "timings")
get_log("timings")
-rm_log("timings") # delete "timings" from .pipetime_env
+rm_log("timings") #delete the dataframe in .pipetime_env
```
-Set a global default for the session using: `options(pipetime.log = "timings")`.
\ No newline at end of file
+## Managing logs
+
+- `get_log("name")` → return one log
+
+- `get_log(NULL)` → return all logs as a named list
+
+- `rm_log("name")` → remove one log
+
+- `rm_log(NULL, force = TRUE)` → remove all logs
+
+# Options
+
+You can also set **session‑wide** defaults:
+
+```{r}
+options(pipetime.log = "timings",
+ pipetime.console = TRUE,
+ pipetime.unit = "secs")
+```
\ No newline at end of file
diff --git a/README.md b/README.md
index cdf5049..2f70608 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,23 @@
-# pipetime
+# pipetime
[](https://github.com/CyGei/pipetime/actions/workflows/R-CMD-check.yaml)
[](https://www.codefactor.io/repository/github/cygei/pipetime)
+
-`pipetime` measures the runtime of a pipeline from its start up to the
-`time_pipe()` call. It works with the native R pipe (`|>`) and
-integrates seamlessly into ‘*tidy workflows*’.
+⏳ `pipetime` measures elapsed time in R pipelines.
+
+Insert `time_pipe()` anywhere in a pipeline to print or log the time
+since the pipeline started. It works with the native R pipe (`|>`) and
+fits naturally into [tidyverse](https://www.tidyverse.org/) workflows.
# Installation
+Install from GitHub and load alongside `dplyr` for examples:
+
``` r
# devtools::install_github("CyGei/pipetime")
library(pipetime)
@@ -21,67 +26,80 @@ library(dplyr)
# Example
-Place `time_pipe()` anywhere in a pipeline to measure elapsed time from
-the start up to that point.
+Place `time_pipe()` at the end of a pipeline to measure total elapsed
+time:
``` r
+slow_op <- function(delay, x) {
+ Sys.sleep(delay) # Simulate a time-consuming operation
+ rnorm(n = length(x), mean = x, sd = 1)
+}
+
data.frame(x = 1:3) |>
- mutate(sleep = Sys.sleep(0.1)) |> # e.g. a complex operation
+ mutate(sleep = slow_op(0.1, x)) |>
summarise(mean_x = mean(x)) |>
- time_pipe("total pipeline") # ~0.1 sec
-#> [2025-09-22 14:30:18.343] total pipeline: 0.1094 secs
+ time_pipe("total pipeline") # ~+0.1 sec
+#> [2025-09-28 22:40:14.332] total pipeline: +0.1061 secs
#> mean_x
#> 1 2
```
-Insert multiple `time_pipe()` calls to add *timestamps* along the
-pipeline:
+Use multiple `time_pipe()` calls to mark steps along a pipeline:
``` r
-complex_fn <- function(duration,x) {
- Sys.sleep(duration) # Simulate a time-consuming operation
- rnorm(n = length(x), mean = x, sd = 1)
-}
-
data.frame(x = 1:5) |>
- mutate(y = complex_fn(0.5, x)) |>
+ mutate(y = slow_op(0.5, x)) |>
time_pipe("compute y") |>
- mutate(z = complex_fn(0.5, y)) |>
+ mutate(z = slow_op(0.5, y)) |>
time_pipe("compute z") |>
summarise(mean_z = mean(z)) |>
time_pipe("total pipeline")
-#> [2025-09-22 14:30:18.462] compute y: 0.5066 secs
-#> [2025-09-22 14:30:18.462] compute z: 1.0120 secs
-#> [2025-09-22 14:30:18.462] total pipeline: 1.0171 secs
+#> [2025-09-28 22:40:14.444] compute y: +0.5055 secs
+#> [2025-09-28 22:40:14.444] compute z: +1.0114 secs
+#> [2025-09-28 22:40:14.444] total pipeline: +1.0122 secs
#> mean_z
-#> 1 3.117084
+#> 1 2.710958
```
-Each `time_pipe()` reports the cumulative time since the start of the
-pipeline.
+⏱️ **Each `time_pipe()` reports the cumulative time since the pipeline
+started.**
-# Logging to a dataframe
+# Logging
-Save timings to a dataframe in the package’s private environment
-(`.pipetime_env`) with the `log` argument:
+📝 Use `log` to save timings to a hidden environment (`.pipetime_env`):
``` r
-df_1 <- data.frame(x = 1:5) |>
- mutate(y = complex_fn(0.5, x)) |>
- time_pipe("compute y", log = "timings")
-#> [2025-09-22 14:30:19.494] compute y: 0.5073 secs
-
-df_2 <- df_1 |>
- mutate(z = complex_fn(0.5, y)) |>
+df <- data.frame(x = 1:5) |>
+ mutate(y = slow_op(0.5, x)) |>
+ time_pipe("compute y", log = "timings") |>
+ mutate(z = slow_op(0.5, y)) |>
time_pipe("compute z", log = "timings")
-#> [2025-09-22 14:30:20.005] compute z: 0.5077 secs
+#> [2025-09-28 22:40:15.460] compute y: +0.5055 secs
+#> [2025-09-28 22:40:15.460] compute z: +1.0116 secs
get_log("timings")
-#> timestamp label duration unit
-#> 1 2025-09-22 14:30:19.494 compute y 0.5073 secs
-#> 2 2025-09-22 14:30:20.005 compute z 0.5077 secs
-rm_log("timings") # delete "timings" from .pipetime_env
+#> timestamp label duration unit
+#> 1 2025-09-28 22:40:15 compute y 0.5054879 secs
+#> 2 2025-09-28 22:40:15 compute z 1.0115728 secs
+rm_log("timings") #delete the dataframe in .pipetime_env
```
-Set a global default for the session using:
-`options(pipetime.log = "timings")`.
+## Managing logs
+
+- `get_log("name")` → return one log
+
+- `get_log(NULL)` → return all logs as a named list
+
+- `rm_log("name")` → remove one log
+
+- `rm_log(NULL, force = TRUE)` → remove all logs
+
+# Options
+
+You can also set **session‑wide** defaults:
+
+``` r
+options(pipetime.log = "timings",
+ pipetime.console = TRUE,
+ pipetime.unit = "secs")
+```
diff --git a/man/figures/logo.png b/man/figures/logo.png
index c2ddab7..a00a45d 100644
Binary files a/man/figures/logo.png and b/man/figures/logo.png differ
diff --git a/man/get_log.Rd b/man/get_log.Rd
index 4ee0649..1d21e8e 100644
--- a/man/get_log.Rd
+++ b/man/get_log.Rd
@@ -2,16 +2,30 @@
% Please edit documentation in R/get_log.R
\name{get_log}
\alias{get_log}
-\title{Retrieve a stored timing log}
+\title{Retrieve a timing log (or all logs)}
\usage{
-get_log(log)
+get_log(log = NULL)
}
\arguments{
-\item{log}{Character. Name of the data frame to load from \code{.pipetime_env}.}
+\item{log}{Character string or \code{NULL}. Name of the log to retrieve. If \code{NULL}, all logs are returned.}
}
\value{
-A data frame of timing logs.
+Either:
+\itemize{
+\item A data frame with columns:
+\itemize{
+\item \code{timestamp} (\code{POSIXct}): Pipeline start time
+\item \code{label} (\code{character}): Operation label
+\item \code{duration} (\code{numeric}): Elapsed time since pipeline start
+\item \code{unit} (\code{character}): Time unit used
+}
+\item Or, if \code{log = NULL}, a named list of such data frames.
+}
}
\description{
-Retrieve a stored timing log
+Return a stored timing log from \code{.pipetime_env}.
+If \code{log = NULL}, return all logs as a named list.
+}
+\seealso{
+\code{\link[=rm_log]{rm_log()}}
}
diff --git a/man/rm_log.Rd b/man/rm_log.Rd
index ded525d..295533b 100644
--- a/man/rm_log.Rd
+++ b/man/rm_log.Rd
@@ -2,13 +2,22 @@
% Please edit documentation in R/rm_log.R
\name{rm_log}
\alias{rm_log}
-\title{Remove a stored timing log}
+\title{Remove a timing log (or all logs)}
\usage{
-rm_log(log)
+rm_log(log = NULL, force = FALSE)
}
\arguments{
-\item{log}{Character. Name of the timing log to delete from \code{.pipetime_env}.}
+\item{log}{Character string or \code{NULL}. Name of the log to remove. If \code{NULL}, all logs are targeted.}
+
+\item{force}{Logical. To remove all logs, \code{force} must be \code{TRUE}. Default: \code{FALSE}.}
+}
+\value{
+Invisibly, \code{TRUE}.
}
\description{
-Remove a stored timing log
+Delete a timing log from \code{.pipetime_env}.
+If \code{log = NULL}, all logs are removed, but only when \code{force = TRUE}.
+}
+\seealso{
+\code{\link[=get_log]{get_log()}}
}
diff --git a/man/time_pipe.Rd b/man/time_pipe.Rd
index f72245d..27f4f29 100644
--- a/man/time_pipe.Rd
+++ b/man/time_pipe.Rd
@@ -15,24 +15,33 @@ time_pipe(
\arguments{
\item{.data}{Input object passed through the pipeline.}
-\item{label}{Optional. Name for the operation. Defaults to the expression if not provided.}
+\item{label}{Character string. Operation name. Defaults to the expression if \code{NULL}.}
-\item{log}{Character or NULL. Name of a data frame to store logs in \code{.pipetime_env}. Defaults to NULL (no storage).}
+\item{log}{Character string or \code{NULL}. Name of a log data frame in \code{.pipetime_env}. Default: \code{NULL}.}
-\item{console}{Logical. Print timing to the console? Defaults to TRUE.}
+\item{console}{Logical. Print timing to console? Default: \code{TRUE}.}
-\item{unit}{Character. Time unit passed to \code{\link[base:difftime]{base::difftime()}}. One of \code{"secs"}, \code{"mins"}, \code{"hours"}, \code{"days"}, or \code{"weeks"}. Defaults to \code{"secs"}.}
+\item{unit}{Character string. Time unit for \code{\link[base:difftime]{base::difftime()}}. One of \code{"secs"}, \code{"mins"}, \code{"hours"}, \code{"days"}, \code{"weeks"}. Default: \code{"secs"}.}
}
\value{
-The input object, unchanged. Timing information is printed or stored separately.
+\code{.data}, unchanged. Timing information is printed and/or stored separately.
}
\description{
-Records the runtime of pipeline (|>) operation.
-Can print the timing to the console and optionally log it to a data frame in \code{.pipetime_env}.
-Defaults can be set via \code{options()}.
+Records the runtime of a pipeline (\verb{|>}) from its start to the point where \code{time_pipe()} is called.
+Prints results to the console and/or logs them in \code{.pipetime_env}.
+Defaults can be set via \verb{options(pipetime.*)}.
}
\details{
-\code{time_pipe()} measures the elapsed time of the pipeline from its start to the point where \code{time_pipe()} is called.
+\code{time_pipe()} measures elapsed time from pipeline start to the call.
+If \code{log} is set, results are appended to a data frame in \code{.pipetime_env} with columns:
+\itemize{
+\item \code{timestamp}: Pipeline start time (\code{POSIXct})
+\item \code{label}: Operation label
+\item \code{duration}: Elapsed time since pipeline start (\code{numeric})
+\item \code{unit}: Time unit used
+}
+
+Stored logs can be retrieved with \code{\link[=get_log]{get_log()}}.
}
\examples{
library(dplyr)
diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png
index 8734449..a761e85 100644
Binary files a/pkgdown/favicon/apple-touch-icon.png and b/pkgdown/favicon/apple-touch-icon.png differ
diff --git a/pkgdown/favicon/favicon-96x96.png b/pkgdown/favicon/favicon-96x96.png
index 0319ed9..5f21cd9 100644
Binary files a/pkgdown/favicon/favicon-96x96.png and b/pkgdown/favicon/favicon-96x96.png differ
diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico
index f2c463f..dc51c26 100644
Binary files a/pkgdown/favicon/favicon.ico and b/pkgdown/favicon/favicon.ico differ
diff --git a/pkgdown/favicon/favicon.svg b/pkgdown/favicon/favicon.svg
index 0d9aa02..586d735 100644
--- a/pkgdown/favicon/favicon.svg
+++ b/pkgdown/favicon/favicon.svg
@@ -1,3 +1,3 @@
-
\ No newline at end of file
diff --git a/pkgdown/favicon/web-app-manifest-192x192.png b/pkgdown/favicon/web-app-manifest-192x192.png
index fb0e53b..a7841a4 100644
Binary files a/pkgdown/favicon/web-app-manifest-192x192.png and b/pkgdown/favicon/web-app-manifest-192x192.png differ
diff --git a/pkgdown/favicon/web-app-manifest-512x512.png b/pkgdown/favicon/web-app-manifest-512x512.png
index c79fccc..e4af4d5 100644
Binary files a/pkgdown/favicon/web-app-manifest-512x512.png and b/pkgdown/favicon/web-app-manifest-512x512.png differ
diff --git a/tests/testthat/test-time_pipe.R b/tests/testthat/test-time_pipe.R
index 4324353..a5807e7 100644
--- a/tests/testthat/test-time_pipe.R
+++ b/tests/testthat/test-time_pipe.R
@@ -4,80 +4,64 @@ test_that("time_pipe returns input unchanged", {
expect_identical(out, df)
})
-test_that("time_pipe generates default label", {
- mydataframe <- data.frame(x = 1:3)
- expect_message(mydataframe |> time_pipe(), regexp = "mydataframe")
-})
-
-test_that("time_pipe generates custom label", {
+test_that("labels are generated correctly", {
df <- data.frame(x = 1:3)
- expect_message(df |> time_pipe("custom label"), regexp = "custom label")
+ expect_message(df |> time_pipe(), regexp = "df") # default label
+ expect_message(df |> time_pipe("custom"), regexp = "custom") # custom label
})
-test_that("invalid unit raises error", {
- df <- data.frame(x = 1:3)
- expect_error(
- df |> time_pipe("test", unit = "microsec"),
- regexp = "should be one of"
- )
-})
-
-test_that("different time units are accepted", {
+test_that("invalid and valid units behave as expected", {
df <- data.frame(x = 1:3)
+ expect_error(df |> time_pipe("bad", unit = "microsec"))
for (u in c("secs", "mins", "hours", "days", "weeks")) {
- expect_message(df |> time_pipe("time unit test", unit = u), regexp = u)
+ expect_message(df |> time_pipe("ok", unit = u), regexp = u)
}
})
-test_that("time_pipe throws error if log is not character", {
- df_input <- data.frame(x = 1:3)
- expect_error(
- df_input |> time_pipe("test", log = data.frame()),
- regexp = "'log' must be a character string"
- )
+test_that("log must be character", {
+ df <- data.frame(x = 1:3)
+ expect_error(df |> time_pipe("bad", log = data.frame()))
})
-test_that("time_pipe stores timings in a data frame", {
+test_that("timings are stored in a log", {
data.frame(x = 1:3) |>
dplyr::mutate(y = x * 2) |>
- time_pipe("step 1", console = FALSE, log = "times") |>
+ time_pipe("step1", console = FALSE, log = "log1") |>
dplyr::mutate(z = y / 2) |>
- time_pipe("step 2", console = FALSE, log = "times")
-
- expect_true(exists("times", envir = .pipetime_env))
+ time_pipe("step2", console = FALSE, log = "log1")
- # Load the stored data frame
- stored <- get_log("times")
+ stored <- get_log("log1")
expect_s3_class(stored, "data.frame")
expect_equal(nrow(stored), 2)
- expect_equal(stored$label, c("step 1", "step 2"))
+ expect_equal(stored$label, c("step1", "step2"))
- # Remove the stored data frame
- rm_log("times")
- expect_false(exists("times", envir = .pipetime_env))
+ rm_log("log1")
+ expect_false(exists("log1", envir = .pipetime_env))
})
-
-test_that("time_pipe time is as expected", {
+test_that("durations reflect elapsed time", {
data.frame(x = 1:3) |>
- time_pipe(
- "pre-sleep",
- unit = "secs",
- log = "times",
- console = FALSE
- ) |>
+ time_pipe("pre", unit = "secs", log = "log2", console = FALSE) |>
dplyr::mutate(result = Sys.sleep(0.5)) |>
- time_pipe(
- "post-sleep",
- unit = "secs",
- log = "times",
- console = FALSE
- )
- times <- get_log("times")
- pre_time <- times$duration[1]
- post_time <- times$duration[2]
+ time_pipe("post", unit = "secs", log = "log2", console = FALSE)
+
+ times <- get_log("log2")
+ expect_true(times$duration[2] >= 0.5)
+ expect_true(times$duration[1] < 0.5)
+
+ rm_log("log2")
+})
+
+test_that("multiple logs can be used independently", {
+ df <- data.frame(x = 1:3)
+ df |> time_pipe("a1", log = "loga", console = FALSE)
+ df |> time_pipe("b1", log = "logb", console = FALSE)
+
+ logs <- get_log(NULL) # all logs
+ expect_named(logs, c("loga", "logb"))
+ expect_equal(nrow(logs$loga), 1)
+ expect_equal(nrow(logs$logb), 1)
- expect_true(post_time >= 0.5)
- expect_true(pre_time < 0.5)
- rm_log("times")
+ rm_log(NULL, force = TRUE) # clear all
+ expect_length(get_log(NULL), 0)
})
diff --git a/vignettes/timing_R_pipelines.Rmd b/vignettes/timing_R_pipelines.Rmd
new file mode 100644
index 0000000..8026599
--- /dev/null
+++ b/vignettes/timing_R_pipelines.Rmd
@@ -0,0 +1,125 @@
+---
+title: "Timing R Pipelines"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{timing_R_pipelines}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ comment = "#>"
+)
+library(pipetime)
+library(dplyr)
+library(stringr)
+library(ggplot2)
+```
+
+# Overview
+`pipetime` enables **inline** timing of R pipelines (`|>`), helping identify performance bottlenecks and compare different approaches without disrupting your workflow.
+
+We illustrate this with a text processing example, comparing base R string functions against the optimized `stringr` package for common data cleaning tasks.
+
+- **Workflow A** 🐢 : Uses base R string functions (`gsub`, `substr`, `grepl`).
+
+- **Workflow B** 🚀: Uses `stringr`'s optimised functions.
+
+## Example Data
+```{r}
+set.seed(123)
+make_str <- function(n) paste(sample(letters, n, TRUE), collapse = "")
+
+text_data <- data.frame(
+ id = 1:1e5,
+ email = paste0(
+ sapply(sample(5:15, 1e5, TRUE), make_str),
+ sample(c("@gmail.com", "@yahoo.com", "@hotmail.com"), 1e5, TRUE)
+ ),
+ phone = paste0(
+ "(",
+ sample(100:999, 1e5, TRUE),
+ ") ",
+ sample(100:999, 1e5, TRUE),
+ "-",
+ sample(1000:9999, 1e5, TRUE)
+ ),
+ text = sapply(sample(20:100, 1e5, TRUE), make_str)
+)
+head(text_data, n = 3)
+```
+
+## Timing Workflows
+We use the `log` argument so each workflow stores its timings separately.
+
+```{r}
+library(dplyr)
+library(pipetime)
+options(pipetime.console = FALSE)
+# Workflow A: Base R
+wf_A <- text_data |>
+ mutate(
+ domain = sub(".*@", "", email),
+ clean_phone = gsub("[^0-9]", "", phone),
+ word_count = lengths(strsplit(text, " "))
+ ) |>
+ time_pipe("extract & clean", log = "base") |>
+ filter(grepl("^[a-m]", text)) |>
+ time_pipe("filter", log = "base") |>
+ mutate(
+ text_upper = toupper(text),
+ truncated = substr(text, 1, 50)
+ ) |>
+ time_pipe("transform", log = "base")
+
+# Workflow B: stringr (optimized)
+wf_B <- text_data |>
+ mutate(
+ domain = str_extract(email, "(?<=@).*"),
+ clean_phone = str_remove_all(phone, "[^0-9]"),
+ word_count = str_count(text, "\\S+")
+ ) |>
+ time_pipe("extract & clean", log = "stringr") |>
+ filter(str_detect(text, "^[a-m]")) |>
+ time_pipe("filter", log = "stringr") |>
+ mutate(
+ text_upper = str_to_upper(text),
+ truncated = str_sub(text, 1, 50)
+ ) |>
+ time_pipe("transform", log = "stringr")
+
+```
+
+# Results
+```{r, dpi = 500}
+# Collect both logs
+logs <- get_log() |>
+ bind_rows(.id = "workflow") |>
+ group_by(workflow) |>
+ # Add a starting point
+ group_modify(~ add_row(.x, duration = 0, label = "start", .before = 1)) |>
+ mutate(step = factor(row_number()))
+
+library(ggplot2)
+logs |>
+ ggplot(
+ aes(
+ x = step,
+ y = duration,
+ colour = workflow,
+ group = workflow
+ )
+ ) +
+ geom_line(linewidth = 1) +
+ geom_point(size = 3) +
+ geom_text(aes(label = label), vjust = -0.7, size = 3.5, show.legend = FALSE) +
+ labs(
+ x = "Step",
+ y = "Cumulative time (sec)",
+ title = "Base R vs stringr",
+ colour = "Workflow"
+ ) +
+ theme_classic()
+```
\ No newline at end of file
diff --git a/vignettes/timing_operations_in_pipelines.Rmd b/vignettes/why_pipetime.Rmd
similarity index 65%
rename from vignettes/timing_operations_in_pipelines.Rmd
rename to vignettes/why_pipetime.Rmd
index 510d2f1..2da8c20 100644
--- a/vignettes/timing_operations_in_pipelines.Rmd
+++ b/vignettes/why_pipetime.Rmd
@@ -1,8 +1,8 @@
---
-title: "Timing Operations in R Pipelines"
+title: "Why pipetime?"
output: rmarkdown::html_vignette
vignette: >
- %\VignetteIndexEntry{timing_operations_in_pipelines}
+ %\VignetteIndexEntry{why_pipetime}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
@@ -21,14 +21,11 @@ library(pipetime)
library(dplyr)
```
-# Motivation
-
-R pipelines (`|>`) allow chaining operations in a readable, sequential way. Existing timing tools (e.g. `system.time()`, `tictoc`) do not integrate naturally with pipelines and tidy workflows.
-
-# Demonstration
+R pipelines (`|>`) allow chaining operations in a readable, sequential way. Existing timing tools (e.g. `system.time()`, `tictoc`) do not integrate naturally with pipelines and tidy workflows. `pipetime` solves this by letting you measure time inline, without interrupting the pipeline.
+# Examples
```{r}
-complex_fn <- function(x) {
+slow_op <- function(x) {
Sys.sleep(0.1) # Simulate a time-consuming operation
x^2
}
@@ -37,19 +34,19 @@ complex_fn <- function(x) {
## `system.time()`
```{r, error=TRUE}
-# Must wrap entire pipeline to record the time, breaking the flow
+# Must wrap the entire pipeline, breaking the flow
the_time <- system.time({
df <- data.frame(x = 1:3) |>
- mutate(y = complex_fn(x)) |>
+ mutate(y = slow_op(x)) |>
summarise(mean_y = mean(y))
})
the_time
df
-# Doesn't return the result of the pipeline, so will give an error
+# system.time() cannot be inserted inline in a pipeline:
data.frame(x = 1:3) |>
- mutate(y = complex_fn(x)) |>
- system_time() |>
+ mutate(y = slow_op(x)) |>
+ # system.time() would break the pipeline here
summarise(mean_y = mean(y))
```
@@ -58,10 +55,10 @@ data.frame(x = 1:3) |>
```{r}
library(tictoc)
-# Manual start/stop, not inline
+# Requires manual start/stop
tic("total pipeline")
df <- data.frame(x = 1:3) |>
- mutate(y = complex_fn(x)) |>
+ mutate(y = slow_op(x)) |>
summarise(mean_y = mean(y))
toc()
df
@@ -70,18 +67,19 @@ df
## `time_pipe`
```{r}
-# Inline timing checkpoints, keeps pipeline flow
+# Inline timing checkpoints, pipeline stays intact
data.frame(x = 1:3) |>
- mutate(y = complex_fn(x)) |>
+ mutate(y = slow_op(x)) |>
time_pipe("after mutate") |>
summarise(mean_y = mean(y)) |>
time_pipe("total pipeline")
+
```
# Why `pipetime`?
- Works directly inside pipelines.
-- Supports multiple calls within the pipeline.
+- Supports multiple checkpoints.
-- Records timing operations in `.pipetime_env`.
+- Prints or logs timings in `.pipetime_env` (see `?get_log`).
\ No newline at end of file