From 903f14992ba55434f18cf3f18bad8588f1ac62a5 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 7 Feb 2024 17:18:04 +0100 Subject: [PATCH] Newly created variables in `...` can be processed in `.at` or `.if` (#480) --- R/data_modify.R | 30 ++++++++++++++---------------- man/data_modify.Rd | 18 ++++++++---------- tests/testthat/test-data_modify.R | 15 ++++++++++++--- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/R/data_modify.R b/R/data_modify.R index 30b21e460..40a186736 100644 --- a/R/data_modify.R +++ b/R/data_modify.R @@ -23,17 +23,17 @@ #' - Using `NULL` as right-hand side removes a variable from the data frame. #' Example: `Petal.Width = NULL`. #' -#' Note that newly created variables can be used in subsequent expressions. -#' See also 'Examples'. +#' Note that newly created variables can be used in subsequent expressions, +#' including `.at` or `.if`. See also 'Examples'. #' #' @param .at A character vector of variable names that should be modified. This #' argument is used in combination with the `.modify` argument. Note that only one #' of `.at` or `.if` can be provided, but not both at the same time. Newly created -#' variables in `...` are not affected by `.at`. +#' variables in `...` can also be selected, see 'Examples'. #' @param .if A function that returns `TRUE` for columns in the data frame where #' `.if` applies. This argument is used in combination with the `.modify` argument. #' Note that only one of `.at` or `.if` can be provided, but not both at the same -#' time. Newly created variables in `...` are not affected by `.if`. +#' time. Newly created variables in `...` can also be selected, see 'Examples'. #' @param .modify A function that modifies the variables defined in `.at` or `.if`. #' This argument is used in combination with either the `.at` or the `.if` argument. #' Note that the modified variable (i.e. the result from `.modify`) must be either @@ -113,15 +113,13 @@ #' # can be combined with dots #' data_modify(d, new_length = Petal.Length * 2, .at = "Species", .modify = as.numeric) #' -#' # note that new variables cannot be used in `.at` or `.if` arguments -#' # this example would throw an error -#' \dontrun{ +#' # new variables used in `.at` or `.if` #' data_modify( #' d, #' new_length = Petal.Length * 2, -#' .at = c("Species", "new_length"), -#' .modify = as.numeric -#' )} +#' .at = c("Petal.Length", "new_length"), +#' .modify = round +#' ) #' #' # combine "data_find()" and ".at" argument #' out <- data_modify( @@ -146,7 +144,6 @@ data_modify.default <- function(data, ...) { #' @export data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify = NULL) { dots <- eval(substitute(alist(...))) - column_names <- colnames(data) # check if we have dots, or only at/modify ---- @@ -248,7 +245,7 @@ data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify = } # check if we have at/modify ---- - data <- .modify_at(data, .at, .if, .modify, column_names) + data <- .modify_at(data, .at, .if, .modify) data } @@ -258,7 +255,6 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify = # we need to evaluate dots here, and pass them with "do.call" to # the data.frame method later... dots <- match.call(expand.dots = FALSE)[["..."]] - column_names <- colnames(data) # works only for dplyr >= 0.8.0 grps <- attr(data, "groups", exact = TRUE) @@ -313,7 +309,7 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify = } # check if we have at/modify ---- - data <- .modify_at(data, .at, .if, .modify, column_names) + data <- .modify_at(data, .at, .if, .modify) # set back attributes and class data <- .replace_attrs(data, attr_data) @@ -324,7 +320,7 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify = # helper ------------- -.modify_at <- function(data, .at, .if, .modify, column_names) { +.modify_at <- function(data, .at, .if, .modify) { # check if ".at" or ".if" is defined, but not ".modify" if (is.null(.modify)) { if (!is.null(.at) || !is.null(.if)) { @@ -345,9 +341,11 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify = insight::format_error("You need to specify either `.at` or `.if`.") } + column_names <- colnames(data) + # if we have ".if" defined, specify ".at" if (!is.null(.if)) { - .at <- column_names[vapply(data[column_names], .if, logical(1))] + .at <- column_names[vapply(data, .if, logical(1))] } # check for valid defined column names if (!all(.at %in% column_names)) { diff --git a/man/data_modify.Rd b/man/data_modify.Rd index 4cba88dd3..8cac82205 100644 --- a/man/data_modify.Rd +++ b/man/data_modify.Rd @@ -32,18 +32,18 @@ character vector is provided, you may not add further elements to \code{...}. Example: \code{Petal.Width = NULL}. } -Note that newly created variables can be used in subsequent expressions. -See also 'Examples'.} +Note that newly created variables can be used in subsequent expressions, +including \code{.at} or \code{.if}. See also 'Examples'.} \item{.if}{A function that returns \code{TRUE} for columns in the data frame where \code{.if} applies. This argument is used in combination with the \code{.modify} argument. Note that only one of \code{.at} or \code{.if} can be provided, but not both at the same -time. Newly created variables in \code{...} are not affected by \code{.if}.} +time. Newly created variables in \code{...} can also be selected, see 'Examples'.} \item{.at}{A character vector of variable names that should be modified. This argument is used in combination with the \code{.modify} argument. Note that only one of \code{.at} or \code{.if} can be provided, but not both at the same time. Newly created -variables in \code{...} are not affected by \code{.at}.} +variables in \code{...} can also be selected, see 'Examples'.} \item{.modify}{A function that modifies the variables defined in \code{.at} or \code{.if}. This argument is used in combination with either the \code{.at} or the \code{.if} argument. @@ -130,15 +130,13 @@ data_modify(d, .if = is.factor, .modify = as.numeric) # can be combined with dots data_modify(d, new_length = Petal.Length * 2, .at = "Species", .modify = as.numeric) -# note that new variables cannot be used in `.at` or `.if` arguments -# this example would throw an error -\dontrun{ +# new variables used in `.at` or `.if` data_modify( d, new_length = Petal.Length * 2, - .at = c("Species", "new_length"), - .modify = as.numeric -)} + .at = c("Petal.Length", "new_length"), + .modify = round +) # combine "data_find()" and ".at" argument out <- data_modify( diff --git a/tests/testthat/test-data_modify.R b/tests/testthat/test-data_modify.R index 0a641c11b..9bb0a92d6 100644 --- a/tests/testthat/test-data_modify.R +++ b/tests/testthat/test-data_modify.R @@ -495,6 +495,7 @@ test_that("data_modify works with functions that return character vectors", { test_that("data_modify .if/.at arguments", { data(iris) d <- iris[1:5, ] + # validate results out <- data_modify(d, .at = "Species", .modify = as.numeric) expect_identical(out$Species, c(1, 1, 1, 1, 1)) out <- data_modify(d, .if = is.factor, .modify = as.numeric) @@ -505,39 +506,47 @@ test_that("data_modify .if/.at arguments", { "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species", "new_length" )) + # .at and .if cannot be used at same timne expect_error( data_modify(d, .at = "Species", .if = is.factor, .modify = as.numeric), regex = "You cannot use both" ) + # modify must be a function expect_error( data_modify(d, .at = "Species", .modify = "a"), regex = "`.modify` must" ) + # unknown variable expect_error( data_modify(d, .at = c("Species", "Test"), .modify = as.numeric), regex = "Variable \"Test\"" ) + # unknown variables expect_error( data_modify(d, .at = c("Species", "Hi", "Test"), .modify = as.numeric), regex = "Variables \"Hi\" and \"Test\"" ) + # one of .at or .if must be specified expect_error( data_modify(d, .modify = as.numeric), regex = "You need to specify" ) + # function not applicable to factors expect_error( data_modify(d, .at = "Species", .modify = function(x) 2 / y + x), regex = "Error in modifying variable" ) + # function not applicable to factors expect_error( data_modify(d, .at = "Species", .modify = function(x) 2 * x), regex = "Error in modifying variable" ) + # .modify needs to be specified expect_error( data_modify(d, .at = "Species", .if = is.factor), regex = "You need to specify" ) - # newly created variables are not modified by if/at - out <- data_modify(d, new_length = Petal.Length * 2, .if = is.numeric, .modify = as.factor) - expect_identical(out$new_length, c(2.8, 2.8, 2.6, 3, 2.8)) + # newly created variables are processed by if/at + out <- data_modify(d, new_length = Petal.Length * 2, .if = is.numeric, .modify = round) + expect_equal(out$new_length, c(3, 3, 3, 3, 3), ignore_attr = TRUE) })