Skip to content

Commit

Permalink
Newly created variables in ... can be processed in .at or .if (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke authored Feb 7, 2024
1 parent 3be20ad commit 903f149
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 29 deletions.
30 changes: 14 additions & 16 deletions R/data_modify.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@
#' - Using `NULL` as right-hand side removes a variable from the data frame.
#' Example: `Petal.Width = NULL`.
#'
#' Note that newly created variables can be used in subsequent expressions.
#' See also 'Examples'.
#' Note that newly created variables can be used in subsequent expressions,
#' including `.at` or `.if`. See also 'Examples'.
#'
#' @param .at A character vector of variable names that should be modified. This
#' argument is used in combination with the `.modify` argument. Note that only one
#' of `.at` or `.if` can be provided, but not both at the same time. Newly created
#' variables in `...` are not affected by `.at`.
#' variables in `...` can also be selected, see 'Examples'.
#' @param .if A function that returns `TRUE` for columns in the data frame where
#' `.if` applies. This argument is used in combination with the `.modify` argument.
#' Note that only one of `.at` or `.if` can be provided, but not both at the same
#' time. Newly created variables in `...` are not affected by `.if`.
#' time. Newly created variables in `...` can also be selected, see 'Examples'.
#' @param .modify A function that modifies the variables defined in `.at` or `.if`.
#' This argument is used in combination with either the `.at` or the `.if` argument.
#' Note that the modified variable (i.e. the result from `.modify`) must be either
Expand Down Expand Up @@ -113,15 +113,13 @@
#' # can be combined with dots
#' data_modify(d, new_length = Petal.Length * 2, .at = "Species", .modify = as.numeric)
#'
#' # note that new variables cannot be used in `.at` or `.if` arguments
#' # this example would throw an error
#' \dontrun{
#' # new variables used in `.at` or `.if`
#' data_modify(
#' d,
#' new_length = Petal.Length * 2,
#' .at = c("Species", "new_length"),
#' .modify = as.numeric
#' )}
#' .at = c("Petal.Length", "new_length"),
#' .modify = round
#' )
#'
#' # combine "data_find()" and ".at" argument
#' out <- data_modify(
Expand All @@ -146,7 +144,6 @@ data_modify.default <- function(data, ...) {
#' @export
data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify = NULL) {
dots <- eval(substitute(alist(...)))
column_names <- colnames(data)

# check if we have dots, or only at/modify ----

Expand Down Expand Up @@ -248,7 +245,7 @@ data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify =
}

# check if we have at/modify ----
data <- .modify_at(data, .at, .if, .modify, column_names)
data <- .modify_at(data, .at, .if, .modify)

data
}
Expand All @@ -258,7 +255,6 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify =
# we need to evaluate dots here, and pass them with "do.call" to
# the data.frame method later...
dots <- match.call(expand.dots = FALSE)[["..."]]
column_names <- colnames(data)

# works only for dplyr >= 0.8.0
grps <- attr(data, "groups", exact = TRUE)
Expand Down Expand Up @@ -313,7 +309,7 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify =
}

# check if we have at/modify ----
data <- .modify_at(data, .at, .if, .modify, column_names)
data <- .modify_at(data, .at, .if, .modify)

# set back attributes and class
data <- .replace_attrs(data, attr_data)
Expand All @@ -324,7 +320,7 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify =

# helper -------------

.modify_at <- function(data, .at, .if, .modify, column_names) {
.modify_at <- function(data, .at, .if, .modify) {
# check if ".at" or ".if" is defined, but not ".modify"
if (is.null(.modify)) {
if (!is.null(.at) || !is.null(.if)) {
Expand All @@ -345,9 +341,11 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify =
insight::format_error("You need to specify either `.at` or `.if`.")
}

column_names <- colnames(data)

# if we have ".if" defined, specify ".at"
if (!is.null(.if)) {
.at <- column_names[vapply(data[column_names], .if, logical(1))]
.at <- column_names[vapply(data, .if, logical(1))]
}
# check for valid defined column names
if (!all(.at %in% column_names)) {
Expand Down
18 changes: 8 additions & 10 deletions man/data_modify.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 12 additions & 3 deletions tests/testthat/test-data_modify.R
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,7 @@ test_that("data_modify works with functions that return character vectors", {
test_that("data_modify .if/.at arguments", {
data(iris)
d <- iris[1:5, ]
# validate results
out <- data_modify(d, .at = "Species", .modify = as.numeric)
expect_identical(out$Species, c(1, 1, 1, 1, 1))
out <- data_modify(d, .if = is.factor, .modify = as.numeric)
Expand All @@ -505,39 +506,47 @@ test_that("data_modify .if/.at arguments", {
"Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width",
"Species", "new_length"
))
# .at and .if cannot be used at same timne
expect_error(
data_modify(d, .at = "Species", .if = is.factor, .modify = as.numeric),
regex = "You cannot use both"
)
# modify must be a function
expect_error(
data_modify(d, .at = "Species", .modify = "a"),
regex = "`.modify` must"
)
# unknown variable
expect_error(
data_modify(d, .at = c("Species", "Test"), .modify = as.numeric),
regex = "Variable \"Test\""
)
# unknown variables
expect_error(
data_modify(d, .at = c("Species", "Hi", "Test"), .modify = as.numeric),
regex = "Variables \"Hi\" and \"Test\""
)
# one of .at or .if must be specified
expect_error(
data_modify(d, .modify = as.numeric),
regex = "You need to specify"
)
# function not applicable to factors
expect_error(
data_modify(d, .at = "Species", .modify = function(x) 2 / y + x),
regex = "Error in modifying variable"
)
# function not applicable to factors
expect_error(
data_modify(d, .at = "Species", .modify = function(x) 2 * x),
regex = "Error in modifying variable"
)
# .modify needs to be specified
expect_error(
data_modify(d, .at = "Species", .if = is.factor),
regex = "You need to specify"
)
# newly created variables are not modified by if/at
out <- data_modify(d, new_length = Petal.Length * 2, .if = is.numeric, .modify = as.factor)
expect_identical(out$new_length, c(2.8, 2.8, 2.6, 3, 2.8))
# newly created variables are processed by if/at
out <- data_modify(d, new_length = Petal.Length * 2, .if = is.numeric, .modify = round)
expect_equal(out$new_length, c(3, 3, 3, 3, 3), ignore_attr = TRUE)
})

0 comments on commit 903f149

Please sign in to comment.