diff --git a/DESCRIPTION b/DESCRIPTION index 5562d25..9067d58 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tidyREDCap Title: Helper Functions for Working with 'REDCap' Data -Version: 1.2.0 +Version: 2.0.0 Authors@R: c(person( given = "Raymond", @@ -72,6 +72,7 @@ Imports: dplyr, janitor, labelVector, + lifecycle, magrittr, purrr, REDCapR, diff --git a/NAMESPACE b/NAMESPACE index 478f9ea..f2b4935 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -47,6 +47,7 @@ importFrom(janitor,adorn_pct_formatting) importFrom(janitor,tabyl) importFrom(labelVector,is_labelled) importFrom(labelVector,set_label) +importFrom(lifecycle,deprecate_soft) importFrom(magrittr,"%>%") importFrom(purrr,map_chr) importFrom(purrr,map_df) diff --git a/NEWS.md b/NEWS.md index 69340af..9483ecd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,17 @@ +# tidyREDCap 2.0.0 (CRAN release) + +## Breaking changes + +* `drop_label()` now preserves variable class attributes (e.g., `"character"`, `"numeric"`) while removing only the `"labelled"` class and `label` attribute. Previous versions removed all attributes. + +## New features + +* `drop_label()` now can be used on data frames. When called without arguments on a data frame, it removes labels from all variables. + +## Fixes/Changes + +* `drop_labels()` is now deprecated in favor of `drop_label()`. Use `drop_label()` without arguments to remove labels from all variables. + # tidyREDCap 1.2.0 (CRAN release) * Updated `drop_label()` now accepts multiple variables and uses `tidyselect` helpers. diff --git a/R/drop_label.R b/R/drop_label.R new file mode 100644 index 0000000..d079af7 --- /dev/null +++ b/R/drop_label.R @@ -0,0 +1,84 @@ + +#' Drop attributes/labels from variables or data frames +#' +#' @description Some functions don't work with labelled variables. As a solution, +#' this function can be used to drop labels (and all other attributes) from +#' one or more variables within a data frame, from all variables if none are +#' specified, or from a vector directly. +#' +#' @param x A data frame or a vector/column. +#' @param ... When `x` is a data frame, select variables using tidyselect +#' helpers (e.g., `contains()`, `starts_with()`) or unquoted names. +#' If empty, removes labels from ALL variables. Ignored when `x` is a vector. +#' +#' @return The modified data frame or vector with attributes removed. +#' +#' @examples +#' \dontrun{ +#' # Dataset-level: Remove labels from ALL variables +#' df |> drop_label() +#' +#' # Dataset-level: Remove labels from specific variables +#' df |> drop_label(employment, starts_with("dem_")) +#' +#' # Variable-level: Use inside mutate +#' df |> mutate(name_first = drop_label(name_first)) +#' +#' # Variable-level: Use with across() +#' df |> mutate(across(c(age, income), drop_label)) +#' } +#' +#' @export +drop_label <- function(x, ...) { + # 1. Dataset-level logic + if (is.data.frame(x)) { + old_class_char <- class(x) + + # Check if any variables were selected + vars_idx <- tidyselect::eval_select(rlang::expr(c(...)), x) + + # If no variables selected, process ALL columns + if (length(vars_idx) == 0) { + # seq_along will either capture the variable column number + vars_idx <- seq_along(x) + } + + # Remove attributes from selected columns + for (col_idx in vars_idx) { + # col_idx evaluates to: + # 1. variable name(s) if passed with the `...` argument + # 2. variable column position number if the whole dataset was passed for dropping + + # Get all current attributes + col_attrs <- attributes(x[[col_idx]]) + + # Remove the label attribute + col_attrs$label <- NULL + + # Remove the "labelled" from class if it exists + if (!is.null(col_attrs$class)) { + col_attrs$class <- col_attrs$class[col_attrs$class != "labelled"] + # If the class now becomes empty, remove it to use R's default + if (length(col_attrs$class) == 0) col_attrs$class <- NULL + } + + # Reapply the modified attributes + attributes(x[[col_idx]]) <- col_attrs + } + + # Preserve original class structure (data.frame vs tibble) + if (("data.frame" %in% old_class_char) && !("tbl_df" %in% old_class_char)) { + class(x) <- "data.frame" + } + + return(x) + } + + # 2. Variable-level logic + if (is.character(substitute(x))) { + stop('It looks like you quoted your variable. The variable must be unquoted when used inside mutate().') + } + + attributes(x) <- NULL + return(x) +} diff --git a/R/drop_labels.R b/R/drop_labels.R index 901a9b1..865978b 100644 --- a/R/drop_labels.R +++ b/R/drop_labels.R @@ -5,6 +5,7 @@ #' @param df The data frame with column labels that you want to drop #' #' @importFrom purrr map_df +#' @importFrom lifecycle deprecate_soft #' #' @export #' @@ -17,6 +18,13 @@ #' skimr::skim() #' } drop_labels <- function(df) { + # Add deprecate message: + lifecycle::deprecate_soft( + when = "1.2.0", # Version when this was deprecated + what = "drop_labels()", + with = "drop_label()" + ) + old_class_char <- class(df) if (!("data.frame" %in% old_class_char)) { stop("df must have class data.frame", call. = FALSE) diff --git a/R/make_instrument_auto.R b/R/make_instrument_auto.R index a73e906..2ddf8e4 100644 --- a/R/make_instrument_auto.R +++ b/R/make_instrument_auto.R @@ -171,51 +171,3 @@ fix_class_bug <- function(df) { return(df) } "fix_class_bug" - - -#' Drop attributes/labels from variables or data frames -#' -#' @description Some functions don't work with labelled variables. As a solution, -#' this function can be used to drop labels (and all other attributes) from -#' one or more variables within a data frame, or from a vector directly. -#' -#' @param x A data frame or a vector/column. -#' @param ... When `x` is a data frame, select variables using tidyselect -#' helpers (e.g., `contains()`, `starts_with()`) or unquoted names. -#' Ignored when `x` is a vector. -#' -#' @return The modified data frame or vector with attributes removed. -#' -#' @examples -#' \dontrun{ -#' # Dataset-level: Remove labels from specific variables -#' df |> drop_label(employment, starts_with("dem_")) -#' -#' # Variable-level: Use inside mutate -#' df |> mutate(name_first = drop_label(name_first)) -#' -#' # Variable-level: Use with across() -#' df |> mutate(across(c(age, income), drop_label)) -#' } -#' -#' @export -drop_label <- function(x, ...) { - # 1. Dataset-level logic - if (is.data.frame(x)) { - vars_idx <- tidyselect::eval_select(rlang::expr(c(...)), x) - if (length(vars_idx) == 0) return(x) - - for (col_idx in vars_idx) { - attributes(x[[col_idx]]) <- NULL - } - return(x) - } - - # 2. Variable-level logic - if (is.character(substitute(x))) { - stop('It looks like you quoted your variable. The variable must be unquoted when used inside mutate().') - } - - attributes(x) <- NULL - return(x) -} diff --git a/README.md b/README.md index 030ce15..343e391 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ tidyREDCap is an R package with functions for processing REDCap data. * After loading data into R using RStudio with the `import_instruments()` function, you can see both the variable name and the text that appears to users of REDCap. All you need to do is click on the dataset's name in the **Environment** tab or use the `View()` function. The column headings will include both the Variable Name and the Field Label from REDCap. -* 💥 NEW in Version 1.1 💥 Functions coming from packages outside of `tidyREDCap` may not understand what to do with labeled variables. So, `tidyREDCap` includes a new `drop_labels()` function that will allow you to strip the labels before using functions that want unlabeled data. +* 💥 NEW in Version 2.0.0 💥 Functions coming from packages outside of `tidyREDCap` may not understand what to do with labeled variables. So, `tidyREDCap` includes a new `drop_label()` function that will allow you to strip the labels before using functions that want unlabeled data. #### Working with Choose One Questions diff --git a/man/drop_label.Rd b/man/drop_label.Rd index 378525e..bda08ef 100644 --- a/man/drop_label.Rd +++ b/man/drop_label.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/make_instrument_auto.R +% Please edit documentation in R/drop_label.R \name{drop_label} \alias{drop_label} \title{Drop attributes/labels from variables or data frames} @@ -11,7 +11,7 @@ drop_label(x, ...) \item{...}{When \code{x} is a data frame, select variables using tidyselect helpers (e.g., \code{contains()}, \code{starts_with()}) or unquoted names. -Ignored when \code{x} is a vector.} +If empty, removes labels from ALL variables. Ignored when \code{x} is a vector.} } \value{ The modified data frame or vector with attributes removed. @@ -19,10 +19,14 @@ The modified data frame or vector with attributes removed. \description{ Some functions don't work with labelled variables. As a solution, this function can be used to drop labels (and all other attributes) from -one or more variables within a data frame, or from a vector directly. +one or more variables within a data frame, from all variables if none are +specified, or from a vector directly. } \examples{ \dontrun{ +# Dataset-level: Remove labels from ALL variables +df |> drop_label() + # Dataset-level: Remove labels from specific variables df |> drop_label(employment, starts_with("dem_")) diff --git a/tests/testthat/test-drop_label.R b/tests/testthat/test-drop_label.R new file mode 100644 index 0000000..d0f5e3f --- /dev/null +++ b/tests/testthat/test-drop_label.R @@ -0,0 +1,181 @@ +demographics <- structure( + list( + record_id = c(1, 2, 3, 4, 5), + name_first = structure( + c("Nutmeg", "Tumtum", "Marcus", "Trudy", "John Lee"), + label = "First Name", + class = c("labelled", "character") + ), + name_last = structure( + c("Nutmouse", "Nutmouse", "Wood", "DAG", "Walker"), + label = "Last Name", + class = c("labelled", "character") + ), + address = structure( + c( + "14 Rose Cottage St.\nKenning UK, 323232", + "14 Rose Cottage Blvd.\nKenning UK 34243", + "243 Hill St.\nGuthrie OK 73402", + "342 Elm\nDuncanville TX, 75116", + "Hotel Suite\nNew Orleans LA, 70115" + ), + label = "Street, City, State, ZIP", + class = c("labelled", "character") + ), + telephone = structure( + c( + "(405) 321-1111", + "(405) 321-2222", + "(405) 321-3333", + "(405) 321-4444", + "(405) 321-5555" + ), + label = "Phone number", + class = c("labelled", "character") + ), email = structure( + c( + "nutty@mouse.com", + "tummy@mouse.comm", + "mw@mwood.net", + "peroxide@blonde.com", + "left@hippocket.com" + ), + label = "E-mail", + class = c("labelled", "character") + ), + dob = structure( + c(12294, 12121, -13051, -6269, -5375), + class = c("labelled", "Date"), + label = "Date of birth" + ), + age = structure( + c(11, 11, 80, 61, 59), + label = "Age (years)", + class = c("labelled", "numeric") + ), + days = structure( + c(1, 2, 3, 4, 5), + label = "Days", + class = c("labelled", "numeric") + ), + sex = structure( + c("Female", "Male", "Male", "Female", "Male"), + label = "Gender", + class = c("labelled", "character") + ), + demographics_complete = structure( + c("Complete", "Complete", "Complete", "Complete", "Complete"), + label = "Complete?...10", + class = c("labelled", "character") + ) + ), + row.names = c(NA, -5L), + class = "data.frame" +) + +test_that("drop_label() removes all labels when no variables specified", { + result <- drop_label(demographics) + + # Check that label attribute is removed from all columns + expect_null(attr(result$name_first, "label")) + expect_null(attr(result$name_last, "label")) + + # Check that "labelled" class is removed but base class preserved + expect_false("labelled" %in% class(result$name_first)) + expect_true("character" %in% class(result$name_first)) +}) + +test_that("drop_label() removes label from single variable", { + result <- drop_label(demographics, name_first) + + # name_first should have label removed + expect_null(attr(result$name_first, "label")) + expect_false("labelled" %in% class(result$name_first)) + expect_true("character" %in% class(result$name_first)) + + # name_last should still have label + expect_equal(attr(result$name_last, "label"), "Last Name") + expect_true("labelled" %in% class(result$name_last)) +}) + +test_that("drop_label() removes labels from multiple variables", { + result <- drop_label(demographics, name_first, name_last) + + # Both should have labels removed + expect_null(attr(result$name_first, "label")) + expect_null(attr(result$name_last, "label")) + expect_false("labelled" %in% class(result$name_first)) + expect_false("labelled" %in% class(result$name_last)) + + # Other columns should still have labels (if they had them) + if ("labelled" %in% class(demographics$email)) { + expect_true("labelled" %in% class(result$email)) + } +}) + +test_that("drop_label() works with tidyselect helpers", { + result <- drop_label(demographics, contains('name')) + + # Variables containing 'name' should have labels removed + expect_null(attr(result$name_first, "label")) + expect_null(attr(result$name_last, "label")) + expect_false("labelled" %in% class(result$name_first)) + expect_false("labelled" %in% class(result$name_last)) +}) + +test_that("drop_label() works inside mutate() with single variable", { + result <- demographics |> + mutate(name_first = drop_label(name_first)) + + # name_first should have label removed + expect_null(attr(result$name_first, "label")) + expect_false("labelled" %in% class(result$name_first)) + + # name_last should still have label + expect_equal(attr(result$name_last, "label"), "Last Name") +}) + +test_that("drop_label() works inside mutate() with across()", { + result <- demographics |> + mutate(across(contains('name'), drop_label)) + + # Both name variables should have labels removed + expect_null(attr(result$name_first, "label")) + expect_null(attr(result$name_last, "label")) + expect_false("labelled" %in% class(result$name_first)) + expect_false("labelled" %in% class(result$name_last)) +}) + +test_that("drop_label() errors when variable is quoted in mutate()", { + expect_error( + demographics |> mutate(name_first = drop_label('name_first')), + "quoted your variable" + ) +}) + +test_that("drop_label() preserves data frame class", { + # Test with base data.frame + df_base <- as.data.frame(demographics) + result_base <- drop_label(df_base) + expect_s3_class(result_base, "data.frame") + expect_false(inherits(result_base, "tbl_df")) + + # Test with tibble + df_tibble <- tibble::as_tibble(demographics) + result_tibble <- drop_label(df_tibble) + expect_s3_class(result_tibble, "tbl_df") +}) + +test_that("drop_labels() shows deprecation warning", { + lifecycle::expect_deprecated( + drop_labels(demographics), + "drop_labels.*deprecated.*drop_label" + ) +}) + +test_that("drop_label() on quoted variable doesn't work at dataset level", { + # This should process the variable 'name_first' (not error) + # Quoting is only an error inside mutate() + result <- drop_label(demographics, 'name_first') + expect_null(attr(result$name_first, "label")) +}) \ No newline at end of file diff --git a/vignettes/.gitignore b/vignettes/.gitignore index 097b241..47018d6 100644 --- a/vignettes/.gitignore +++ b/vignettes/.gitignore @@ -1,2 +1,5 @@ *.html *.R + +/.quarto/ +**/*.quarto_ipynb diff --git a/vignettes/dropLabels.Rmd b/vignettes/dropLabels.Rmd index 3647f73..860e192 100644 --- a/vignettes/dropLabels.Rmd +++ b/vignettes/dropLabels.Rmd @@ -183,9 +183,16 @@ demographics_from_mutate <- demographics |> mutate(across(starts_with('name'), drop_label)) ``` -You can drop all the labels using the `drop_labels()` function. For example: +## Drop all dataset variable labels -```{r} -demographics_without_labels <- drop_labels(demographics) +You can drop all the labels using the `drop_label()` function. For example: + +```{r drop-label-dataset} +demographics_without_labels <- drop_label(demographics) ``` +> NOTE: tidyREDCap versions prior to 1.2.0 handled dropping all variable labels from a dataset by using `drop_labels()`. This function can still be used, but we added a polite message to use `drop_label()` instead. + +```{r, warning=TRUE} +demographics_without_labels <- drop_labels(demographics) +```