diff --git a/DESCRIPTION b/DESCRIPTION index 3c23de6..5562d25 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tidyREDCap Title: Helper Functions for Working with 'REDCap' Data -Version: 1.1.3 +Version: 1.2.0 Authors@R: c(person( given = "Raymond", diff --git a/NEWS.md b/NEWS.md index 9e9b6b2..69340af 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# tidyREDCap 1.2.0 (CRAN release) + +* Updated `drop_label()` now accepts multiple variables and uses `tidyselect` helpers. +* Updated "Drop Labels from a Table" vignette to reflect new changes. + # tidyREDCap 1.1.3 (CRAN release) * Update vignettes to suggest `keyring`. diff --git a/R/make_instrument_auto.R b/R/make_instrument_auto.R index 43af055..a73e906 100644 --- a/R/make_instrument_auto.R +++ b/R/make_instrument_auto.R @@ -13,20 +13,24 @@ #' @export #' ## @examples -make_instrument_auto <- function(df, drop_which_when = FALSE, - record_id = "record_id") { +make_instrument_auto <- function( + df, + drop_which_when = FALSE, + record_id = "record_id" +) { if (names(df)[1] != record_id) { - stop(" + stop( + " The first variable in df must be `record_id`; - use option 'record_id=' to set the name of your custom id.", call. = FALSE) + use option 'record_id=' to set the name of your custom id.", + call. = FALSE + ) } - # Strip labels from REDCap created variables to prevent reported join (and # perhaps pivot) issues on labeled variables. df <- drop_label(df, record_id) - is_longitudinal <- any(names(df) == "redcap_event_name") if (is_longitudinal) { @@ -69,37 +73,49 @@ make_instrument_auto <- function(df, drop_which_when = FALSE, record_id_col <- which(colnames(df) == record_id) redcap_event_name_col <- which(colnames(df) == "redcap_event_name") record_repeat_inst_col <- which(colnames(df) == "redcap_repeat_instance") - + if (is_longitudinal) { - # Select rows that have data with a repeat number - if (is_repeated & !all(is.na(df[!allMissing,record_repeat_inst_col]))) { - return(df[!allMissing, c( - record_id_col, - redcap_event_name_col, - record_repeat_inst_col, - first_col:last_col - )]) + # Select rows that have data with a repeat number + if (is_repeated & !all(is.na(df[!allMissing, record_repeat_inst_col]))) { + return(df[ + !allMissing, + c( + record_id_col, + redcap_event_name_col, + record_repeat_inst_col, + first_col:last_col + ) + ]) } else { # Longitudinal not repeated instruments - return(df[!allMissing, c( - record_id_col, - redcap_event_name_col, - first_col:last_col - )]) + return(df[ + !allMissing, + c( + record_id_col, + redcap_event_name_col, + first_col:last_col + ) + ]) } } else { - # Select rows that have data with a repeat number - if (is_repeated & !all(is.na(df[!allMissing,record_repeat_inst_col]))) { - return(df[!allMissing, c( - record_id_col, - record_repeat_inst_col, - first_col:last_col - )]) + # Select rows that have data with a repeat number + if (is_repeated & !all(is.na(df[!allMissing, record_repeat_inst_col]))) { + return(df[ + !allMissing, + c( + record_id_col, + record_repeat_inst_col, + first_col:last_col + ) + ]) } else { - return(df[!allMissing, c( - record_id_col, - first_col:last_col - )]) + return(df[ + !allMissing, + c( + record_id_col, + first_col:last_col + ) + ]) } } } else { @@ -157,19 +173,49 @@ fix_class_bug <- function(df) { "fix_class_bug" -#' Drop the label from a variable -#' @description There is a reported issues with joins on data (without a reprex) -#' that seem to be caused by the labels. As a possible solution this can be -#' used to drop labels. +#' Drop attributes/labels from variables or data frames +#' +#' @description Some functions don't work with labelled variables. As a solution, +#' this function can be used to drop labels (and all other attributes) from +#' one or more variables within a data frame, or from a vector directly. #' -#' @param df the name of the data frame -#' @param x the quoted name of the variable +#' @param x A data frame or a vector/column. +#' @param ... When `x` is a data frame, select variables using tidyselect +#' helpers (e.g., `contains()`, `starts_with()`) or unquoted names. +#' Ignored when `x` is a vector. #' -#' @export +#' @return The modified data frame or vector with attributes removed. #' +#' @examples +#' \dontrun{ +#' # Dataset-level: Remove labels from specific variables +#' df |> drop_label(employment, starts_with("dem_")) #' -#' @return df -drop_label <- function(df, x) { - attributes(df[, which(names(df) == x)]) <- NULL - df +#' # Variable-level: Use inside mutate +#' df |> mutate(name_first = drop_label(name_first)) +#' +#' # Variable-level: Use with across() +#' df |> mutate(across(c(age, income), drop_label)) +#' } +#' +#' @export +drop_label <- function(x, ...) { + # 1. Dataset-level logic + if (is.data.frame(x)) { + vars_idx <- tidyselect::eval_select(rlang::expr(c(...)), x) + if (length(vars_idx) == 0) return(x) + + for (col_idx in vars_idx) { + attributes(x[[col_idx]]) <- NULL + } + return(x) + } + + # 2. Variable-level logic + if (is.character(substitute(x))) { + stop('It looks like you quoted your variable. The variable must be unquoted when used inside mutate().') + } + + attributes(x) <- NULL + return(x) } diff --git a/man/drop_label.Rd b/man/drop_label.Rd index 78d7ade..378525e 100644 --- a/man/drop_label.Rd +++ b/man/drop_label.Rd @@ -2,20 +2,35 @@ % Please edit documentation in R/make_instrument_auto.R \name{drop_label} \alias{drop_label} -\title{Drop the label from a variable} +\title{Drop attributes/labels from variables or data frames} \usage{ -drop_label(df, x) +drop_label(x, ...) } \arguments{ -\item{df}{the name of the data frame} +\item{x}{A data frame or a vector/column.} -\item{x}{the quoted name of the variable} +\item{...}{When \code{x} is a data frame, select variables using tidyselect +helpers (e.g., \code{contains()}, \code{starts_with()}) or unquoted names. +Ignored when \code{x} is a vector.} } \value{ -df +The modified data frame or vector with attributes removed. } \description{ -There is a reported issues with joins on data (without a reprex) -that seem to be caused by the labels. As a possible solution this can be -used to drop labels. +Some functions don't work with labelled variables. As a solution, +this function can be used to drop labels (and all other attributes) from +one or more variables within a data frame, or from a vector directly. +} +\examples{ +\dontrun{ +# Dataset-level: Remove labels from specific variables +df |> drop_label(employment, starts_with("dem_")) + +# Variable-level: Use inside mutate +df |> mutate(name_first = drop_label(name_first)) + +# Variable-level: Use with across() +df |> mutate(across(c(age, income), drop_label)) +} + } diff --git a/tests/testthat/test-import_instruments.R b/tests/testthat/test-import_instruments.R index ea417d5..e6470dd 100644 --- a/tests/testthat/test-import_instruments.R +++ b/tests/testthat/test-import_instruments.R @@ -16,7 +16,7 @@ target <- structure( # processing. Removed these attributes from test fixture to match actual # REDCapR output behavior in tibble format. # See: https://github.com/OuhscBbmc/REDCapR/releases/tag/v1.2.0 (#415) - label = "Study ID", + # label = "Study ID", # class = c("labelled", "numeric") class = c("numeric") ), diff --git a/vignettes/dropLabels.Rmd b/vignettes/dropLabels.Rmd index 60b4c97..3647f73 100644 --- a/vignettes/dropLabels.Rmd +++ b/vignettes/dropLabels.Rmd @@ -12,6 +12,7 @@ knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) +library(dplyr) ``` ```{r setup} @@ -85,6 +86,11 @@ demographics <- structure( label = "Age (years)", class = c("labelled", "numeric") ), + days = structure( + c(1, 2, 3, 4, 5), + label = "Days", + class = c("labelled", "numeric") + ), sex = structure( c("Female", "Male", "Male", "Female", "Male"), label = "Gender", @@ -108,28 +114,78 @@ View(demographics) ![](./view_demog_w_labels_20230217.png){width=90% alt="Demographics preview with labels"} -However, some functions do not work well with labeled variables. -```{r skim-demo, error=TRUE} -library(skimr) # for the skim() function -demographics |> skim() +However, some functions do not work well with labeled variables. For example: + +![](./show_numbers.png){width=40% alt="Show two numeric variables with labels"} + +```{r show-error, error=TRUE} +demographics |> + rowwise() |> + mutate(x = sum(c_across(c(age, days)))) ``` So you need a way to drop the label off of a variable or to drop all the labels from all the variables in a dataset. # The Solution +## Drop a single label + You can drop the label from a single variable with the `drop_label()` function. For example: -```{r} -demographics_changed <- drop_label(demographics, "name_first") +```r +new_demographics_table <- drop_label(demographics, "name_first") +# Or +new_demographics_table <- drop_label(demographics, name_first) +# Or +new_demographics_table <- demographics |> drop_label("name_first") +``` + +## Drop multiple labels + +If you need to drop labels from multiple variables, you can drop them individually or using helper methods (i.e., `across()`). + +```r +demographics |> + mutate(age = drop_label(age)) |> + mutate(days = drop_label(days)) |> + rowwise() |> + mutate(x = sum(c_across(c(age, days)))) +# Or +demographics |> + mutate(across(c(age, days), drop_label)) |> + rowwise() |> + mutate(x = sum(c_across(c(age, days)))) +``` + +![](./show_numbers_2.png){width=40% alt="Show three numeric variables without labels"} + +You can use `tidyselect` helper methods (i.e., `contains()` or `starts_with()`) to include more than one variable or list them. The following code produces the same result: + +```r +demographics_changed_2 <- drop_label(demographics, contains("name")) +# Same as: +demographics_changed_3 <- drop_label(demographics, name_first, name_last) +# Verifying: +identical(demographics_changed_2, demographics_changed_3) +``` + +> NOTE: You do not normally need to enclose the variable names in quotations outside of `tidyselect` helpers (i.e., `contains()`) though the function still operates if you choose to. + +## Use inside a `mutate` + +You can now use `drop_label()` inside a `mutate` pipe like this: + +```r +demographics_from_mutate <- demographics |> + mutate(name_first = drop_label(name_first)) +# Or +demographics_from_mutate <- demographics |> + mutate(across(starts_with('name'), drop_label)) ``` You can drop all the labels using the `drop_labels()` function. For example: ```{r} demographics_without_labels <- drop_labels(demographics) - -demographics_without_labels |> - skim() ``` diff --git a/vignettes/show_numbers.png b/vignettes/show_numbers.png new file mode 100644 index 0000000..a0e49d4 Binary files /dev/null and b/vignettes/show_numbers.png differ diff --git a/vignettes/show_numbers_2.png b/vignettes/show_numbers_2.png new file mode 100644 index 0000000..39ed54f Binary files /dev/null and b/vignettes/show_numbers_2.png differ