Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tidyREDCap
Title: Helper Functions for Working with 'REDCap' Data
Version: 1.1.3
Version: 1.2.0
Authors@R:
c(person(
given = "Raymond",
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# tidyREDCap 1.2.0 (CRAN release)

* Updated `drop_label()` now accepts multiple variables and uses `tidyselect` helpers.
* Updated "Drop Labels from a Table" vignette to reflect new changes.

# tidyREDCap 1.1.3 (CRAN release)

* Update vignettes to suggest `keyring`.
Expand Down
130 changes: 88 additions & 42 deletions R/make_instrument_auto.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,24 @@
#' @export
#'
## @examples
make_instrument_auto <- function(df, drop_which_when = FALSE,
record_id = "record_id") {
make_instrument_auto <- function(
df,
drop_which_when = FALSE,
record_id = "record_id"
) {
if (names(df)[1] != record_id) {
stop("
stop(
"
The first variable in df must be `record_id`;
use option 'record_id=' to set the name of your custom id.", call. = FALSE)
use option 'record_id=' to set the name of your custom id.",
call. = FALSE
)
}


# Strip labels from REDCap created variables to prevent reported join (and
# perhaps pivot) issues on labeled variables.
df <- drop_label(df, record_id)


is_longitudinal <- any(names(df) == "redcap_event_name")

if (is_longitudinal) {
Expand Down Expand Up @@ -69,37 +73,49 @@ make_instrument_auto <- function(df, drop_which_when = FALSE,
record_id_col <- which(colnames(df) == record_id)
redcap_event_name_col <- which(colnames(df) == "redcap_event_name")
record_repeat_inst_col <- which(colnames(df) == "redcap_repeat_instance")

if (is_longitudinal) {
# Select rows that have data with a repeat number
if (is_repeated & !all(is.na(df[!allMissing,record_repeat_inst_col]))) {
return(df[!allMissing, c(
record_id_col,
redcap_event_name_col,
record_repeat_inst_col,
first_col:last_col
)])
# Select rows that have data with a repeat number
if (is_repeated & !all(is.na(df[!allMissing, record_repeat_inst_col]))) {
return(df[
!allMissing,
c(
record_id_col,
redcap_event_name_col,
record_repeat_inst_col,
first_col:last_col
)
])
} else {
# Longitudinal not repeated instruments
return(df[!allMissing, c(
record_id_col,
redcap_event_name_col,
first_col:last_col
)])
return(df[
!allMissing,
c(
record_id_col,
redcap_event_name_col,
first_col:last_col
)
])
}
} else {
# Select rows that have data with a repeat number
if (is_repeated & !all(is.na(df[!allMissing,record_repeat_inst_col]))) {
return(df[!allMissing, c(
record_id_col,
record_repeat_inst_col,
first_col:last_col
)])
# Select rows that have data with a repeat number
if (is_repeated & !all(is.na(df[!allMissing, record_repeat_inst_col]))) {
return(df[
!allMissing,
c(
record_id_col,
record_repeat_inst_col,
first_col:last_col
)
])
} else {
return(df[!allMissing, c(
record_id_col,
first_col:last_col
)])
return(df[
!allMissing,
c(
record_id_col,
first_col:last_col
)
])
}
}
} else {
Expand Down Expand Up @@ -157,19 +173,49 @@ fix_class_bug <- function(df) {
"fix_class_bug"


#' Drop the label from a variable
#' @description There is a reported issues with joins on data (without a reprex)
#' that seem to be caused by the labels. As a possible solution this can be
#' used to drop labels.
#' Drop attributes/labels from variables or data frames
#'
#' @description Some functions don't work with labelled variables. As a solution,
#' this function can be used to drop labels (and all other attributes) from
#' one or more variables within a data frame, or from a vector directly.
#'
#' @param df the name of the data frame
#' @param x the quoted name of the variable
#' @param x A data frame or a vector/column.
#' @param ... When `x` is a data frame, select variables using tidyselect
#' helpers (e.g., `contains()`, `starts_with()`) or unquoted names.
#' Ignored when `x` is a vector.
#'
#' @export
#' @return The modified data frame or vector with attributes removed.
#'
#' @examples
#' \dontrun{
#' # Dataset-level: Remove labels from specific variables
#' df |> drop_label(employment, starts_with("dem_"))
#'
#' @return df
drop_label <- function(df, x) {
attributes(df[, which(names(df) == x)]) <- NULL
df
#' # Variable-level: Use inside mutate
#' df |> mutate(name_first = drop_label(name_first))
#'
#' # Variable-level: Use with across()
#' df |> mutate(across(c(age, income), drop_label))
#' }
#'
#' @export
drop_label <- function(x, ...) {
# 1. Dataset-level logic
if (is.data.frame(x)) {
vars_idx <- tidyselect::eval_select(rlang::expr(c(...)), x)
if (length(vars_idx) == 0) return(x)

for (col_idx in vars_idx) {
attributes(x[[col_idx]]) <- NULL
}
return(x)
}

# 2. Variable-level logic
if (is.character(substitute(x))) {
stop('It looks like you quoted your variable. The variable must be unquoted when used inside mutate().')
}

attributes(x) <- NULL
return(x)
}
31 changes: 23 additions & 8 deletions man/drop_label.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-import_instruments.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ target <- structure(
# processing. Removed these attributes from test fixture to match actual
# REDCapR output behavior in tibble format.
# See: https://github.com/OuhscBbmc/REDCapR/releases/tag/v1.2.0 (#415)
label = "Study ID",
# label = "Study ID",
# class = c("labelled", "numeric")
class = c("numeric")
),
Expand Down
74 changes: 65 additions & 9 deletions vignettes/dropLabels.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
library(dplyr)
```

```{r setup}
Expand Down Expand Up @@ -85,6 +86,11 @@ demographics <- structure(
label = "Age (years)",
class = c("labelled", "numeric")
),
days = structure(
c(1, 2, 3, 4, 5),
label = "Days",
class = c("labelled", "numeric")
),
sex = structure(
c("Female", "Male", "Male", "Female", "Male"),
label = "Gender",
Expand All @@ -108,28 +114,78 @@ View(demographics)

![](./view_demog_w_labels_20230217.png){width=90% alt="Demographics preview with labels"}

However, some functions do not work well with labeled variables.
```{r skim-demo, error=TRUE}
library(skimr) # for the skim() function
demographics |> skim()
However, some functions do not work well with labeled variables. For example:

![](./show_numbers.png){width=40% alt="Show two numeric variables with labels"}

```{r show-error, error=TRUE}
demographics |>
rowwise() |>
mutate(x = sum(c_across(c(age, days))))
```

So you need a way to drop the label off of a variable or to drop all the labels from all the variables in a dataset.

# The Solution

## Drop a single label

You can drop the label from a single variable with the `drop_label()` function. For example:

```{r}
demographics_changed <- drop_label(demographics, "name_first")
```r
new_demographics_table <- drop_label(demographics, "name_first")
# Or
new_demographics_table <- drop_label(demographics, name_first)
# Or
new_demographics_table <- demographics |> drop_label("name_first")
```

## Drop multiple labels

If you need to drop labels from multiple variables, you can drop them individually or using helper methods (i.e., `across()`).

```r
demographics |>
mutate(age = drop_label(age)) |>
mutate(days = drop_label(days)) |>
rowwise() |>
mutate(x = sum(c_across(c(age, days))))
# Or
demographics |>
mutate(across(c(age, days), drop_label)) |>
rowwise() |>
mutate(x = sum(c_across(c(age, days))))
```

![](./show_numbers_2.png){width=40% alt="Show three numeric variables without labels"}

You can use `tidyselect` helper methods (i.e., `contains()` or `starts_with()`) to include more than one variable or list them. The following code produces the same result:

```r
demographics_changed_2 <- drop_label(demographics, contains("name"))
# Same as:
demographics_changed_3 <- drop_label(demographics, name_first, name_last)
# Verifying:
identical(demographics_changed_2, demographics_changed_3)
```

> NOTE: You do not normally need to enclose the variable names in quotations outside of `tidyselect` helpers (i.e., `contains()`) though the function still operates if you choose to.

## Use inside a `mutate`

You can now use `drop_label()` inside a `mutate` pipe like this:

```r
demographics_from_mutate <- demographics |>
mutate(name_first = drop_label(name_first))
# Or
demographics_from_mutate <- demographics |>
mutate(across(starts_with('name'), drop_label))
```

You can drop all the labels using the `drop_labels()` function. For example:

```{r}
demographics_without_labels <- drop_labels(demographics)

demographics_without_labels |>
skim()
```

Binary file added vignettes/show_numbers.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vignettes/show_numbers_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.