Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Mixed Repeat/Nonrepeat Instrument Support #177

Merged
merged 20 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
REDCAPTIDIER_LARGE_SPARSE_API: ${{ secrets.REDCAPTIDIER_LARGE_SPARSE_API }}
REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }}
REDCAPTIDIER_LONGITUDINAL_DAG_API: ${{ secrets.REDCAPTIDIER_LONGITUDINAL_DAG_API }}
REDCAPTIDIER_MIXED_STRUCTURE_API: ${{ secrets.REDCAPTIDIER_MIXED_STRUCTURE_API }}
steps:
- name: Update Ubuntu, Install cURL Headers, add Libraries
run: |
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
SUPERHEROES_REDCAP_API: ${{ secrets.SUPERHEROES_REDCAP_API }}
REDCAPTIDIER_DEEP_DIVE_VIGNETTE_API: ${{ secrets.REDCAPTIDIER_DEEP_DIVE_VIGNETTE_API }}
REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }}
REDCAPTIDIER_MIXED_STRUCTURE_API: ${{ secrets.REDCAPTIDIER_MIXED_STRUCTURE_API }}
steps:
- uses: actions/checkout@v3

Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: REDCapTidieR
Type: Package
Title: Extract 'REDCap' Databases into Tidy 'Tibble's
Version: 1.0.0
Version: 1.1.0
Authors@R: c(
person("Richard", "Hanna", , "richardshanna91@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0009-0005-6496-8154")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ importFrom(rlang,try_fetch)
importFrom(rlang,zap)
importFrom(stringi,stri_split_fixed)
importFrom(stringr,str_detect)
importFrom(stringr,str_ends)
importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_squish)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# REDCapTidieR 1.1.0

- `read_redcap()` now supports instruments that follow a mixed repeating/non-repeating structure with the `allow_mixed_structure` parameter
- When enabled, instruments with mixed repeating/nonrepeating structure will be treated as single-instance repeating instruments

# REDCapTidieR 1.0.0

Version 1.0.0
Expand Down
2 changes: 1 addition & 1 deletion R/REDCapTidieR-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' is_installed new_environment quo_get_expr try_fetch zap as_label
#' @importFrom stringi stri_split_fixed
#' @importFrom stringr str_detect str_replace str_replace_all str_squish str_trunc
#' str_trim
#' str_trim str_ends
#' @importFrom tibble as_tibble is_tibble tibble
#' @importFrom tidyr complete fill pivot_wider nest unnest unnest_wider
#' @importFrom tidyselect all_of any_of ends_with eval_select everything
Expand Down
49 changes: 13 additions & 36 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,48 +101,25 @@ check_user_rights <- function(db_data,


check_repeat_and_nonrepeat <- function(db_data, call = caller_env()) {
# Identify columns to check for repeat/nonrepeat behavior
safe_cols <- c(
names(db_data)[1], "redcap_event_name",
"redcap_repeat_instrument", "redcap_repeat_instance",
"redcap_data_access_group"
)

check_cols <- setdiff(names(db_data), safe_cols)

# Set up check_data function that looks for repeating and nonrepeating
# behavior in a given column and returns a boolean
check_data <- function(db_data, check_col) {
# Repeating Check
rep <- any(!is.na(db_data[{{ check_col }}]) & !is.na(db_data["redcap_repeat_instrument"]))

# Nonrepeating Check
nonrep <- any(!is.na(db_data[{{ check_col }}]) & is.na(db_data["redcap_repeat_instrument"]))

rep & nonrep
}

# Create a simple dataframe, loop through check columns and append
# dataframe with column being checked and the output of check_data
out <- data.frame()
for (i in seq_along(check_cols)) {
rep_and_nonrep <- db_data %>%
check_data(check_col = check_cols[i])

field <- check_cols[i]

out <- rbind(out, data.frame(field, rep_and_nonrep))
out
}
out <- get_mixed_structure_fields(db_data = db_data)

# Filter for violations
out <- out %>%
filter(rep_and_nonrep)
filter(.data$rep_and_nonrep)

# Produce error message if violations detected
if (nrow(out) > 0) {
cli_abort(c("x" = "Instrument{?s} detected that ha{?s/ve} both repeating and
nonrepeating instances defined in the project: {out$field}"),
cli_abort(
c(
"x" = "Instrument{?s} detected that ha{?s/ve} both repeating and
nonrepeating instances defined in the project: {out$field}",
"i" = paste0(
"Set {.code allow_mixed_structure} to {.code TRUE} to override. ",
"See ",
"{.href [Mixed Structure Instruments](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html#mixed-structure-instruments)} ", # nolint line_length_linter
"for more information."
)
),
class = c("repeat_nonrepeat_instrument", "REDCapTidieR_cond"),
call = call
)
Expand Down
171 changes: 150 additions & 21 deletions R/clean_redcap_long.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
#' \code{REDCapR::redcap_metadata_read()$data}
#' @param linked_arms Output of \code{link_arms}, linking instruments to REDCap
#' events/arms
#' @param allow_mixed_structure A logical to allow for support of mixed repeating/non-repeating
#' instruments. Setting to `TRUE` will treat the mixed instrument's non-repeating versions
#' as repeating instruments with a single instance. Applies to longitudinal projects
#' only. Default `FALSE`.
#'
#' @return
#' Returns a \code{tibble} with list elements containing tidy dataframes. Users
Expand All @@ -22,41 +26,26 @@

clean_redcap_long <- function(db_data_long,
db_metadata_long,
linked_arms) {
linked_arms,
allow_mixed_structure = FALSE) {
# Repeating Instrument Check ----
# Check if database supplied contains any repeating instruments to map onto
# `redcap_repeat_*` variables

has_repeat_forms <- "redcap_repeat_instance" %in% names(db_data_long)
has_mixed_structure_forms <- FALSE

# Apply checkmate checks
assert_data_frame(db_data_long)
assert_data_frame(db_metadata_long)

if (has_repeat_forms) {
check_repeat_and_nonrepeat(db_data_long)
}

## Repeating Instruments Logic ----
## Repeating Forms Assignment ----
# Needed first to inform nonrepeating forms logic
if (has_repeat_forms) {
repeated_forms <- db_data_long %>%
filter(!is.na(.data$redcap_repeat_instrument)) %>%
pull(.data$redcap_repeat_instrument) %>%
unique()

repeated_forms_tibble <- tibble(
redcap_form_name = repeated_forms,
redcap_data = map(
.data$redcap_form_name,
~ distill_repeat_table_long(
.x,
db_data_long,
db_metadata_long,
linked_arms
)
),
structure = "repeating"
)
}

## Nonrepeating Instruments Logic ----
Expand Down Expand Up @@ -86,6 +75,49 @@ clean_redcap_long <- function(db_data_long,
structure = "nonrepeating"
)

## Repeating Instruments Logic ----
if (has_repeat_forms) {
# If mixed structure allowed, retrieve mixed structure forms

mixed_structure_ref <- data.frame()

if (allow_mixed_structure) {
# Retrieve mixed structure fields and forms in reference df
mixed_structure_ref <- get_mixed_structure_fields(db_data_long) %>%
filter(.data$rep_and_nonrep & !str_ends(.data$field_name, "_form_complete")) %>%
left_join(db_metadata_long %>% select(.data$field_name, .data$form_name),
by = "field_name"
)

# Update if project actually has mixed structure
has_mixed_structure_forms <- nrow(mixed_structure_ref) > 0
# Convert mixed instruments to mixed structure format
} else {
if (!has_mixed_structure_forms) {
check_repeat_and_nonrepeat(db_data_long)
}
rsh52 marked this conversation as resolved.
Show resolved Hide resolved
}

repeated_forms_tibble <- tibble(
redcap_form_name = repeated_forms,
redcap_data = map(
.data$redcap_form_name,
~ distill_repeat_table_long(
.x,
db_data_long,
db_metadata_long,
linked_arms,
has_mixed_structure_forms = has_mixed_structure_forms,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is fine but noting that one implication of this is that convert_mixed_instrument() gets run for every instrument even if it's actually repeating rather than mixed. It works because because mixed_structure_ref gets filtered to 0 rows for repeating instruments and the for loop in convert_mixed_instrument() doesn't run. We do still need to filter() mixed_structure_ref every time to find that out though which could be a performance hit. No need to optimize until it's a problem though

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it? The actual conversion is wrapped in a check on has_mixed_structure_forms so if FALSE (which it would be for the whole map() sequence) then convert_mixed_instrument() wouldn't get run unless I'm missing something.

if (has_mixed_structure_forms) {
db_data_long <- convert_mixed_instrument(db_data_long, mixed_structure_ref %>% filter(form_name == my_form))
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right but if you have mixed and repeating instruments then has_mixed_structure_forms is TRUE and it gets run for all the repeating instruments in addition to the mixed instruments.

mixed_structure_ref = mixed_structure_ref
)
),
structure = case_when(
has_mixed_structure_forms & redcap_form_name %in% mixed_structure_ref$form_name ~ "mixed",
TRUE ~ "repeating"
)
)
}

if (has_repeat_forms) {
rbind(repeated_forms_tibble, nonrepeated_forms_tibble)
} else {
Expand Down Expand Up @@ -235,13 +267,19 @@ distill_nonrepeat_table_long <- function(form_name,
#' \code{REDCapR::redcap_metadata_read()$data}
#' @param linked_arms Output of \code{link_arms}, linking instruments to REDCap
#' events/arms
#' @param has_mixed_structure Whether the instrument under evaluation has a mixed
#' structure. Default `FALSE`.
#' @param name mixed_structure_ref A mixed structure reference dataframe supplied
#' by `get_mixed_structure_fields()`.
#'
#' @keywords internal

distill_repeat_table_long <- function(form_name,
db_data_long,
db_metadata_long,
linked_arms) {
linked_arms,
has_mixed_structure_forms = FALSE,
mixed_structure_ref = NULL) {
has_repeat_forms <- "redcap_repeat_instance" %in% names(db_data_long)

my_record_id <- names(db_data_long)[1]
Expand Down Expand Up @@ -275,6 +313,11 @@ distill_repeat_table_long <- function(form_name,
my_fields <- c(my_fields, "redcap_data_access_group")
}

# If has mixed structure, convert form
if (has_mixed_structure_forms) {
db_data_long <- convert_mixed_instrument(db_data_long, mixed_structure_ref %>% filter(form_name == my_form))
}

# Setup data for loop redcap_arm linking
db_data_long <- db_data_long %>%
add_partial_keys(var = .data$redcap_event_name) %>%
Expand Down Expand Up @@ -337,3 +380,89 @@ distill_repeat_table_long <- function(form_name,
out %>%
tibble()
}

#' @title Convert Mixed Structure Instruments to Repeating Instruments
#'
#' @description
#' For longitudinal projects where users set `allow_mixed_structure` to `TRUE`,
#' this function will handle the process of setting the nonrepeating parts of the
#' instrument to repeating ones with a single instance.
#'
#' @param db_data_long The longitudinal REDCap database output defined by
#' \code{REDCapR::redcap_read_oneshot()$data}
#' @param mixed_structure_ref Reference dataframe containing mixed structure
#' fields and forms.
#'
#' @return
#' Returns a \code{tibble} with list elements containing tidy dataframes. Users
#' can access dataframes under the \code{redcap_data} column with reference to
#' \code{form_name} and \code{structure} column details.
#'
#' @keywords internal

convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) {
for (i in seq_len(nrow(mixed_structure_ref))) {
field <- mixed_structure_ref$field_name[i]
form <- mixed_structure_ref$form_name[i]

# Create a logical mask for rows needing update
update_mask <- is.na(db_data_long$redcap_repeat_instance) & !is.na(db_data_long[[field]])

# Update redcap_repeat_instance
db_data_long$redcap_repeat_instance <- if_else(update_mask, 1, db_data_long$redcap_repeat_instance)

# Update redcap_repeat_instrument
db_data_long$redcap_repeat_instrument <- if_else(update_mask, form, db_data_long$redcap_repeat_instrument)
}

db_data_long
}

#' @title Get Mixed Structure Instrument List
#'
#' @description
#' Define fields in a given project that are used in both a repeating and
#' nonrepeating manner.
#'
#' @param db_data The REDCap database output generated by
#' \code{REDCapR::redcap_read_oneshot()$data}
#'
#' @returns a dataframe
#'
#' @keywords internal

get_mixed_structure_fields <- function(db_data) {
# Identify columns to check for repeat/nonrepeat behavior
safe_cols <- c(
names(db_data)[1], "redcap_event_name",
"redcap_repeat_instrument", "redcap_repeat_instance",
"redcap_data_access_group"
)

check_cols <- setdiff(names(db_data), safe_cols)

# Set up check_data function that looks for repeating and nonrepeating
# behavior in a given column and returns a boolean
check_data <- function(db_data, check_col) {
# Repeating Check
rep <- any(!is.na(db_data[{{ check_col }}]) & !is.na(db_data["redcap_repeat_instrument"]))

# Nonrepeating Check
nonrep <- any(!is.na(db_data[{{ check_col }}]) & is.na(db_data["redcap_repeat_instrument"]))

rep & nonrep
}

# Create a simple dataframe, loop through check columns and append
# dataframe with column being checked and the output of check_data
out <- data.frame()
for (i in seq_along(check_cols)) {
rep_and_nonrep <- db_data %>%
check_data(check_col = check_cols[i])

field_name <- check_cols[i]

out <- rbind(out, data.frame(field_name, rep_and_nonrep))
}
out
}
11 changes: 9 additions & 2 deletions R/read_redcap.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@
#' @param guess_max A positive [base::numeric] value
#' passed to [readr::read_csv()] that specifies the maximum number of records to
#' use for guessing column types. Default `.Machine$integer.max`.
#' @param allow_mixed_structure A logical to allow for support of mixed repeating/non-repeating
#' instruments. Setting to `TRUE` will treat the mixed instrument's non-repeating versions
#' as repeating instruments with a single instance. Applies to longitudinal projects
#' only. Default `FALSE`. Can be set globally with `options(redcaptidier.allow.mixed.structure = FALSE)`.
#'
#' @examples
#' \dontrun{
Expand All @@ -75,7 +79,8 @@ read_redcap <- function(redcap_uri,
export_survey_fields = NULL,
export_data_access_groups = NULL,
suppress_redcapr_messages = TRUE,
guess_max = .Machine$integer.max) {
guess_max = .Machine$integer.max,
allow_mixed_structure = getOption("redcaptidier.allow.mixed.structure", FALSE)) {
check_arg_is_character(redcap_uri, len = 1, any.missing = FALSE)
check_arg_is_character(token, len = 1, any.missing = FALSE)
check_arg_is_valid_token(token)
Expand All @@ -84,6 +89,7 @@ read_redcap <- function(redcap_uri,
check_arg_is_logical(export_survey_fields, len = 1, any.missing = FALSE, null.ok = TRUE)
check_arg_is_logical(export_data_access_groups, len = 1, any.missing = FALSE, null.ok = TRUE)
check_arg_is_logical(suppress_redcapr_messages, len = 1, any.missing = FALSE)
check_arg_is_logical(allow_mixed_structure, len = 1, any.missing = FALSE)

# Load REDCap Metadata ----
# Capture unexpected metadata API call errors
Expand Down Expand Up @@ -267,7 +273,8 @@ read_redcap <- function(redcap_uri,
out <- clean_redcap_long(
db_data_long = db_data,
db_metadata_long = db_metadata,
linked_arms = linked_arms
linked_arms = linked_arms,
allow_mixed_structure = allow_mixed_structure
)
} else {
out <- clean_redcap(
Expand Down
Binary file added inst/testdata/db_metadata_mixed_structure.RDS
Binary file not shown.
Binary file added inst/testdata/db_mixed_structure.RDS
Binary file not shown.
Binary file added inst/testdata/db_mixed_structure_linked_arms.RDS
Binary file not shown.
Loading
Loading