diff --git a/.Rbuildignore b/.Rbuildignore index de5a9c5..442d98c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,4 @@ ^\.travis\.yml$ ^doc$ ^Meta$ +^\.claude$ diff --git a/.gitignore b/.gitignore index 5692018..c9b63ec 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,5 @@ inst/doc **/.DS_Store doc Meta +/doc/ +/Meta/ diff --git a/DESCRIPTION b/DESCRIPTION index 24d8faa..a7d5189 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tidyREDCap Title: Helper Functions for Working with 'REDCap' Data -Version: 1.1.2 +Version: 1.2.0 Authors@R: c(person( given = "Raymond", @@ -50,6 +50,13 @@ Authors@R: role = "aut", email = "kxg679@miami.edu", comment = c(ORCID = "0000-0002-9223-8854") + ), + person( + given = "Dylan", + family = "Pieper", + role = "aut", + email = "dylanpieper@gmail.com", + comment = c(ORCID = "0000-0002-2834-7351") ) ) Description: @@ -62,11 +69,13 @@ RoxygenNote: 7.3.2 Depends: R (>= 3.5.0) Imports: cli, + DBI, dplyr, + duckdb, janitor, - labelVector, - magrittr, + labelled, purrr, + redquack, REDCapR, rlang, stringr, diff --git a/NAMESPACE b/NAMESPACE index 478f9ea..784a5d1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,9 @@ # Generated by roxygen2: do not edit by hand +S3method(codebook,data.frame) +S3method(codebook,default) +S3method(codebook_convert,data.frame) +S3method(codebook_convert,default) S3method(vec_cast,character.labelled) S3method(vec_cast,double.labelled) S3method(vec_cast,integer.labelled) @@ -18,9 +22,11 @@ S3method(vec_ptype2,labelled.integer) S3method(vec_ptype2,labelled.labelled) S3method(vec_ptype2,labelled.logical) S3method(vec_ptype2,logical.labelled) -export("%>%") +export(codebook) +export(codebook_convert) export(drop_label) export(drop_labels) +export(has_redcap_values) export(import_instruments) export(make_binary_word) export(make_choose_all_table) @@ -29,13 +35,26 @@ export(make_instrument) export(make_instrument_auto) export(make_yes_no) export(make_yes_no_unknown) +importFrom(DBI,dbConnect) +importFrom(DBI,dbDisconnect) importFrom(REDCapR,redcap_metadata_read) importFrom(REDCapR,redcap_read) -importFrom(REDCapR,redcap_read_oneshot) +importFrom(cli,cli_abort) +importFrom(cli,cli_alert_info) +importFrom(cli,cli_h1) importFrom(cli,cli_inform) +importFrom(cli,cli_li) +importFrom(cli,cli_text) +importFrom(cli,cli_ul) +importFrom(cli,cli_warn) importFrom(dplyr,across) +importFrom(dplyr,all_of) importFrom(dplyr,bind_cols) importFrom(dplyr,case_when) +importFrom(dplyr,collect) +importFrom(dplyr,count) +importFrom(dplyr,distinct) +importFrom(dplyr,filter) importFrom(dplyr,if_else) importFrom(dplyr,mutate) importFrom(dplyr,pull) @@ -43,23 +62,25 @@ importFrom(dplyr,rename) importFrom(dplyr,select) importFrom(dplyr,starts_with) importFrom(dplyr,summarise_all) +importFrom(dplyr,sym) +importFrom(dplyr,tbl) +importFrom(duckdb,duckdb) importFrom(janitor,adorn_pct_formatting) importFrom(janitor,tabyl) -importFrom(labelVector,is_labelled) -importFrom(labelVector,set_label) -importFrom(magrittr,"%>%") +importFrom(labelled,"var_label<-") +importFrom(labelled,var_label) importFrom(purrr,map_chr) importFrom(purrr,map_df) importFrom(purrr,map_lgl) +importFrom(redquack,redcap_to_db) +importFrom(rlang,"!!") importFrom(rlang,.data) -importFrom(stringr,fixed) importFrom(stringr,regex) importFrom(stringr,str_count) importFrom(stringr,str_detect) importFrom(stringr,str_extract) importFrom(stringr,str_locate) -importFrom(stringr,str_remove) -importFrom(stringr,str_remove_all) +importFrom(stringr,str_replace) importFrom(stringr,str_replace_na) importFrom(stringr,str_sub) importFrom(tibble,enframe) @@ -68,5 +89,6 @@ importFrom(tidyselect,ends_with) importFrom(tidyselect,everything) importFrom(tidyselect,starts_with) importFrom(tidyselect,vars_select_helpers) +importFrom(utils,head) importFrom(vctrs,vec_cast) importFrom(vctrs,vec_ptype2) diff --git a/NEWS.md b/NEWS.md index 5c8df42..0b29f6b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,149 +1,182 @@ +--- +editor_options: + markdown: + wrap: 72 +--- + +# tidyREDCap 1.2 (CRAN release) + +- Change import package to redquack + () +- Add parameters to function `import_instruments()`: + - `filter_instrument` and `filter_function` for lazy data + filtering + - `return_list` for returning a list of instrument data.frames + - `labels` for adding/removing column labels +- Add `codebook()` function for generating structured metadata + documentation +- Add `codebook_convert()` function for converting coded values to + labeled equivalents using stored metadata +- Add `has_redcap_values()` helper function for identifying columns + with REDCap value labels + # tidyREDCap 1.1.2 (CRAN release) -* Fix issues reported by CRAN with Linux and old R Windows (4.3.3) saying +- Fix issues reported by CRAN with Linux and old R Windows (4.3.3) + saying -``` +``` ✖ These names are duplicated: * "record_id" at locations 1 and 2. ``` -+ Update roxygen2 version (Thank you for Will Beasley) -+ Fix .data$ was depreciated in `tidyselect` (https://github.com/r-lib/tidyselect/issues/169) -+ Fixed missing global bindings caused by `tidyselect` fix. +- Update roxygen2 version (Thank you for Will Beasley) +- Fix .data\$ was depreciated in `tidyselect` + () +- Fixed missing global bindings caused by `tidyselect` fix. # tidyREDCap 1.1.1 (CRAN release) ## New features -* Add `make_yes_no()` function to convert "checked" or "yes"-like answers to - "Yes" and other answers to "No or Unknown". -* Add `make_yes_no_unknown()` function to convert "checked" or "yes"-like - answers to "Yes", unchecked or "no"-like answers to "No" and other answers to - "Unknown". - +- Add `make_yes_no()` function to convert "checked" or "yes"-like + answers to "Yes" and other answers to "No or Unknown". +- Add `make_yes_no_unknown()` function to convert "checked" or + "yes"-like answers to "Yes", unchecked or "no"-like answers to "No" + and other answers to "Unknown". + ## Fixes/Changes -* `make_choose_all_table()` now works with api or manual/point-and-click - exports. -## Added S3 methods so dplyr (and friends) can work with labelled objects -# tidyREDCap 1.1.0 (CRAN release) +- `make_choose_all_table()` now works with api or + manual/point-and-click exports. \## Added S3 methods so dplyr (and + friends) can work with labelled objects + +# tidyREDCap 1.1.0 (CRAN release) ## New features -* Add `drop_labels()` function for datasets. Used to deal with packages/functions - that don't want labeled variables (i.e. `dplyr::pivot_longer()` and `skimr::skim()` -* Added options (`record_id =` and `first_record_id =` for custom record_id fields in `import_instruments()` -* Added repeat instance numbers for repeated instruments in `import_instruments()` +- Add `drop_labels()` function for datasets. Used to deal with + packages/functions that don't want labeled variables (i.e. + `dplyr::pivot_longer()` and `skimr::skim()` +- Added options (`record_id =` and `first_record_id =` for custom + record_id fields in `import_instruments()` +- Added repeat instance numbers for repeated instruments in + `import_instruments()` ## Fixes/Changes -* Documentation fixes - + Suppress warning caused by dplyr 1.1 - + fix wrong function in api vignette +- Documentation fixes + - Suppress warning caused by dplyr 1.1 + - fix wrong function in api vignette ## Minor improvements and fixes -* Add unit test on import_instruments() function call. - +- Add unit test on import_instruments() function call. -# tidyREDCap 1.0.1.9001 (dev version) +# tidyREDCap 1.0.1.9001 (dev version) ## Fixes/Changes -* Fixed bug that caused labels to be missing if they contained parentheses +- Fixed bug that caused labels to be missing if they contained + parentheses -# tidyREDCap 1.0.1.9000 (dev version) +# tidyREDCap 1.0.1.9000 (dev version) ## New features -* Add drop label function +- Add drop label function ## Fixes/Changes -* Fix message display bug while `import_instruments()` runs -* Fix bug with `import_instruments()` loading repeated instruments (the first instrument in a project was badly messed up) -* Row names no longer reflect the row number of the exported data -* Remove labels from a few automatically created REDCap variables ("record_id", "redcap_event_name", "redcap_repeat_instrument", "redcap_repeat_instance") - +- Fix message display bug while `import_instruments()` runs +- Fix bug with `import_instruments()` loading repeated instruments + (the first instrument in a project was badly messed up) +- Row names no longer reflect the row number of the exported data +- Remove labels from a few automatically created REDCap variables + ("record_id", "redcap_event_name", "redcap_repeat_instrument", + "redcap_repeat_instance") -# tidyREDCap 1.0.1 (CRAN release) +# tidyREDCap 1.0.1 (CRAN release) ## New features -* Added support for REDCapR API - - `import_instruments()` function imports all instruments with a single command - - Added targeted status messaging during the import - -## Minor improvements and fixes +- Added support for REDCapR API + - `import_instruments()` function imports all instruments with a + single command + - Added targeted status messaging during the import -* `make_choose_one_table()` no longer requires factors -* `make_choose_all_table()` now works with "1" vs "0" indicator variables -* Greatly improved vignettes +## Minor improvements and fixes -# tidyREDCap 1.0.0.9002 (dev version) +- `make_choose_one_table()` no longer requires factors +- `make_choose_all_table()` now works with "1" vs "0" indicator + variables +- Greatly improved vignettes -* Add {REDCapR} support -* Added `import_instruments()` function to import all instruments; currently uses the `REDCapR` package as the API +# tidyREDCap 1.0.0.9002 (dev version) +- Add {REDCapR} support +- Added `import_instruments()` function to import all instruments; + currently uses the `REDCapR` package as the API -# tidyREDCap 1.0.0.9001 (dev version) +# tidyREDCap 1.0.0.9001 (dev version) -* Fix bug in `make_choose_all_table()` with repeating instruments showing NA counts -* Removes superseded `summarise_all()` function +- Fix bug in `make_choose_all_table()` with repeating instruments + showing NA counts +- Removes superseded `summarise_all()` function -# tidyREDCap 1.0.0.9000 (dev version) +# tidyREDCap 1.0.0.9000 (dev version) -* Removes superseded `mutate_if` and `mutate_all` from `make_choose_all_table()` -* Added `import_instruments()` function -* Added `make_instrument_auto()` function -* Adds checks on arguments +- Removes superseded `mutate_if` and `mutate_all` from + `make_choose_all_table()` +- Added `import_instruments()` function +- Added `make_instrument_auto()` function +- Adds checks on arguments -# tidyREDCap 0.2.2 (CRAN patch) +# tidyREDCap 0.2.2 (CRAN patch) -* Fix `rlang` bug in `make_choose_all_table()`; see +- Fix `rlang` bug in `make_choose_all_table()`; see + -# tidyREDCap 0.2.1 (CRAN release) +# tidyREDCap 0.2.1 (CRAN release) -* Fix bug with "" character stings with make_instrument() +- Fix bug with "" character stings with make_instrument() # tidyREDCap 0.2.0 (CRAN release) -* Cleaned up vignettes, docs +- Cleaned up vignettes, docs -# tidyREDCap 0.1.3.1 +# tidyREDCap 0.1.3.1 -* Cleaned up vignettes, docs +- Cleaned up vignettes, docs -# tidyREDCap 0.1.3.0 +# tidyREDCap 0.1.3.0 -* Added `make_choose_all_table()` function +- Added `make_choose_all_table()` function -# tidyREDCap 0.1.2.1 +# tidyREDCap 0.1.2.1 -* Cleaned up vignettes +- Cleaned up vignettes -# tidyREDCap 0.1.2 +# tidyREDCap 0.1.2 -* Added `make_instrument()` function +- Added `make_instrument()` function # tidyREDCap 0.1.1 -* Added `make_choose_one_table()` function +- Added `make_choose_one_table()` function # tidyREDCap 0.1.0 (CRAN release) -* Fixed title capitalization. -* Added reference to REDCap website. -* Updated the release year in the license. -* Updated hyperlinks to vignettes and example instrument description. -* Added references to financial support in the ReadMe. +- Fixed title capitalization. +- Added reference to REDCap website. +- Updated the release year in the license. +- Updated hyperlinks to vignettes and example instrument description. +- Added references to financial support in the ReadMe. # tidyREDCap 0.0.0.9005 -* Added check on number of arguments +- Added check on number of arguments # tidyREDCap 0.0.0.9004 -* Added a `NEWS.md` file to track changes to the package. - - +- Added a `NEWS.md` file to track changes to the package. diff --git a/R/codebook.R b/R/codebook.R new file mode 100644 index 0000000..6c8cef0 --- /dev/null +++ b/R/codebook.R @@ -0,0 +1,301 @@ +#' @title Generate codebook for REDCap data +#' @description Extract and display metadata information for REDCap data, +#' including variable labels and value labels for categorical variables. +#' +#' @param data A data.frame or single column from REDCap data with metadata attributes +#' @param field_name Character string specifying field name when `data` is a full dataset +#' +#' @return A list or data.frame containing codebook information +#' +#' @examples +#' \dontrun{ +#' # Get full codebook for dataset +#' codebook(demographics) +#' +#' # Get codebook for specific variable +#' codebook(demographics$sex) +#' codebook(demographics, "sex") +#' } +#' +#' @export +codebook <- function(data, field_name = NULL) { + UseMethod("codebook") +} + +#' @export +codebook.data.frame <- function(data, field_name = NULL) { + if (!is.null(field_name)) { + if (!field_name %in% names(data)) { + cli_abort("Field {.val {field_name}} not found in data") + } + return(codebook(data[[field_name]], field_name = field_name)) + } + + # Get metadata for all columns + metadata <- attr(data, "redcap_metadata") + if (is.null(metadata)) { + # Fallback: extract info from individual columns + result <- list() + for (col_name in names(data)) { + col_info <- list( + name = col_name, + label = attr(data[[col_name]], "label") %||% "No label", + type = class(data[[col_name]])[1], + values = extract_value_labels(data[[col_name]]) + ) + result[[col_name]] <- col_info + } + return(structure(result, class = "codebook")) + } + + # Extract codebook from stored metadata + result <- list() + for (i in seq_len(nrow(metadata))) { + field <- metadata[i, ] + field_name <- field$field_name + + if (field_name %in% names(data)) { + col_info <- list( + name = field_name, + label = field$field_label, + type = field$field_type, + note = field$field_note, + values = parse_choices(field$select_choices_or_calculations) + ) + result[[field_name]] <- col_info + } + } + + structure(result, class = "codebook") +} + +#' @export +codebook.default <- function(data, field_name = NULL) { + # For individual columns + if (is.null(field_name)) { + raw_name <- deparse(substitute(data)) + + # Extract just the variable name for cleaner display + if (grepl("\\$", raw_name)) { + # Extract variable name after $ + field_name <- sub(".*\\$", "", raw_name) + } else if (grepl("\\[\\[", raw_name)) { + # Extract variable name from data[[field_name]] pattern + field_name <- gsub(".*\\[\\[[\"\']?([^\"\'\\]]+)[\"\']?\\]\\]", "\\1", raw_name) + } else { + field_name <- raw_name + } + } + + structure(list( + name = field_name, + label = attr(data, "label") %||% "No label", + type = class(data)[1], + values = extract_value_labels(data) + ), class = "codebook") +} + +#' @importFrom cli cli_text cli_ul cli_li cli_h1 cli_abort +print.codebook <- function(x, ...) { + if (!is.null(x$name)) { + cli_h1("Variable: {.field {x$name}}") + cli_text("{.strong Label:} {x$label}") + cli_text("{.strong Type:} {.cls {x$type}}") + + if (!is.null(x$note) && !is.na(x$note)) { + cli_text("{.strong Note:} {x$note}") + } + + if (!is.null(x$values) && length(x$values) > 0) { + cli_text("{.strong Values:}") + value_items <- character(length(x$values)) + for (i in seq_along(x$values)) { + val <- names(x$values)[i] + label <- x$values[[i]] + value_items[i] <- paste0("{.val ", val, "} = ", label) + } + cli_ul(value_items) + } + } else { + for (field_name in names(x)) { + field <- x[[field_name]] + + cli_h1("Variable: {.field {field$name}}") + cli_text("{.strong Label:} {field$label}") + cli_text("{.strong Type:} {.cls {field$type}}") + + if (!is.null(field$note) && !is.na(field$note)) { + cli_text("{.strong Note:} {field$note}") + } + + if (!is.null(field$values) && length(field$values) > 0) { + cli_text("{.strong Values:}") + value_items <- character(length(field$values)) + for (i in seq_along(field$values)) { + val <- names(field$values)[i] + label <- field$values[[i]] + value_items[i] <- paste0("{.val ", val, "} = ", label) + } + cli_ul(value_items) + } + } + } + invisible(x) +} + +#' @title Convert coded values using codebook metadata +#' @description Converts coded values in REDCap data to their labeled equivalents +#' using the metadata stored in column attributes. Both syntaxes return the converted +#' column vector for consistency with codebook(). For data.frame operations, use +#' standard tidy patterns with mutate(). +#' +#' @param data A data.frame, single column, or data.frame with column name specification +#' @param col_name Character string specifying column name when `data` is a data.frame +#' +#' @return Converted column vector with coded values replaced by labels +#' +#' @examples +#' \dontrun{ +#' # Both return converted column vector +#' codebook_convert(demographics$sex) +#' codebook_convert(demographics, "sex") +#' +#' # For data.frame operations, use tidy patterns: +#' demographics |> +#' mutate(sex = codebook_convert(sex)) +#' +#' # Convert multiple columns +#' demographics |> +#' mutate(across(where(has_redcap_values), codebook_convert)) +#' +#' # Convert specific columns +#' demographics |> +#' mutate( +#' sex = codebook_convert(sex), +#' race = codebook_convert(race) +#' ) +#' } +#' +#' @export +codebook_convert <- function(data, col_name = NULL) { + UseMethod("codebook_convert") +} + +#' @export +codebook_convert.data.frame <- function(data, col_name = NULL) { + if (!is.null(col_name)) { + if (!col_name %in% names(data)) { + cli_abort("Column {.val {col_name}} not found in data") + } + # Return just the converted column to maintain consistency with codebook() + return(codebook_convert(data[[col_name]])) + } + + # When no column specified, error with helpful message about tidy approach + cli_abort(c( + "When converting multiple columns, use dplyr patterns like:", + "i" = "data |> mutate(across(where(has_redcap_values), codebook_convert))", + "i" = "data |> mutate(col1 = codebook_convert(col1), col2 = codebook_convert(col2))" + )) +} + +#' @export +codebook_convert.default <- function(data, col_name = NULL) { + value_labels <- attr(data, "redcap_values") + + if (is.null(value_labels) || length(value_labels) == 0) { + return(data) + } + + # Convert to character to handle case_when-like logic + result <- as.character(data) + + # Handle the case where logical values need to be treated as numeric (0/1) + # This happens when REDCap 0/1 codes get converted to logical by import process + lookup_data <- data + if (is.logical(data)) { + # Convert logical to numeric for lookup: FALSE = 0, TRUE = 1 + lookup_data <- as.numeric(data) + } + + # Apply conversions for each coded value + for (code in names(value_labels)) { + # Try both string and numeric comparison for robust matching + code_numeric <- suppressWarnings(as.numeric(code)) + if (!is.na(code_numeric)) { + # For numeric codes, match against numeric conversion of the data + result[lookup_data == code_numeric] <- value_labels[[code]] + } else { + # For string codes, match against character conversion + result[as.character(lookup_data) == code] <- value_labels[[code]] + } + } + + # Handle NAs appropriately - keep as NA if original was NA + result[is.na(data)] <- NA_character_ + + # Preserve original attributes except redcap_values (since we've converted) + attributes_to_keep <- attributes(data) + attributes_to_keep$redcap_values <- NULL + + attributes(result) <- c(attributes(result), attributes_to_keep) + + result +} + +#' @title Check if column has REDCap value labels +#' @description Helper function to identify columns with redcap_values attributes. +#' Useful with dplyr::across() for selective conversion. +#' +#' @param x A column/vector to check +#' +#' @return Logical indicating whether column has redcap_values attribute +#' +#' @examples +#' \dontrun{ +#' # Convert only columns with value labels +#' demographics |> +#' mutate(across(where(has_redcap_values), codebook_convert)) +#' } +#' +#' @export +has_redcap_values <- function(x) { + !is.null(attr(x, "redcap_values")) +} + +# Helper function to parse REDCap choice strings +parse_choices <- function(choices_string) { + if (is.na(choices_string) || choices_string == "") { + return(NULL) + } + + # Split by | and then by comma + choices <- strsplit(choices_string, " \\| ")[[1]] + result <- list() + + for (choice in choices) { + if (grepl(",", choice)) { + parts <- strsplit(choice, ", ", 2)[[1]] + if (length(parts) == 2) { + result[[parts[1]]] <- parts[2] + } + } + } + + if (length(result) == 0) { + return(NULL) + } + result +} + +# Helper function to extract value labels from column attributes +extract_value_labels <- function(col) { + value_labels <- attr(col, "redcap_values") + if (is.null(value_labels)) { + return(NULL) + } + value_labels +} + +# Helper function (equivalent to %||% from rlang) +`%||%` <- function(x, y) if (is.null(x)) y else x diff --git a/R/import_instruments.R b/R/import_instruments.R index 03cfa14..be403a2 100644 --- a/R/import_instruments.R +++ b/R/import_instruments.R @@ -14,183 +14,399 @@ #' "first dude" for one of its records this argument would be #' `first_record_id = "first dude"`. #' @param envir The name of the environment where the tables should be saved. +#' @param return_list If TRUE, returns instrument data in a named list. If FALSE (default), assigns instrument data to environment. +#' @param labels If TRUE (default), applies variable labels to columns. If FALSE, columns will not have labels. +#' @param filter_instrument Optional character string specifying which instrument +#' to use for filtering. If provided with filter_function, this instrument will be +#' filtered first, and the resulting record IDs will be used to filter all +#' other instruments. +#' @param filter_function Optional function that filters REDCap data. +#' If `filter_instrument` is specified, filter is applied only to that instrument, +#' and returned record IDs are used to filter all other instruments. +#' If `filter_instrument` is NULL (default), filter applies to each instrument separately. #' -#' @return one `data.frame` for each instrument/form in a REDCap project. By -#' default the datasets are saved into the global environment. +#' @return One table (`data.frame`) for each instrument is assigned to the environment. +#' If `return_list` = TRUE, returns a named list instead. #' -#' -#' @importFrom REDCapR redcap_read redcap_read_oneshot redcap_metadata_read -#' @importFrom dplyr pull if_else -#' @importFrom magrittr %>% -#' @importFrom stringr str_remove str_remove_all fixed +#' @importFrom REDCapR redcap_read redcap_metadata_read +#' @importFrom dplyr pull if_else collect tbl select all_of filter distinct sym count +#' @importFrom stringr str_replace str_count str_sub str_extract str_locate #' @importFrom tidyselect ends_with -#' @importFrom labelVector set_label -#' @importFrom cli cli_inform +#' @importFrom labelled var_label<- +#' @importFrom cli cli_inform cli_alert_info cli_warn +#' @importFrom redquack redcap_to_db +#' @importFrom DBI dbConnect dbDisconnect +#' @importFrom duckdb duckdb +#' @importFrom rlang !! +#' @importFrom utils head #' @export #' #' @examples #' \dontrun{ +#' # Import each instrument to multiple tables #' import_instruments( -#' "https://redcap.miami.edu/api/", -#' Sys.getenv("test_API_key") +#' "https://bbmc.ouhsc.edu/redcap/api/", +#' Sys.getenv("redcap_token") +#' ) +#' +#' # Import each instrument to a single list +#' instruments <- import_instruments( +#' "https://bbmc.ouhsc.edu/redcap/api/", +#' Sys.getenv("redcap_token"), +#' return_list = TRUE +#' ) +#' +#' # Import without labels +#' instruments <- import_instruments( +#' "https://bbmc.ouhsc.edu/redcap/api/", +#' Sys.getenv("redcap_token"), +#' labels = FALSE +#' ) +#' +#' # Filter all instruments based on demographics +#' instruments <- import_instruments( +#' "https://bbmc.ouhsc.edu/redcap/api/", +#' Sys.getenv("redcap_token"), +#' filter_instrument = "demographics", +#' filter_function = \(x) x |> dplyr::filter(age >= 18) #' ) #' } import_instruments <- function(url, token, drop_blank = TRUE, record_id = "record_id", first_record_id = 1, - envir = .GlobalEnv) { - cli::cli_inform("Reading metadata about your project.... ") + envir = .GlobalEnv, + return_list = FALSE, + labels = TRUE, + filter_instrument = NULL, + filter_function = NULL) { + # internal function to extract instrument columns (indices only) + get_instrument_columns <- function(data_set, big_i, meta) { + curr_instr_idx <- (big_i[data_set] + 1):big_i[data_set + 1] + c(meta, curr_instr_idx) |> unique() + } - ds_instrument <- - suppressWarnings( - suppressMessages( - REDCapR::redcap_metadata_read(redcap_uri = url, token = token)$data - ) - ) + # internal function to apply labels and metadata to collected data + apply_labels_to_data <- function(data, full_labeled_structure, metadata = NULL) { + # copy labels from full structure to matching columns in data + for (col_name in names(data)) { + if (col_name %in% names(full_labeled_structure)) { + attr(data[[col_name]], "label") <- attr(full_labeled_structure[[col_name]], "label") + } + } + + # Add value labels for categorical variables if metadata available + if (!is.null(metadata)) { + for (col_name in names(data)) { + field_meta <- metadata[metadata$field_name == col_name, ] + if (nrow(field_meta) == 1 && !is.na(field_meta$select_choices_or_calculations)) { + value_labels <- parse_redcap_choices(field_meta$select_choices_or_calculations) + if (!is.null(value_labels)) { + attr(data[[col_name]], "redcap_values") <- value_labels + } + } + } + # Store full metadata as dataset attribute + attr(data, "redcap_metadata") <- metadata + } + + data + } + + # Helper function to parse REDCap choice strings + parse_redcap_choices <- function(choices_string) { + if (is.na(choices_string) || choices_string == "") { + return(NULL) + } - # Get names of instruments - form_name <- NULL + # Split by | and then by comma + choices <- strsplit(choices_string, " \\| ")[[1]] + result <- list() + + for (choice in choices) { + if (grepl(",", choice)) { + parts <- strsplit(choice, ", ", 2)[[1]] + if (length(parts) == 2) { + result[[parts[1]]] <- parts[2] + } + } + } + if (length(result) == 0) { + return(NULL) + } + result + } + + cli_inform("Reading metadata about your project...") + + metadata_result <- tryCatch({ + suppressWarnings(suppressMessages( + redcap_metadata_read(redcap_uri = url, token = token) + )) + }, error = function(e) { + stop("Metadata read failed: ", e$message, call. = FALSE) + }) + + # Check if metadata read was successful + if (!metadata_result$success) { + stop("Metadata read failed: ", metadata_result$outcome_message, call. = FALSE) + } + + ds_instrument <- metadata_result$data + + # get instrument names instrument_name <- ds_instrument |> pull(form_name) |> unique() + # validate filter_instrument + if (!is.null(filter_instrument) && !filter_instrument %in% instrument_name) { + stop("filter_instrument '", filter_instrument, "' not found in project instruments: ", + paste(instrument_name, collapse = ", "), + call. = FALSE + ) + } - # do the api call - cli::cli_inform("Reading variable labels for your variables.... ") - raw_labels <- - suppressWarnings( - suppressMessages( - REDCapR::redcap_read( - redcap_uri = url, - token = token, + # read variable labels if needed + label_names <- NULL + if (labels) { + cli_inform("Reading variable labels...") + labels_result <- tryCatch({ + suppressWarnings(suppressMessages( + redcap_read( + redcap_uri = url, token = token, raw_or_label_headers = "label", records = first_record_id - )$data + ) + )) + }, error = function(e) { + stop("Labels read failed: ", e$message, call. = FALSE) + }) + + # Check if label read was successful + if (!labels_result$success) { + stop("Labels read failed: ", labels_result$outcome_message, call. = FALSE) + } + + raw_labels <- labels_result$data + if (nrow(raw_labels) == 0) { + stop("The first 'record_id' must be 1; use argument 'first_record_id' to set first id", + call. = FALSE ) - ) + } - # Provide error for first instance of record id. - if (dim(raw_labels)[1] == 0) { - stop( - " - The first 'record_id' or custom id in df must be 1; - use option 'first_record_id=' to set the first id in df.", - call. = FALSE - ) + # prepare labels + label_names <- names(raw_labels) |> + str_replace("(\\(.*)\\(", "\\1") |> + str_replace("\\)(.*\\))", "\\1") |> + str_replace("\\.\\.\\.\\d+$", "") + names(label_names) <- names(raw_labels) } - just_labels <- raw_labels + cli_inform("Reading your data...") - # deal with nested parentheses - # see https://stackoverflow.com/questions/74525811/how-can-i-remove-inner-parentheses-from-an-r-string/74525923#74525923 - just_labels_names <- names(just_labels) |> - stringr::str_replace("(\\(.*)\\(", "\\1") |> - stringr::str_replace("\\)(.*\\))", "\\1") + # create temporary duckdb connection + db_file <- tempfile(fileext = ".duckdb") + duckdb <- dbConnect(duckdb(), db_file) - cli::cli_inform( - c( - "Reading your data.... ", - i = "This may take a while if your dataset is large." - ) - ) + on.exit({ + dbDisconnect(duckdb) + if (file.exists(db_file)) file.remove(db_file) + }) - raw_redcapr <- - suppressWarnings( - suppressMessages( - REDCapR::redcap_read_oneshot( - redcap_uri = url, - token = token, - raw_or_label = "label" - )$data - ) + # import redcap data to duckdb + tryCatch({ + redcap_to_db( + conn = duckdb, redcap_uri = url, token = token, + record_id_name = record_id, echo = "progress", beep = FALSE ) + }, error = function(e) { + stop("Data import failed: ", e$message, call. = FALSE) + }) + + # get data table reference and apply labels to full structure + data_tbl <- tbl(duckdb, "data") + + # check data size and warn if big + filter_in_use <- !is.null(filter_instrument) || !is.null(filter_function) + if (!filter_in_use) { + n_rows <- data_tbl |> + count() |> + collect() |> + pull(n) + n_cols <- length(colnames(data_tbl)) + total_elements <- n_rows * n_cols + + if (total_elements >= 100000000) { # 100m elements - serious warning + cli_warn("Your very large REDCap project ({n_rows} obs. of {n_cols} variables) may exceed memory and require arguments {.arg filter_function} and {.arg filter_instrument} to import filtered data") + } else if (total_elements >= 25000000) { # 25m elements - suggestion + cli_alert_info("Consider filtering your somewhat large REDCap project ({n_rows} obs. of {n_cols} variables) using arguments {.arg filter_function} and {.arg filter_instrument} for optimized memory management") + } + } - just_data <- raw_redcapr + # collect a sample to get full column structure for labeling + full_structure <- data_tbl |> + head(1) |> + collect() - just_data[] <- - mapply( - nm = names(just_data), - lab = relabel(just_labels_names), + # apply labels to the full structure template if labels requested + if (labels) { + full_structure[] <- mapply( + nm = names(full_structure), + lab = relabel(label_names), FUN = function(nm, lab) { - labelVector::set_label(just_data[[nm]], lab) + var_label(full_structure[[nm]]) <- lab + full_structure[[nm]] }, SIMPLIFY = FALSE ) + } - redcap <- just_data - - # get the index (end) of instruments - i <- - which( - names(redcap) %in% paste0(instrument_name, "_complete") - ) - - # add placeholder + # get instrument indices + i <- which(names(full_structure) %in% paste0(instrument_name, "_complete")) big_i <- c(0, i) n_instr_int <- length(big_i) - 1 - is_longitudinal <- any(names(redcap) == "redcap_event_name") - is_repeated <- any(names(redcap) == "redcap_repeat_instrument") + # determine metadata columns + is_longitudinal <- any(names(full_structure) == "redcap_event_name") + is_repeated <- any(names(full_structure) == "redcap_repeat_instrument") - if (is_longitudinal && is_repeated) { - meta <- c(1:4) + meta <- if (is_longitudinal && is_repeated) { + c(1:4) } else if (is_repeated) { - meta <- c(1:3) + c(1:3) } else if (is_longitudinal) { - meta <- c(1:2) + c(1:2) } else { - meta <- 1 + 1 } - # Load all datasets to the global environment - for (data_set in seq_len(n_instr_int)) { - # all columns in the current instrument - curr_instr_idx <- (big_i[data_set] + 1):big_i[data_set + 1] - - column_index <- - c(meta, curr_instr_idx) %>% - # Sometimes the `record_id` can appear in both - # the metadata columns and the current instrument. - # See https://github.com/RaymondBalise/tidyREDCap/pull/61 - unique() - - drop_dot_one <- redcap[, column_index] %>% - select(-ends_with(".1")) - - # drops blank instruments - if (drop_blank == TRUE) { - processed_blank <- - make_instrument_auto(drop_dot_one, record_id = record_id) - } else { - processed_blank <- drop_dot_one + # get filtered record ids if filter specified + filtered_ids <- NULL + if (!is.null(filter_instrument) && !is.null(filter_function)) { + filter_idx <- which(instrument_name == filter_instrument) + + cli_inform("Applying filter to '{filter_instrument}' instrument...") + + # get column indices for filter instrument + filter_columns <- get_instrument_columns(filter_idx, big_i, meta) + + # apply filter directly on database table + filtered_ids <- data_tbl |> + select(all_of(filter_columns)) |> + filter_function() |> + select(all_of(record_id)) |> + distinct() |> + collect() |> + pull(!!sym(record_id)) + + cli_inform("Filter resulted in {length(filtered_ids)} records") + } + + if (n_instr_int == 0) { + cli_inform("No instruments found in project") + return(if (return_list) list() else invisible()) + } + + # process instruments with memory-efficient approach + if (return_list) { + instruments_list <- vector("list", length = n_instr_int) + names(instruments_list) <- instrument_name[1:n_instr_int] + + for (data_set in seq_len(n_instr_int)) { + # get column indices for this instrument + column_index <- get_instrument_columns(data_set, big_i, meta) + + # build query starting from database table + instrument_query <- data_tbl |> + select(all_of(column_index)) + + # apply filtering if needed + if (!is.null(filtered_ids)) { + instrument_query <- instrument_query |> + filter(!!sym(record_id) %in% filtered_ids) + } else if (!is.null(filter_function)) { + instrument_query <- instrument_query |> filter_function() + } + + # collect data + instrument_data <- instrument_query |> collect() + + # apply labels if requested + if (labels) { + instrument_data <- apply_labels_to_data(instrument_data, full_structure, ds_instrument) + } + + # process (drop blank if needed) + processed_data <- if (drop_blank) { + result <- make_instrument_auto(instrument_data, record_id = record_id) + # Preserve record_id label if labels are requested + if (labels && record_id %in% names(result) && record_id %in% names(instrument_data)) { + attr(result[[record_id]], "label") <- attr(instrument_data[[record_id]], "label") + } + result + } else { + instrument_data + } + + rownames(processed_data) <- NULL + + if (nrow(processed_data) > 0) { + instruments_list[[instrument_name[data_set]]] <- processed_data + } else { + # Keep empty data.frame structure instead of setting to NULL + instruments_list[[instrument_name[data_set]]] <- processed_data + } } - # without this row names reflect the repeated instrument duplicates - rownames(processed_blank) <- NULL + return(instruments_list) + } else { + # assign to environment + for (data_set in seq_len(n_instr_int)) { + column_index <- get_instrument_columns(data_set, big_i, meta) - # The order of the names from exportInstruments() matches the order of the - # data sets from exportRecords() + instrument_query <- data_tbl |> + select(all_of(column_index)) - if (nrow(processed_blank > 0)) { - assign( - instrument_name[data_set], - processed_blank, - envir = envir - ) - } else { - warning( - paste( - "The", instrument_name[data_set], - "instrument/form has 0 records and will not be imported. \n" - ), - call. = FALSE - ) - # How to print warning about no records... how disruptive should this be? + if (!is.null(filtered_ids)) { + instrument_query <- instrument_query |> + filter(!!sym(record_id) %in% filtered_ids) + } else if (!is.null(filter_function)) { + instrument_query <- instrument_query |> filter_function() + } + + # collect data + instrument_data <- instrument_query |> collect() + + # apply labels if requested + if (labels) { + instrument_data <- apply_labels_to_data(instrument_data, full_structure, ds_instrument) + } + + processed_data <- if (drop_blank) { + result <- make_instrument_auto(instrument_data, record_id = record_id) + # Preserve record_id label if labels are requested + if (labels && record_id %in% names(result) && record_id %in% names(instrument_data)) { + attr(result[[record_id]], "label") <- attr(instrument_data[[record_id]], "label") + } + result + } else { + instrument_data + } + + rownames(processed_data) <- NULL + + if (nrow(processed_data) > 0) { + assign(instrument_name[data_set], processed_data, envir = envir) + } else { + cli_warn( + "The {instrument_name[data_set]} instrument has 0 records and will not be imported" + ) + } } - } - invisible() + invisible() + } } #' @title relabel @@ -202,7 +418,7 @@ import_instruments <- function(url, token, drop_blank = TRUE, #' @param x Character string variable holding label to be check for possible #' labels that need fixing to match REDCapAPI's variable label convention. #' -#' @importFrom stringr str_count str_sub str_extract +#' @importFrom stringr str_count str_sub str_extract str_locate #' #' @noRd #' @@ -217,12 +433,12 @@ relabel <- function(x) { # regular expression (Reg Ex) to get content inside () after choice= re <- "\\(choice=([^()]+)\\)" if_else( - stringr::str_count(x, "\\(choice") == 0, + str_count(x, "\\(choice") == 0, x, paste0( - stringr::str_sub(x, 1, str_locate(x, "\\(choice")[, 1] - 2), + str_sub(x, 1, str_locate(x, "\\(choice")[, 1] - 2), ": ", - gsub(re, "\\1", stringr::str_extract(x, re)) # content inside of Reg Ex + gsub(re, "\\1", str_extract(x, re)) ) ) } diff --git a/R/labels.R b/R/labels.R index e158895..7c73596 100644 --- a/R/labels.R +++ b/R/labels.R @@ -3,6 +3,7 @@ #' @importFrom vctrs vec_cast #' @importFrom vctrs vec_ptype2 +#' @importFrom labelled var_label #### Checking types of objects #### @@ -78,7 +79,8 @@ vec_cast.labelled.labelled <- function(x, to, ...) { ##### char ##### #' @export vec_cast.character.labelled <- function(x, to, ...) { - labelVector::set_label(x, labelVector::get_label(to)) + var_label(x) <- var_label(to) + x } #' @export @@ -89,7 +91,8 @@ vec_cast.labelled.character <- function(x, to, ...) { ##### integer ##### #' @export vec_cast.integer.labelled <- function(x, to, ...) { - labelVector::set_label(x, labelVector::get_label(to)) + var_label(x) <- var_label(to) + x } #' @export @@ -100,7 +103,8 @@ vec_cast.labelled.integer <- function(x, to, ...) { ##### double ##### #' @export vec_cast.double.labelled <- function(x, to, ...) { - labelVector::set_label(x, labelVector::get_label(to)) + var_label(x) <- var_label(to) + x } #' @export @@ -111,7 +115,8 @@ vec_cast.labelled.double <- function(x, to, ...) { ##### logical ##### #' @export vec_cast.logical.labelled <- function(x, to, ...) { - labelVector::set_label(x, labelVector::get_label(to)) + var_label(x) <- var_label(to) + x } #' @export diff --git a/R/make_choose_all_table.R b/R/make_choose_all_table.R index 9bb5f93..05b7651 100644 --- a/R/make_choose_all_table.R +++ b/R/make_choose_all_table.R @@ -55,7 +55,6 @@ getLabel2 <- function(data, aVariable) { #' @importFrom tibble enframe #' @importFrom tidyselect everything vars_select_helpers starts_with #' @importFrom rlang .data -#' @importFrom labelVector is_labelled #' @export #' #' @return A variable's response label without the choose all the question @@ -66,10 +65,10 @@ getLabel2 <- function(data, aVariable) { ## } make_choose_all_table <- function(df, variable) { # . <- NULL # kludge to get CMD Check to pass with nonstandard evaluation - the_vars_df <- df %>% + the_vars_df <- df |> dplyr::select(dplyr::starts_with(variable)) - are_vars_labelled <- purrr::map_lgl(the_vars_df, labelVector::is_labelled) + are_vars_labelled <- purrr::map_lgl(the_vars_df, function(x) inherits(x, "labelled")) if (! all(are_vars_labelled)) { @@ -89,26 +88,26 @@ make_choose_all_table <- function(df, variable) { Count <- NULL counts <- the_vars_df |> - dplyr::mutate(dplyr::across(tidyselect::everything(), ~ . %in% c("1", "Checked"))) %>% + dplyr::mutate(dplyr::across(tidyselect::everything(), ~ . %in% c("1", "Checked"))) |> dplyr::mutate(dplyr::across( tidyselect::vars_select_helpers$where( is.logical ), as.numeric - )) %>% - dplyr::summarise(across(everything(), ~ sum(.x, na.rm = TRUE))) %>% - dplyr::mutate(blah = "x") %>% + )) |> + dplyr::summarise(across(everything(), ~ sum(.x, na.rm = TRUE))) |> + dplyr::mutate(blah = "x") |> tidyr::pivot_longer(-`blah`, names_to = "thingy", values_to = "Count") aTable <- - counts %>% - dplyr::pull(.data$thingy) %>% + counts |> + dplyr::pull(.data$thingy) |> purrr::map_chr(.f = ~ { getLabel2(df, .x) - }) %>% - tibble::enframe(name = NULL) %>% # new variable is value - dplyr::rename("What" = `value`) %>% - dplyr::bind_cols(counts) %>% + }) |> + tibble::enframe(name = NULL) |> # new variable is value + dplyr::rename("What" = `value`) |> + dplyr::bind_cols(counts) |> dplyr::select(`What`, `Count`) aTable } diff --git a/R/make_choose_one_table.R b/R/make_choose_one_table.R index 1f0c33b..3ad2105 100644 --- a/R/make_choose_one_table.R +++ b/R/make_choose_one_table.R @@ -26,7 +26,7 @@ dropTags <- function(x) { #' @return a table taybull <- function(variable, subset = FALSE) { # grab the label attribute off the variable - theLab <- attr(variable, "label") %>% + theLab <- attr(variable, "label") |> dropTags() # print the label @@ -42,7 +42,7 @@ taybull <- function(variable, subset = FALSE) { cat(paste(theLab2, "\n")) } Response <- variable - janitor::tabyl(Response) %>% + janitor::tabyl(Response) |> janitor::adorn_pct_formatting(digits = 0) } @@ -64,7 +64,7 @@ taybull <- function(variable, subset = FALSE) { #' @return a table taybull2 <- function(data, aVariable, subset = FALSE) { # pull the variable out and into a data frame - Response <- data %>% + Response <- data |> dplyr::pull({{ aVariable }}) # grab the label attribute off the variable inside of the DF @@ -83,7 +83,7 @@ taybull2 <- function(data, aVariable, subset = FALSE) { cat(paste(theLab2, "\n")) } - janitor::tabyl(Response) %>% + janitor::tabyl(Response) |> janitor::adorn_pct_formatting(digits = 0) } diff --git a/R/make_instrument.R b/R/make_instrument.R index e9ed9ec..2a86095 100644 --- a/R/make_instrument.R +++ b/R/make_instrument.R @@ -1,7 +1,7 @@ #' Extract an Instrument from an REDCap Export #' #' @description This function takes a data frame and the names of the first and -#' last variables in an instrumnt and returns a data frame with the instrument. +#' last variables in an instrument and returns a data frame with the instrument. #' #' @param df A data frame with the instrument #' @param first_var The name of the first variable in an instrument @@ -40,13 +40,14 @@ make_instrument <- function(df, first_var, last_var, drop_which_when = FALSE, instrument <- df[, c(first_col:last_col)] # which records are all missing - allMissing <- apply(instrument, 1, function(x) { - all(is.na(x) | x == "") - }) + allMissing <- rep(FALSE, nrow(instrument)) + for (i in 1:nrow(instrument)) { + x <- instrument[i, ] + allMissing[i] <- all(is.na(x) | x == "") + } # the rows that are not all missing if (drop_which_when == FALSE) { - # get the column number for the id and event name record_id_col <- which(colnames(df) == record_id) redcap_event_name_col <- which(colnames(df) == "redcap_event_name") diff --git a/R/make_instrument_auto.R b/R/make_instrument_auto.R index 43af055..e74e128 100644 --- a/R/make_instrument_auto.R +++ b/R/make_instrument_auto.R @@ -24,7 +24,7 @@ make_instrument_auto <- function(df, drop_which_when = FALSE, # Strip labels from REDCap created variables to prevent reported join (and # perhaps pivot) issues on labeled variables. - df <- drop_label(df, record_id) + df <- drop_label(df, !!record_id) is_longitudinal <- any(names(df) == "redcap_event_name") @@ -36,8 +36,7 @@ make_instrument_auto <- function(df, drop_which_when = FALSE, is_repeated <- any(names(df) == "redcap_repeat_instrument") if (is_repeated) { - df <- drop_label(df, "redcap_repeat_instrument") - df <- drop_label(df, "redcap_repeat_instance") + df <- drop_label(df, "redcap_repeat_instrument", "redcap_repeat_instance") } # if there are repeated instruments check to see if this instrument has repeats @@ -59,9 +58,11 @@ make_instrument_auto <- function(df, drop_which_when = FALSE, instrument <- df[, c(first_col:last_col), drop = FALSE] # which records are all missing - allMissing <- apply(instrument, 1, function(x) { - all(is.na(x) | x == "") - }) + allMissing <- rep(FALSE, nrow(instrument)) + for (i in 1:nrow(instrument)) { + x <- instrument[i, ] + allMissing[i] <- all(is.na(x) | x == "") + } # the rows that are not all missing. if (drop_which_when == FALSE) { @@ -163,13 +164,36 @@ fix_class_bug <- function(df) { #' used to drop labels. #' #' @param df the name of the data frame -#' @param x the quoted name of the variable +#' @param ... Variables to select. You can use: +#' * Variable names as strings: `c("var1", "var2")` +#' * Bare variable names: `c(var1, var2)` +#' * Select helpers like `starts_with()`, `ends_with()`, `contains()`, `matches()`, etc. #' #' @export #' +#' @examples +#' \dontrun{ +#' # Remove labels from single variable +#' df <- drop_label(df, "variable_name") +#' +#' # Remove labels from multiple variables +#' df <- drop_label(df, c("var1", "var2", "var3")) +#' +#' # Use tidyselect helpers +#' df <- drop_label(df, starts_with("redcap_")) +#' } #' #' @return df -drop_label <- function(df, x) { - attributes(df[, which(names(df) == x)]) <- NULL +drop_label <- function(df, ...) { + # Use tidyselect to handle variable selection + selected_vars <- names(dplyr::select(df, ...)) + + # Remove labels from selected variables + for (var in selected_vars) { + if (var %in% names(df)) { + attributes(df[[var]]) <- NULL + } + } + df } diff --git a/R/make_yes_no.R b/R/make_yes_no.R index 5f4cab6..2ea03f9 100644 --- a/R/make_yes_no.R +++ b/R/make_yes_no.R @@ -1,27 +1,30 @@ #' @title make_yes_no #' -#' @description Convert a "Yes-No", "True-False" or "Checkboxes (Multiple +#' @description Convert a "Yes-No", "True-False", or "Checkboxes (Multiple #' Answers)" question in REDCap to a factor holding "Yes" or -#' "No or Unknown". Technically "yes" or "checked" (ignoring case), 1 or -#' TRUE responses are converted to "Yes" and all other values to +#' "No or Unknown". Technically "yes" or "checked" (ignoring case), 1 or +#' TRUE responses are converted to "Yes" and all other values to #' "No or Unknown". Also see `make_yes_no_unknown()`. #' #' @param x x variable to be converted to hold "Yes" or "No or Unknown" #' #' @return a factor with "Yes" or "No or Unknown" -#' +#' #' @importFrom stringr str_detect regex #' @importFrom dplyr case_when -#' +#' @importFrom labelled var_label +#' #' @export #' -#' @examples +#' @examples #' make_yes_no(c(0, 1, NA)) #' make_yes_no(c("unchecked", "Checked", NA)) - make_yes_no <- function(x) { - if(is.factor(x) | is.character(x)){ - factor( + # Store original label + original_label <- var_label(x) + + if (is.factor(x) | is.character(x)) { + result <- factor( case_when( str_detect( x, stringr::regex("^yes", ignore_case = TRUE) @@ -34,7 +37,7 @@ make_yes_no <- function(x) { levels = c("No or Unknown", "Yes") ) } else if (is.numeric(x) | is.logical(x)) { - factor( + result <- factor( case_when( x == 1 ~ "Yes", x == 0 ~ "No or Unknown", @@ -42,8 +45,14 @@ make_yes_no <- function(x) { ), levels = c("No or Unknown", "Yes") ) - } else { - x # not an expected atomic class - } + result <- x # not an expected atomic class + } + + # Restore original label if it existed + if (!is.null(original_label)) { + var_label(result) <- original_label + } + + result } diff --git a/R/make_yes_no_unknown.R b/R/make_yes_no_unknown.R index 1f26c5d..1cfbf45 100644 --- a/R/make_yes_no_unknown.R +++ b/R/make_yes_no_unknown.R @@ -1,27 +1,31 @@ #' @title make_yes_no_unknown #' -#' @description Convert a "Yes-No", "True-False" or "Checkboxes (Multiple +#' @description Convert a "Yes-No", "True-False", or "Checkboxes (Multiple #' Answers)" question in REDCap to a factor holding "No" or -#' "Yes" or "Unknown". Technically "yes" or "checked" (ignoring case), 1 or -#' TRUE responses are converted to "Yes". "No" or "unchecked" (ignoring +#' "Yes" or "Unknown". Technically "yes" or "checked" (ignoring case), 1 or +#' TRUE responses are converted to "Yes". "No" or "unchecked" (ignoring #' case), 0 or FALSE are converted to "No". All other values are set to #' "Unknown". Also see `make_yes_no()`. #' -#' @param x variable to be converted to hold "No", "Yes", or Unknown" +#' @param x variable to be converted to hold "No", "Yes", or "Unknown" +#' +#' @return a factor with "No", "Yes", or "Unknown" #' -#' @return a factor with "No", "Yes", or Unknown" -#' #' @importFrom stringr str_detect regex #' @importFrom dplyr case_when -#' +#' @importFrom labelled var_label +#' #' @export #' #' @examples #' make_yes_no_unknown(c(0, 1, NA)) #' make_yes_no_unknown(c("unchecked", "Checked", NA)) make_yes_no_unknown <- function(x) { - if(is.factor(x) | is.character(x)){ - factor( + # Store original label + original_label <- var_label(x) + + if (is.factor(x) | is.character(x)) { + result <- factor( dplyr::case_when( str_detect( x, stringr::regex("^yes", ignore_case = TRUE) @@ -40,7 +44,7 @@ make_yes_no_unknown <- function(x) { levels = c("No", "Yes", "Unknown") ) } else if (is.numeric(x) | is.logical(x)) { - factor( + result <- factor( dplyr::case_when( x == 1 ~ "Yes", x == 0 ~ "No", @@ -48,8 +52,14 @@ make_yes_no_unknown <- function(x) { ), levels = c("No", "Yes", "Unknown") ) - } else { - x - } + result <- x + } + + # Restore original label if it existed + if (!is.null(original_label)) { + var_label(result) <- original_label + } + + result } diff --git a/R/utils-global.R b/R/utils-global.R new file mode 100644 index 0000000..e0489a8 --- /dev/null +++ b/R/utils-global.R @@ -0,0 +1,2 @@ +# Global variables to avoid R CMD CHECK notes for NSE variables +utils::globalVariables(c("form_name", "n")) diff --git a/R/utils-pipe.R b/R/utils-pipe.R deleted file mode 100644 index e79f3d8..0000000 --- a/R/utils-pipe.R +++ /dev/null @@ -1,11 +0,0 @@ -#' Pipe operator -#' -#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. -#' -#' @name %>% -#' @rdname pipe -#' @keywords internal -#' @export -#' @importFrom magrittr %>% -#' @usage lhs \%>\% rhs -NULL diff --git a/README.md b/README.md index 80190b8..036ce24 100644 --- a/README.md +++ b/README.md @@ -13,36 +13,44 @@ tidyREDCap is an R package with functions for processing REDCap data. 'REDCap' (Research Electronic Data CAPture; ) is a web-enabled application for building and managing surveys and databases developed at Vanderbilt University. -## What tidyREDCap Functions Can Do for You? +## What tidyREDCap Do for You? #### Load All Data from REDCap into R with One Line of Code -* 💥 NEW in Version 1.1 💥 `import_instruments()` includes the repeat number for repeated instruments/forms/questionnaires. - -* `import_instruments()` will use an API call to load every instrument/questionnaire into its own R dataset. If the REDCap project is longitudinal or has repeated instruments, the function will remove blank records. +* `import_instruments()`: uses an API call to load every instrument into a separate tidy R dataset or list of data.frames, labeling the columns and removing blank records. +* Import big REDCap project data that is larger than memory and won't load using a simple API call. The import function uses [duckdb](https://duckdb.org) to lazily store data and filter for only the data you need to load. #### Show the Field Labels Inside RStudio -* After loading data into R using RStudio with the `import_instruments()` function, you can see both the variable name and the text that appears to users of REDCap. All you need to do is click on the dataset's name in the **Environment** tab or use the View() function. The column headings will include both the variable name and the Field Label from REDCap. +After loading data into R using RStudio with the `import_instruments()` function, you can see both the variable name and the text that appears to users of REDCap. All you need to do is click on the dataset's name in the **Environment** tab or use the `View()` function. The column headings will include both the variable name and the Field Label from REDCap. + +#### Generate Data Codebooks -* 💥 NEW in Version 1.1 💥 Functions coming from packages outside of `tidyREDCap` may not understand what to do with labeled variables. So, `tidyREDCap` includes a new `drop_labels()` function that will allow you to strip the labels before using functions that want unlabeled data. +* `codebook()`: generates structured metadata documentation for REDCap data, displaying variable labels, types, and value labels for categorical variables. +* `codebook_convert()`: converts coded values in REDCap data to their labeled equivalents using stored metadata. Useful for converting numeric codes (e.g., 1, 2, 3) to descriptive labels (e.g., "Male", "Female", "Non-binary"). -#### Working with Choose One Questions +#### Working with *Choose One* Questions -* `make_choose_one_table()`: print a `janitor::tabyl()` style table with a variable label. This function lets you print one choice from a choose all that apply question. +`make_choose_one_table()`: prints a `janitor::tabyl()` style table with a variable label. This function lets you print one choice from a *choose all that apply* question. -#### Working with Choose All that Apply Questions +#### Working with *Choose All that Apply* Questions -REDCap exports the responses to a choose all that apply question into many similarly named questions. tidyREDCap helps summarize the responses with two functions: +REDCap exports the responses to a *choose all that apply* question into many similarly named questions. tidyREDCap helps summarize the responses with two functions: * `make_binary_word()`: converts all the responses into a single descriptive "word" * `make_choose_all_table()`: converts all the responses into a single summary table #### Working with Repeated Measures -Projects that have repeated assessments with different questionnaires/instruments export with holes in the CSV. tidyREDCap will parse the export and create tables for any of the questionnaires/instruments: +Projects that have repeated assessments with different instruments export with holes in the CSV. tidyREDCap will parse the export and create tables for any of the instruments: + +* `make_instrument()`: makes a tibble for an instrument based on the variable names +* `make_instrument_auto()`: extracts an instrument from REDCap data without specifying variables, automatically detecting longitudinal and repeated measure structures -* `make_instrument()`: makes a tibble for a questionnaire/instrument +#### Working with Yes/No Questions + +* `make_yes_no()`: converts Yes-No, True-False, or checkbox questions to a factor with "Yes" or "No or Unknown" +* `make_yes_no_unknown()`: converts Yes-No, True-False, or checkbox questions to a factor with "No", "Yes", or "Unknown" ## What are the tidyREDCap Websites? Main Page: https://raymondbalise.github.io/tidyREDCap/ @@ -65,14 +73,9 @@ if (!requireNamespace("devtools")) install.packages("devtools") devtools::install_github("RaymondBalise/tidyREDCap") ``` -#### What is new on the development release? - -* 💥 NEW in Version 1.1.0.9000 💥 adds `make_yes_no()` function to convert - "checked" or "yes"-like answers to "Yes" and other answers to "No or Unknown". -* 💥 NEW in Version 1.1.0.9000 💥 adds `make_yes_no_unknown()` function to - convert "checked" or "yes"-like answers to "Yes"", unchecked or "no"-like - answers to "No" and other answers to "Unknown". +#### What is New? +💥 **Version 1.2.0** 💥 Changes import package to [redquack](https://github.com/dylanpieper/redquack/tree/main/R) and adds parameters to function `import_instruments()`: `filter_instrument` and `filter_function` for lazy data filtering, `return_list` for returning a list of instrument data.frames, and `labels` for adding/removing column labels. ## What if I Find a Problem? We are currently in active development of tidyREDCap. If one of our functions does not work the way that you expect, or if one of our functions is broken, please submit an issue ticket (using a [reproducible example](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html)) to our [issues page](https://github.com/RaymondBalise/tidyREDCap/issues). If you have a cool idea for our next new function, also submit an issue ticket. If you are an R developer and want so contribute to this package, please submit an issue ticket or a pull request. diff --git a/docs/articles/dropLabels.html b/docs/articles/dropLabels.html deleted file mode 100644 index a4f8d33..0000000 --- a/docs/articles/dropLabels.html +++ /dev/null @@ -1,620 +0,0 @@ - - - - - - - -Drop Labels from a Table • tidyREDCap - - - - - - - - - - - - - - - - - - - -
-
- - - - -
-
- - - - - -
-

The Problem -

-

The tidyREDCap package creates data sets with labelled -columns.

-
-tidyREDCap::import_instruments(
-  url = "https://bbmc.ouhsc.edu/redcap/api/",
-  token = Sys.getenv("REDCapR_test")
-)
-

If you would like to see the labels on the data set -demographics, you can use the RStudio function -View(), as shown below.

-
-View(demographics)
-

-

However, some functions do not work well with labeled variables.

-
-library(skimr)  # for the skim() function
-demographics |> skim()
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Data summary
Namedemographics
Number of rows5
Number of columns10
_______________________
Column type frequency:
character7
Date1
numeric2
________________________
Group variablesNone
-

Variable type: character

- ---------- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
skim_variablen_missingcomplete_rateminmaxemptyn_uniquewhitespace
name_first0158050
name_last0138040
address012938050
telephone011414050
email011219050
sex0146020
demographics_complete0188010
-

Variable type: Date

- --------- - - - - - - - - - - - - - - - - - - -
skim_variablen_missingcomplete_rateminmaxmediann_unique
dob011934-04-092003-08-301955-04-155
-

Variable type: numeric

- ------------- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
skim_variablen_missingcomplete_ratemeansdp0p25p50p75p100hist
record_id013.01.5812345▇▇▇▇▇
age0144.431.571111596180▇▁▁▇▃
-

So you need a way to drop the label off of a variable or to drop all -the labels from all the variables in a dataset.

-
-
-

The Solution -

-

You can drop the label from a single variable with the drop_label() -function. For example:

-
-demographics_changed <- drop_label(demographics, "first_name")
-

You can drop all the labels using the drop_labels() -function. For example:

-
-demographics_without_labels <- drop_labels(demographics)
-
-demographics_without_labels |> 
-  skim()
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Data summary
Namedemographics_without_labe…
Number of rows5
Number of columns10
_______________________
Column type frequency:
character7
numeric3
________________________
Group variablesNone
-

Variable type: character

- ---------- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
skim_variablen_missingcomplete_rateminmaxemptyn_uniquewhitespace
name_first0158050
name_last0138040
address012938050
telephone011414050
email011219050
sex0146020
demographics_complete0188010
-

Variable type: numeric

- ------------- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
skim_variablen_missingcomplete_ratemeansdp0p25p50p75p100hist
record_id013.01.5812345▇▇▇▇▇
dob01-56.011581.94-13051-6269-53751212112294▃▇▁▁▇
age0144.431.571111596180▇▁▁▇▃
-
-
- - - -
- - - -
- -
-

-

Site built with pkgdown 2.0.7.

-
- -
-
- - - - - - - - diff --git a/man/codebook.Rd b/man/codebook.Rd new file mode 100644 index 0000000..aeeeab0 --- /dev/null +++ b/man/codebook.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/codebook.R +\name{codebook} +\alias{codebook} +\title{Generate codebook for REDCap data} +\usage{ +codebook(data, field_name = NULL) +} +\arguments{ +\item{data}{A data.frame or single column from REDCap data with metadata attributes} + +\item{field_name}{Character string specifying field name when \code{data} is a full dataset} +} +\value{ +A list or data.frame containing codebook information +} +\description{ +Extract and display metadata information for REDCap data, +including variable labels and value labels for categorical variables. +} +\examples{ +\dontrun{ +# Get full codebook for dataset +codebook(demographics) + +# Get codebook for specific variable +codebook(demographics$sex) +codebook(demographics, "sex") +} + +} diff --git a/man/codebook_convert.Rd b/man/codebook_convert.Rd new file mode 100644 index 0000000..17435ae --- /dev/null +++ b/man/codebook_convert.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/codebook.R +\name{codebook_convert} +\alias{codebook_convert} +\title{Convert coded values using codebook metadata} +\usage{ +codebook_convert(data, col_name = NULL) +} +\arguments{ +\item{data}{A data.frame, single column, or data.frame with column name specification} + +\item{col_name}{Character string specifying column name when \code{data} is a data.frame} +} +\value{ +Converted column vector with coded values replaced by labels +} +\description{ +Converts coded values in REDCap data to their labeled equivalents +using the metadata stored in column attributes. Both syntaxes return the converted +column vector for consistency with codebook(). For data.frame operations, use +standard tidy patterns with mutate(). +} +\examples{ +\dontrun{ +# Both return converted column vector +codebook_convert(demographics$sex) +codebook_convert(demographics, "sex") + +# For data.frame operations, use tidy patterns: +demographics |> + mutate(sex = codebook_convert(sex)) + +# Convert multiple columns +demographics |> + mutate(across(where(has_redcap_values), codebook_convert)) + +# Convert specific columns +demographics |> + mutate( + sex = codebook_convert(sex), + race = codebook_convert(race) + ) +} + +} diff --git a/man/drop_label.Rd b/man/drop_label.Rd index 78d7ade..a34bf0a 100644 --- a/man/drop_label.Rd +++ b/man/drop_label.Rd @@ -4,12 +4,17 @@ \alias{drop_label} \title{Drop the label from a variable} \usage{ -drop_label(df, x) +drop_label(df, ...) } \arguments{ \item{df}{the name of the data frame} -\item{x}{the quoted name of the variable} +\item{...}{Variables to select. You can use: +\itemize{ +\item Variable names as strings: \code{c("var1", "var2")} +\item Bare variable names: \code{c(var1, var2)} +\item Select helpers like \code{starts_with()}, \code{ends_with()}, \code{contains()}, \code{matches()}, etc. +}} } \value{ df @@ -19,3 +24,16 @@ There is a reported issues with joins on data (without a reprex) that seem to be caused by the labels. As a possible solution this can be used to drop labels. } +\examples{ +\dontrun{ +# Remove labels from single variable +df <- drop_label(df, "variable_name") + +# Remove labels from multiple variables +df <- drop_label(df, c("var1", "var2", "var3")) + +# Use tidyselect helpers +df <- drop_label(df, starts_with("redcap_")) +} + +} diff --git a/man/has_redcap_values.Rd b/man/has_redcap_values.Rd new file mode 100644 index 0000000..762f13f --- /dev/null +++ b/man/has_redcap_values.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/codebook.R +\name{has_redcap_values} +\alias{has_redcap_values} +\title{Check if column has REDCap value labels} +\usage{ +has_redcap_values(x) +} +\arguments{ +\item{x}{A column/vector to check} +} +\value{ +Logical indicating whether column has redcap_values attribute +} +\description{ +Helper function to identify columns with redcap_values attributes. +Useful with dplyr::across() for selective conversion. +} +\examples{ +\dontrun{ +# Convert only columns with value labels +demographics |> + mutate(across(where(has_redcap_values), codebook_convert)) +} + +} diff --git a/man/import_instruments.Rd b/man/import_instruments.Rd index 9960d0d..b419869 100644 --- a/man/import_instruments.Rd +++ b/man/import_instruments.Rd @@ -10,7 +10,11 @@ import_instruments( drop_blank = TRUE, record_id = "record_id", first_record_id = 1, - envir = .GlobalEnv + envir = .GlobalEnv, + return_list = FALSE, + labels = TRUE, + filter_instrument = NULL, + filter_function = NULL ) } \arguments{ @@ -32,10 +36,24 @@ are using \code{dude_id} instead of \code{record_id} and \code{dude_id} has a va \code{first_record_id = "first dude"}.} \item{envir}{The name of the environment where the tables should be saved.} + +\item{return_list}{If TRUE, returns instrument data in a named list. If FALSE (default), assigns instrument data to environment.} + +\item{labels}{If TRUE (default), applies variable labels to columns. If FALSE, columns will not have labels.} + +\item{filter_instrument}{Optional character string specifying which instrument +to use for filtering. If provided with filter_function, this instrument will be +filtered first, and the resulting record IDs will be used to filter all +other instruments.} + +\item{filter_function}{Optional function that filters REDCap data. +If \code{filter_instrument} is specified, filter is applied only to that instrument, +and returned record IDs are used to filter all other instruments. +If \code{filter_instrument} is NULL (default), filter applies to each instrument separately.} } \value{ -one \code{data.frame} for each instrument/form in a REDCap project. By -default the datasets are saved into the global environment. +One table (\code{data.frame}) for each instrument is assigned to the environment. +If \code{return_list} = TRUE, returns a named list instead. } \description{ This function takes the url and key for a REDCap @@ -43,9 +61,32 @@ project and returns a table for each instrument/form in the project. } \examples{ \dontrun{ +# Import each instrument to multiple tables import_instruments( - "https://redcap.miami.edu/api/", - Sys.getenv("test_API_key") + "https://bbmc.ouhsc.edu/redcap/api/", + Sys.getenv("redcap_token") +) + +# Import each instrument to a single list +instruments <- import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + Sys.getenv("redcap_token"), + return_list = TRUE +) + +# Import without labels +instruments <- import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + Sys.getenv("redcap_token"), + labels = FALSE +) + +# Filter all instruments based on demographics +instruments <- import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + Sys.getenv("redcap_token"), + filter_instrument = "demographics", + filter_function = \(x) x |> dplyr::filter(age >= 18) ) } } diff --git a/man/make_instrument.Rd b/man/make_instrument.Rd index 0be3438..0f33006 100644 --- a/man/make_instrument.Rd +++ b/man/make_instrument.Rd @@ -28,5 +28,5 @@ A data frame that has an instrument (with at least one not NA value) } \description{ This function takes a data frame and the names of the first and -last variables in an instrumnt and returns a data frame with the instrument. +last variables in an instrument and returns a data frame with the instrument. } diff --git a/man/make_yes_no.Rd b/man/make_yes_no.Rd index 270db66..1fd43ad 100644 --- a/man/make_yes_no.Rd +++ b/man/make_yes_no.Rd @@ -13,7 +13,7 @@ make_yes_no(x) a factor with "Yes" or "No or Unknown" } \description{ -Convert a "Yes-No", "True-False" or "Checkboxes (Multiple +Convert a "Yes-No", "True-False", or "Checkboxes (Multiple Answers)" question in REDCap to a factor holding "Yes" or "No or Unknown". Technically "yes" or "checked" (ignoring case), 1 or TRUE responses are converted to "Yes" and all other values to diff --git a/man/make_yes_no_unknown.Rd b/man/make_yes_no_unknown.Rd index 30a90f8..ea6610e 100644 --- a/man/make_yes_no_unknown.Rd +++ b/man/make_yes_no_unknown.Rd @@ -7,13 +7,13 @@ make_yes_no_unknown(x) } \arguments{ -\item{x}{variable to be converted to hold "No", "Yes", or Unknown"} +\item{x}{variable to be converted to hold "No", "Yes", or "Unknown"} } \value{ -a factor with "No", "Yes", or Unknown" +a factor with "No", "Yes", or "Unknown" } \description{ -Convert a "Yes-No", "True-False" or "Checkboxes (Multiple +Convert a "Yes-No", "True-False", or "Checkboxes (Multiple Answers)" question in REDCap to a factor holding "No" or "Yes" or "Unknown". Technically "yes" or "checked" (ignoring case), 1 or TRUE responses are converted to "Yes". "No" or "unchecked" (ignoring diff --git a/man/pipe.Rd b/man/pipe.Rd deleted file mode 100644 index 0eec752..0000000 --- a/man/pipe.Rd +++ /dev/null @@ -1,12 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils-pipe.R -\name{\%>\%} -\alias{\%>\%} -\title{Pipe operator} -\usage{ -lhs \%>\% rhs -} -\description{ -See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. -} -\keyword{internal} diff --git a/tests/testthat/test-codebook.R b/tests/testthat/test-codebook.R new file mode 100644 index 0000000..0f8770f --- /dev/null +++ b/tests/testthat/test-codebook.R @@ -0,0 +1,212 @@ +# Tests for codebook functionality +# 2025-09-09 + +library(testthat) +library(tidyREDCap) + +test_that("codebook works with REDCap data", { + # Import test data with metadata + demo_data <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + return_list = TRUE + ) + + # Test full dataset codebook + cb_full <- codebook(demo_data$demographics) + expect_s3_class(cb_full, "codebook") + expect_true("sex" %in% names(cb_full)) + expect_equal(cb_full$sex$label, "Gender") + expect_equal(cb_full$sex$type, "radio") + expect_equal(cb_full$sex$values[["0"]], "Female") + expect_equal(cb_full$sex$values[["1"]], "Male") + + # Test individual variable codebook + cb_sex <- codebook(demo_data$demographics$sex) + expect_equal(cb_sex$label, "Gender") + expect_equal(cb_sex$values[["0"]], "Female") + expect_equal(cb_sex$values[["1"]], "Male") + + # Test that value labels are stored as attributes + expect_equal(attr(demo_data$demographics$sex, "redcap_values")[["0"]], "Female") + expect_equal(attr(demo_data$demographics$sex, "redcap_values")[["1"]], "Male") + + # Test that dataset metadata is stored + expect_true(!is.null(attr(demo_data$demographics, "redcap_metadata"))) + expect_true("field_name" %in% names(attr(demo_data$demographics, "redcap_metadata"))) +}) + +test_that("codebook works with specific field name", { + demo_data <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + return_list = TRUE + ) + + # Test using field name parameter + cb_sex <- codebook(demo_data$demographics, "sex") + expect_equal(cb_sex$label, "Gender") + expect_equal(cb_sex$values[["0"]], "Female") + expect_equal(cb_sex$values[["1"]], "Male") +}) + +test_that("codebook handles non-existent fields gracefully", { + demo_data <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + return_list = TRUE + ) + + expect_error( + codebook(demo_data$demographics, "nonexistent_field"), + 'Field "nonexistent_field" not found in data' + ) +}) + +test_that("codebook works with data without metadata", { + # Test with regular data.frame without metadata + simple_df <- data.frame( + x = 1:3, + y = letters[1:3] + ) + + cb <- codebook(simple_df) + expect_s3_class(cb, "codebook") + expect_true("x" %in% names(cb)) + expect_equal(cb$x$label, "No label") +}) + +test_that("codebook_convert works with REDCap data", { + # Import test data with metadata + demo_data <- tryCatch( + { + tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + return_list = TRUE + ) + }, + error = function(e) { + skip("Cannot connect to test REDCap server") + NULL + } + ) + + skip_if(is.null(demo_data)) + + # Find a column with redcap_values for testing + test_col_name <- NULL + for (col_name in names(demo_data$demographics)) { + if (!is.null(attr(demo_data$demographics[[col_name]], "redcap_values"))) { + test_col_name <- col_name + break + } + } + + # If no column with redcap_values, create a test case + if (is.null(test_col_name)) { + # Create manual test data + demo_data$demographics$test_convert <- c(0, 1, 1, 0, 1) + attr(demo_data$demographics$test_convert, "redcap_values") <- list("0" = "No", "1" = "Yes") + test_col_name <- "test_convert" + } + + # Test converting single column + col_converted <- codebook_convert(demo_data$demographics[[test_col_name]]) + expect_type(col_converted, "character") + + # Test converting specific column in data.frame (should return column vector) + demo_converted_col <- codebook_convert(demo_data$demographics, test_col_name) + expect_type(demo_converted_col, "character") + expect_false(is.data.frame(demo_converted_col)) # Should not be a data.frame + + # Test that calling without column name gives helpful error + expect_error( + codebook_convert(demo_data$demographics), + "When converting multiple columns, use dplyr patterns" + ) +}) + +test_that("codebook_convert handles data without value labels", { + # Test with column that has no redcap_values attribute + simple_col <- c(1, 2, 3) + result <- codebook_convert(simple_col) + expect_identical(result, simple_col) + + # Test with data.frame column that has no metadata + simple_df <- data.frame(x = 1:3, y = letters[1:3]) + result <- codebook_convert(simple_df, "x") + expect_identical(result, simple_df$x) +}) + +test_that("codebook_convert handles non-existent columns gracefully", { + demo_data <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + return_list = TRUE + ) + + expect_error( + codebook_convert(demo_data$demographics, "nonexistent_column"), + 'Column "nonexistent_column" not found in data' + ) +}) + +test_that("codebook_convert preserves NAs and attributes", { + # Create test column with value labels and NAs + test_col <- c(0, 1, NA, 0, 1) + attr(test_col, "redcap_values") <- list("0" = "No", "1" = "Yes") + attr(test_col, "label") <- "Test Label" + + result <- codebook_convert(test_col) + expect_type(result, "character") + expect_equal(result[!is.na(result)], c("No", "Yes", "No", "Yes")) + expect_true(is.na(result[3])) + expect_equal(attr(result, "label"), "Test Label") + expect_null(attr(result, "redcap_values")) +}) + +test_that("codebook_convert handles logical values as 0/1 codes", { + # Create logical column that should be treated as 0/1 codes + logical_col <- c(FALSE, TRUE, TRUE, FALSE, TRUE) + attr(logical_col, "redcap_values") <- list("0" = "Female", "1" = "Male") + attr(logical_col, "label") <- "Gender" + + result <- codebook_convert(logical_col) + expect_type(result, "character") + expect_equal(as.vector(result), c("Female", "Male", "Male", "Female", "Male")) + expect_equal(attr(result, "label"), "Gender") + expect_null(attr(result, "redcap_values")) +}) + +test_that("codebook_convert works with mixed numeric and string codes", { + # Test with both numeric and string codes + mixed_col <- c("A", "B", "A", "C") + attr(mixed_col, "redcap_values") <- list("A" = "Option A", "B" = "Option B", "C" = "Option C") + + result <- codebook_convert(mixed_col) + expect_equal(result, c("Option A", "Option B", "Option A", "Option C")) + + # Test with numeric codes as strings + numeric_col <- c(1, 2, 1, 3) + attr(numeric_col, "redcap_values") <- list("1" = "First", "2" = "Second", "3" = "Third") + + result2 <- codebook_convert(numeric_col) + expect_equal(result2, c("First", "Second", "First", "Third")) +}) + +test_that("has_redcap_values helper function works", { + # Column with redcap_values + col_with_values <- c(0, 1, 1, 0) + attr(col_with_values, "redcap_values") <- list("0" = "No", "1" = "Yes") + expect_true(has_redcap_values(col_with_values)) + + # Column without redcap_values + col_without_values <- c(1, 2, 3) + expect_false(has_redcap_values(col_without_values)) + + # Column with other attributes but no redcap_values + col_with_other_attrs <- c(1, 2, 3) + attr(col_with_other_attrs, "label") <- "Test Label" + expect_false(has_redcap_values(col_with_other_attrs)) +}) diff --git a/tests/testthat/test-import_instruments.R b/tests/testthat/test-import_instruments.R index c799128..ce9314a 100644 --- a/tests/testthat/test-import_instruments.R +++ b/tests/testthat/test-import_instruments.R @@ -83,10 +83,258 @@ tidyREDCap::import_instruments( ###### Tests ###### test_that("import works", { - if (packageVersion("REDCapR") <= "1.1.0") { - expect_equal(demographics, target, ignore_attr = TRUE) - } else { - expect_equal(demographics, target) + # Test basic structure + expect_s3_class(demographics, "data.frame") + expect_equal(names(demographics), names(target)) + expect_equal(nrow(demographics), nrow(target)) + + # Test that labels are preserved (the key functionality) + expect_equal(attr(demographics$record_id, "label"), "Study ID") + expect_equal(attr(demographics$name_first, "label"), "First Name") + expect_equal(attr(demographics$name_last, "label"), "Last Name") + expect_equal(attr(demographics$address, "label"), "Street, City, State, ZIP") + expect_equal(attr(demographics$telephone, "label"), "Phone number") + expect_equal(attr(demographics$email, "label"), "E-mail") + expect_equal(attr(demographics$dob, "label"), "Date of birth") + expect_equal(attr(demographics$age, "label"), "Age (years)") + expect_equal(attr(demographics$sex, "label"), "Gender") + expect_equal(attr(demographics$demographics_complete, "label"), "Complete?") + + # Test data values (ignoring class differences and attributes) + expect_equal(as.numeric(demographics$record_id), c(1, 2, 3, 4, 5)) + expect_equal(as.character(demographics$name_first), c("Nutmeg", "Tumtum", "Marcus", "Trudy", "John Lee")) + expect_equal(as.character(demographics$name_last), c("Nutmouse", "Nutmouse", "Wood", "DAG", "Walker")) +}) + +test_that("return_list parameter works", { + # Test return_list = TRUE returns a list + instruments_list <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + return_list = TRUE + ) + + expect_type(instruments_list, "list") + expect_true("demographics" %in% names(instruments_list)) + expect_s3_class(instruments_list$demographics, "data.frame") + + # Test return_list = FALSE returns individual data.frames in environment (default behavior) + expect_s3_class(demographics, "data.frame") +}) + +test_that("labels parameter works", { + # Test labels = FALSE removes column labels + tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + return_list = TRUE + ) -> instruments_with_labels + + tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + return_list = TRUE, + labels = FALSE + ) -> instruments_without_labels + + # Check that labels are present in the first case + expect_true(any(sapply(instruments_with_labels$demographics, function(x) !is.null(attr(x, "label"))))) + + # Check that labels are removed in the second case + expect_false(any(sapply(instruments_without_labels$demographics, function(x) !is.null(attr(x, "label"))))) +}) + +test_that("filter_function parameter works", { + # Test filter_function with a simple filter + filtered_instruments <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + filter_function = function(x) x |> dplyr::filter(record_id == 3), + return_list = TRUE + ) + + expect_type(filtered_instruments, "list") + expect_true("demographics" %in% names(filtered_instruments)) + expect_equal(nrow(filtered_instruments$demographics), 1) + expect_equal(as.numeric(filtered_instruments$demographics$record_id), 3) + # Check that label is preserved when filtering + expect_equal(attr(filtered_instruments$demographics$record_id, "label"), "Study ID") + + # Test filtering by record_id range - common use case + range_filtered <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + filter_function = function(x) x |> dplyr::filter(record_id >= 2 & record_id <= 4), + return_list = TRUE + ) + + expect_true(all(range_filtered$demographics$record_id >= 2)) + expect_true(all(range_filtered$demographics$record_id <= 4)) +}) + +test_that("filter_instrument parameter works", { + # Test filter_instrument with demographics + filtered_instruments <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + filter_instrument = "demographics", + filter_function = function(x) x |> dplyr::filter(name_last == "Nutmouse"), + return_list = TRUE + ) + + expect_type(filtered_instruments, "list") + expect_true("demographics" %in% names(filtered_instruments)) + + # Should only have records where name_last == "Nutmouse" + expect_true(all(filtered_instruments$demographics$name_last == "Nutmouse")) + expect_equal(nrow(filtered_instruments$demographics), 2) # Based on test data, should be 2 records +}) + +test_that("combined filtering works (two-step filter)", { + # Test the two-step filtering process: + # 1) filter filter_instrument and get record IDs + # 2) filter the rest of the instruments by record ID from step 1 + filtered_instruments <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + filter_instrument = "demographics", + filter_function = function(x) x |> dplyr::filter(name_last == "Nutmouse"), + return_list = TRUE + ) + + expect_type(filtered_instruments, "list") + + # All instruments in the list should only contain records with IDs that match + # the filtered demographics instrument + demographics_record_ids <- filtered_instruments$demographics$record_id + + # Check that all instruments have the same record_ids + for (instrument_name in names(filtered_instruments)) { + instrument_data <- filtered_instruments[[instrument_name]] + if ("record_id" %in% names(instrument_data)) { + expect_true(all(instrument_data$record_id %in% demographics_record_ids), + info = paste("Instrument", instrument_name, "has incorrect record_ids")) + } } }) +test_that("error handling for invalid filter_instrument", { + # Test that invalid filter_instrument names produce appropriate errors + expect_error( + tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + filter_instrument = "nonexistent_instrument", + return_list = TRUE + ), + # Error message may vary depending on redquack implementation + regexp = ".*" # Accept any error message for now + ) +}) + +test_that("parameter combinations work correctly", { + # Test multiple parameters together + result <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + filter_instrument = "demographics", + filter_function = function(x) x |> dplyr::filter(record_id <= 2), + return_list = TRUE, + labels = FALSE + ) + + expect_type(result, "list") + expect_true("demographics" %in% names(result)) + + # Check filtering worked + expect_true(all(result$demographics$record_id <= 2)) + + # Check labels were removed + expect_false(any(sapply(result$demographics, function(x) !is.null(attr(x, "label"))))) +}) + +test_that("edge cases handle gracefully", { + # Test filter that returns no results + empty_result <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + filter_function = function(x) x |> dplyr::filter(record_id == 999), # Non-existent ID + return_list = TRUE + ) + + expect_type(empty_result, "list") + expect_true("demographics" %in% names(empty_result)) + expect_equal(nrow(empty_result$demographics), 0) + + # Test filtering by multiple record_id values + multi_record_result <- tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B", + filter_function = function(x) x |> dplyr::filter(record_id %in% c(1, 3, 5)), + return_list = TRUE + ) + + expect_true(all(multi_record_result$demographics$record_id %in% c(1, 3, 5))) +}) + +test_that("robust error handling works correctly", { + # Test malformed token error + expect_error( + tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "bad_token" + ), + "The token does not conform with the regex" + ) + + # Test invalid token with correct format (403 error) + expect_error( + tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7c" + ), + "Metadata read failed" + ) + + # Test network/hostname resolution error + expect_error( + tidyREDCap::import_instruments( + "https://nonexistent_redcap_server.com/api/", + "9A81268476645C4E5F03428B8AC3AA7B" + ), + "Metadata read failed.*Could not resolve host" + ) + + # Test 404 error with valid hostname but wrong path + expect_error( + tidyREDCap::import_instruments( + "https://httpbin.org/status/404", + "9A81268476645C4E5F03428B8AC3AA7B" + ), + "Metadata read failed" + ) +}) + +test_that("error handling preserves list vs environment behavior", { + # Test that list mode throws errors + expect_error( + tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "bad_token", + return_list = TRUE + ), + "The token does not conform with the regex" + ) + + # Test that environment mode also throws errors for invalid parameters + expect_error( + tidyREDCap::import_instruments( + "https://bbmc.ouhsc.edu/redcap/api/", + "bad_token", + return_list = FALSE + ), + "The token does not conform with the regex" + ) +}) + + diff --git a/tests/testthat/test-make_yes_no.R b/tests/testthat/test-make_yes_no.R index 068c80a..bd966b2 100644 --- a/tests/testthat/test-make_yes_no.R +++ b/tests/testthat/test-make_yes_no.R @@ -27,3 +27,42 @@ test_that("logicals work", { make_yes_no(original_lgl), target) }) + +test_that("labels are preserved for strings", { + library(labelled) + original_with_label <- original_str + var_label(original_with_label) <- "Test question about agreement" + + result <- make_yes_no(original_with_label) + + expect_equal(var_label(result), "Test question about agreement") + expect_equal(as.character(result), as.character(target)) +}) + +test_that("labels are preserved for numerics", { + library(labelled) + original_with_label <- original_num + var_label(original_with_label) <- "Numeric yes/no response" + + result <- make_yes_no(original_with_label) + + expect_equal(var_label(result), "Numeric yes/no response") + expect_equal(as.character(result), as.character(target)) +}) + +test_that("labels are preserved for logicals", { + library(labelled) + original_with_label <- original_lgl + var_label(original_with_label) <- "Logical true/false response" + + result <- make_yes_no(original_with_label) + + expect_equal(var_label(result), "Logical true/false response") + expect_equal(as.character(result), as.character(target)) +}) + +test_that("function works when no label is present", { + result <- make_yes_no(original_str) + expect_equal(result, target) + expect_null(var_label(result)) +}) diff --git a/tests/testthat/test-make_yes_no_unknown.R b/tests/testthat/test-make_yes_no_unknown.R index a7f8cc2..4fce112 100644 --- a/tests/testthat/test-make_yes_no_unknown.R +++ b/tests/testthat/test-make_yes_no_unknown.R @@ -71,3 +71,66 @@ test_that("processing works across a df", { target_df ) }) + +test_that("labels are preserved for strings", { + library(labelled) + original_with_label <- original_str + var_label(original_with_label) <- "Test question about agreement" + + result <- make_yes_no_unknown(original_with_label) + + expect_equal(var_label(result), "Test question about agreement") + expect_equal(as.character(result), as.character(target)) +}) + +test_that("labels are preserved for numerics", { + library(labelled) + original_with_label <- original_num + var_label(original_with_label) <- "Numeric yes/no/unknown response" + + result <- make_yes_no_unknown(original_with_label) + + expect_equal(var_label(result), "Numeric yes/no/unknown response") + expect_equal(as.character(result), as.character(target)) +}) + +test_that("labels are preserved for logicals", { + library(labelled) + original_with_label <- original_lgl + var_label(original_with_label) <- "Logical true/false/unknown response" + + result <- make_yes_no_unknown(original_with_label) + + expect_equal(var_label(result), "Logical true/false/unknown response") + expect_equal(as.character(result), as.character(target)) +}) + +test_that("function works when no label is present", { + result <- make_yes_no_unknown(original_str) + expect_equal(result, target) + expect_null(var_label(result)) +}) + +test_that("labels are preserved in data frame processing", { + library(labelled) + + # Create labeled data + labeled_df <- original_df + var_label(labeled_df$original_str) <- "String responses" + var_label(labeled_df$original_num) <- "Numeric responses" + var_label(labeled_df$original_lgl) <- "Logical responses" + + # Process with function + result_df <- labeled_df |> + mutate(across(everything(), make_yes_no_unknown)) + + # Check that labels are preserved + expect_equal(var_label(result_df$original_str), "String responses") + expect_equal(var_label(result_df$original_num), "Numeric responses") + expect_equal(var_label(result_df$original_lgl), "Logical responses") + + # Check that values are correct + expect_equal(as.character(result_df$original_str), as.character(target)) + expect_equal(as.character(result_df$original_num), as.character(target)) + expect_equal(as.character(result_df$original_lgl), as.character(target)) +}) diff --git a/tidyREDCap.Rproj b/tidyREDCap.Rproj index 1788e68..da78db6 100644 --- a/tidyREDCap.Rproj +++ b/tidyREDCap.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: 59ecf50a-ffa1-4e64-b310-7f6538b22cdc RestoreWorkspace: Default SaveWorkspace: Default diff --git a/vignettes/dropLabels.Rmd b/vignettes/dropLabels.Rmd deleted file mode 100644 index 2fcb496..0000000 --- a/vignettes/dropLabels.Rmd +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: "Drop Labels from a Table" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Drop Labels from a Table} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup} -library(tidyREDCap) -``` - -# The Problem - -The `tidyREDCap` package creates data sets with labelled columns. - -```{r eval=FALSE} -tidyREDCap::import_instruments( - url = "https://bbmc.ouhsc.edu/redcap/api/", - token = Sys.getenv("REDCapR_test") -) -``` - -```{r hidden-data-load, echo=FALSE} -demographics <- structure( - list( - record_id = c(1, 2, 3, 4, 5), - name_first = structure( - c("Nutmeg", "Tumtum", "Marcus", "Trudy", "John Lee"), - label = "First Name", - class = c("labelled", "character") - ), - name_last = structure( - c("Nutmouse", "Nutmouse", "Wood", "DAG", "Walker"), - label = "Last Name", - class = c("labelled", "character") - ), - address = structure( - c( - "14 Rose Cottage St.\nKenning UK, 323232", - "14 Rose Cottage Blvd.\nKenning UK 34243", - "243 Hill St.\nGuthrie OK 73402", - "342 Elm\nDuncanville TX, 75116", - "Hotel Suite\nNew Orleans LA, 70115" - ), - label = "Street, City, State, ZIP", - class = c("labelled", "character") - ), - telephone = structure( - c( - "(405) 321-1111", - "(405) 321-2222", - "(405) 321-3333", - "(405) 321-4444", - "(405) 321-5555" - ), - label = "Phone number", - class = c("labelled", "character") - ), email = structure( - c( - "nutty@mouse.com", - "tummy@mouse.comm", - "mw@mwood.net", - "peroxide@blonde.com", - "left@hippocket.com" - ), - label = "E-mail", - class = c("labelled", "character") - ), - dob = structure( - c(12294, 12121, -13051, -6269, -5375), - class = c("labelled", "Date"), - label = "Date of birth" - ), - age = structure( - c(11, 11, 80, 61, 59), - label = "Age (years)", - class = c("labelled", "numeric") - ), - sex = structure( - c("Female", "Male", "Male", "Female", "Male"), - label = "Gender", - class = c("labelled", "character") - ), - demographics_complete = structure( - c("Complete", "Complete", "Complete", "Complete", "Complete"), - label = "Complete?...10", - class = c("labelled", "character") - ) - ), - row.names = c(NA, -5L), - class = "data.frame" -) -``` - -If you would like to see the labels on the data set `demographics`, you can use the RStudio function `View()`, as shown below. -```{r eval=FALSE} -View(demographics) -``` - -![](./view_demog_w_labels_20230217.png){width=90%} - -However, some functions do not work well with labeled variables. -```{r skim-demo, error=TRUE} -library(skimr) # for the skim() function -demographics |> skim() -``` - -So you need a way to drop the label off of a variable or to drop all the labels from all the variables in a dataset. - -# The Solution - -You can drop the label from a single variable with the drop_label() function. For example: - -```{r} -demographics_changed <- drop_label(demographics, "first_name") -``` - -You can drop all the labels using the `drop_labels()` function. For example: - -```{r} -demographics_without_labels <- drop_labels(demographics) - -demographics_without_labels |> - skim() -``` - diff --git a/vignettes/import_instruments.Rmd b/vignettes/import_instruments.Rmd index f8525c4..c427690 100644 --- a/vignettes/import_instruments.Rmd +++ b/vignettes/import_instruments.Rmd @@ -3,8 +3,11 @@ title: "Import All Instruments from a REDCap Project" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Import All Instruments from a REDCap Project} - %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + markdown: + wrap: 72 --- ```{r, include = FALSE} @@ -26,16 +29,39 @@ p.caption { } ``` +# The Problem -# The Problem -Suppose you have a REDCap project with many instruments; some instruments are administered in some visits but not in others. In that case, the export from REDCap will have empty cells for the visits in which the instrument was not used. For example, Figure 1 shows a study where subjects completed four instruments (Enrollment, NCI, GAD7, and Hamilton) at baseline. On days 1, 2, and 3, they completed only Hamilton. At the end of the study, they completed NCI and Hamilton. The export of the data for this subject will have five records (one for each visit), and each record will have a cell for every existing instrument. However, the “NCI” record will only have values for the baseline and final visit and empty cells for the visits in between. On the other hand, there will be no empty spaces for the “Hamilton” record as it was completed in all the visits. It would be good to have a function that will export all the data from a project and produce one R table for each instrument. Those tables should remove the blank records. -\ +Suppose you have a REDCap project with many instruments; some +instruments are administered in some visits but not in others. In that +case, the export from REDCap will have empty cells for the visits in +which the instrument was not used. For example, Figure 1 shows a study +where subjects completed four instruments (Enrollment, NCI, GAD7, and +Hamilton) at baseline. On days 1, 2, and 3, they completed only +Hamilton. At the end of the study, they completed NCI and Hamilton. The +export of the data for this subject will have five records (one for each +visit), and each record will have a cell for every existing instrument. +However, the “NCI” record will only have values for the baseline and +final visit and empty cells for the visits in between. On the other +hand, there will be no empty spaces for the “Hamilton” record as it was +completed in all the visits. It would be good to have a function that +will export all the data from a project and produce one R table for each +instrument. Those tables should remove the blank records.\ \ ![](./dashboard.jpg) -

-Figure 1

-The same functionality should help deal with instruments that are potentially given repeatedly. Common examples include asking participants to fill out a form describing medical conditions for all their siblings or asking them to fill out a form for each side effect they experience while using a drug. In these cases, each participant may have zero or many records. Again, creating a table with all the records for these “repeated” instruments would be good. -\ + +

+ +Figure 1 + +

+ +The same functionality should help deal with instruments that are +potentially given repeatedly. Common examples include asking +participants to fill out a form describing medical conditions for all +their siblings or asking them to fill out a form for each side effect +they experience while using a drug. In these cases, each participant may +have zero or many records. Again, creating a table with all the records +for these “repeated” instruments would be good.\ \ # The Solution @@ -43,21 +69,36 @@ The same functionality should help deal with instruments that are potentially gi ```{r eval=FALSE} # Do not type your API token directly into your code tidyREDCap::import_instruments( - url = "https://redcap.miami.edu/api/", - token = "1A2B3CXXYYZZOOMMGOSHNOOOOX1Y2Z3" # This is BAD! + url = "https://bbmc.ouhsc.edu/redcap/api/", + token = "9A81268476645C4E5F03428B8AC3AA7B" # This is BAD! +) + +# A better way to do this is to read the API key from the .Renviron file +# For instructions on saving your API key, see link below +tidyREDCap::import_instruments( + url = "https://bbmc.ouhsc.edu/redcap/api/", + token = Sys.getenv("redcap_token") # This is BETTER! ) -# A better way to do this is to read the API key from the .Renviron file. -# For instructions on saving your API key, see link below. +# Another great way to do this is to read the API key from keyring package +# Save your API key via keyring::key_set("redcap_token") tidyREDCap::import_instruments( - url = "https://redcap.miami.edu/api/", - token = Sys.getenv("nacho_anxiety_key") # This is BETTER! + url = "https://bbmc.ouhsc.edu/redcap/api/", + token = keyring::key_get("redcap_token") # This is GREAT! ) ``` -See the [Importing from REDCap](../doc/useAPI.html) vignette for details/information for saving your API key in the .Renviron file. +See the [Importing from REDCap](../doc/useAPI.html) vignette for +details/information for saving your API key in the .Renviron file. -The `import_instruments()` function can be given a URL and token for a REDCap project, like the one created above and it will return one table for each instrument in a project. By default, the function will drop all empty records. For example, the above API call is pulling data from a REDCap project that has four instruments: Enrollment, the Nacho Craving Index (NCI), the Generalized Anxiety Disorder Assessment (GAD7), and the Hamilton Anxiety Scale (HAM-A). +The `import_instruments()` function can be given a URL and token for a +REDCap project, like the one created above and it will return one table +for each instrument in a project. By default, the function will drop all +empty records and apply variable labels to the columns. For example, the +above API call is pulling data from a REDCap project that has four +instruments: Enrollment, the Nacho Craving Index (NCI), the Generalized +Anxiety Disorder Assessment (GAD7), and the Hamilton Anxiety Scale +(HAM-A). After running the above code we get four tables from the REDCap project. @@ -67,4 +108,179 @@ Notice that each repeat of the HAM-A is its own record. ![](./hama.jpg) -If a person has only done the baseline assessment they will only have one record. +If a person has only done the baseline assessment they will only have +one record. + +## Controlling Variable Labels + +By default, `import_instruments()` applies descriptive variable labels +to columns based on the field labels from your REDCap project. However, +you can disable this behavior if they labels interfere with other +functions or you prefer to work with unlabeled data: + +```{r eval=FALSE} +# Import instruments without variable labels +tidyREDCap::import_instruments( + url = "https://bbmc.ouhsc.edu/redcap/api/", + token = Sys.getenv("redcap_token"), + labels = FALSE +) +``` + +This can be useful when labels interfere with other functions or you +prefer working with unlabeled data frames. + +You can also remove labels from data that has already been imported using the `drop_labels()` function: + +```{r eval=FALSE} +# Remove labels from imported data +demographics_without_labels <- drop_labels(demographics) + +# Or remove a single label +demographics_no_age_label <- drop_label(demographics, "age") +``` + +## Working with Lists Instead of Environment Tables + +By default, `import_instruments()` creates separate tables in your +global environment for each instrument. However, you can instead return +all instruments as a named list using the `return_list = TRUE` +parameter: + +```{r eval=FALSE} +# Import instruments as a named list +instruments <- tidyREDCap::import_instruments( + url = "https://bbmc.ouhsc.edu/redcap/api/", + token = Sys.getenv("redcap_token"), + return_list = TRUE +) + +# Access individual instruments from the list +demographics <- instruments$demographics +health <- instruments$health +race_and_ethnicity <- instruments$race_and_ethnicity + +# Or work with all instruments +purrr::walk(instruments, ~cli::cli_inform("Instrument has {nrow(.x)} records")) +#> Instrument has 5 records +#> Instrument has 5 records +#> Instrument has 5 records +``` + +Using `return_list = TRUE` is beneficial when you: - Want to keep +instruments organized in a single object - Need to programmatically work +with multiple instruments - Are building functions that process REDCap +data - Want to avoid cluttering your global environment + +## Memory Management for Large Projects + +The `import_instruments()` function includes built-in memory management +features to help with large REDCap projects. When you import a large +project, you'll see informative messages about the project size: + +- **Large projects** (25+ million data elements): You'll get + suggestions to use filtering +- **Very large projects** (100+ million data elements): You'll get + warnings about potential memory issues + +The function uses an efficient in-memory database +([DuckDB](https://duckdb.org)) to process your data, which helps manage +memory usage. However, for very large projects, you may want to use the +filtering capabilities described in the next section. + +## Filtering Large Projects + +For large projects or when you only need a subset of your data, you can +use the `filter_function` and `filter_instrument` parameters to reduce +memory usage and improve performance. + +### Filter All Instruments Based on One Instrument + +Use `filter_instrument` and `filter_function` together to filter all +instruments based on criteria from a specific instrument (e.g., +demographics): + +```{r eval=FALSE} +# Import only participants 18 years or older based on demographics instrument +instruments <- tidyREDCap::import_instruments( + url = "https://bbmc.ouhsc.edu/redcap/api/", + token = Sys.getenv("redcap_token"), + filter_instrument = "demographics", + filter_function = function(x) x |> dplyr::filter(age >= 18) +) +``` + +This approach: 1. First applies the filter to the `demographics` +instrument 2. Gets the `record_id` values from the filtered demographics +data 3. Uses those record IDs to filter ALL other instruments 4. Only +imports records that meet your criteria + +### Filter Each Instrument Individually + +If you don't specify `filter_instrument`, the `filter_function` will be +applied to each instrument separately: + +```{r eval=FALSE} +# Apply the same filter to each instrument individually +instruments <- tidyREDCap::import_instruments( + url = "https://bbmc.ouhsc.edu/redcap/api/", + token = Sys.getenv("redcap_token"), + filter_function = \(x) x |> filter(record_id == 3) +) +``` + +### Benefits of Filtering + +Filtering is especially useful for: + +- **Large projects**: Reducing memory usage and import time +- **Longitudinal studies**: Importing only +- **Multi-site studies**: Importing data +from specific sites +- **Quality control**: Excluding incomplete or +problematic records + +The filtering happens at the database level before data is loaded into R +memory, making it very efficient even for large datasets. + +## Exploring Your Data with Codebooks + +After importing instruments, you can generate structured documentation using the `codebook()` function: + +```{r eval=FALSE} +# Generate codebook for an entire instrument +codebook(demographics) + +# Generate codebook for a specific variable +codebook(demographics$age) +``` + +The codebook displays variable labels, data types, and value labels for categorical variables, helping you understand your REDCap data structure. + +## Converting Coded Values to Labels + +REDCap often stores categorical data as numeric codes (e.g., 1, 2, 3) with corresponding labels (e.g., "Male", "Female", "Non-binary"). The `codebook_convert()` function transforms these coded values into their descriptive labels: + +```{r eval=FALSE} +# Convert a single column (both options work the same way) +codebook_convert(demographics$sex) +codebook_convert(demographics, "sex") + +# For data.frame operations, use standard dplyr patterns: +demographics |> + mutate(sex = codebook_convert(sex)) + +# Convert multiple columns at once using the has_redcap_values() helper +demographics |> + mutate(across(where(has_redcap_values), codebook_convert)) + +# Convert specific columns +demographics |> + mutate( + sex = codebook_convert(sex), + race = codebook_convert(race), + ethnicity = codebook_convert(ethnicity) + ) +``` + +The `has_redcap_values()` helper function identifies which columns contain REDCap value labels, making it easy to selectively convert only the appropriate columns. diff --git a/vignettes/makeBinaryWord.Rmd b/vignettes/makeBinaryWord.Rmd index f52be82..a09c3e4 100644 --- a/vignettes/makeBinaryWord.Rmd +++ b/vignettes/makeBinaryWord.Rmd @@ -31,16 +31,16 @@ REDCap exports a "choose all that apply" question into a series of similarly-nam In REDCap, it is simple to get a summary of those individual variables by using the "Data Exports, Reports, and Stats" application within the REDCap interface and selecting "Stats & Charts". Once the data is in R, simple tables can be produced with the `table()` function, or beautiful tables can be created with the `tabyl()` and `adorn_pct_formatting()` functions from the `janitor` package. However, from these univariate tables, it is impossible to judge which patterns of answers are marked together. In the above example, using the univariate tables, it is difficult to tell what percentage of people are craving both chips and yellow cheese. ```{r univariate, warning=FALSE} -redcap <- readRDS(file = "./redcap.rds") +sample_data <- readRDS(file = "./sample_race_ethnicity.rds") -# Chips -janitor::tabyl(redcap$ingredients___1) %>% - janitor::adorn_pct_formatting() %>% +# American Indian/Alaska Native +janitor::tabyl(sample_data$race___1) |> + janitor::adorn_pct_formatting() |> knitr::kable() -# Yellow cheese -janitor::tabyl(redcap$ingredients___2) %>% - janitor::adorn_pct_formatting() %>% +# Asian +janitor::tabyl(sample_data$race___2) |> + janitor::adorn_pct_formatting() |> knitr::kable() ``` @@ -53,10 +53,10 @@ See the [Import All Instruments from a REDCap Project](../doc/importInstruments. Even after subsetting the REDCap data to only include the ingredients variables, it is still difficult to detect common patterns in the eight ingredients. ```{r loadData} -redcap <- readRDS(file = "./redcap.rds") +sample_data <- readRDS(file = "./sample_race_ethnicity.rds") -analysis <- redcap %>% - select(starts_with("ingredients___")) +analysis <- sample_data |> + select(starts_with("race___")) knitr::kable(tail(analysis)) @@ -69,7 +69,7 @@ knitr::kable(tail(analysis)) ## Default Lettering The `make_binary_word()` function combines responses from the individual variables into a single "word" that indicates which choices were selected. For example, if the first option from the NCI ingredient question, *chips* (i.e., `ingredients___1`), was checked, the word created by `make_binary_word()` will begin with *a*; or if it was not checked, the word would start with *\_*. If the second option, *Yellow cheese* (i.e., `ingredients___2`), was checked, the next letter will be a *b*; otherwise, a *\_* will be used as a placeholder. Following this pattern, if somebody is not craving any of the eight nacho ingredients, the "word" will be eight underscores, one for each ingredient (i.e., \_\_\_\_\_\_\_\_). Conversely, if they are craving every ingredient, the "word" will be *abcdefgh*. -```{r nachoExample} +```{r raceExample} patterns <- make_binary_word(analysis) janitor::tabyl(patterns) ``` diff --git a/vignettes/makeChooseAllTable.Rmd b/vignettes/makeChooseAllTable.Rmd index 703a5c2..cc48ca6 100644 --- a/vignettes/makeChooseAllTable.Rmd +++ b/vignettes/makeChooseAllTable.Rmd @@ -24,11 +24,11 @@ library(dplyr) # The Problem -REDCap exports a "choose all that apply" question into a series of similarly-named, binary indicator variables (i.e., the variables are equal to either "checked" or "unchecked"). For example, the following data represents a sample of responses to the Nacho Craving Index. +REDCap exports a "choose all that apply" question into a series of similarly-named, binary indicator variables (i.e., the variables are equal to either "checked" or "unchecked"). For example, the following data represents a sample of responses to a race/ethnicity question with multiple checkboxes. ```{r} -redcap <- readRDS(file = "./redcap.rds") -redcap %>% - select(starts_with("ingredients___")) %>% +sample_data <- readRDS(file = "./sample_race_ethnicity.rds") +sample_data |> + select(starts_with("race___")) |> head() ``` It is desirable to have a concise table showing how often each option was chosen. @@ -42,13 +42,13 @@ See the [Import All Instruments from a REDCap Project](../doc/importInstruments. If you pass the `make_choose_all_table()` function, the name of a REDCap export, and the name of the _choose all that apply question_ question in REDCap, it will produce a concise frequency count table. ```{r show_results} -make_choose_all_table(redcap, "ingredients") +make_choose_all_table(sample_data, "race") ``` Similar to the `make_choose_one_table()` function, we can use this function inside an analysis pipeline. We can add the `kable()` call to make the table publication quality. ```{r show_results_pretty, results='asis'} -redcap %>% - make_choose_all_table("ingredients") %>% +sample_data |> + make_choose_all_table("race") |> knitr::kable() ``` diff --git a/vignettes/makeChooseOneTable.Rmd b/vignettes/makeChooseOneTable.Rmd index d6b4729..d6d632f 100644 --- a/vignettes/makeChooseOneTable.Rmd +++ b/vignettes/makeChooseOneTable.Rmd @@ -24,15 +24,15 @@ library(tidyREDCap) # The Problem It is often desirable to print variable labels above a summary table that shows the count of factor labels. The labels exported on _choose all that apply_ questions, including the question and whichever response was chosen. This redundancy is often unwanted, and the results are not presented professionally. -For example, in the Nacho Craving Index data, the first ingredient is "Chips". We see how R presents this information by simply printing the components of the `ingredients___1` column. +For example, in the race/ethnicity data, the first race option is "American Indian/Alaska Native". We see how R presents this information by simply printing the components of the `race___1` column. ```{r} -redcap <- readRDS(file = "./redcap.rds") -redcap$ingredients___1 +sample_data <- readRDS(file = "./sample_race_ethnicity.rds") +sample_data$race___1 ``` -As we can see, this information is quite ugly, so we want to tabulate the results instead. However, if we use the simple `table()` function to clean up this information, we lose the original question and the answer label for `ingredients___1`. +As we can see, this information is quite ugly, so we want to tabulate the results instead. However, if we use the simple `table()` function to clean up this information, we lose the original question and the answer label for `race___1`. ```{r} -table(redcap$ingredients___1) +table(sample_data$race___1) ``` We no longer know what the question was or which "select all" option this information represents. @@ -42,28 +42,28 @@ See the [Import All Instruments from a REDCap Project](../doc/importInstruments. # The Solution The `make_choose_one_table()` function can be used with a factor variable to tabulate the response *while preserving the question and checked option context*. ```{r example_raw} -make_choose_one_table(redcap$ingredients___1) +make_choose_one_table(sample_data$race___1) ``` Further, this output can be molded into a publication-ready table with a single additional function call. ```{r example_pretty, results="asis"} -make_choose_one_table(redcap$ingredients___1) %>% +make_choose_one_table(sample_data$race___1) |> knitr::kable() ``` -The `subset` option, if set to `TRUE,` will cause the function to remove the label's text and only show the response option (i.e., not repeat the "What ingredients do you currently crave?" question). +The `subset` option, if set to `TRUE,` will cause the function to remove the label's text and only show the response option (i.e., not repeat the question). ```{r example2, results="asis", } make_choose_one_table( - redcap$ingredients___2, + sample_data$race___2, subset = TRUE -) %>% +) |> knitr::kable() ``` This function can also be used in an analysis pipeline with a data frame name and the name of the factor inside that data frame. For example: ```{r example3, results="asis", fig.align="left"} -redcap %>% - make_choose_one_table(ingredients___3) %>% +sample_data |> + make_choose_one_table(race___3) |> knitr::kable() ``` diff --git a/vignettes/makeInstrument.Rmd b/vignettes/makeInstrument.Rmd index 2e91ab3..455a641 100644 --- a/vignettes/makeInstrument.Rmd +++ b/vignettes/makeInstrument.Rmd @@ -24,24 +24,27 @@ library(dplyr) # The Problem -REDCap exports longitudinal projects with one record (a line of data) per assessment (typically 1 line per day). This works well when every instrument/questionnaire is given at every assessment. Still, for projects with different instruments/questionnaires given on different days, REDCap exports empty values (represented by the value `NA` in R). +REDCap exports data from projects with multiple instruments, where some participants may have completed different instruments. This can result in incomplete records where some instruments have missing data (represented by the value `NA` in R). -> **Example:** In the Nachos for Anxiety project, three instruments were used; they each had a different administration schedule. Subjects' anxiety was assessed at baseline with the Generalized Anxiety Disorder 7-item (GAD-7) scale. Every day, it was assessed with the Hamilton Anxiety Scale (HAM-A), but the Nacho Craving Index was administered only at the baseline and at the end of the study (see the figure below for clarification). -![](./schedule.png) - -The instruments that are not assessed every day appear as entirely blank questionnaires when the data is exported. For example, values from the NCI instrument are shown as missing for Day 1, Day 2, and Day 3 (because it was not administered during those visits). +> **Example:** In a typical survey project, participants may complete different sets of questionnaires. Some may complete all sections (demographics, health, race/ethnicity), while others may only complete certain sections. This results in incomplete records for some instruments. In R, this data is displayed as ```{r} -redcap <- readRDS(file = "./redcap_nacho_anxiety.rds") +sample_data <- readRDS(file = "./sample_survey_data.rds") + +# To demonstrate the make_instrument function properly, +# let's simulate incomplete records by setting some health instrument values to NA +sample_data <- sample_data +sample_data[c(2, 5), + c("height", "weight", "bmi", "comments", "mugshot", "health_complete")] <- NA -redcap %>% +sample_data |> select( - # Select these two columns - record_id, redcap_event_name, - # And also select all columns between "nachos" and "nci_complete" - nachos:nci_complete - ) %>% + # Select record ID and health instrument columns + record_id, + # Select all columns for the health instrument + height:health_complete + ) |> # Make the table pretty knitr::kable() ``` @@ -51,31 +54,32 @@ It is often helpful to make a different data table that has the values for each ## Aside: Loading REDCap Data into R See the [Import All Instruments from a REDCap Project](../doc/importInstruments.html) and [Importing from REDCap](../doc/useAPI.html) vignettes for details/information. - # The Solution -Pass the `make_instrument()` function to the name of a dataset and the names of the first and last variables in an instrument, and it will return a table that has the non-empty records for the instrument. For example, to extract the enrollment/consent instrument: +Pass the `make_instrument()` function to the name of a dataset and the names of the first and last variables in an instrument, and it will return a table that has the non-empty records for the instrument. For example, to extract the demographics instrument: ```{r} -make_instrument(redcap, "concented", "enrollment_complete") %>% +make_instrument(sample_data, "name_first", "demographics_complete") |> knitr::kable() ``` -To extract nacho craving information: +To extract health information: ```{r} -make_instrument(redcap, "nachos", "nci_complete") %>% +make_instrument(sample_data, "height", "health_complete") |> knitr::kable() ``` -To make an analysis dataset containing the NCI values **without** the subject ID and the event name: +Notice that records 2 and 5 are excluded from the health instrument table because they have completely missing health data. The `make_instrument()` function automatically removes records where all instrument variables are `NA` or empty, keeping only records with at least some data for that instrument. + +To make an analysis dataset containing the race/ethnicity values **without** the subject ID: ```{r} make_instrument( - redcap, - "nachos", "nci_complete", + sample_data, + "race___1", "race_and_ethnicity_complete", drop_which_when = TRUE -) %>% +) |> knitr::kable() ``` diff --git a/vignettes/redcap.rds b/vignettes/redcap.rds deleted file mode 100644 index 5722cd4..0000000 Binary files a/vignettes/redcap.rds and /dev/null differ diff --git a/vignettes/redcap_nacho_anxiety.rds b/vignettes/redcap_nacho_anxiety.rds deleted file mode 100644 index c5dc019..0000000 Binary files a/vignettes/redcap_nacho_anxiety.rds and /dev/null differ diff --git a/vignettes/sample_data_labeled.rds b/vignettes/sample_data_labeled.rds new file mode 100644 index 0000000..c10d11f Binary files /dev/null and b/vignettes/sample_data_labeled.rds differ diff --git a/vignettes/sample_demographics.rds b/vignettes/sample_demographics.rds new file mode 100644 index 0000000..c10d11f Binary files /dev/null and b/vignettes/sample_demographics.rds differ diff --git a/vignettes/sample_race_ethnicity.rds b/vignettes/sample_race_ethnicity.rds new file mode 100644 index 0000000..30041a0 Binary files /dev/null and b/vignettes/sample_race_ethnicity.rds differ diff --git a/vignettes/sample_survey_data.rds b/vignettes/sample_survey_data.rds new file mode 100644 index 0000000..b22ffec Binary files /dev/null and b/vignettes/sample_survey_data.rds differ diff --git a/vignettes/useAPI.Rmd b/vignettes/useAPI.Rmd index 401e1f6..17a5e8f 100644 --- a/vignettes/useAPI.Rmd +++ b/vignettes/useAPI.Rmd @@ -19,17 +19,12 @@ library(tidyREDCap) ``` # Loading REDCap Data into R with an API request -While most people use the "Data Exports, Reports and Stats" Application built into REDCap, another handy method to get data out of REDCap is an API request. An API allows one program to request data from another program. For example, an add-on package in R can request data from your instance of REDCap. Obviously, an R package can not *type* your user name and password. Instead, you will store an API token, a password, on your machine and then ask R to look up that token and pass it to REDCap whenever you want data. -We have used two R packages to access our REDCap projects,`redcapAPI` and `REDCapR`. Unfortunately, `redcapAPI` is no longer being actively developed, and we have run into problems with it. It had the lovely benefit of exporting variables, basically using the same variable names as you see in REDCap, then tagging the variables with the "labels" subjects viewed when completing the forms. - -that people taking REDCap surveys or viewing other forms see. - -We have taken the labeling functionality and added it in tidyREDCap. +While most people use the "Data Exports, Reports and Stats" Application built into REDCap, another handy method to get data out of REDCap is an API request. An API allows one program to request data from another program. For example, an package in R can request data from your instance of REDCap. To access it, you will store an API token on your machine and then ask R to look up that token and pass it to REDCap whenever you want data. ### Getting an API Token -If your REDCap project has API access enabled, you will see it in the applications on the left side of the screen. +If your REDCap project has API access enabled, you will see it in the applications on the left side of the screen. ![](./key.jpg) @@ -45,13 +40,13 @@ Leaders in the REDCap community have developed techniques for safely storing you #### NEVER DO THIS! -The functions that allow you to export data need you to give them your API token. Remember, this is the same information as your username and password. **NEVER** type that directly in your code. That is if your REDCap API key is "1A2B3CXXYYZZOOMMGOSHNOOOOX1Y2Z3" do NOT do this: +The functions that allow you to export data need you to give them your API token. Remember, this is the same information as your username and password. **NEVER** type that directly in your code. That is if your REDCap API key is "9A81268476645C4E5F03428B8AC3AA7B" do NOT do this: ```r # Do not type your API token directly into your code tidyREDCap::import_instruments( - "https://redcap.miami.edu/api/", - "1A2B3CXXYYZZOOMMGOSHNOOOOX1Y2Z3" # This is BAD! + "https://bbmc.ouhsc.edu/redcap/api/", + "9A81268476645C4E5F03428B8AC3AA7B" # This is BAD! ) ``` @@ -62,11 +57,11 @@ If you do this, anybody who gets a copy of your code will be able to access your You can save your API keys into a "hidden" file containing code that runs when you start R. That file is called you ".Renviron". It can be a bit of a pain to find. So your best plan is to install the `usethis` package, which contains helper functions, including a function to find this file. If you use the RStudio interface, you can add it using the Packages window pane or run these lines in the console once. ```r -install.packages("remotes") -remotes::install_cran("usethis") +install.packages("pak") +pak::pak("usethis") ``` -When it comes time to add packages to your copy of R, the `install_cran()` function in the `remotes` package is superior to the usual `install.packages()` function because it will first check to see if you already have the latest version before bothering to download and install. +When it comes time to add packages to your copy of R, the `pak()` function in the `pak` package is superior to the usual `install.packages()`. After installing `usethis` you can access your ".Renviron" file by typing this in your console. @@ -79,7 +74,7 @@ It will cause the file to open. Create a name for your API key and add a like like this to your .Renviron file: ``` -nacho_anxiety_key="1A2B3CXXYYZZOOMMGOSHNOOOOX1Y2Z3" +redcap_token="9A81268476645C4E5F03428B8AC3AA7B" ``` After adding the line, remember to save the file and completely restart R/RStudio. @@ -88,40 +83,61 @@ Once R restarts, you can access the key like this: ```r tidyREDCap::import_instruments( - "https://redcap.miami.edu/api/", - Sys.getenv("nacho_anxiety_key") + "https://bbmc.ouhsc.edu/redcap/api/", + Sys.getenv("redcap_token") ) ``` -If you want to use the redcapAPI or the REDCapR packages directly you can use the same trickery to pass your API key to their functions. For example: +This includes a call to `Sys.getenv()` to grab the key. To learn more about working with APIs, look [here](https://daattali.gitbooks.io/stat545-ubc-github-io/bit003_api-key-env-var.html). -```r -rcon <- redcapAPI::redcapConnection( - url = 'https://redcap.miami.edu/api/', - token = Sys.getenv("nacho_anxiety_key") + +If you are curious, when we made these help files, we generated sample data using the `tidyREDCap::import_instruments()` function and saved it for use in our examples. + +```{r getData, eval=FALSE} +# Generate sample data for vignettes +instruments <- tidyREDCap::import_instruments( + url = 'https://bbmc.ouhsc.edu/redcap/api/', + token = Sys.getenv("redcap_token"), + return_list = TRUE ) -redcap <- redcapAPI::exportRecords(rcon) +# Save sample data files +saveRDS(instruments$demographics, file = "sample_data_labeled.rds") +saveRDS(instruments$race_and_ethnicity, file = "sample_race_ethnicity.rds") ``` -This includes a call to `Sys.getenv()` to grab the key. To learn more about working with APIs, look [here](https://daattali.gitbooks.io/stat545-ubc-github-io/bit003_api-key-env-var.html). +
+
+### Even Better Options -If you are curious, when we made these help files, we saved the data using the `saveRDS()` function. +If somebody gets access to the files on your machine, they could find and read your .Renviron file. A more secure option is to use the `keyring` package. It will store an encrypted copy of your API key in your machine's credential store (i.e., the "keychain" on macOS, the Credential Store on Windows, etc.). -```{r getData, eval=FALSE} -rcon <- redcapAPI::redcapConnection( - url = 'https://redcap.miami.edu/api/', - token = Sys.getenv("nacho_anxiety_key") -) +#### Using keyring for Secure Token Storage -redcap <- redcapAPI::exportRecords(rcon) +First, install the keyring package: -saveRDS(redcap, file = "redcap.rds") +```r +install.packages("keyring") ``` -
-
+Set up your API token securely (run this once): -### Even Better Options -If somebody gets access to the files on your machine, they could find and read your .Renviron file. A more secure option is to use the r `keyring` package. It will store an encrypted copy of your API key in your machine's credential store (i.e., the "keychain" on macOS, the Credential Store on Windows, etc.). Consider using it. If you use it and your machine is stolen, it will buy you more time to find an internet connection, log into REDCap and change your API tokens (before the thief can access your data). \ No newline at end of file +```r +library(keyring) +keyring::key_set("redcap", "project") +``` + +Then use it in your REDCap imports: + +```r +library(keyring) +library(tidyREDCap) + +import_instruments( + url = "https://bbmc.ouhsc.edu/redcap/api/", + token = keyring::key_get("redcap", "project") +) +``` + +This approach stores your token in your system's secure credential store. If your machine is stolen, it will buy you more time to find an internet connection, log into REDCap and change your API tokens (before the thief can access your data). \ No newline at end of file