diff --git a/DESCRIPTION b/DESCRIPTION index c637efa4..d8bb60e9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,8 @@ Imports: tidyselect, formattable, pillar, - vctrs + vctrs, + readr Suggests: covr, knitr, @@ -41,7 +42,6 @@ Suggests: lintr, openxlsx2 (>= 0.8), prettyunits, - readr, rmarkdown, skimr, testthat (>= 3.0.0), diff --git a/NAMESPACE b/NAMESPACE index 37c2d663..bd72cbd5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -56,6 +56,8 @@ importFrom(dplyr,slice) importFrom(dplyr,summarise) importFrom(formattable,percent) importFrom(lobstr,obj_size) +importFrom(lubridate,is.Date) +importFrom(lubridate,is.POSIXt) importFrom(lubridate,is.difftime) importFrom(lubridate,is.period) importFrom(pillar,tbl_sum) @@ -67,6 +69,13 @@ importFrom(purrr,map_lgl) importFrom(purrr,pluck) importFrom(purrr,pmap_chr) importFrom(purrr,some) +importFrom(readr,parse_character) +importFrom(readr,parse_date) +importFrom(readr,parse_datetime) +importFrom(readr,parse_double) +importFrom(readr,parse_integer) +importFrom(readr,parse_logical) +importFrom(readr,parse_time) importFrom(rlang,"!!!") importFrom(rlang,.data) importFrom(rlang,abort) @@ -117,4 +126,5 @@ importFrom(tidyselect,eval_select) importFrom(tidyselect,everything) importFrom(tidyselect,starts_with) importFrom(tidyselect,where) +importFrom(vctrs,vec_ptype) importFrom(vctrs,vec_ptype_abbr) diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index d24682de..e4b3a88e 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -8,7 +8,7 @@ #' left_join mutate pull recode relocate rename row_number select slice summarise #' @importFrom formattable percent #' @importFrom lobstr obj_size -#' @importFrom lubridate is.difftime is.period +#' @importFrom lubridate is.difftime is.period is.POSIXt is.Date #' @importFrom purrr compose map map2 map_int map_lgl pluck pmap_chr some #' @importFrom REDCapR redcap_arm_export redcap_event_instruments redcap_instruments #' redcap_metadata_read redcap_read_oneshot sanitize_token @@ -23,8 +23,10 @@ #' @importFrom tidyr complete fill pivot_wider nest unnest unnest_wider #' @importFrom tidyselect all_of any_of ends_with eval_select everything #' starts_with where -#' @importFrom vctrs vec_ptype_abbr +#' @importFrom vctrs vec_ptype_abbr vec_ptype #' @importFrom pillar tbl_sum +#' @importFrom readr parse_logical parse_integer parse_double parse_date parse_time +#' parse_datetime parse_character "_PACKAGE" ## usethis namespace: start diff --git a/R/read_redcap.R b/R/read_redcap.R index 93175fbb..4c1f627a 100644 --- a/R/read_redcap.R +++ b/R/read_redcap.R @@ -36,9 +36,10 @@ #' "https://server.org/apps/redcap/api/"). Required. #' @param token The user-specific string that serves as the password for a #' project. Required. -#' @param raw_or_label A string (either 'raw' or 'label') that specifies whether +#' @param raw_or_label A string (either 'raw', 'label', or 'haven') that specifies whether #' to export the raw coded values or the labels for the options of categorical -#' fields. Default is 'label'. +#' fields. Default is 'label'. If 'haven' is supplied, categorical fields are converted +#' to `haven_labelled` vectors. #' @param forms A character vector of REDCap instrument names that specifies #' which instruments to import. Default is `NULL` which imports all instruments #' in the project. @@ -84,13 +85,17 @@ read_redcap <- function(redcap_uri, check_arg_is_character(redcap_uri, len = 1, any.missing = FALSE) check_arg_is_character(token, len = 1, any.missing = FALSE) check_arg_is_valid_token(token) - check_arg_choices(raw_or_label, choices = c("label", "raw")) + check_arg_choices(raw_or_label, choices = c("label", "raw", "haven")) check_arg_is_character(forms, min.len = 1, null.ok = TRUE, any.missing = FALSE) check_arg_is_logical(export_survey_fields, len = 1, any.missing = FALSE, null.ok = TRUE) check_arg_is_logical(export_data_access_groups, len = 1, any.missing = FALSE, null.ok = TRUE) check_arg_is_logical(suppress_redcapr_messages, len = 1, any.missing = FALSE) check_arg_is_logical(allow_mixed_structure, len = 1, any.missing = FALSE) + if (raw_or_label == "haven") { + check_installed("labelled", reason = "to use `read_redcap()` with `raw_or_label = 'haven'`") + } + # Load REDCap Metadata ---- # Capture unexpected metadata API call errors db_metadata <- try_redcapr({ @@ -251,8 +256,8 @@ read_redcap <- function(redcap_uri, filter(.data$field_name_updated %in% names(db_data)) } - if (raw_or_label == "label") { - db_data <- multi_choice_to_labels(db_data, db_metadata) + if (raw_or_label != "raw") { + db_data <- multi_choice_to_labels(db_data, db_metadata, raw_or_label) } # Longitudinal Arms Check and Cleaning Application ---- diff --git a/R/utils.R b/R/utils.R index c8c7cc23..c0334b3e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -397,10 +397,16 @@ update_data_col_names <- function(db_data, db_metadata) { #' #' @param db_data A REDCap database object #' @param db_metadata A REDCap metadata object +#' @inheritParams read_redcap #' #' @keywords internal -multi_choice_to_labels <- function(db_data, db_metadata) { +multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label") { + if (raw_or_label == "label") { + label_handler <- apply_labs_factor + } else if (raw_or_label == "haven") { + label_handler <- apply_labs_haven + } # form_status_complete Column Handling ---- # Must be done before the creation of form_status_complete # select columns that don't appear in field_name_updated and end with @@ -411,28 +417,9 @@ multi_choice_to_labels <- function(db_data, db_metadata) { db_data <- db_data %>% mutate( - # Change double output of raw data to character - across( - .cols = all_of(form_status_cols), - .fns = ~ as.character(.) - ), - # Map constant values to raw values across( .cols = all_of(form_status_cols), - .fns = ~ case_when( - . == "0" ~ "Incomplete", - . == "1" ~ "Unverified", - . == "2" ~ "Complete" - ) - ), - # Convert to factor - # Map constant values to raw values - across( - .cols = all_of(form_status_cols), - .fns = ~ factor( - ., - levels = c("Incomplete", "Unverified", "Complete") - ) + .fns = ~ label_handler(., c("0" = "Incomplete", "1" = "Unverified", "2" = "Complete"), integer(0)) ) ) @@ -485,15 +472,126 @@ multi_choice_to_labels <- function(db_data, db_metadata) { # Replace values from db_data$(field_name) with label values from # parse_labels key - db_data[[field_name]] <- db_data[[field_name]] %>% - as.character() %>% - recode(!!!parse_labels_output) %>% - factor(levels = unique(parse_labels_output)) + + db_data[[field_name]] <- label_handler( + x = db_data[[field_name]], + labels = parse_labels_output, + ptype = db_data[[field_name]] + ) } } db_data } +#' @title +#' Apply factor labels to a vector +#' +#' @details +#' Dots are needed to ignore `ptype` argument that may be passed to `apply_labs_haven` +#' +#' +#' @param x a vector to label +#' @param labels a named vector of labels in the format `c(value = label)` +#' @param \dots unused, needed to ignore extra arguments that may be passed +#' +#' @return +#' factor +#' +#' @keywords internal +apply_labs_factor <- function(x, labels, ...) { + as.character(x) %>% + recode(!!!labels) %>% + factor(levels = unique(labels)) +} + +#' @title +#' Apply haven value labels to a vector +#' +#' @details +#' Assumes a check_installed() has been run for `labelled`. Since `haven` preserves the +#' underlying data values we need to make sure the data type of the value options in the metadata matches +#' the data type of the values in the actual data. This function accepts a prototype, usually a column +#' from db_data, and uses `force_cast()` to do a best-effort casting of the value options in the metadata +#' to the same data type as `ptype`. The fallback is to convert `x` and the value labels to character. +#' +#' @param x a vector to label +#' @param labels a named vector of labels in the format `c(value = label)` +#' @param ptype vector to serve as prototype for label values +#' @param \dots unused, needed to ignore extra arguments that may be passed +#' +#' @return +#' `haven_labelled` vector +#' +#' @keywords internal +#' +apply_labs_haven <- function(x, labels, ptype, ...) { + # set_value_labels expects labels in c(label = value) format so reverse them + labels <- invert_vec(labels) + # Try to cast values to match data type in data, catching any parsing warnings + cnd <- NULL + labels_cast <- withCallingHandlers( + { + force_cast(labels, ptype) + }, + warning = function(w) { + cnd <<- w + cnd_muffle(w) + } + ) + if (!is.null(attr(labels_cast, "problems"))) { + # If there was parsing problem fall back to character + x <- as.character(x) + labels_cast <- force_cast(labels, character()) + } else if (!is.null(cnd)) { + # If there was some other warning we didn't mean to catch it, so re-raise + cli_warn(cnd) + } + + labelled::set_value_labels(x, .labels = labels_cast) +} + +#' @title +#' Swap vector names for values +#' +#' @param x a vector +#' +#' @return +#' Vector with names and values reversed +#' +#' @keywords internal +#' +invert_vec <- function(x) { + out <- names(x) + # If there were no names do nothing + if (is.null(out)) { + return(x) + } + names(out) <- x + out +} + +force_cast <- function(x, ptype) { + ptype <- vec_ptype(ptype) + if (is.logical(ptype)) { + out <- parse_logical(x) + } else if (is.integer(ptype)) { + out <- parse_integer(x) + } else if (is.numeric(ptype)) { + out <- parse_double(x) + } else if (is.Date(ptype)) { + out <- parse_date(x) + } else if (is.difftime(ptype)) { + out <- parse_time(x) + } else if (is.POSIXt(ptype)) { + out <- parse_datetime(x) + } else { + out <- parse_character(x) + } + + names(out) <- names(x) + out +} + #' @title #' Utility function to extract the name of the project identifier field for #' a tibble of REDCap data diff --git a/man/apply_labs_factor.Rd b/man/apply_labs_factor.Rd new file mode 100644 index 00000000..b5e5f015 --- /dev/null +++ b/man/apply_labs_factor.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{apply_labs_factor} +\alias{apply_labs_factor} +\title{Apply factor labels to a vector} +\usage{ +apply_labs_factor(x, labels, ...) +} +\arguments{ +\item{x}{a vector to label} + +\item{labels}{a named vector of labels in the format \code{c(value = label)}} + +\item{\dots}{unused} +} +\value{ +factor +} +\description{ +Apply factor labels to a vector +} +\keyword{internal} diff --git a/man/apply_labs_haven.Rd b/man/apply_labs_haven.Rd new file mode 100644 index 00000000..ad35755d --- /dev/null +++ b/man/apply_labs_haven.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{apply_labs_haven} +\alias{apply_labs_haven} +\title{Apply haven value labels to a vector} +\usage{ +apply_labs_haven(x, labels, ptype, ...) +} +\arguments{ +\item{x}{a vector to label} + +\item{labels}{a named vector of labels in the format \code{c(value = label)}} + +\item{ptype}{vector to serve as prototype for label values} + +\item{\dots}{unused} +} +\value{ +\code{haven_labelled} vector +} +\description{ +Apply haven value labels to a vector +} +\details{ +Assumes a check_installed() has been run for \code{labelled}. Since \code{haven} preserves the +underlying data values we need to make sure the data type of the value options in the metadata matches +the data type of the values in the actual data. This function accepts a prototype, usually a column +from db_data, and uses \code{force_cast()} to do a best-effort casting of the value options in the metadata +to the same data type as \code{ptype}. The fallback is to convert \code{x} and the value labels to character. +} +\keyword{internal} diff --git a/man/invert_vec.Rd b/man/invert_vec.Rd new file mode 100644 index 00000000..b85eb7ac --- /dev/null +++ b/man/invert_vec.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{invert_vec} +\alias{invert_vec} +\title{Swap vector names for values} +\usage{ +invert_vec(x) +} +\arguments{ +\item{x}{a vector} +} +\value{ +Vector with names and values reversed +} +\description{ +Swap vector names for values +} +\keyword{internal} diff --git a/man/multi_choice_to_labels.Rd b/man/multi_choice_to_labels.Rd index 1a0f291e..2dca6b09 100644 --- a/man/multi_choice_to_labels.Rd +++ b/man/multi_choice_to_labels.Rd @@ -4,12 +4,17 @@ \alias{multi_choice_to_labels} \title{Update multiple choice fields with label data} \usage{ -multi_choice_to_labels(db_data, db_metadata) +multi_choice_to_labels(db_data, db_metadata, raw_or_label = "label") } \arguments{ \item{db_data}{A REDCap database object} \item{db_metadata}{A REDCap metadata object} + +\item{raw_or_label}{A string (either 'raw', 'label', or 'haven') that specifies whether +to export the raw coded values or the labels for the options of categorical +fields. Default is 'label'. If 'haven' is supplied, categorical fields are converted +to \code{haven_labelled} vectors.} } \description{ Update REDCap variables with multi-choice types to standard form labels taken diff --git a/man/read_redcap.Rd b/man/read_redcap.Rd index 7cd5d2bb..469899f5 100644 --- a/man/read_redcap.Rd +++ b/man/read_redcap.Rd @@ -24,9 +24,10 @@ URI/URL of the REDCap server (e.g., \item{token}{The user-specific string that serves as the password for a project. Required.} -\item{raw_or_label}{A string (either 'raw' or 'label') that specifies whether +\item{raw_or_label}{A string (either 'raw', 'label', or 'haven') that specifies whether to export the raw coded values or the labels for the options of categorical -fields. Default is 'label'.} +fields. Default is 'label'. If 'haven' is supplied, categorical fields are converted +to \code{haven_labelled} vectors.} \item{forms}{A character vector of REDCap instrument names that specifies which instruments to import. Default is \code{NULL} which imports all instruments diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 164dcf1e..65cd1b77 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -69,6 +69,34 @@ test_that("multi_choice_to_labels works", { expect_factor(out$data_field_types_complete) expect_equal(levels(out$data_field_types_complete), c("Incomplete", "Unverified", "Complete")) + + # Haven option works + skip_if_not_installed("labelled") + + out <- multi_choice_to_labels( + db_data = db_data_classic, + db_metadata = db_metadata_classic, + raw_or_label = "haven" + ) %>% + suppressWarnings(classes = c( + "empty_parse_warning", + "field_missing_categories", + "duplicate_labels" + )) + + expect_s3_class(out$dropdown_single, "haven_labelled") + expect_equal( + labelled::val_labels(out$dropdown_single), + c("one" = "choice_1", "two" = "choice_2", "three" = "choice_3") + ) + expect_s3_class(out$radio_single, "haven_labelled") + expect_equal(labelled::val_labels(out$radio_single), c("A" = "choice_1", "B" = "choice_2", "C" = "choice_3")) + expect_s3_class(out$data_field_types_complete, "haven_labelled") + expect_equal( + labelled::val_labels(out$data_field_types_complete), c("Incomplete" = 0, "Unverified" = 1, "Complete" = 2) + ) + expect_s3_class(out$repeatsurvey_radio_v2, "haven_labelled") + expect_equal(labelled::val_labels(out$repeatsurvey_radio_v2), c("Choice 1" = 1, "Choice 2" = 2, "Choice 3" = 3)) }) test_that("parse_labels works", { @@ -399,3 +427,42 @@ test_that("make_skimr_labels works", { expect_true(is.character(skimr_labels)) expect_true(!is.null(attr(skimr_labels, "name"))) }) + +test_that("invert_vec works", { + expect_equal(invert_vec(c("a" = 1, "b" = 2)), c("1" = "a", "2" = "b")) + expect_equal(invert_vec(1:2), 1:2) +}) + +test_that("apply_labs_haven works", { + skip_if_not_installed("labelled") + + # Testing all classes where labelled has built in val_labels<- methods + out <- apply_labs_haven(1:3, c("1" = "a", "2" = "b", "3" = "c"), integer()) + expect_s3_class(out, "haven_labelled") + expect_equal(labelled::val_labels(out), c(a = 1, b = 2, c = 3)) + + out <- apply_labs_haven(c(1.1, 2, 3.3), c("1.1" = "a", "2" = "b", "3.3" = "c"), numeric()) + expect_s3_class(out, "haven_labelled") + expect_equal(labelled::val_labels(out), c(a = 1.1, b = 2.0, c = 3.3)) + + out <- apply_labs_haven(letters[1:3], c("a" = "x", "b" = "y", "c" = "z"), character()) + expect_s3_class(out, "haven_labelled") + expect_equal(labelled::val_labels(out), c(x = "a", y = "b", z = "c")) + + # Case with mismatching data types between labels from metadata and data values + out <- apply_labs_haven(1:2, c("1" = "a", "2" = "b", "3.3" = "c"), integer()) + expect_s3_class(out, "haven_labelled") + expect_equal(labelled::val_labels(out), c(a = "1", b = "2", c = "3.3")) +}) + +test_that("apply_labs_factor works", { + out <- apply_labs_factor(1:3, c("1" = "a", "2" = "b", "3" = "c")) + expect_s3_class(out, "factor") + expect_equal(out, factor(letters[1:3])) +}) + +test_that("force_cast works", { + expect_s3_class(force_cast("2023-01-01", as.Date(NA)), "Date") + expect_s3_class(force_cast("12:00", as.difftime(0, units = "secs")), "difftime") + expect_s3_class(force_cast("2023-01-01 12:00", as.POSIXct(NA)), "POSIXct") +})