diff --git a/NAMESPACE b/NAMESPACE index 056ce167..5c16f592 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,7 +12,6 @@ export(fmt_strip_html) export(fmt_strip_trailing_colon) export(fmt_strip_trailing_punct) export(fmt_strip_whitespace) -export(join_data_tibbles) export(make_labelled) export(read_redcap) export(write_redcap_xlsx) diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R deleted file mode 100644 index 3db4912b..00000000 --- a/R/join_data_tibbles.R +++ /dev/null @@ -1,238 +0,0 @@ -#' @title Join Two Data Tibbles from a Supertibble -#' -#' @description -#' The [join_data_tibbles()] function provides a way to intelligently join two -#' data tibbles from a REDCaTidieR supertibble. A supertibble is an output of -#' [read_redcap()]. -#' -#' [join_data_tibbles()] attempts to correctly assign the `by` when left `NULL` (the default) -#' based on detecting the data tibble structure of `x` and `y`. -#' -#' @inheritParams extract_tibbles -#' @param type A character string for the type of join to be performed borrowing from -#' dplyr. One of "left", "right", "inner", or "full". Default "left". -#' @inheritParams dplyr::inner_join -#' -#' -#' @returns A `tibble`. -#' -#' @export - -join_data_tibbles <- function(supertbl, - x, - y, - by = NULL, - type = "left", - suffix = c(".x", ".y")) { - record_id_field <- get_record_id_field(supertbl$redcap_data[[1]]) # nolint: object_usage_linter - join_fn <- get_join_fn(type) - - # Append the supertibble with the primary keys column - supertbl <- supertbl |> - mutate(pks = purrr::map_chr(.data$redcap_data, ~ extract_keys(., record_id_field = record_id_field))) %>% - select(.data$redcap_form_name, .data$redcap_form_label, .data$redcap_data, - .data$redcap_metadata, .data$structure, .data$pks, matches("redcap_events")) - - tbl_x <- extract_tibble(supertbl, x) - tbl_x_structure <- get_structure(supertbl, x) - tbl_y <- extract_tibble(supertbl, y) - tbl_y_structure <- get_structure(supertbl, y) - - # Mixed structure requires special handling - is_mixed <- any(c(tbl_x_structure, tbl_y_structure) == "mixed") - - if (is_mixed) { - # TODO: Determine if ok to remove - required_columns <- c("redcap_event_instance", "redcap_form_instance") # nolint: commented_code_linter - tbl_x <- add_missing_columns(tbl_x, required_columns) # nolint: commented_code_linter - tbl_y <- add_missing_columns(tbl_y, required_columns) # nolint: commented_code_linter - - tbl_x_type <- get_type(supertbl, x) - tbl_y_type <- get_type(supertbl, y) - - # Add on .repeat_type specifier for the redcap_event column - tbl_x <- left_join(tbl_x, tbl_x_type, by = "redcap_event") - tbl_y <- left_join(tbl_y, tbl_y_type, by = "redcap_event") - } - - join_fn <- get_join_fn(type) - by <- build_by(supertbl, x, y, is_mixed) - - join_fn(tbl_x, tbl_y, by, suffix) %>% - relocate(starts_with("form_status_complete"), .after = everything()) %>% - select(-starts_with(".repeat_type")) -} - -#' @title Extract the primary keys associated with a data tibble -#' -#' @param data_tbl A data tibble from a supertibble -#' @param record_id_field The record ID field for the REDCap project, retrieved -#' as an ouput of [get_record_id_field()] -#' -#' @returns a character string -#' -#' @keywords internal -extract_keys <- function(data_tbl, record_id_field) { - redcap_keys <- c( - record_id_field, "redcap_event", "redcap_form_instance", - "redcap_event_instance", "redcap_arm" - ) - - data_tbl |> - colnames() |> - intersect(redcap_keys) |> - paste(collapse = ", ") -} - -#' @title Retrieve the structure data for a form from the supertibble -#' -#' @inheritParams join_data_tibbles -#' @param tbl_name the `x` or `y` values assigned to `join_data_tibbles` -#' -#' @keywords internal -get_structure <- function(supertbl, tbl_name) { - supertbl$structure[supertbl$redcap_form_name == tbl_name] -} - -#' @title Retrieve the repeat event type data for a form from the supertibble -#' -#' @inheritParams join_data_tibbles -#' @param tbl_name the `x` or `y` values assigned to `join_data_tibbles` -#' -#' @keywords internal -get_type <- function(supertbl, tbl_name) { - supertbl %>% - filter(.data$redcap_form_name == tbl_name) %>% - pull(.data$redcap_events) %>% - pluck(1) %>% - select(.data$redcap_event, - ".repeat_type" = .data$repeat_type) %>% - unique() -} - -#' @title Retrieve the appropriate user specified join function -#' -#' @inheritParams join_data_tibbles -#' -#' @returns a function -#' -#' @keywords internal -get_join_fn <- function(type) { - join_functions <- list( - left = dplyr::left_join, - right = dplyr::right_join, - inner = dplyr::inner_join, - full = dplyr::full_join - ) - - if (!type %in% names(join_functions)) { - cli::cli_abort("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.") - } - - join_functions[[type]] -} - -#' @title Intelligently retrieve the join by cols -#' -#' @inheritParams join_data_tibbles -#' @param is_mixed TRUE/FALSE, whether or not the given tables contain a mixed structure -#' -#' @returns a character vector -#' -#' @keywords internal -build_by <- function(supertbl, x, y, is_mixed) { - x_pks <- supertbl$pks[supertbl$redcap_form_name == x] %>% - stringr::str_split(", ", simplify = TRUE) - y_pks <- supertbl$pks[supertbl$redcap_form_name == y] %>% - stringr::str_split(", ", simplify = TRUE) - - out <- intersect(x_pks, y_pks) - - if (is_mixed) { - # For mixed tables, depending on the .repeat_types present tables may not - # have event and form instance columns and must be added - out <- c(out, "redcap_event_instance", "redcap_form_instance") %>% - # TODO: Make standard, currently needed for repeat/mixed joins - unique() - } - - out -} - -#' @keywords intenral -#' @noRd -add_missing_columns <- function(tbl, columns) { - missing_cols <- setdiff(columns, names(tbl)) - tbl[missing_cols] <- NA - return(tbl) -} - -#' @title Join data tbls of various structures and types -#' -#' @description -#' [join_tbls()] either performs the `join_fun()` specified by the `type` or, in -#' the event of mixed structure data tibble joins, will seek to split data into -#' three categories before performing the joins. The key identifiers here are -#' `redcap_form_instance` and the added `.repeat_type` columns. -#' -#' @inheritParams join_data_tibbles -#' @param join_fn the user specified join function type output by [get_join_fn()] -#' @param is_mixed TRUE/FALSE mixed data structure -#' -#' @returns a dataframe -#' -#' @keywords internal - -join_tbls <- function(x, y, join_fn, by, suffix, is_mixed) { - if (is_mixed) { - # Filter based on .repeat_type - # If repeating together events, can use redcap_form_instance (NA) and redcap_event_instance - x_together <- x %>% filter(.data$.repeat_type == "repeat_together") - y_together <- y %>% filter(.data$.repeat_type == "repeat_together") - - # repeating instruments for separately repeating events shouldn't be joined by redcap_form_instance - x_separate_repeating <- x %>% filter(.data$.repeat_type == "repeat_separate" & !is.na(.data$redcap_form_instance)) - y_separate_repeating <- y %>% filter(.data$.repeat_type == "repeat_separate" & !is.na(.data$redcap_form_instance)) - - # nonrepeating instruments for separately repeating events should be joined by redcap_form_instance - x_separate_nonrepeating <- x %>% filter(.data$.repeat_type == "repeat_separate" & is.na(.data$redcap_form_instance)) - y_separate_nonrepeating <- y %>% filter(.data$.repeat_type == "repeat_separate" & is.na(.data$redcap_form_instance)) - - # Join together sets - joined_together <- x_together %>% - join_fn(y_together, by = by, suffix = suffix) - - joined_separate_repeating <- x_separate_repeating %>% - join_fn(y_separate_repeating, by = by[by != "redcap_form_instance"], suffix = suffix) - - joined_separate_nonrepeating <- x_separate_nonrepeating %>% - join_fn(y_separate_nonrepeating, by = by, suffix = suffix) - - # Bind rows together, issue in arrangmenet of output - result <- bind_rows(joined_together, joined_separate_repeating) %>% - bind_rows(joined_separate_nonrepeating) %>% - drop_non_suffix_columns() - } else { - result <- join_fn(x, y, by = by, suffix = suffix) - } - result -} - -drop_non_suffix_columns <- function(data) { - # Extract column names that contain a "." - # Note: We can look for periods because REDCap will not allow variables to made - # with them. Only user tampering with column names in the output would result in this. - dot_columns <- names(data)[grepl("\\.", names(data))] - - # Extract the base column names without the suffixes (everything before the ".") - base_columns <- unique(sub("\\..*", "", dot_columns)) - - # Filter out base columns that do not exist without a suffix - columns_to_drop <- base_columns[base_columns %in% names(data)] - - # Drop only those base columns that exist both with and without suffixes - data <- data %>% - select(-all_of(columns_to_drop)) - - return(data) -} diff --git a/man/build_by.Rd b/man/build_by.Rd deleted file mode 100644 index 81696fb2..00000000 --- a/man/build_by.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{build_by} -\alias{build_by} -\title{Intelligently retrieve the join by cols} -\usage{ -build_by(supertbl, x, y, is_mixed) -} -\arguments{ -\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} - -\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or -lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for -more details.} - -\item{is_mixed}{TRUE/FALSE, whether or not the given tables contain a mixed structure} -} -\value{ -a character vector -} -\description{ -Intelligently retrieve the join by cols -} -\keyword{internal} diff --git a/man/extract_keys.Rd b/man/extract_keys.Rd deleted file mode 100644 index b497120b..00000000 --- a/man/extract_keys.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{extract_keys} -\alias{extract_keys} -\title{Extract the primary keys associated with a data tibble} -\usage{ -extract_keys(data_tbl, record_id_field) -} -\arguments{ -\item{data_tbl}{A data tibble from a supertibble} - -\item{record_id_field}{The record ID field for the REDCap project, retrieved -as an ouput of \code{\link[=get_record_id_field]{get_record_id_field()}}} -} -\value{ -a character string -} -\description{ -Extract the primary keys associated with a data tibble -} -\keyword{internal} diff --git a/man/get_join_fn.Rd b/man/get_join_fn.Rd deleted file mode 100644 index f99d7b01..00000000 --- a/man/get_join_fn.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{get_join_fn} -\alias{get_join_fn} -\title{Retrieve the appropriate user specified join function} -\usage{ -get_join_fn(type) -} -\arguments{ -\item{type}{A character string for the type of join to be performed borrowing from -dplyr. One of "left", "right", "inner", or "full". Default "left".} -} -\value{ -a function -} -\description{ -Retrieve the appropriate user specified join function -} -\keyword{internal} diff --git a/man/get_structure.Rd b/man/get_structure.Rd deleted file mode 100644 index b8a901eb..00000000 --- a/man/get_structure.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{get_structure} -\alias{get_structure} -\title{Retrieve the structure data for a form from the supertibble} -\usage{ -get_structure(supertbl, tbl_name) -} -\arguments{ -\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} - -\item{tbl_name}{the \code{x} or \code{y} values assigned to \code{join_data_tibbles}} -} -\description{ -Retrieve the structure data for a form from the supertibble -} -\keyword{internal} diff --git a/man/get_type.Rd b/man/get_type.Rd deleted file mode 100644 index f3088095..00000000 --- a/man/get_type.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{get_type} -\alias{get_type} -\title{Retrieve the repeat event type data for a form from the supertibble} -\usage{ -get_type(supertbl, tbl_name) -} -\arguments{ -\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} - -\item{tbl_name}{the \code{x} or \code{y} values assigned to \code{join_data_tibbles}} -} -\description{ -Retrieve the repeat event type data for a form from the supertibble -} -\keyword{internal} diff --git a/man/join_data_tibbles.Rd b/man/join_data_tibbles.Rd deleted file mode 100644 index 75dc94b1..00000000 --- a/man/join_data_tibbles.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{join_data_tibbles} -\alias{join_data_tibbles} -\title{Join Two Data Tibbles from a Supertibble} -\usage{ -join_data_tibbles( - supertbl, - x, - y, - by = NULL, - type = "left", - suffix = c(".x", ".y") -) -} -\arguments{ -\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} - -\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or -lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for -more details.} - -\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character -vector of variables to join by. - -If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all -variables in common across \code{x} and \code{y}. A message lists the variables so -that you can check they're correct; suppress the message by supplying \code{by} -explicitly. - -To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} -specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. - -To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with -multiple expressions. For example, \code{join_by(a == b, c == d)} will match -\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between -\code{x} and \code{y}, you can shorten this by listing only the variable names, like -\code{join_by(a, c)}. - -\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap -joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on -these types of joins. - -For simple equality joins, you can alternatively specify a character vector -of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} -to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, -use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. - -To perform a cross-join, generating all combinations of \code{x} and \code{y}, see -\code{\link[dplyr:cross_join]{cross_join()}}.} - -\item{type}{A character string for the type of join to be performed borrowing from -dplyr. One of "left", "right", "inner", or "full". Default "left".} - -\item{suffix}{If there are non-joined duplicate variables in \code{x} and -\code{y}, these suffixes will be added to the output to disambiguate them. -Should be a character vector of length 2.} -} -\value{ -A \code{tibble}. -} -\description{ -The \code{\link[=join_data_tibbles]{join_data_tibbles()}} function provides a way to intelligently join two -data tibbles from a REDCaTidieR supertibble. A supertibble is an output of -\code{\link[=read_redcap]{read_redcap()}}. - -\code{\link[=join_data_tibbles]{join_data_tibbles()}} attempts to correctly assign the \code{by} when left \code{NULL} (the default) -based on detecting the data tibble structure of \code{x} and \code{y}. -} diff --git a/man/join_tbls.Rd b/man/join_tbls.Rd deleted file mode 100644 index 95fd52b5..00000000 --- a/man/join_tbls.Rd +++ /dev/null @@ -1,60 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{join_tbls} -\alias{join_tbls} -\title{Join data tbls of various structures and types} -\usage{ -join_tbls(x, y, join_fn, by, suffix, is_mixed) -} -\arguments{ -\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or -lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for -more details.} - -\item{join_fn}{the user specified join function type output by \code{\link[=get_join_fn]{get_join_fn()}}} - -\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character -vector of variables to join by. - -If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all -variables in common across \code{x} and \code{y}. A message lists the variables so -that you can check they're correct; suppress the message by supplying \code{by} -explicitly. - -To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} -specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. - -To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with -multiple expressions. For example, \code{join_by(a == b, c == d)} will match -\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between -\code{x} and \code{y}, you can shorten this by listing only the variable names, like -\code{join_by(a, c)}. - -\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap -joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on -these types of joins. - -For simple equality joins, you can alternatively specify a character vector -of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} -to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, -use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. - -To perform a cross-join, generating all combinations of \code{x} and \code{y}, see -\code{\link[dplyr:cross_join]{cross_join()}}.} - -\item{suffix}{If there are non-joined duplicate variables in \code{x} and -\code{y}, these suffixes will be added to the output to disambiguate them. -Should be a character vector of length 2.} - -\item{is_mixed}{TRUE/FALSE mixed data structure} -} -\value{ -a dataframe -} -\description{ -\code{\link[=join_tbls]{join_tbls()}} either performs the \code{join_fun()} specified by the \code{type} or, in -the event of mixed structure data tibble joins, will seek to split data into -three categories before performing the joins. The key identifiers here are -\code{redcap_form_instance} and the added \code{.repeat_type} columns. -} -\keyword{internal} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 9829a0fc..ede73fed 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -69,7 +69,6 @@ reference: Helpful functions for supertibble data analytics. contents: - combine_checkboxes - - join_data_tibbles - title: "Data" contents: - superheroes_supertbl