Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #2563 no_list_columns: add check to avoid list columns #2592

Merged
merged 12 commits into from
Dec 13, 2024
Merged
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ BugReports: https://github.com/pharmaverse/admiral/issues
Depends:
R (>= 4.0)
Imports:
admiraldev (>= 1.1.0),
admiraldev (>= 1.1.0.9007),
cli (>= 3.6.2),
dplyr (>= 1.0.5),
hms (>= 0.5.3),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ importFrom(rlang,call2)
importFrom(rlang,call_name)
importFrom(rlang,caller_env)
importFrom(rlang,cnd_muffle)
importFrom(rlang,cnd_signal)
importFrom(rlang,current_env)
importFrom(rlang,enexpr)
importFrom(rlang,enexprs)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ or that the queries dataset contains duplicates. (#2543)

- In `get_summary_records()`, previously deprecated formal arguments `analysis_var` and `summary_fun` now removed from function, documentation, tests etc. (#2521)

- A check was added to `derive_vars_transposed()` and `derive_vars_atc()` which
stops execution if the records in `dataset_merge` or `dataset_facm` respectively
are not unique. (#2563)

## Breaking Changes

- The following function arguments are entering the next phase of the deprecation process: (#2487)
Expand Down
10 changes: 5 additions & 5 deletions R/admiral-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
#' map_if map_lgl map2 modify_at modify_if pmap reduce transpose
#' walk
#' @importFrom rlang := abort arg_match as_data_mask as_function as_label
#' as_name as_string call2 call_name caller_env cnd_muffle current_env .data
#' enexpr enexprs eval_bare eval_tidy expr expr_interp exec expr_label exprs
#' f_lhs f_rhs inform is_call is_expression is_missing is_named list2
#' new_environment new_formula parse_expr parse_exprs set_names sym syms
#' type_of warn
#' as_name as_string call2 call_name caller_env cnd_muffle cnd_signal
#' current_env .data enexpr enexprs eval_bare eval_tidy expr expr_interp exec
#' expr_label exprs f_lhs f_rhs inform is_call is_expression is_missing
#' is_named list2 new_environment new_formula parse_expr parse_exprs set_names
#' sym syms type_of warn
#' @importFrom stats setNames
#' @importFrom stringr str_c str_count str_detect str_extract str_glue
#' str_length str_locate str_locate_all str_match str_remove
Expand Down
108 changes: 108 additions & 0 deletions R/derive_vars_atc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#' Derive ATC Class Variables
#'
#' @description Add Anatomical Therapeutic Chemical class variables from `FACM` to `ADCM`.
#'
#' **Note:** This is a wrapper function for the more generic `derive_vars_transposed()`.
#'
#' @param dataset
#' `r roxygen_param_dataset(expected_vars = c("by_vars"))`
#'
#' @param dataset_facm FACM dataset
#'
#' The variables specified by the `by_vars`, `id_vars`, and `value_var`
#' arguments and `FATESTCD` are required. The variables `by_vars`, `id_vars`,
#' and `FATESTCD` must be a unique key.
#'
#' @param by_vars Grouping variables
#'
#' Keys used to merge `dataset_facm` with `dataset`.
#'
#' @param id_vars ID variables
#'
#' Variables (excluding by_vars) that uniquely identify each observation in `dataset_merge`.
#'
#' `r roxygen_param_by_vars()`
#'
#' @param value_var The variable of `dataset_facm` containing the values of the
#' transposed variables
#'
#' @return The input dataset with ATC variables added
#'
#' @seealso [derive_vars_transposed()]
#'
#' @family der_occds
#' @keywords der_occds
#'
#' @export
#'
#' @examples
#' library(tibble)
#'
#' cm <- tribble(
#' ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD,
#' "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL",
#' "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL",
#' "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE"
#' )
#' facm <- tribble(
#' ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC,
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA"
#' )
#'
#' derive_vars_atc(cm, facm, id_vars = exprs(FAGRPID))
derive_vars_atc <- function(dataset,
dataset_facm,
by_vars = exprs(
!!!get_admiral_option("subject_keys"),
CMREFID = FAREFID
),
id_vars = NULL,
value_var = FASTRESC) {
value_var <- assert_symbol(enexpr(value_var))
assert_vars(by_vars)
assert_vars(id_vars, optional = TRUE)
assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars))
assert_data_frame(
dataset_facm,
required_vars = exprs(!!!by_vars, !!value_var, !!!id_vars, FATESTCD)
)

tryCatch(
data_transposed <- derive_vars_transposed(
dataset,
select(dataset_facm, !!!unname(by_vars), !!value_var, !!!id_vars, FATESTCD),
by_vars = by_vars,
id_vars = id_vars,
key_var = FATESTCD,
value_var = !!value_var,
filter = str_detect(FATESTCD, "^CMATC[1-4](CD)?$")
),
merge_duplicates = function(cnd) {
cnd$message <- str_replace(cnd$message, "dataset_merge", "dataset_facm")
cnd$body[[1]] <- "Please check data and `by_vars` and `id_vars` arguments."
cnd_signal(cnd)
}
)
data_transposed %>%
select(-starts_with("FA")) %>%
rename_with(.fn = ~ str_remove(.x, "^CM"), .cols = starts_with("CMATC"))
}
142 changes: 22 additions & 120 deletions R/derive_vars_transposed.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
#'
#' @param dataset_merge Dataset to transpose and merge
#'
#' The variables specified by the `by_vars`, `key_var` and `value_var` parameters
#' are expected
#' The variables specified by the `by_vars`, `id_vars`, `key_var` and
#' `value_var` arguments are expected. The variables `by_vars`, `id_vars`,
#' `key_var` have to be a unique key.
#'
#' @param by_vars Grouping variables
#'
Expand All @@ -35,17 +36,18 @@
#' <https://dplyr.tidyverse.org/reference/mutate-joins.html#arguments> for
#' more details.
#'
#' Permitted Values for `relationship`: `"one-to-one"`, `"one-to-many"`,
#' `"many-to-one"`, `"many-to-many"`, `NULL`.
#' *Permitted Values*: `"one-to-one"`, `"one-to-many"`, `"many-to-one"`,
#' `"many-to-many"`, `NULL`
#'
#' @details
#' After filtering `dataset_merge` based upon the condition provided in `filter`, this
#' dataset is transposed and subsequently merged onto `dataset` using `by_vars` as
#' keys.
#'
#'
#' @return The input dataset with transposed variables from `dataset_merge` added
#'
#' @seealso [derive_vars_atc()]
#'
#' @family der_gen
#' @keywords der_gen
#'
Expand Down Expand Up @@ -116,6 +118,21 @@ derive_vars_transposed <- function(dataset,
optional = TRUE
)

# check for duplicates in dataset_merge as these will create list columns,
# which is not acceptable for ADaM datasets
signal_duplicate_records(
dataset_merge,
by_vars = c(by_vars, id_vars, exprs(!!key_var)),
msg = c(
paste(
"Dataset {.arg dataset_merge} contains duplicate records with respect to",
"{.var {by_vars}}"
),
"Please check data and {.arg by_vars}, {.arg id_vars}, and {.arg key_var} arguments."
),
class = "merge_duplicates"
)

dataset_transposed <- dataset_merge %>%
filter_if(filter) %>%
pivot_wider(
Expand Down Expand Up @@ -164,121 +181,6 @@ derive_vars_transposed <- function(dataset,
),
call = parent.frame(n = 4)
)
},
"dplyr_error_join_relationship_one_to_many" = function(cnd) {
cli_abort(
message = c(
str_replace(
str_replace(
cnd$message, "`x`", "`dataset`"
), "`y`", "the transposed `dataset_merge`"
),
i = str_replace(
str_replace(
cnd$body, "`x`", "`dataset`"
), "`y`", "the transposed `dataset_merge`"
)
),
call = parent.frame(n = 4)
)
}
)
}

#' Derive ATC Class Variables
#'
#' @description Add Anatomical Therapeutic Chemical class variables from `FACM` to `ADCM`.
#'
#' **Note:** This is a wrapper function for the more generic `derive_vars_transposed()`.
#'
#' @param dataset
#' `r roxygen_param_dataset(expected_vars = c("by_vars"))`
#'
#' @param dataset_facm FACM dataset
#'
#' The variables specified by the `by_vars` and `value_var` parameters,
#' `FAGRPID` and `FATESTCD` are required
#'
#' @param by_vars Grouping variables
#'
#' Keys used to merge `dataset_facm` with `dataset`.
#'
#' @param id_vars ID variables
#'
#' Variables (excluding by_vars) that uniquely identify each observation in `dataset_merge`.
#'
#' `r roxygen_param_by_vars()`
#'
#' @param value_var The variable of `dataset_facm` containing the values of the
#' transposed variables
#'
#' Default: `FASTRESC`
#'
#'
#' @return The input dataset with ATC variables added
#'
#' @family der_occds
#' @keywords der_occds
#'
#' @export
#'
#' @examples
#' library(tibble)
#'
#' cm <- tribble(
#' ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD,
#' "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL",
#' "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL",
#' "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE"
#' )
#' facm <- tribble(
#' ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC,
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA"
#' )
#'
#' derive_vars_atc(cm, facm)
derive_vars_atc <- function(dataset,
dataset_facm,
by_vars = exprs(
!!!get_admiral_option("subject_keys"),
CMREFID = FAREFID
),
id_vars = NULL,
value_var = FASTRESC) {
value_var <- assert_symbol(enexpr(value_var))
assert_vars(by_vars)
assert_vars(id_vars, optional = TRUE)
assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars))
assert_data_frame(dataset_facm, required_vars = exprs(!!!by_vars, !!value_var, FAGRPID, FATESTCD))

dataset %>%
derive_vars_transposed(
select(dataset_facm, !!!unname(by_vars), !!value_var, FAGRPID, FATESTCD),
by_vars = by_vars,
id_vars = id_vars,
key_var = FATESTCD,
value_var = !!value_var,
filter = str_detect(FATESTCD, "^CMATC[1-4](CD)?$")
) %>%
select(-starts_with("FA")) %>%
rename_with(.fn = ~ str_remove(.x, "^CM"), .cols = starts_with("CMATC"))
}
14 changes: 12 additions & 2 deletions R/duplicates.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ extract_duplicate_records <- function(dataset, by_vars) {
#' @param msg The condition message
#' @param cnd_type Type of condition to signal when detecting duplicate records.
#' One of `"message"`, `"warning"` or `"error"`. Default is `"error"`.
#' @param class Class of the condition
#'
#' The specified classes are added to the classes of the condition.
#' `c("duplicate_records", "assert-admiral")` is always added.
#'
#' @return No return value, called for side effects
#'
Expand All @@ -113,11 +117,13 @@ signal_duplicate_records <- function(dataset,
"with respect to",
"{.var {replace_values_by_names(by_vars)}}"
),
cnd_type = "error") {
cnd_type = "error",
class = NULL) {
assert_expr_list(by_vars)
assert_data_frame(dataset, required_vars = extract_vars(by_vars), check_is_grouped = FALSE)
assert_character_vector(msg)
assert_character_scalar(cnd_type, values = c("message", "warning", "error"))
assert_character_vector(class, optional = TRUE)

cnd_funs <- list(message = cli_inform, warning = cli_warn, error = cli_abort)

Expand All @@ -134,7 +140,11 @@ signal_duplicate_records <- function(dataset,
msg,
i = "Run {.run admiral::get_duplicates_dataset()} to access the duplicate records"
)
cnd_funs[[cnd_type]](full_msg)
cnd_funs[[cnd_type]](
full_msg,
class = c(class, "duplicate_records", "assert-admiral"),
by_vars = by_vars
)
}
}

Expand Down
Loading
Loading