Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
catalamarti committed Apr 10, 2024
1 parent 90bb1a7 commit c9ae297
Show file tree
Hide file tree
Showing 3 changed files with 243 additions and 21 deletions.
54 changes: 33 additions & 21 deletions R/conceptCohortSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ conceptCohortSet <- function(cdm,
name = "cohort",
verbose = TRUE) {
# initial input validation
cdm <- validateCDM(cdm)
cdm <- validateCdm(cdm)
name <- validateName(name)
verbose <- validateVerbose(verbose)
if (length(conceptSet) == 0) {
if (verbose) {
cli::cli_inform(c("i" = "Empty codelist provided, returning empty cohort"))
Expand Down Expand Up @@ -58,7 +59,7 @@ conceptCohortSet <- function(cdm,
dplyr::distinct() |>
dplyr::pull()

cohort <- list()
cohorts <- list()
for (k in seq_along(domains)) {
domain <- domains[k]
table <- domainsData$table[domainsData$domain_id == domain]
Expand All @@ -77,7 +78,7 @@ conceptCohortSet <- function(cdm,
"i" = "Subsetting table {.strong {table}} using {n} concept{?s} with domain: {.strong {domain}}."
))
}
cohort[[k]] <- cdm[[table]] |>
cohorts[[k]] <- cdm[[table]] |>
dplyr::select(
"subject_id" = "person_id",
"concept_id" = dplyr::all_of(concept),
Expand All @@ -97,7 +98,7 @@ conceptCohortSet <- function(cdm,
}
}

if (length(cohort) == 0) {
if (length(cohorts) == 0) {
if (verbose) {
cli::cli_inform(c("i" = "No table could be subsetted, returning empty cohort."))
}
Expand All @@ -114,14 +115,22 @@ conceptCohortSet <- function(cdm,
if (verbose) {
cli::cli_inform(c("i" = "Subsetting tables."))
}
cohort <- Reduce(dplyr::union_all, cohort) |>
cohort <- cohorts[[1]]
if (length(cohorts) > 1) {
for (k in 2:length(cohorts)) {
cohort <- cohort |> dplyr::union_all(cohorts[[k]])
}
}
cohort <- cohort |>
dplyr::compute(name = name, temporary = FALSE)

if (verbose) {
cli::cli_inform(c("i" = "Collapsing records."))
}
cohort <- cohort |>
collapseGap(gap = 0) |>
# assign to cdm so we keep class, to be removed when https://github.com/darwin-eu-dev/omopgenerics/issues/256
cdm[[name]] <- cohort |>
collapseGap(gap = 0)
cohort <- cdm[[name]] |>
dplyr::compute(name = name, temporary = FALSE)

if (verbose) {
Expand Down Expand Up @@ -150,13 +159,16 @@ addDomains <- function(cohortCodelist, cdm) {
cdm = cdm, name = tmpName, table = cohortCodelist
)
cdm[[tmpName]] <- cdm[[tmpName]] |> dplyr::compute()
omopgenerics::dropTable(cdm = cdm, name = tmpName)

cdm[["concept"]] |>
cohortCodelist <- cdm[["concept"]] |>
dplyr::select("concept_id", "domain_id") |>
dplyr::right_join(cdm[[tmpName]], by = "concept_id") |>
dplyr::mutate("domain_id" = tolower(.data$domain_id)) |>
dplyr::compute()

omopgenerics::dropTable(cdm = cdm, name = tmpName)

return(cohortCodelist)
}
collapseGap <- function(cohort, gap) {
start <- cohort |>
Expand All @@ -166,29 +178,29 @@ collapseGap <- function(cohort, gap) {
dplyr::mutate("date_id" = -1)
end <- cohort |>
dplyr::select(
"cohort_definition_id", "subject_id", "date" = "cohort_start_date"
"cohort_definition_id", "subject_id", "date" = "cohort_end_date"
) |>
dplyr::mutate("date_id" = 1)
start |>
dplyr::union_all(end) |>
dplyr::group_by(.data$cohort_definition_id, .data$subject_id) %>%
dplyr::arrange(.data$date, .data$date_id) %>%
dplyr::mutate("cum_id" = cumsum(.data$date_id)) %>%
dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
dplyr::arrange(.data$date, .data$date_id) |>
dplyr::mutate("cum_id" = cumsum(.data$date_id)) |>
dplyr::filter(
.data$cum_id == 0 || (.data$cum_id == -1 && .data$date_id == -1)
) %>%
.data$cum_id == 0 | (.data$cum_id == -1 & .data$date_id == -1)
) |>
dplyr::mutate(
"name" = dplyr::if_else(
.data$date_id == -1, "cohort_start_date", "cohort_end_date"
),
"era_id" = dplyr::if_else(.data$date_id == -1, 1, 0)
) %>%
dplyr::mutate("era_id" = cumsum(as.numeric(.data$era_id))) %>%
dplyr::ungroup() %>%
dplyr::arrange() %>%
) |>
dplyr::mutate("era_id" = cumsum(as.numeric(.data$era_id))) |>
dplyr::ungroup() |>
dplyr::arrange() |>
dplyr::select(
"cohort_definition_id", "subject_id", "era_id", "name", "date"
) %>%
tidyr::pivot_wider(names_from = "name", values_from = "date") %>%
) |>
tidyr::pivot_wider(names_from = "name", values_from = "date") |>
dplyr::select(-"era_id")
}
14 changes: 14 additions & 0 deletions R/validateFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ validateCDM <- function(cdm) {
return(invisible(cdm))
}

validateCdm <- function(cdm) {
if (!isTRUE(inherits(cdm, "cdm_reference"))) {
cli::cli_abort("cdm must be a cdm_reference object.")
}
return(invisible(cdm))
}

validateCohortTable <- function(cohort) {
if(!"cohort_table" %in% class(cohort) ||
!all(c("cohort_definition_id", "subject_id",
Expand Down Expand Up @@ -67,3 +74,10 @@ validateName <- function(name) {
validateConceptSet <- function(conceptSet) {
omopgenerics::newCodelist(conceptSet)
}

validateVerbose <- function(verbose) {
if (!is.logical(verbose) | length(verbose) != 1 | is.na(verbose)) {
cli::cli_abort("verbose must be TRUE or FALSE.")
}
return(invisible(verbose))
}
196 changes: 196 additions & 0 deletions tests/testthat/test-conceptCohortSet.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
test_that("expected errors and messages", {

cdm <- omock::mockCdmReference() |>
omock::mockPerson() |>
omock::mockObservationPeriod()
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "concept", table = dplyr::tibble(
"concept_id" = 1,
"concept_name" = "my concept",
"domain_id" = "adsf",
"vocabulary_id" = NA,
"concept_class_id" = NA,
"concept_code" = NA,
"valid_start_date" = NA,
"valid_end_date" = NA
)
)

# not a cdm reference
expect_error(conceptCohortSet(cdm = NULL))

# wrong naming
expect_error(conceptCohortSet(cdm = cdm, name = NA))
expect_error(conceptCohortSet(cdm = cdm, name = 1))
expect_error(conceptCohortSet(cdm = cdm, name = c("ass", "asdf")))
expect_error(conceptCohortSet(cdm = cdm, name = "AAAA"))
expect_no_error(x <- conceptCohortSet(cdm = cdm, name = "my_cohort"))
expect_true("my_cohort" == omopgenerics::tableName(x))

# empty cohort
expect_no_error(x <- conceptCohortSet(cdm = cdm, conceptSet = NULL))
expect_true(inherits(x, "cohort_table"))
expect_true(x |> dplyr::collect() |> nrow() == 0)

# not codelist
expect_error(x <- conceptCohortSet(cdm = cdm, conceptSet = 1))
expect_error(x <- conceptCohortSet(cdm = cdm, conceptSet = list(1)))
expect_message(expect_message(
x <- conceptCohortSet(cdm = cdm, conceptSet = list(a = 1))
))
expect_true(inherits(x, "cohort_table"))
expect_true(x |> dplyr::collect() |> nrow() == 0)
expect_true(omopgenerics::tableName(x) == "cohort")
expect_true(setdiff(names(omopgenerics::cdmReference(x)), names(cdm)) == "cohort")
expect_identical(setdiff(names(cdm), names(omopgenerics::cdmReference(x))), character())
expect_equal(
settings(x), dplyr::tibble("cohort_definition_id" = 1L, "cohort_name" = "a")
)
expect_true(nrow(attrition(x)) == 1)
# currently only standard concepts are includes in cohortCodelist see https://github.com/oxford-pharmacoepi/CohortConstructor/issues/74
expect_warning(expect_equal(
cohortCodelist(x, 1), omopgenerics::newCodelist(list())
))
expect_message(expect_message(
x <- conceptCohortSet(cdm = cdm, conceptSet = list(a = 2))
))

# verbose
expect_error(x <- conceptCohortSet(cdm = cdm, verbose = "my_cohort"))
expect_error(x <- conceptCohortSet(cdm = cdm, verbose = c(T, F)))
expect_error(x <- conceptCohortSet(cdm = cdm, verbose = as.logical(NA)))

})

test_that("simple example", {
cdm <- omock::mockCdmReference() |>
omock::mockCdmFromTable(cohortTable = list("cohort" = dplyr::tibble(
"cohort_definition_id" = 1,
"subject_id" = c(1, 2, 3),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
)))
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "concept", table = dplyr::tibble(
"concept_id" = 1,
"concept_name" = "my concept",
"domain_id" = "drUg",
"vocabulary_id" = NA,
"concept_class_id" = NA,
"concept_code" = NA,
"valid_start_date" = NA,
"valid_end_date" = NA
)
)
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "drug_exposure", table = dplyr::tibble(
"drug_exposure_id" = 1:11,
"person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1),
"drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1),
"drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803),
"drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1801, 1802, 1803, 1804),
"drug_type_concept_id" = 1
) |>
dplyr::mutate(
"drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01"),
"drug_exposure_end_date" = as.Date(.data$drug_exposure_end_date, origin = "2020-01-01")
)
)

expect_no_error(cohort <- conceptCohortSet(cdm = cdm, conceptSet = list(a = 1)))

expect_true(cohort |> dplyr::tally() |> dplyr::pull() == 4)
expect_true(cohortCount(cohort)$number_records == 4)
expect_true(cohortCount(cohort)$number_subjects == 2)
expect_true(attrition(cohort) |> nrow() == 1)
expect_identical(
settings(cohort),
dplyr::tibble("cohort_definition_id" = 1L, "cohort_name" = "a")
)
expect_identical(cohortCodelist(cohort, 1), omopgenerics::newCodelist(list(a = 1)))
cohort <- cohort |>
dplyr::collect() |>
dplyr::as_tibble() |>
dplyr::arrange(subject_id, cohort_start_date)
attr(cohort, "cohort_attrition") <- NULL
attr(cohort, "cohort_codelist") <- NULL
attr(cohort, "cohort_set") <- NULL
expect_equal(
cohort,
dplyr::tibble(
"cohort_definition_id" = 1L,
"subject_id" = c(1L, 1L, 1L, 2L),
"cohort_start_date" = as.Date(c(0, 1500, 1800, 10), origin = "2020-01-01"),
"cohort_end_date" = as.Date(c(800, 1600, 1804, 2000), origin = "2020-01-01")
)
)

})

test_that("simple example duckdb", {
cdm <- omock::mockCdmReference() |>
omock::mockCdmFromTable(cohortTable = list("cohort" = dplyr::tibble(
"cohort_definition_id" = 1,
"subject_id" = c(1, 2, 3),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
)))
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "concept", table = dplyr::tibble(
"concept_id" = 1,
"concept_name" = "my concept",
"domain_id" = "drUg",
"vocabulary_id" = NA,
"concept_class_id" = NA,
"concept_code" = NA,
"valid_start_date" = NA,
"valid_end_date" = NA
)
)
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "drug_exposure", table = dplyr::tibble(
"drug_exposure_id" = 1:11,
"person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1),
"drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1),
"drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803),
"drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1801, 1802, 1803, 1804),
"drug_type_concept_id" = 1
) |>
dplyr::mutate(
"drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01"),
"drug_exposure_end_date" = as.Date(.data$drug_exposure_end_date, origin = "2020-01-01")
)
)

cdm <- CDMConnector::copyCdmTo(con = DBI::dbConnect(duckdb::duckdb()), cdm = cdm, schema = "main")

expect_no_error(cohort <- conceptCohortSet(cdm = cdm, conceptSet = list(a = 1)))

expect_true(cohort |> dplyr::tally() |> dplyr::pull() == 4)
expect_true(cohortCount(cohort)$number_records == 4)
expect_true(cohortCount(cohort)$number_subjects == 2)
expect_true(attrition(cohort) |> nrow() == 1)
expect_identical(
settings(cohort),
dplyr::tibble("cohort_definition_id" = 1L, "cohort_name" = "a")
)
expect_identical(cohortCodelist(cohort, 1), omopgenerics::newCodelist(list(a = 1)))
cohort <- cohort |>
dplyr::collect() |>
dplyr::as_tibble() |>
dplyr::arrange(subject_id, cohort_start_date)
attr(cohort, "cohort_attrition") <- NULL
attr(cohort, "cohort_codelist") <- NULL
attr(cohort, "cohort_set") <- NULL
expect_equal(
cohort,
dplyr::tibble(
"cohort_definition_id" = 1L,
"subject_id" = c(1L, 1L, 1L, 2L),
"cohort_start_date" = as.Date(c(0, 1500, 1800, 10), origin = "2020-01-01"),
"cohort_end_date" = as.Date(c(800, 1600, 1804, 2000), origin = "2020-01-01")
)
)

CDMConnector::cdmDisconnect(cdm = cdm)
})

0 comments on commit c9ae297

Please sign in to comment.