diff --git a/.Rbuildignore b/.Rbuildignore index 70af2f10..1003fd43 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,3 +8,4 @@ ^_pkgdown\.yml$ ^doc$ ^Meta$ +^data-raw$ diff --git a/DESCRIPTION b/DESCRIPTION index 14cdf60f..a9a1aef8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,9 @@ Authors@R: c( person("Marta", "Alcalde", , "marta.alcaldeherraiz@ndorms.ox.ac.uk", role = c("aut"), comment = c(ORCID = "0000-0000-0000-0000")), person("Yuchen", "Guo", email = "yuchen.guo@ndorms.ox.ac.uk", - role = c("aut"), comment = c(ORCID = "0000-0002-0847-4855")) + role = c("aut"), comment = c(ORCID = "0000-0002-0847-4855")), + person("Nuria", "Mercade-Besora", , "nuria.mercadebesora@ndorms.ox.ac.uk", + role = c("ctb"), comment = c(ORCID = "0009-0006-7948-3747")) ) Description: This package aims to provide functionalities to manipulate and evaluate cohorts in data mapped to the Observational Medical @@ -47,3 +49,5 @@ Config/testthat/parallel: true VignetteBuilder: knitr Remotes: darwin-eu-dev/PatientProfiles +Depends: + R (>= 4.1) diff --git a/NAMESPACE b/NAMESPACE index 7401eab0..49bb6898 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,11 +1,13 @@ # Generated by roxygen2: do not edit by hand export(attrition) +export(cohortCodelist) export(cohortCount) +export(conceptCohort) export(generateIntersectCohortSet) -export(generateMatchedCohortSet) export(getIdentifier) export(joinOverlap) +export(matchCohort) export(requireAge) export(requireCohortIntersectFlag) export(requireDemographics) @@ -19,6 +21,7 @@ export(splitOverlap) export(trimToDateRange) importFrom(magrittr,"%>%") importFrom(omopgenerics,attrition) +importFrom(omopgenerics,cohortCodelist) importFrom(omopgenerics,cohortCount) importFrom(omopgenerics,settings) importFrom(rlang,":=") diff --git a/R/conceptCohort.R b/R/conceptCohort.R new file mode 100644 index 00000000..144ed1cf --- /dev/null +++ b/R/conceptCohort.R @@ -0,0 +1,185 @@ +#' Generate a cohort based on a concept set. The current supported domains are: +#' +#' @param cdm A cdm_reference object. +#' @param conceptSet A conceptSet, can either be a list of concepts, a codelist +#' or a conceptSetExpression (TO DO). +#' @param name Name of the cohort in the cdm object. +#' +#' @export +#' +#' @return A cohort_table object. +#' +conceptCohort <- function(cdm, + conceptSet, + name) { + # initial input validation + cdm <- validateCdm(cdm) + name <- validateName(name) + if (length(conceptSet) == 0) { + cli::cli_inform(c("i" = "Empty codelist provided, returning empty cohort")) + cdm <- omopgenerics::emptyCohortTable(cdm = cdm, name = name) + return(cdm[[name]]) + } + conceptSet <- validateConceptSet(conceptSet) + + # create concept set tibble + cohortSet <- dplyr::tibble("cohort_name" = names(conceptSet)) |> + dplyr::mutate("cohort_definition_id" = dplyr::row_number()) + cohortCodelist <- lapply(conceptSet, dplyr::as_tibble) |> + dplyr::bind_rows(.id = "cohort_name") |> + dplyr::inner_join(cohortSet, by = "cohort_name") |> + dplyr::select("cohort_definition_id", "concept_id" = "value", "codelist_name" = "cohort_name") |> + dplyr::mutate("type" = "index event") |> + addDomains(cdm) + + ud <- cohortCodelist |> + dplyr::group_by(.data$domain_id) |> + dplyr::tally() |> + dplyr::collect() |> + dplyr::filter(!.data$domain_id %in% domainsData$domain_id) + for (k in seq_len(nrow(ud))) { + cli::cli_inform(c( + "x" = "Domain {.strong {ud$domain_id[k]}} ({ud$n[k]} concept{?s}) excluded because it is not supported." + )) + } + + cohortCodelist <- cohortCodelist |> + dplyr::filter(.data$domain_id %in% !!domainsData$domain_id) |> + dplyr::compute() + + domains <- cohortCodelist |> + dplyr::select("domain_id") |> + dplyr::distinct() |> + dplyr::pull() + + cohorts <- list() + for (k in seq_along(domains)) { + domain <- domains[k] + table <- domainsData$table[domainsData$domain_id == domain] + concept <- domainsData$concept[domainsData$domain_id == domain] + start <- domainsData$start[domainsData$domain_id == domain] + end <- domainsData$end[domainsData$domain_id == domain] + n <- cohortCodelist |> + dplyr::filter(.data$domain_id %in% .env$domain) |> + dplyr::tally() |> + dplyr::pull() + if (table %in% names(cdm)) { + cli::cli_inform(c( + "i" = "Subsetting table {.strong {table}} using {n} concept{?s} with domain: {.strong {domain}}." + )) + cohorts[[k]] <- cdm[[table]] |> + dplyr::select( + "subject_id" = "person_id", + "concept_id" = dplyr::all_of(concept), + "cohort_start_date" = dplyr::all_of(start), + "cohort_end_date" = dplyr::all_of(end) + ) |> + dplyr::inner_join( + cohortCodelist |> + dplyr::filter(.data$domain_id %in% .env$domain) |> + dplyr::select("concept_id", "cohort_definition_id"), + by = "concept_id" + ) + } else { + cli::cli_inform(c( + "x" = "Domain {.strong {domain}} ({n} concept{?s}) excluded because table {table} is not present in the cdm." + )) + } + } + + if (length(cohorts) == 0) { + cli::cli_inform(c("i" = "No table could be subsetted, returning empty cohort.")) + cdm <- omopgenerics::emptyCohortTable(cdm = cdm, name = name) + cdm[[name]] <- cdm[[name]] |> + omopgenerics::newCohortTable( + cohortSetRef = cohortSet, + cohortAttritionRef = NULL, + cohortCodelistRef = NULL + ) + return(cdm[[name]]) + } + + cli::cli_inform(c("i" = "Subsetting tables.")) + cohort <- cohorts[[1]] + if (length(cohorts) > 1) { + for (k in 2:length(cohorts)) { + cohort <- cohort |> dplyr::union_all(cohorts[[k]]) + } + } + cohort <- cohort |> + dplyr::compute(name = name, temporary = FALSE) + + cli::cli_inform(c("i" = "Collapsing records.")) + # assign to cdm so we keep class, to be removed when https://github.com/darwin-eu-dev/omopgenerics/issues/256 + cdm[[name]] <- cohort |> + collapseGap(gap = 0) + cohort <- cdm[[name]] |> + dplyr::compute(name = name, temporary = FALSE) + + cli::cli_inform(c("i" = "Creating cohort attributes.")) + cdm[[name]] <- cohort |> + omopgenerics::newCohortTable( + cohortSetRef = cohortSet, + cohortAttritionRef = NULL, + cohortCodelistRef = cohortCodelist |> + dplyr::select(-"domain_id") |> + dplyr::collect() + ) + + cli::cli_inform(c("v" = "Cohort {.strong {name}} created.")) + + return(cdm[[name]]) +} + +addDomains <- function(cohortCodelist, cdm) { + # insert table as temporary + tmpName <- omopgenerics::uniqueTableName() + cdm <- omopgenerics::insertTable( + cdm = cdm, name = tmpName, table = cohortCodelist + ) + cdm[[tmpName]] <- cdm[[tmpName]] |> dplyr::compute() + + cohortCodelist <- cdm[["concept"]] |> + dplyr::select("concept_id", "domain_id") |> + dplyr::right_join(cdm[[tmpName]], by = "concept_id") |> + dplyr::mutate("domain_id" = tolower(.data$domain_id)) |> + dplyr::compute() + + omopgenerics::dropTable(cdm = cdm, name = tmpName) + + return(cohortCodelist) +} +collapseGap <- function(cohort, gap) { + start <- cohort |> + dplyr::select( + "cohort_definition_id", "subject_id", "date" = "cohort_start_date" + ) |> + dplyr::mutate("date_id" = -1) + end <- cohort |> + dplyr::select( + "cohort_definition_id", "subject_id", "date" = "cohort_end_date" + ) |> + dplyr::mutate("date_id" = 1) + start |> + dplyr::union_all(end) |> + dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |> + dplyr::arrange(.data$date, .data$date_id) |> + dplyr::mutate("cum_id" = cumsum(.data$date_id)) |> + dplyr::filter( + .data$cum_id == 0 | (.data$cum_id == -1 & .data$date_id == -1) + ) |> + dplyr::mutate( + "name" = dplyr::if_else( + .data$date_id == -1, "cohort_start_date", "cohort_end_date" + ), + "era_id" = dplyr::if_else(.data$date_id == -1, 1, 0) + ) |> + dplyr::mutate("era_id" = cumsum(as.numeric(.data$era_id))) |> + dplyr::ungroup() |> + dplyr::arrange() |> + dplyr::select( + "cohort_definition_id", "subject_id", "era_id", "name", "date" + ) |> + tidyr::pivot_wider(names_from = "name", values_from = "date") |> + dplyr::select(-"era_id") +} diff --git a/R/generateMatchedCohortSet.R b/R/matchCohort.R similarity index 97% rename from R/generateMatchedCohortSet.R rename to R/matchCohort.R index c00e44df..98a0cfb6 100644 --- a/R/generateMatchedCohortSet.R +++ b/R/matchCohort.R @@ -22,7 +22,7 @@ #' library(dplyr) #' cdm <- mockDrugUtilisation(numberIndividuals = 100) #' cdm <- cdm %>% -#' generateMatchedCohortSet(name = "new_matched_cohort", +#' matchCohort(name = "new_matched_cohort", #' targetCohortName = "cohort1", #' targetCohortId = c(1,2), #' matchSex = TRUE, @@ -30,13 +30,13 @@ #' ratio = 2) #' cdm$new_matched_cohort #' -generateMatchedCohortSet <- function(cdm, - name, - targetCohortName, - targetCohortId = NULL, - matchSex = TRUE, - matchYearOfBirth = TRUE, - ratio = 1){ +matchCohort <- function(cdm, + name, + targetCohortName, + targetCohortId = NULL, + matchSex = TRUE, + matchYearOfBirth = TRUE, + ratio = 1){ cli::cli_inform("Starting matching") # validate initial input diff --git a/R/reexports-omopgenerics.R b/R/reexports-omopgenerics.R index 0d4f2e18..8a66bc16 100644 --- a/R/reexports-omopgenerics.R +++ b/R/reexports-omopgenerics.R @@ -9,3 +9,7 @@ omopgenerics::settings #' @importFrom omopgenerics attrition #' @export omopgenerics::attrition + +#' @importFrom omopgenerics cohortCodelist +#' @export +omopgenerics::cohortCodelist diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 00000000..32f95982 Binary files /dev/null and b/R/sysdata.rda differ diff --git a/R/validateFunctions.R b/R/validateFunctions.R index be440ad2..03d8f14a 100644 --- a/R/validateFunctions.R +++ b/R/validateFunctions.R @@ -2,6 +2,14 @@ validateCDM <- function(cdm) { if (!isTRUE(inherits(cdm, "cdm_reference"))) { cli::cli_abort("cohort must be part of a cdm reference") } + return(invisible(cdm)) +} + +validateCdm <- function(cdm) { + if (!isTRUE(inherits(cdm, "cdm_reference"))) { + cli::cli_abort("cdm must be a cdm_reference object.") + } + return(invisible(cdm)) } validateCohortTable <- function(cohort) { @@ -46,3 +54,23 @@ validateDateRange<-function(dateRange){ } return(invisible(dateRange)) } + +validateName <- function(name) { + em <- c( + "x" = "{name} it is not a valida value for name.", + "i" = "It must be:", + "*" = "lowercase character vector of length 1", + "*" = "NA or NULL values are not allowed" + ) + if (!is.character(name) | length(name) != 1 | is.na(name)) { + cli::cli_abort(em) + } + if (tolower(name) != name){ + cli::cli_abort(em) + } + return(invisible(name)) +} + +validateConceptSet <- function(conceptSet) { + omopgenerics::newCodelist(conceptSet) +} diff --git a/data-raw/domainsData.R b/data-raw/domainsData.R new file mode 100644 index 00000000..72a2e899 --- /dev/null +++ b/data-raw/domainsData.R @@ -0,0 +1,14 @@ +## code to prepare `DATASET` dataset goes here + +domainsData <- dplyr::tribble( + ~"domain_id", ~"table", ~"concept", ~"start", ~"end", + "drug", "drug_exposure", "drug_concept_id", "drug_exposure_start_date", "drug_exposure_end_date", + "condition", "condition_occurrence", "condition_concept_id", "condition_start_date", "condition_end_date", + "procedure", "procedure_occurrence", "procedure_concept_id", "procedure_date", "procedure_date", + "observation", "observation", "observation_concept_id", "observation_date", "observation_date", + "measurement", "measurement", "measurement_concept_id", "measurement_date", "measurement_date", + "visit", "visit_occurrence", "visit_concept_id", "visit_start_date", "visit_end_date", + "device", "device_exposure", "device_concept_id", "device_exposure_start_date", "device_exposure_end_date" +) + +usethis::use_data(domainsData, internal = TRUE, overwrite = TRUE) diff --git a/man/CohortConstructor-package.Rd b/man/CohortConstructor-package.Rd index bbb21399..b98a3826 100644 --- a/man/CohortConstructor-package.Rd +++ b/man/CohortConstructor-package.Rd @@ -18,5 +18,10 @@ Authors: \item Yuchen Guo \email{yuchen.guo@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0002-0847-4855}{ORCID}) } +Other contributors: +\itemize{ + \item Nuria Mercade-Besora \email{nuria.mercadebesora@ndorms.ox.ac.uk} (\href{https://orcid.org/0009-0006-7948-3747}{ORCID}) [contributor] +} + } \keyword{internal} diff --git a/man/conceptCohort.Rd b/man/conceptCohort.Rd new file mode 100644 index 00000000..b9867896 --- /dev/null +++ b/man/conceptCohort.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/conceptCohort.R +\name{conceptCohort} +\alias{conceptCohort} +\title{Generate a cohort based on a concept set. The current supported domains are:} +\usage{ +conceptCohort(cdm, conceptSet, name) +} +\arguments{ +\item{cdm}{A cdm_reference object.} + +\item{conceptSet}{A conceptSet, can either be a list of concepts, a codelist +or a conceptSetExpression (TO DO).} + +\item{name}{Name of the cohort in the cdm object.} +} +\value{ +A cohort_table object. +} +\description{ +Generate a cohort based on a concept set. The current supported domains are: +} diff --git a/man/generateMatchedCohortSet.Rd b/man/matchCohort.Rd similarity index 88% rename from man/generateMatchedCohortSet.Rd rename to man/matchCohort.Rd index 39a61b7a..17127efc 100644 --- a/man/generateMatchedCohortSet.Rd +++ b/man/matchCohort.Rd @@ -1,12 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generateMatchedCohortSet.R -\name{generateMatchedCohortSet} -\alias{generateMatchedCohortSet} +% Please edit documentation in R/matchCohort.R +\name{matchCohort} +\alias{matchCohort} \title{Generate a new cohort matched cohort from a preexisting target cohort. The new cohort will contain individuals not included in the target cohort with same year of birth (matchYearOfBirth = TRUE) and same sex (matchSex = TRUE).} \usage{ -generateMatchedCohortSet( +matchCohort( cdm, name, targetCohortName, @@ -47,7 +47,7 @@ library(CohortConstructor) library(dplyr) cdm <- mockDrugUtilisation(numberIndividuals = 100) cdm <- cdm \%>\% - generateMatchedCohortSet(name = "new_matched_cohort", + matchCohort(name = "new_matched_cohort", targetCohortName = "cohort1", targetCohortId = c(1,2), matchSex = TRUE, diff --git a/man/reexports.Rd b/man/reexports.Rd index b709532d..ca3e6d4b 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -6,6 +6,7 @@ \alias{cohortCount} \alias{settings} \alias{attrition} +\alias{cohortCodelist} \title{Objects exported from other packages} \keyword{internal} \description{ @@ -13,6 +14,6 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{omopgenerics}{\code{\link[omopgenerics]{attrition}}, \code{\link[omopgenerics]{cohortCount}}, \code{\link[omopgenerics]{settings}}} + \item{omopgenerics}{\code{\link[omopgenerics]{attrition}}, \code{\link[omopgenerics]{cohortCodelist}}, \code{\link[omopgenerics]{cohortCount}}, \code{\link[omopgenerics]{settings}}} }} diff --git a/tests/testthat/test-conceptCohort.R b/tests/testthat/test-conceptCohort.R new file mode 100644 index 00000000..c6844321 --- /dev/null +++ b/tests/testthat/test-conceptCohort.R @@ -0,0 +1,191 @@ +test_that("expected errors and messages", { + + cdm <- omock::mockCdmReference() |> + omock::mockPerson() |> + omock::mockObservationPeriod() + cdm <- omopgenerics::insertTable( + cdm = cdm, name = "concept", table = dplyr::tibble( + "concept_id" = 1, + "concept_name" = "my concept", + "domain_id" = "adsf", + "vocabulary_id" = NA, + "concept_class_id" = NA, + "concept_code" = NA, + "valid_start_date" = NA, + "valid_end_date" = NA + ) + ) + + # not a cdm reference + expect_error(conceptCohort(cdm = NULL, name = "cohort", conceptSet = NULL)) + + # wrong naming + expect_error(conceptCohort(cdm = cdm, name = NA, conceptSet = NULL)) + expect_error(conceptCohort(cdm = cdm, name = 1, conceptSet = NULL)) + expect_error(conceptCohort(cdm = cdm, name = c("ass", "asdf"), conceptSet = NULL)) + expect_error(conceptCohort(cdm = cdm, name = "AAAA", conceptSet = NULL)) + expect_no_error(x <- conceptCohort(cdm = cdm, name = "my_cohort", conceptSet = NULL)) + expect_true("my_cohort" == omopgenerics::tableName(x)) + + # empty cohort + expect_no_error(x <- conceptCohort(cdm = cdm, conceptSet = NULL, name = "cohort")) + expect_true(inherits(x, "cohort_table")) + expect_true(x |> dplyr::collect() |> nrow() == 0) + + # not codelist + expect_error(x <- conceptCohort(cdm = cdm, conceptSet = 1, name = "cohort")) + expect_error(x <- conceptCohort(cdm = cdm, conceptSet = list(1), name = "cohort")) + expect_message(expect_message( + x <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort") + )) + expect_true(inherits(x, "cohort_table")) + expect_true(x |> dplyr::collect() |> nrow() == 0) + expect_true(omopgenerics::tableName(x) == "cohort") + expect_true(setdiff(names(omopgenerics::cdmReference(x)), names(cdm)) == "cohort") + expect_identical(setdiff(names(cdm), names(omopgenerics::cdmReference(x))), character()) + expect_equal( + settings(x), dplyr::tibble("cohort_definition_id" = 1L, "cohort_name" = "a") + ) + expect_true(nrow(attrition(x)) == 1) + # currently only standard concepts are includes in cohortCodelist see https://github.com/oxford-pharmacoepi/CohortConstructor/issues/74 + expect_warning(expect_equal( + cohortCodelist(x, 1), omopgenerics::newCodelist(list()) + )) + expect_message(expect_message( + x <- conceptCohort(cdm = cdm, conceptSet = list(a = 2), name = "cohort") + )) + +}) + +test_that("simple example", { + cdm <- omock::mockCdmReference() |> + omock::mockCdmFromTable(cohortTable = list("cohort" = dplyr::tibble( + "cohort_definition_id" = 1, + "subject_id" = c(1, 2, 3), + "cohort_start_date" = as.Date("2020-01-01"), + "cohort_end_date" = as.Date("2029-12-31") + ))) + cdm <- omopgenerics::insertTable( + cdm = cdm, name = "concept", table = dplyr::tibble( + "concept_id" = 1, + "concept_name" = "my concept", + "domain_id" = "drUg", + "vocabulary_id" = NA, + "concept_class_id" = NA, + "concept_code" = NA, + "valid_start_date" = NA, + "valid_end_date" = NA + ) + ) + cdm <- omopgenerics::insertTable( + cdm = cdm, name = "drug_exposure", table = dplyr::tibble( + "drug_exposure_id" = 1:11, + "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1), + "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1), + "drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803), + "drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1801, 1802, 1803, 1804), + "drug_type_concept_id" = 1 + ) |> + dplyr::mutate( + "drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01"), + "drug_exposure_end_date" = as.Date(.data$drug_exposure_end_date, origin = "2020-01-01") + ) + ) + + expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort")) + + expect_true(cohort |> dplyr::tally() |> dplyr::pull() == 4) + expect_true(cohortCount(cohort)$number_records == 4) + expect_true(cohortCount(cohort)$number_subjects == 2) + expect_true(attrition(cohort) |> nrow() == 1) + expect_identical( + settings(cohort), + dplyr::tibble("cohort_definition_id" = 1L, "cohort_name" = "a") + ) + expect_identical(cohortCodelist(cohort, 1), omopgenerics::newCodelist(list(a = 1))) + cohort <- cohort |> + dplyr::collect() |> + dplyr::as_tibble() |> + dplyr::arrange(subject_id, cohort_start_date) + attr(cohort, "cohort_attrition") <- NULL + attr(cohort, "cohort_codelist") <- NULL + attr(cohort, "cohort_set") <- NULL + expect_equal( + cohort, + dplyr::tibble( + "cohort_definition_id" = 1L, + "subject_id" = c(1L, 1L, 1L, 2L), + "cohort_start_date" = as.Date(c(0, 1500, 1800, 10), origin = "2020-01-01"), + "cohort_end_date" = as.Date(c(800, 1600, 1804, 2000), origin = "2020-01-01") + ) + ) + +}) + +test_that("simple example duckdb", { + cdm <- omock::mockCdmReference() |> + omock::mockCdmFromTable(cohortTable = list("cohort" = dplyr::tibble( + "cohort_definition_id" = 1, + "subject_id" = c(1, 2, 3), + "cohort_start_date" = as.Date("2020-01-01"), + "cohort_end_date" = as.Date("2029-12-31") + ))) + cdm <- omopgenerics::insertTable( + cdm = cdm, name = "concept", table = dplyr::tibble( + "concept_id" = 1, + "concept_name" = "my concept", + "domain_id" = "drUg", + "vocabulary_id" = NA, + "concept_class_id" = NA, + "concept_code" = NA, + "valid_start_date" = NA, + "valid_end_date" = NA + ) + ) + cdm <- omopgenerics::insertTable( + cdm = cdm, name = "drug_exposure", table = dplyr::tibble( + "drug_exposure_id" = 1:11, + "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1), + "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1), + "drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803), + "drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1801, 1802, 1803, 1804), + "drug_type_concept_id" = 1 + ) |> + dplyr::mutate( + "drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01"), + "drug_exposure_end_date" = as.Date(.data$drug_exposure_end_date, origin = "2020-01-01") + ) + ) + + cdm <- CDMConnector::copyCdmTo(con = DBI::dbConnect(duckdb::duckdb()), cdm = cdm, schema = "main") + + expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort")) + + expect_true(cohort |> dplyr::tally() |> dplyr::pull() == 4) + expect_true(cohortCount(cohort)$number_records == 4) + expect_true(cohortCount(cohort)$number_subjects == 2) + expect_true(attrition(cohort) |> nrow() == 1) + expect_identical( + settings(cohort), + dplyr::tibble("cohort_definition_id" = 1L, "cohort_name" = "a") + ) + expect_identical(cohortCodelist(cohort, 1), omopgenerics::newCodelist(list(a = 1))) + cohort <- cohort |> + dplyr::collect() |> + dplyr::as_tibble() |> + dplyr::arrange(subject_id, cohort_start_date) + attr(cohort, "cohort_attrition") <- NULL + attr(cohort, "cohort_codelist") <- NULL + attr(cohort, "cohort_set") <- NULL + expect_equal( + cohort, + dplyr::tibble( + "cohort_definition_id" = 1L, + "subject_id" = c(1L, 1L, 1L, 2L), + "cohort_start_date" = as.Date(c(0, 1500, 1800, 10), origin = "2020-01-01"), + "cohort_end_date" = as.Date(c(800, 1600, 1804, 2000), origin = "2020-01-01") + ) + ) + + CDMConnector::cdmDisconnect(cdm = cdm) +}) diff --git a/tests/testthat/test-generateMatchedCohortSet.R b/tests/testthat/test-matchCohort.R similarity index 63% rename from tests/testthat/test-generateMatchedCohortSet.R rename to tests/testthat/test-matchCohort.R index 65887193..ac03f997 100644 --- a/tests/testthat/test-generateMatchedCohortSet.R +++ b/tests/testthat/test-matchCohort.R @@ -1,4 +1,4 @@ -test_that("generateMatchedCohortSet runs without errors", { +test_that("matchCohort runs without errors", { # Create cdm object cdm <- DrugUtilisation::generateConceptCohortSet( cdm = DrugUtilisation::mockDrugUtilisation(numberIndividuals = 200), @@ -8,10 +8,10 @@ test_that("generateMatchedCohortSet runs without errors", { requiredObservation = c(180, 180), overwrite = TRUE) - expect_no_error(a <- generateMatchedCohortSet(cdm, - name = "new_cohort", - targetCohortName = "cases", - ratio = 2)) + expect_no_error(a <- matchCohort(cdm, + name = "new_cohort", + targetCohortName = "cases", + ratio = 2)) cdm <- DrugUtilisation::generateConceptCohortSet( cdm = DrugUtilisation::mockDrugUtilisation(numberIndividuals = 200), @@ -21,50 +21,50 @@ test_that("generateMatchedCohortSet runs without errors", { requiredObservation = c(10,10), overwrite = TRUE) - expect_no_error(generateMatchedCohortSet(cdm, - name = "new_cohort", - targetCohortName = "cases")) - - expect_no_error(generateMatchedCohortSet(cdm, - name = "new_cohort", - targetCohortName = "cases", - ratio = 3)) - - expect_no_error(generateMatchedCohortSet(cdm, - name = "new_cohort", - targetCohortName = "cases", - ratio = Inf)) - - expect_no_error(generateMatchedCohortSet(cdm, - name = "new_cohort", - matchSex = FALSE, - matchYearOfBirth = TRUE, - targetCohortName = "cases")) - - expect_no_error(generateMatchedCohortSet(cdm, - name = "new_cohort", - matchSex = TRUE, - matchYearOfBirth = FALSE, - targetCohortName = "cases")) - - expect_no_error(b <- generateMatchedCohortSet(cdm, - name = "new_cohort", - matchSex = FALSE, - matchYearOfBirth = FALSE, - targetCohortName = "cases")) - - expect_no_error(a <- generateMatchedCohortSet(cdm, - name = "new_cohort", - targetCohortName = "cases", - targetCohortId = c(1,2), - matchSex = TRUE, - matchYearOfBirth = TRUE, - ratio = 2)) + expect_no_error(matchCohort(cdm, + name = "new_cohort", + targetCohortName = "cases")) + + expect_no_error(matchCohort(cdm, + name = "new_cohort", + targetCohortName = "cases", + ratio = 3)) + + expect_no_error(matchCohort(cdm, + name = "new_cohort", + targetCohortName = "cases", + ratio = Inf)) + + expect_no_error(matchCohort(cdm, + name = "new_cohort", + matchSex = FALSE, + matchYearOfBirth = TRUE, + targetCohortName = "cases")) + + expect_no_error(matchCohort(cdm, + name = "new_cohort", + matchSex = TRUE, + matchYearOfBirth = FALSE, + targetCohortName = "cases")) + + expect_no_error(b <- matchCohort(cdm, + name = "new_cohort", + matchSex = FALSE, + matchYearOfBirth = FALSE, + targetCohortName = "cases")) + + expect_no_error(a <- matchCohort(cdm, + name = "new_cohort", + targetCohortName = "cases", + targetCohortId = c(1,2), + matchSex = TRUE, + matchYearOfBirth = TRUE, + ratio = 2)) }) -test_that("generateMatchedCohortSet, no duplicated people within a cohort", { +test_that("matchCohort, no duplicated people within a cohort", { followback <- 180 cdm <- DrugUtilisation::generateConceptCohortSet( @@ -76,13 +76,13 @@ test_that("generateMatchedCohortSet, no duplicated people within a cohort", { overwrite = TRUE ) - a <- generateMatchedCohortSet(cdm, - name = "new_cohort", - targetCohortName = "cohort", - targetCohortId = NULL, - matchSex = TRUE, - matchYearOfBirth = TRUE, - ratio = 1) + a <- matchCohort(cdm, + name = "new_cohort", + targetCohortName = "cohort", + targetCohortId = NULL, + matchSex = TRUE, + matchYearOfBirth = TRUE, + ratio = 1) p1 <- a$new_cohort %>% dplyr::filter(cohort_definition_id == 1) %>% @@ -92,13 +92,13 @@ test_that("generateMatchedCohortSet, no duplicated people within a cohort", { expect_true(length(p1) == length(unique(p1))) - a <- generateMatchedCohortSet(cdm, - name = "new_cohort", - targetCohortName = "cohort", - targetCohortId = NULL, - matchSex = TRUE, - matchYearOfBirth = TRUE, - ratio = 5) + a <- matchCohort(cdm, + name = "new_cohort", + targetCohortName = "cohort", + targetCohortId = NULL, + matchSex = TRUE, + matchYearOfBirth = TRUE, + ratio = 5) p1 <- a$new_cohort %>% dplyr::filter(cohort_definition_id == 2) %>% dplyr::select(subject_id) %>% @@ -121,20 +121,20 @@ test_that("check that we obtain expected result when ratio is 1", { ) # Number of counts for the initial cohorts are the same as in the matched cohorts - matched_cohorts <- generateMatchedCohortSet(cdm, - name = "new_cohort", - targetCohortName = "cohort", - targetCohortId = NULL, - matchSex = TRUE, - matchYearOfBirth = TRUE, - ratio = 1) + matched_cohorts <- matchCohort(cdm, + name = "new_cohort", + targetCohortName = "cohort", + targetCohortId = NULL, + matchSex = TRUE, + matchYearOfBirth = TRUE, + ratio = 1) expect_true(nrow(omopgenerics::cohortCount(matched_cohorts$new_cohort) %>% - dplyr::left_join(omopgenerics::settings(matched_cohorts$new_cohort), - by = "cohort_definition_id") %>% - dplyr::filter(stringr::str_detect(cohort_name, "c_1")) %>% - dplyr::select("number_records") %>% - dplyr::distinct()) == 1) + dplyr::left_join(omopgenerics::settings(matched_cohorts$new_cohort), + by = "cohort_definition_id") %>% + dplyr::filter(stringr::str_detect(cohort_name, "c_1")) %>% + dplyr::select("number_records") %>% + dplyr::distinct()) == 1) expect_true(nrow(omopgenerics::cohortCount(matched_cohorts$new_cohort) %>% dplyr::left_join(omopgenerics::settings(matched_cohorts$new_cohort), by = "cohort_definition_id") %>% @@ -189,7 +189,7 @@ test_that("test exactMatchingCohort works if there are no subjects", { overwrite = TRUE ) cdm$cases <- cdm$cases %>% dplyr::filter(subject_id == 0) - cdm <- generateMatchedCohortSet( + cdm <- matchCohort( cdm, name = "new_cohort", targetCohortName = "cases", @@ -210,13 +210,13 @@ test_that("test exactMatchingCohort works if one of the cohorts does not have an ) expect_no_error( - cdm <- generateMatchedCohortSet(cdm, - name = "new_cohort", - targetCohortName = "cases", - targetCohortId = NULL, - matchSex = TRUE, - matchYearOfBirth = TRUE, - ratio = 1) + cdm <- matchCohort(cdm, + name = "new_cohort", + targetCohortName = "cases", + targetCohortId = NULL, + matchSex = TRUE, + matchYearOfBirth = TRUE, + ratio = 1) ) }) @@ -232,13 +232,13 @@ test_that("test exactMatchingCohort with a ratio bigger than 1", { ) expect_no_error( - a <- generateMatchedCohortSet(cdm, - name = "new_cohort", - targetCohortName = "cases", - targetCohortId = NULL, - matchSex = TRUE, - matchYearOfBirth = TRUE, - ratio = 5) + a <- matchCohort(cdm, + name = "new_cohort", + targetCohortName = "cases", + targetCohortId = NULL, + matchSex = TRUE, + matchYearOfBirth = TRUE, + ratio = 5) ) }) @@ -277,13 +277,13 @@ test_that("test exactMatchingCohort with a ratio bigger than 1", { overwrite = TRUE ) - a <- generateMatchedCohortSet(cdm, - name = "new_cohort", - targetCohortName = "cases", - targetCohortId = NULL, - matchSex = TRUE, - matchYearOfBirth = TRUE, - ratio = 4) + a <- matchCohort(cdm, + name = "new_cohort", + targetCohortName = "cases", + targetCohortId = NULL, + matchSex = TRUE, + matchYearOfBirth = TRUE, + ratio = 4) expect_true(a[["new_cohort"]] %>% dplyr::filter(cohort_definition_id %in% c(1,2)) %>%