From def9761f68b31793113e3f4ada74908114b4c570 Mon Sep 17 00:00:00 2001 From: edward-burn <9583964+edward-burn@users.noreply.github.com> Date: Thu, 7 Dec 2023 14:24:54 +0000 Subject: [PATCH] requireCohortIntersectFlag --- NAMESPACE | 1 + R/requireCohortIntersectFlag.R | 86 ++++++++++++++++ R/requireDemographics.R | 9 +- README.Rmd | 19 +++- README.md | 98 +++++++++++++------ man/requireCohortIntersectFlag.Rd | 40 ++++++++ .../testthat/test-generateMatchedCohortSet.R | 2 +- .../test-requireCohortIntersectFlag.R | 57 +++++++++++ 8 files changed, 272 insertions(+), 40 deletions(-) create mode 100644 R/requireCohortIntersectFlag.R create mode 100644 man/requireCohortIntersectFlag.Rd create mode 100644 tests/testthat/test-requireCohortIntersectFlag.R diff --git a/NAMESPACE b/NAMESPACE index be8a2b86..3b040513 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(generateMatchedCohortSet) export(getIdentifier) export(joinOverlap) export(requireAge) +export(requireCohortIntersectFlag) export(requireDemographics) export(requireFutureObservation) export(requirePriorObservation) diff --git a/R/requireCohortIntersectFlag.R b/R/requireCohortIntersectFlag.R new file mode 100644 index 00000000..14293fa3 --- /dev/null +++ b/R/requireCohortIntersectFlag.R @@ -0,0 +1,86 @@ + + +#' Require cohort subjects are present in another cohort +#' +#' @param x Cohort table +#' @param targetCohortTable name of the cohort that we want to check for overlap +#' @param targetCohortId vector of cohort definition ids to include +#' @param indexDate Variable in x that contains the date to compute the +#' intersection. +#' @param targetStartDate date of reference in cohort table, either for start +#' (in overlap) or on its own (for incidence) +#' @param targetEndDate date of reference in cohort table, either for end +#' (overlap) or NULL (if incidence) +#' @param window window to consider events of +#' +#' @return Cohort table with only those in the other cohort kept +#' @export +#' +#' @examples +requireCohortIntersectFlag <- function(x, + targetCohortTable, + targetCohortId = NULL, + indexDate = "cohort_start_date", + targetStartDate = "cohort_start_date", + targetEndDate = "cohort_end_date", + window = list(c(0, Inf))){ + +cols <- unique(c("cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date", + indexDate)) + +if(is.list(window)){ +window_start <- window[[1]][1] +window_end <- window[[1]][2] +} else { + window_start <- window[1] + window_end <- window[2] +} + +cdm <- attr(x, "cdm_reference") + +if(is.null(cdm[[targetCohortTable]])){ + cli::cli_abort("targetCohortTable not found in cdm reference") +} + +if(is.null(targetCohortId)){ +targetCohortId <- CDMConnector::cohortSet(cdm[[targetCohortTable]]) %>% + dplyr::pull("cohort_definition_id") +} + +if(length(targetCohortId) > 1){ + cli::cli_abort("Only one target cohort is currently supported") +} + +target_name <- CDMConnector::cohort_set(cdm[[targetCohortTable]]) %>% + dplyr::filter(.data$cohort_definition_id == .env$targetCohortId) %>% + dplyr::pull("cohort_name") + +subsetCohort <- x %>% + dplyr::select(dplyr::all_of(.env$cols)) %>% + PatientProfiles::addCohortIntersectFlag( + cdm = cdm, + targetCohortTable = targetCohortTable, + targetCohortId = targetCohortId, + indexDate = indexDate, + targetStartDate = targetStartDate, + targetEndDate = targetEndDate, + window = window, + nameStyle = "intersect_cohort" + ) %>% + dplyr::filter(.data$intersect_cohort == 1) %>% + dplyr::select(!"intersect_cohort") + +x %>% + dplyr::inner_join(subsetCohort, + by = c(cols)) %>% + CDMConnector::recordCohortAttrition(reason = + glue::glue("In cohort {target_name} between ", + "{window_start} and ", + "{window_end} days relative to ", + "{indexDate}")) + +} + + + diff --git a/R/requireDemographics.R b/R/requireDemographics.R index c5ed2deb..ece6dd60 100644 --- a/R/requireDemographics.R +++ b/R/requireDemographics.R @@ -222,9 +222,9 @@ demographicsFilter <- function(cohort, # join later working_cohort <- cohort %>% - dplyr::select(c("cohort_definition_id", "subject_id", + dplyr::select(dplyr::all_of(c("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date", - indexDate)) %>% + indexDate))) %>% PatientProfiles::addDemographics(indexDate = indexDate) %>% dplyr::filter(.data$age >= .env$minAge, .data$age <= .env$maxAge, @@ -234,8 +234,9 @@ demographicsFilter <- function(cohort, cohort <- cohort %>% dplyr::inner_join(working_cohort %>% - dplyr::select(c("cohort_definition_id", "subject_id", - "cohort_start_date", "cohort_end_date")), + dplyr::select(dplyr::all_of(c("cohort_definition_id", + "subject_id", + "cohort_start_date", "cohort_end_date"))), by = c("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date")) cohort diff --git a/README.Rmd b/README.Rmd index 7e9d2824..4607590d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -51,7 +51,6 @@ cdm <- generate_concept_cohort_set(cdm = cdm, name = "medications", concept_set = list("diclofenac" = 1124300, "acetaminophen" = 1127433)) -cohortSet(cdm$combinations) cohort_count(cdm$medications) cohort_attrition(cdm$medications) ``` @@ -61,7 +60,21 @@ cohort_attrition(cdm$medications) cdm$medications %>% requireDemographics(ageRange = list(c(40, 65)), sex = "Female") -cohortSet(cdm$combinations) +cohort_count(cdm$medications) +cohort_attrition(cdm$medications) +``` + +### Require presence in another cohort +We can also require that individuals are in another cohort over some window. Here for example we require that study participants are in a GI bleed cohort any time prior up to their entry in the medications cohort. +```{r} +cdm <- generate_concept_cohort_set(cdm = cdm, + name = "gibleed", + concept_set = list("gibleed" = 192671)) + +cdm$medications <- cdm$medications %>% + requireCohortIntersectFlag(targetCohortTable = "gibleed", + window = c(-Inf, 0)) + cohort_count(cdm$medications) cohort_attrition(cdm$medications) ``` @@ -76,8 +89,6 @@ cdm <- generateCombinationCohortSet(cdm = cdm, name = "combinations", targetCohortName = "medications") - - cohortSet(cdm$combinations) cohortCount(cdm$combinations) diff --git a/README.md b/README.md index d0ab06a5..bf1313ba 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,9 @@ devtools::install_github("oxford-pharmacoepi/CohortConstructor") ``` r library(CDMConnector) +#> Warning: package 'CDMConnector' was built under R version 4.2.3 library(PatientProfiles) +#> Warning: package 'PatientProfiles' was built under R version 4.2.3 library(CohortConstructor) con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir()) @@ -44,19 +46,19 @@ cdm <- generate_concept_cohort_set(cdm = cdm, name = "medications", concept_set = list("diclofenac" = 1124300, "acetaminophen" = 1127433)) -cohort_set(cdm$medications) -#> # A tibble: 2 × 6 -#> cohort_definition_id cohort_name limit prior_observation future_observation -#> -#> 1 1 diclofenac first 0 0 -#> 2 2 acetaminophen first 0 0 -#> # ℹ 1 more variable: end cohort_count(cdm$medications) #> # A tibble: 2 × 3 #> cohort_definition_id number_records number_subjects #> #> 1 1 830 830 #> 2 2 2580 2580 +cohort_attrition(cdm$medications) +#> # A tibble: 2 × 7 +#> cohort_definition_id number_records number_subjects reason_id reason +#> +#> 1 1 830 830 1 Qualifying init… +#> 2 2 2580 2580 1 Qualifying init… +#> # ℹ 2 more variables: excluded_records , excluded_subjects ``` ### Applying demographic requirements @@ -66,33 +68,69 @@ cdm$medications %>% requireDemographics(ageRange = list(c(40, 65)), sex = "Female") #> # Source: SQL [?? x 4] -#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\Rtmpqm0Jdd\file42405cf744a.duckdb] +#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\RtmpIngDmK\file4f3841e26e9e.duckdb] #> cohort_definition_id subject_id cohort_start_date cohort_end_date -#> -#> 1 1 730 2002-11-18 2018-12-16 -#> 2 1 1169 1975-12-23 2018-08-27 -#> 3 1 1808 2003-12-18 2019-06-05 -#> 4 1 2858 1953-05-26 2019-05-29 -#> 5 1 2909 1986-09-03 2007-07-23 -#> 6 1 2939 1997-10-31 2018-09-04 -#> 7 1 3175 1999-05-02 2018-09-04 -#> 8 1 5240 1984-05-31 2019-03-12 -#> 9 2 1338 1997-02-22 2019-06-21 -#> 10 2 2026 2009-02-11 2019-06-19 +#> +#> 1 1 18 2009-03-21 2018-11-07 +#> 2 1 893 1993-09-26 2019-05-06 +#> 3 1 2396 1961-08-30 2001-02-28 +#> 4 1 3159 2000-01-26 2018-10-18 +#> 5 1 3376 1994-05-06 2019-06-28 +#> 6 1 4071 1998-08-07 2018-12-27 +#> 7 1 4636 1986-10-26 2018-10-23 +#> 8 1 4690 2001-03-20 2018-10-14 +#> 9 1 4701 2011-07-10 2018-12-22 +#> 10 2 3951 1997-12-11 2019-04-13 #> # ℹ more rows -cohort_set(cdm$medications) -#> # A tibble: 2 × 6 -#> cohort_definition_id cohort_name limit prior_observation future_observation -#> -#> 1 1 diclofenac first 0 0 -#> 2 2 acetaminophen first 0 0 -#> # ℹ 1 more variable: end cohort_count(cdm$medications) #> # A tibble: 2 × 3 #> cohort_definition_id number_records number_subjects #> #> 1 1 156 156 #> 2 2 76 76 +cohort_attrition(cdm$medications) +#> # A tibble: 4 × 7 +#> cohort_definition_id number_records number_subjects reason_id reason +#> +#> 1 1 830 830 1 Qualifying init… +#> 2 2 2580 2580 1 Qualifying init… +#> 3 2 76 76 2 Demographic req… +#> 4 1 156 156 2 Demographic req… +#> # ℹ 2 more variables: excluded_records , excluded_subjects +``` + +### Require presence in another cohort + +We can also require that individuals are in another cohort over some +window. Here for example we require that study participants are in a GI +bleed cohort any time prior up to their entry in the medications cohort. + +``` r +cdm <- generate_concept_cohort_set(cdm = cdm, + name = "gibleed", + concept_set = list("gibleed" = 192671)) + +cdm$medications <- cdm$medications %>% + requireCohortIntersectFlag(targetCohortTable = "gibleed", + window = c(-Inf, 0)) + +cohort_count(cdm$medications) +#> # A tibble: 2 × 3 +#> cohort_definition_id number_records number_subjects +#> +#> 1 2 36 36 +#> 2 1 0 0 +cohort_attrition(cdm$medications) +#> # A tibble: 6 × 7 +#> cohort_definition_id number_records number_subjects reason_id reason +#> +#> 1 1 830 830 1 Qualifying init… +#> 2 2 2580 2580 1 Qualifying init… +#> 3 2 76 76 2 Demographic req… +#> 4 1 156 156 2 Demographic req… +#> 5 2 36 36 3 In cohort gible… +#> 6 1 0 0 3 In cohort gible… +#> # ℹ 2 more variables: excluded_records , excluded_subjects ``` ### Combining cohorts @@ -104,8 +142,6 @@ cdm <- generateCombinationCohortSet(cdm = cdm, name = "combinations", targetCohortName = "medications") - - cohortSet(cdm$combinations) #> # A tibble: 3 × 5 #> cohort_definition_id cohort_name diclofenac acetaminophen mutually_exclusive @@ -117,9 +153,9 @@ cohortCount(cdm$combinations) #> # A tibble: 3 × 3 #> cohort_definition_id number_records number_subjects #> -#> 1 1 830 830 -#> 2 2 2580 2580 -#> 3 3 805 805 +#> 1 2 36 36 +#> 2 1 0 0 +#> 3 3 0 0 cdmDisconnect(cdm) diff --git a/man/requireCohortIntersectFlag.Rd b/man/requireCohortIntersectFlag.Rd new file mode 100644 index 00000000..df833903 --- /dev/null +++ b/man/requireCohortIntersectFlag.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/requireCohortIntersectFlag.R +\name{requireCohortIntersectFlag} +\alias{requireCohortIntersectFlag} +\title{Require cohort subjects are present in another cohort} +\usage{ +requireCohortIntersectFlag( + x, + targetCohortTable, + targetCohortId = NULL, + indexDate = "cohort_start_date", + targetStartDate = "cohort_start_date", + targetEndDate = "cohort_end_date", + window = list(c(0, Inf)) +) +} +\arguments{ +\item{x}{Cohort table} + +\item{targetCohortTable}{name of the cohort that we want to check for overlap} + +\item{targetCohortId}{vector of cohort definition ids to include} + +\item{indexDate}{Variable in x that contains the date to compute the +intersection.} + +\item{targetStartDate}{date of reference in cohort table, either for start +(in overlap) or on its own (for incidence)} + +\item{targetEndDate}{date of reference in cohort table, either for end +(overlap) or NULL (if incidence)} + +\item{window}{window to consider events of} +} +\value{ +Cohort table with only those in the other cohort kept +} +\description{ +Require cohort subjects are present in another cohort +} diff --git a/tests/testthat/test-generateMatchedCohortSet.R b/tests/testthat/test-generateMatchedCohortSet.R index e832f2f9..87c20da4 100644 --- a/tests/testthat/test-generateMatchedCohortSet.R +++ b/tests/testthat/test-generateMatchedCohortSet.R @@ -295,7 +295,7 @@ test_that("test exactMatchingCohort with a ratio bigger than 1", { outc <- a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% - dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09") + dplyr::pull() %in% as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09")) expect_true(unique(outc) == TRUE) }) diff --git a/tests/testthat/test-requireCohortIntersectFlag.R b/tests/testthat/test-requireCohortIntersectFlag.R new file mode 100644 index 00000000..1a2cc90d --- /dev/null +++ b/tests/testthat/test-requireCohortIntersectFlag.R @@ -0,0 +1,57 @@ +test_that("requiring presence in another cohort", { + cdm <- PatientProfiles::mockPatientProfiles(patient_size = 100, + drug_exposure_size = 100) + + cdm$cohort3 <- requireCohortIntersectFlag(x = cdm$cohort1, + targetCohortTable = "cohort2", + targetCohortId = 1, + window = c(-Inf, Inf)) + + expect_true(all(cdm$cohort3 %>% + dplyr::distinct(subject_id) %>% + dplyr::pull() %in% + intersect(cdm$cohort1 %>% + dplyr::distinct(subject_id) %>% + dplyr::pull(), + cdm$cohort2 %>% + dplyr::filter(cohort_definition_id == 1) %>% + dplyr::distinct(subject_id) %>% + dplyr::pull()))) + + cdm$cohort4 <- requireCohortIntersectFlag(x = cdm$cohort1, + targetCohortTable = "cohort2", + targetCohortId = 2, + window = c(-Inf, Inf)) + expect_true(all(cdm$cohort4 %>% + dplyr::distinct(subject_id) %>% + dplyr::pull() %in% + intersect(cdm$cohort1 %>% + dplyr::distinct(subject_id) %>% + dplyr::pull(), + cdm$cohort2 %>% + dplyr::filter(cohort_definition_id == 2) %>% + dplyr::distinct(subject_id) %>% + dplyr::pull()))) + + + # expected errors + # only support one target id at the moment + expect_error(requireCohortIntersectFlag(x = cdm$cohort1, + targetCohortTable = "cohort2", + targetCohortId = c(1,2), + window = c(-Inf, Inf))) + + expect_error(requireCohortIntersectFlag(x = cdm$cohort1, + targetCohortTable = "cohort22", # does not exist + targetCohortId = 1, + window = c(-Inf, Inf))) + expect_error(requireCohortIntersectFlag(x = cdm$cohort1, + targetCohortTable = "cohort2", + targetCohortId = 10, # does not exist + window = c(-Inf, Inf))) + + CDMConnector::cdm_disconnect(cdm) + + }) + +