Skip to content

Commit

Permalink
Merge pull request #21 from oxford-pharmacoepi/requireCohortIntersect…
Browse files Browse the repository at this point in the history
…Flag

requireCohortIntersectFlag
  • Loading branch information
edward-burn authored Dec 7, 2023
2 parents bd43172 + def9761 commit 8b75f98
Show file tree
Hide file tree
Showing 8 changed files with 272 additions and 40 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(generateMatchedCohortSet)
export(getIdentifier)
export(joinOverlap)
export(requireAge)
export(requireCohortIntersectFlag)
export(requireDemographics)
export(requireFutureObservation)
export(requirePriorObservation)
Expand Down
86 changes: 86 additions & 0 deletions R/requireCohortIntersectFlag.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@


#' Require cohort subjects are present in another cohort
#'
#' @param x Cohort table
#' @param targetCohortTable name of the cohort that we want to check for overlap
#' @param targetCohortId vector of cohort definition ids to include
#' @param indexDate Variable in x that contains the date to compute the
#' intersection.
#' @param targetStartDate date of reference in cohort table, either for start
#' (in overlap) or on its own (for incidence)
#' @param targetEndDate date of reference in cohort table, either for end
#' (overlap) or NULL (if incidence)
#' @param window window to consider events of
#'
#' @return Cohort table with only those in the other cohort kept
#' @export
#'
#' @examples
requireCohortIntersectFlag <- function(x,
targetCohortTable,
targetCohortId = NULL,
indexDate = "cohort_start_date",
targetStartDate = "cohort_start_date",
targetEndDate = "cohort_end_date",
window = list(c(0, Inf))){

cols <- unique(c("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date",
indexDate))

if(is.list(window)){
window_start <- window[[1]][1]
window_end <- window[[1]][2]
} else {
window_start <- window[1]
window_end <- window[2]
}

cdm <- attr(x, "cdm_reference")

if(is.null(cdm[[targetCohortTable]])){
cli::cli_abort("targetCohortTable not found in cdm reference")
}

if(is.null(targetCohortId)){
targetCohortId <- CDMConnector::cohortSet(cdm[[targetCohortTable]]) %>%
dplyr::pull("cohort_definition_id")
}

if(length(targetCohortId) > 1){
cli::cli_abort("Only one target cohort is currently supported")
}

target_name <- CDMConnector::cohort_set(cdm[[targetCohortTable]]) %>%
dplyr::filter(.data$cohort_definition_id == .env$targetCohortId) %>%
dplyr::pull("cohort_name")

subsetCohort <- x %>%
dplyr::select(dplyr::all_of(.env$cols)) %>%
PatientProfiles::addCohortIntersectFlag(
cdm = cdm,
targetCohortTable = targetCohortTable,
targetCohortId = targetCohortId,
indexDate = indexDate,
targetStartDate = targetStartDate,
targetEndDate = targetEndDate,
window = window,
nameStyle = "intersect_cohort"
) %>%
dplyr::filter(.data$intersect_cohort == 1) %>%
dplyr::select(!"intersect_cohort")

x %>%
dplyr::inner_join(subsetCohort,
by = c(cols)) %>%
CDMConnector::recordCohortAttrition(reason =
glue::glue("In cohort {target_name} between ",
"{window_start} and ",
"{window_end} days relative to ",
"{indexDate}"))

}



9 changes: 5 additions & 4 deletions R/requireDemographics.R
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ demographicsFilter <- function(cohort,
# join later

working_cohort <- cohort %>%
dplyr::select(c("cohort_definition_id", "subject_id",
dplyr::select(dplyr::all_of(c("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date",
indexDate)) %>%
indexDate))) %>%
PatientProfiles::addDemographics(indexDate = indexDate) %>%
dplyr::filter(.data$age >= .env$minAge,
.data$age <= .env$maxAge,
Expand All @@ -234,8 +234,9 @@ demographicsFilter <- function(cohort,

cohort <- cohort %>%
dplyr::inner_join(working_cohort %>%
dplyr::select(c("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date")),
dplyr::select(dplyr::all_of(c("cohort_definition_id",
"subject_id",
"cohort_start_date", "cohort_end_date"))),
by = c("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date"))
cohort
Expand Down
19 changes: 15 additions & 4 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ cdm <- generate_concept_cohort_set(cdm = cdm,
name = "medications",
concept_set = list("diclofenac" = 1124300,
"acetaminophen" = 1127433))
cohortSet(cdm$combinations)
cohort_count(cdm$medications)
cohort_attrition(cdm$medications)
```
Expand All @@ -61,7 +60,21 @@ cohort_attrition(cdm$medications)
cdm$medications %>%
requireDemographics(ageRange = list(c(40, 65)),
sex = "Female")
cohortSet(cdm$combinations)
cohort_count(cdm$medications)
cohort_attrition(cdm$medications)
```

### Require presence in another cohort
We can also require that individuals are in another cohort over some window. Here for example we require that study participants are in a GI bleed cohort any time prior up to their entry in the medications cohort.
```{r}
cdm <- generate_concept_cohort_set(cdm = cdm,
name = "gibleed",
concept_set = list("gibleed" = 192671))
cdm$medications <- cdm$medications %>%
requireCohortIntersectFlag(targetCohortTable = "gibleed",
window = c(-Inf, 0))
cohort_count(cdm$medications)
cohort_attrition(cdm$medications)
```
Expand All @@ -76,8 +89,6 @@ cdm <- generateCombinationCohortSet(cdm = cdm,
name = "combinations",
targetCohortName = "medications")
cohortSet(cdm$combinations)
cohortCount(cdm$combinations)
Expand Down
98 changes: 67 additions & 31 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ devtools::install_github("oxford-pharmacoepi/CohortConstructor")

``` r
library(CDMConnector)
#> Warning: package 'CDMConnector' was built under R version 4.2.3
library(PatientProfiles)
#> Warning: package 'PatientProfiles' was built under R version 4.2.3
library(CohortConstructor)

con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
Expand All @@ -44,19 +46,19 @@ cdm <- generate_concept_cohort_set(cdm = cdm,
name = "medications",
concept_set = list("diclofenac" = 1124300,
"acetaminophen" = 1127433))
cohort_set(cdm$medications)
#> # A tibble: 2 × 6
#> cohort_definition_id cohort_name limit prior_observation future_observation
#> <int> <chr> <chr> <dbl> <dbl>
#> 1 1 diclofenac first 0 0
#> 2 2 acetaminophen first 0 0
#> # ℹ 1 more variable: end <chr>
cohort_count(cdm$medications)
#> # A tibble: 2 × 3
#> cohort_definition_id number_records number_subjects
#> <int> <dbl> <dbl>
#> 1 1 830 830
#> 2 2 2580 2580
cohort_attrition(cdm$medications)
#> # A tibble: 2 × 7
#> cohort_definition_id number_records number_subjects reason_id reason
#> <int> <dbl> <dbl> <dbl> <chr>
#> 1 1 830 830 1 Qualifying init…
#> 2 2 2580 2580 1 Qualifying init…
#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
```

### Applying demographic requirements
Expand All @@ -66,33 +68,69 @@ cdm$medications %>%
requireDemographics(ageRange = list(c(40, 65)),
sex = "Female")
#> # Source: SQL [?? x 4]
#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\Rtmpqm0Jdd\file42405cf744a.duckdb]
#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\RtmpIngDmK\file4f3841e26e9e.duckdb]
#> cohort_definition_id subject_id cohort_start_date cohort_end_date
#> <int> <int> <date> <date>
#> 1 1 730 2002-11-18 2018-12-16
#> 2 1 1169 1975-12-23 2018-08-27
#> 3 1 1808 2003-12-18 2019-06-05
#> 4 1 2858 1953-05-26 2019-05-29
#> 5 1 2909 1986-09-03 2007-07-23
#> 6 1 2939 1997-10-31 2018-09-04
#> 7 1 3175 1999-05-02 2018-09-04
#> 8 1 5240 1984-05-31 2019-03-12
#> 9 2 1338 1997-02-22 2019-06-21
#> 10 2 2026 2009-02-11 2019-06-19
#> <int> <dbl> <date> <date>
#> 1 1 18 2009-03-21 2018-11-07
#> 2 1 893 1993-09-26 2019-05-06
#> 3 1 2396 1961-08-30 2001-02-28
#> 4 1 3159 2000-01-26 2018-10-18
#> 5 1 3376 1994-05-06 2019-06-28
#> 6 1 4071 1998-08-07 2018-12-27
#> 7 1 4636 1986-10-26 2018-10-23
#> 8 1 4690 2001-03-20 2018-10-14
#> 9 1 4701 2011-07-10 2018-12-22
#> 10 2 3951 1997-12-11 2019-04-13
#> # ℹ more rows
cohort_set(cdm$medications)
#> # A tibble: 2 × 6
#> cohort_definition_id cohort_name limit prior_observation future_observation
#> <int> <chr> <chr> <dbl> <dbl>
#> 1 1 diclofenac first 0 0
#> 2 2 acetaminophen first 0 0
#> # ℹ 1 more variable: end <chr>
cohort_count(cdm$medications)
#> # A tibble: 2 × 3
#> cohort_definition_id number_records number_subjects
#> <int> <dbl> <dbl>
#> 1 1 156 156
#> 2 2 76 76
cohort_attrition(cdm$medications)
#> # A tibble: 4 × 7
#> cohort_definition_id number_records number_subjects reason_id reason
#> <int> <dbl> <dbl> <dbl> <chr>
#> 1 1 830 830 1 Qualifying init…
#> 2 2 2580 2580 1 Qualifying init…
#> 3 2 76 76 2 Demographic req…
#> 4 1 156 156 2 Demographic req…
#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
```

### Require presence in another cohort

We can also require that individuals are in another cohort over some
window. Here for example we require that study participants are in a GI
bleed cohort any time prior up to their entry in the medications cohort.

``` r
cdm <- generate_concept_cohort_set(cdm = cdm,
name = "gibleed",
concept_set = list("gibleed" = 192671))

cdm$medications <- cdm$medications %>%
requireCohortIntersectFlag(targetCohortTable = "gibleed",
window = c(-Inf, 0))

cohort_count(cdm$medications)
#> # A tibble: 2 × 3
#> cohort_definition_id number_records number_subjects
#> <int> <dbl> <dbl>
#> 1 2 36 36
#> 2 1 0 0
cohort_attrition(cdm$medications)
#> # A tibble: 6 × 7
#> cohort_definition_id number_records number_subjects reason_id reason
#> <int> <dbl> <dbl> <dbl> <chr>
#> 1 1 830 830 1 Qualifying init…
#> 2 2 2580 2580 1 Qualifying init…
#> 3 2 76 76 2 Demographic req…
#> 4 1 156 156 2 Demographic req…
#> 5 2 36 36 3 In cohort gible…
#> 6 1 0 0 3 In cohort gible…
#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
```

### Combining cohorts
Expand All @@ -104,8 +142,6 @@ cdm <- generateCombinationCohortSet(cdm = cdm,
name = "combinations",
targetCohortName = "medications")



cohortSet(cdm$combinations)
#> # A tibble: 3 × 5
#> cohort_definition_id cohort_name diclofenac acetaminophen mutually_exclusive
Expand All @@ -117,9 +153,9 @@ cohortCount(cdm$combinations)
#> # A tibble: 3 × 3
#> cohort_definition_id number_records number_subjects
#> <int> <dbl> <dbl>
#> 1 1 830 830
#> 2 2 2580 2580
#> 3 3 805 805
#> 1 2 36 36
#> 2 1 0 0
#> 3 3 0 0


cdmDisconnect(cdm)
Expand Down
40 changes: 40 additions & 0 deletions man/requireCohortIntersectFlag.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-generateMatchedCohortSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ test_that("test exactMatchingCohort with a ratio bigger than 1", {

outc <- a[["new_cohort"]] %>%
dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>%
dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09")
dplyr::pull() %in% as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09"))
expect_true(unique(outc) == TRUE)
})

Expand Down
Loading

0 comments on commit 8b75f98

Please sign in to comment.