Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cohort_overlap #42

Merged
merged 3 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@
^README\.Rmd$
^pkgdown$
^\.github$
^_pkgdown\.yml$
^_pkgdown\.yml$
^doc$
^Meta$
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
.httr-oauth
.DS_Store
inst/doc
.Rhistory
/doc/
/Meta/
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ Imports:
omopgenerics (>= 0.0.2),
PatientProfiles,
rlang,
tidyr
tidyr,
utils
Suggests:
DBI,
DrugUtilisation,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export(requireSex)
export(restrictToFirstEntry)
export(settings)
export(splitOverlap)
export(summariseCohortOverlap)
export(trimToDateRange)
importFrom(magrittr,"%>%")
importFrom(omopgenerics,attrition)
Expand All @@ -26,3 +27,4 @@ importFrom(omopgenerics,settings)
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,.env)
importFrom(utils,data)
1 change: 1 addition & 0 deletions R/CohortConstructor-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
#' @importFrom rlang .data
#' @importFrom rlang .env
#' @importFrom rlang :=
#' @importFrom utils data
## usethis namespace: end
NULL
4 changes: 0 additions & 4 deletions R/cohortTiming.R

This file was deleted.

81 changes: 81 additions & 0 deletions R/summariseCohortOverlap.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#' Summarise cohort overlap
#'
#' @param cohort A cohort table in a cdm reference
#' @param restrictToFirstEntry If TRUE only an individual's first entry per
#' cohort will be considered. If FALSE all entries per individual will be
#' considered
#' @param timing Summary statistics for timing. If NULL, timings between cohort
#' entries will not be considered
#'
#' @return A summarised result
#' @export
#'
#' @examples
summariseCohortOverlap <- function(cohort,
restrictToFirstEntry = TRUE,
timing = c("min", "q25",
"median","q75",
"max")){

# validate inputs


# add cohort names
cdm <- omopgenerics::cdmReference(cohort)
name <- attr(cohort, "tbl_name") # change to omopgenerics::getTableName(cohort) when og is released

cdm[[name]] <- PatientProfiles::addCohortName(cdm[[name]])

if(isTRUE(restrictToFirstEntry)){
cdm[[name]] <- cdm[[name]] %>%
restrictToFirstEntry()
}

# should we use addCohortIntersectDate instead to avoid potentially large number of rows?
cdm[[name]] <- cdm[[name]] %>%
dplyr::inner_join(cdm[[name]],
by = "subject_id") %>%
dplyr::rename("cohort_start_date" = "cohort_start_date.x",
"cohort_end_date" = "cohort_end_date.x",
"cohort_name" = "cohort_name.x",
"cohort_definition_id" = "cohort_definition_id.x",
"cohort_start_date_comparator" = "cohort_start_date.y",
"cohort_end_date_comparator" = "cohort_end_date.y",
"cohort_name_comparator" = "cohort_name.y",
"cohort_definition_id_comparator" = "cohort_definition_id.y") %>%
dplyr::mutate(comparison = as.character(paste0(as.character(.data$cohort_name),
as.character(" &&& "),
as.character(.data$cohort_name_comparator))))

name_overlap <- paste0(omopgenerics::uniqueTableName(), "_", name, "_overlap")

cdm[[name_overlap]] <- cdm[[name]] %>%
dplyr::compute(temporary = FALSE,
name = name_overlap) %>%
omopgenerics::newCohortTable(.softValidation = TRUE)

if(is.null(timing)){
cohort_timings <- cdm[[name_overlap]] %>%
PatientProfiles::summariseCharacteristics(
strata = list("comparison")) %>% # can we only get number subject and records?
dplyr::filter(.data$variable_name %in% c("Number subjects",
"Number records")) %>%
dplyr::mutate(result_type = "cohort_overlap")

return(cohort_timings)

}

cohort_timings <- cdm[[name_overlap]] %>%
dplyr::mutate(diff_days = !!CDMConnector::datediff("cohort_start_date",
"cohort_start_date_comparator",
interval = "day")) %>%
dplyr::collect() %>%
PatientProfiles::summariseResult(group=list("comparison"),
variables = list(diff_days = "diff_days"),
functions = list(diff_days = timing))%>%
dplyr::mutate(result_type = "cohort_overlap")

cohort_timings

}
28 changes: 28 additions & 0 deletions man/summariseCohortOverlap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions tests/testthat/test-summariseCohortOverlap.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
test_that("expected output", {
cdm <- DrugUtilisation::generateConceptCohortSet(
cdm = DrugUtilisation::mockDrugUtilisation(numberIndividuals = 200),
conceptSet = list(c_1 = 317009, c_2 = 432526, c_3 = 4141052),
name = "cohort",
end = "observation_period_end_date"
)

overlap1 <- summariseCohortOverlap(cdm$cohort,
restrictToFirstEntry = TRUE,
timing = c("min", "q25",
"median","q75",
"max"))
expect_equal(colnames(omopgenerics::emptySummarisedResult()),
colnames(overlap1))

overlap2 <- summariseCohortOverlap(cdm$cohort,
restrictToFirstEntry = FALSE,
timing = c("min", "q25",
"median","q75",
"max"))
expect_equal(colnames(omopgenerics::emptySummarisedResult()),
colnames(overlap2))


overlap3 <- summariseCohortOverlap(cdm$cohort,
restrictToFirstEntry = TRUE,
timing = c("min",
"max"))
expect_equal(colnames(omopgenerics::emptySummarisedResult()),
colnames(overlap3))

overlap4 <- summariseCohortOverlap(cdm$cohort,
restrictToFirstEntry = TRUE,
timing = NULL)
expect_equal(colnames(omopgenerics::emptySummarisedResult()),
colnames(overlap4))

CDMConnector::cdm_disconnect(cdm)

})
55 changes: 27 additions & 28 deletions vignettes/a02_applying_cohort_restrictions.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,27 @@ vignette: >
%\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
library(CDMConnector)
if (Sys.getenv("EUNOMIA_DATA_FOLDER") == "") {
Sys.setenv("EUNOMIA_DATA_FOLDER" = file.path(tempdir(), "eunomia"))
}
if (!dir.exists(file.path(tempdir(), "eunomia"))){ dir.create(file.path(tempdir(), "eunomia"))
}
if (!eunomia_is_available()) {
downloadEunomiaData(pathToData = file.path(tempdir(), "eunomia"))
}

eunomia_available <- eunomia_is_available()
```{r, include = FALSE}

knitr::opts_chunk$set(
collapse = TRUE,
message = FALSE,
warning = FALSE,
comment = "#>",
eval = eunomia_available
collapse = TRUE,
eval = TRUE,
comment = "#>"
)

library(CDMConnector)
library(dplyr, warn.conflicts = FALSE)

if (Sys.getenv("EUNOMIA_DATA_FOLDER") == ""){
Sys.setenv("EUNOMIA_DATA_FOLDER" = file.path(tempdir(), "eunomia"))}
if (!dir.exists(Sys.getenv("EUNOMIA_DATA_FOLDER"))){ dir.create(Sys.getenv("EUNOMIA_DATA_FOLDER"))
downloadEunomiaData()
}
```

For this example we'll use the Eunomia synthetic data from the CDMConnector package.
```{r, eval = FALSE}
```{r}
library(CDMConnector)
library(DrugUtilisation)
library(CohortConstructor)
Expand All @@ -40,7 +37,7 @@ cdm <- cdm_from_con(con, cdm_schema = "main",
```

Let's start by creating two drug cohorts, one for users of diclofenac and another for users of acetaminophen. We'll use the `generateDrugUtilisationCohortSet()` from the DrugUtilisation package so that we can specify a gap era when creating the cohort.
```{r, eval = FALSE}
```{r}
cdm <- generateDrugUtilisationCohortSet(cdm = cdm,
name = "medications",
conceptSet = list("diclofenac" = 1124300,
Expand All @@ -50,15 +47,15 @@ cohortCount(cdm$medications)
```

As well as our medication cohorts, let's also make another cohort containing individuals with a record of a GI bleed. For this cohort we can use `generateConceptCohortSet()` from the CDMConnector package. Later we'll use this cohort when specifying inclusion/ exclusion criteria.
```{r, eval = FALSE}
```{r}
cdm <- generateConceptCohortSet(cdm = cdm,
name = "gi_bleed",
conceptSet = list("gi_bleed" = 192671))
```

## Keep only the first record per person
Individuals can contribute multiple records per cohort. However now we'll keep only their earliest cohort entry of the remaining records using `restrictToFirstEntry()` from CohortConstructor. We can see that after this we have one record per person for each cohort.
```{r, eval = FALSE}
```{r}
cdm$medications <- cdm$medications %>%
restrictToFirstEntry(indexDate = "cohort_start_date")

Expand All @@ -69,7 +66,7 @@ Note, applying this criteria later after applying other criteria would result in

## Applying restrictions on patient demographics
Using `requireDemographics()` we'll require that individuals in our medications cohort are female and, relative to their cohort start date, are between 18 and 85 with at least 30 days of prior observation time in the database.
```{r, eval = FALSE}
```{r}
cdm$medications <- cdm$medications %>%
requireDemographics(indexDate = "cohort_start_date",
ageRange = list(c(18, 85)),
Expand All @@ -78,7 +75,7 @@ cdm$medications <- cdm$medications %>%
```

We can then see how many people have people have been excluded based on these demographic requirements.
```{r, eval = FALSE}
```{r}
cohort_attrition(cdm$medications) %>%
dplyr::filter(reason == "Demographic requirements") %>%
dplyr::glimpse()
Expand All @@ -87,14 +84,14 @@ cohort_attrition(cdm$medications) %>%

## Restrictions on calendar dates
Next we can use `requireInDateRange()` to keep only those records where cohort entry was between a particular date range.
```{r, eval = FALSE}
```{r}
cdm$medications <- cdm$medications %>%
requireInDateRange(indexDate = "cohort_start_date",
dateRange = as.Date(c("2000-01-01", "2015-01-01")))
```

Again, we can track cohort attrition
```{r, eval = FALSE}
```{r}
cohort_attrition(cdm$medications) %>%
dplyr::filter(reason == "cohort_start_date between 2000-01-01 and 2015-01-01") %>%
dplyr::glimpse()
Expand All @@ -104,24 +101,26 @@ cohort_attrition(cdm$medications) %>%
## Restrictions on cohort presence
We could require that individuals in our medication cohorts have a history of GI bleed. To do this we can use the `requireCohortIntersectFlag()` function.

```{r, eval = FALSE}
```{r}
cdm$medications_gi_bleed <- cdm$medications %>%
requireCohortIntersectFlag(targetCohortTable = "gi_bleed",
targetCohortId = 1,
indexDate = "cohort_start_date",
window = c(-Inf, 0))
window = c(-Inf, 0)) %>%
dplyr::compute(temporary = FALSE, name = "medications_gi_bleed")
cohort_count(cdm$medications_gi_bleed)
```

Instead of requiring that individuals have history of GI bleed, we could instead require that they are don't have any history of it. In this case we can again use the `requireCohortIntersectFlag()` function, but this time set the negate argument to FALSE to require individuals' absence in this other cohort rather than their presence in it.

```{r, eval = FALSE}
```{r}
cdm$medications_no_gi_bleed <- cdm$medications %>%
requireCohortIntersectFlag(targetCohortTable = "gi_bleed",
targetCohortId = 1,
indexDate = "cohort_start_date",
window = c(-Inf, 0),
negate = TRUE)
negate = TRUE) %>%
dplyr::compute(temporary = FALSE, name = "medications_no_gi_bleed")
cohort_count(cdm$medications_no_gi_bleed)
```

Expand Down
File renamed without changes.
Loading