From e591642b52d2f88d04547152f94c75f032fce81f Mon Sep 17 00:00:00 2001
From: edward-burn <9583964+edward-burn@users.noreply.github.com>
Date: Wed, 28 Feb 2024 13:57:56 +0000
Subject: [PATCH 1/3] cohort_overlap

---
 .gitignore                                    |  1 +
 DESCRIPTION                                   |  3 +-
 NAMESPACE                                     |  2 +
 R/CohortConstructor-package.R                 |  1 +
 R/cohortTiming.R                              |  4 -
 R/summariseCohortOverlap.R                    | 81 +++++++++++++++++++
 man/summariseCohortOverlap.Rd                 | 28 +++++++
 tests/testthat/test-summariseCohortOverlap.R  | 41 ++++++++++
 .../a02_applying_cohort_restrictions.Rmd      | 18 ++---
 ...hort_timing.Rmd => a04_cohort_overlap.Rmd} |  0
 10 files changed, 165 insertions(+), 14 deletions(-)
 delete mode 100644 R/cohortTiming.R
 create mode 100644 R/summariseCohortOverlap.R
 create mode 100644 man/summariseCohortOverlap.Rd
 create mode 100644 tests/testthat/test-summariseCohortOverlap.R
 rename vignettes/{a04_cohort_timing.Rmd => a04_cohort_overlap.Rmd} (100%)

diff --git a/.gitignore b/.gitignore
index f47ffaba..bf36ab7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@
 .httr-oauth
 .DS_Store
 inst/doc
+.Rhistory
diff --git a/DESCRIPTION b/DESCRIPTION
index 6eda8700..230e7c8d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -29,7 +29,8 @@ Imports:
     omopgenerics (>= 0.0.2),
     PatientProfiles,
     rlang,
-    tidyr
+    tidyr,
+    utils
 Suggests: 
     DBI,
     DrugUtilisation,
diff --git a/NAMESPACE b/NAMESPACE
index c63db3d2..8421dbf4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -17,6 +17,7 @@ export(requireSex)
 export(restrictToFirstEntry)
 export(settings)
 export(splitOverlap)
+export(summariseCohortOverlap)
 export(trimToDateRange)
 importFrom(magrittr,"%>%")
 importFrom(omopgenerics,attrition)
@@ -26,3 +27,4 @@ importFrom(omopgenerics,settings)
 importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(rlang,.env)
+importFrom(utils,data)
diff --git a/R/CohortConstructor-package.R b/R/CohortConstructor-package.R
index d821617f..17849ee7 100644
--- a/R/CohortConstructor-package.R
+++ b/R/CohortConstructor-package.R
@@ -6,5 +6,6 @@
 #' @importFrom rlang .data
 #' @importFrom rlang .env
 #' @importFrom rlang :=
+#' @importFrom utils data
 ## usethis namespace: end
 NULL
diff --git a/R/cohortTiming.R b/R/cohortTiming.R
deleted file mode 100644
index df0db1e0..00000000
--- a/R/cohortTiming.R
+++ /dev/null
@@ -1,4 +0,0 @@
-cohortTiming <- function(){
-
-  # output as summarised result
-}
diff --git a/R/summariseCohortOverlap.R b/R/summariseCohortOverlap.R
new file mode 100644
index 00000000..d65bc159
--- /dev/null
+++ b/R/summariseCohortOverlap.R
@@ -0,0 +1,81 @@
+#' Summarise cohort overlap
+#'
+#' @param cohort  A cohort table in a cdm reference
+#' @param restrictToFirstEntry If TRUE only an individual's first entry per
+#' cohort will be considered. If FALSE all entries per individual will be
+#' considered
+#' @param timing Summary statistics for timing. If NULL, timings between cohort
+#' entries will not be considered
+#'
+#' @return A summarised result
+#' @export
+#'
+#' @examples
+summariseCohortOverlap <- function(cohort,
+                          restrictToFirstEntry = TRUE,
+                          timing = c("min", "q25",
+                                     "median","q75",
+                                     "max")){
+
+  # validate inputs
+
+
+  # add cohort names
+  cdm <- omopgenerics::cdmReference(cohort)
+  name <- omopgenerics:::getTableName(cohort) # change to tableName when og is released
+
+  cdm[[name]] <- PatientProfiles::addCohortName(cdm[[name]])
+
+  if(isTRUE(restrictToFirstEntry)){
+    cdm[[name]] <- cdm[[name]] %>%
+      restrictToFirstEntry()
+  }
+
+  # should we use addCohortIntersectDate instead to avoid potentially large number of rows?
+  cdm[[name]] <- cdm[[name]] %>%
+    dplyr::inner_join(cdm[[name]],
+               by = "subject_id") %>%
+    dplyr::rename("cohort_start_date" = "cohort_start_date.x",
+                  "cohort_end_date" = "cohort_end_date.x",
+           "cohort_name" = "cohort_name.x",
+           "cohort_definition_id" = "cohort_definition_id.x",
+           "cohort_start_date_comparator" = "cohort_start_date.y",
+           "cohort_end_date_comparator" = "cohort_end_date.y",
+           "cohort_name_comparator" = "cohort_name.y",
+           "cohort_definition_id_comparator" = "cohort_definition_id.y") %>%
+    dplyr::mutate(comparison = as.character(paste0(as.character(.data$cohort_name),
+                                                   as.character(" &&& "),
+                                                   as.character(.data$cohort_name_comparator))))
+
+  name_overlap <- paste0(omopgenerics::uniqueTableName(), "_", name, "_overlap")
+
+  cdm[[name_overlap]] <- cdm[[name]] %>%
+    dplyr::compute(temporary = FALSE,
+                   name = name_overlap) %>%
+    omopgenerics::newCohortTable(.softValidation = TRUE)
+
+  if(is.null(timing)){
+    cohort_timings <- cdm[[name_overlap]] %>%
+      PatientProfiles::summariseCharacteristics(
+        strata = list("comparison")) %>% # can we only get number subject and records?
+      dplyr::filter(.data$variable_name %in% c("Number subjects",
+                                         "Number records")) %>%
+      dplyr::mutate(result_type = "cohort_overlap")
+
+    return(cohort_timings)
+
+  }
+
+  cohort_timings <- cdm[[name_overlap]]  %>%
+      dplyr::mutate(diff_days = !!CDMConnector::datediff("cohort_start_date",
+                                                         "cohort_start_date_comparator",
+                                                         interval = "day")) %>%
+      dplyr::collect() %>%
+      PatientProfiles::summariseResult(group=list("comparison"),
+                                       variables = list(diff_days = "diff_days"),
+                                       functions = list(diff_days = timing))%>%
+      dplyr::mutate(result_type = "cohort_overlap")
+
+    cohort_timings
+
+}
diff --git a/man/summariseCohortOverlap.Rd b/man/summariseCohortOverlap.Rd
new file mode 100644
index 00000000..b9c8dd0f
--- /dev/null
+++ b/man/summariseCohortOverlap.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summariseCohortOverlap.R
+\name{summariseCohortOverlap}
+\alias{summariseCohortOverlap}
+\title{Summarise cohort overlap}
+\usage{
+summariseCohortOverlap(
+  cohort,
+  restrictToFirstEntry = TRUE,
+  timing = c("min", "q25", "median", "q75", "max")
+)
+}
+\arguments{
+\item{cohort}{A cohort table in a cdm reference}
+
+\item{restrictToFirstEntry}{If TRUE only an individual's first entry per
+cohort will be considered. If FALSE all entries per individual will be
+considered}
+
+\item{timing}{Summary statistics for timing. If NULL, timings between cohort
+entries will not be considered}
+}
+\value{
+A summarised result
+}
+\description{
+Summarise cohort overlap
+}
diff --git a/tests/testthat/test-summariseCohortOverlap.R b/tests/testthat/test-summariseCohortOverlap.R
new file mode 100644
index 00000000..29db37e9
--- /dev/null
+++ b/tests/testthat/test-summariseCohortOverlap.R
@@ -0,0 +1,41 @@
+test_that("expected output", {
+  cdm <- DrugUtilisation::generateConceptCohortSet(
+    cdm = DrugUtilisation::mockDrugUtilisation(numberIndividuals = 200),
+    conceptSet = list(c_1 = 317009, c_2 = 432526, c_3 = 4141052),
+    name = "cohort",
+    end  = "observation_period_end_date"
+  )
+
+ overlap1 <- summariseCohortOverlap(cdm$cohort,
+                                   restrictToFirstEntry = TRUE,
+                                   timing = c("min", "q25",
+                                              "median","q75",
+                                              "max"))
+ expect_equal(colnames(omopgenerics::emptySummarisedResult()),
+              colnames(overlap1))
+
+ overlap2 <- summariseCohortOverlap(cdm$cohort,
+                                   restrictToFirstEntry = FALSE,
+                                   timing = c("min", "q25",
+                                              "median","q75",
+                                              "max"))
+ expect_equal(colnames(omopgenerics::emptySummarisedResult()),
+              colnames(overlap2))
+
+
+ overlap3 <- summariseCohortOverlap(cdm$cohort,
+                                   restrictToFirstEntry = TRUE,
+                                   timing = c("min",
+                                              "max"))
+ expect_equal(colnames(omopgenerics::emptySummarisedResult()),
+              colnames(overlap3))
+
+ overlap4 <- summariseCohortOverlap(cdm$cohort,
+                                   restrictToFirstEntry = TRUE,
+                                   timing = NULL)
+ expect_equal(colnames(omopgenerics::emptySummarisedResult()),
+              colnames(overlap4))
+
+ CDMConnector::cdm_disconnect(cdm)
+
+  })
diff --git a/vignettes/a02_applying_cohort_restrictions.Rmd b/vignettes/a02_applying_cohort_restrictions.Rmd
index af92ee54..d3d1e5fe 100644
--- a/vignettes/a02_applying_cohort_restrictions.Rmd
+++ b/vignettes/a02_applying_cohort_restrictions.Rmd
@@ -9,15 +9,15 @@ vignette: >
 
 ```{r, include = FALSE}
 library(CDMConnector)
-if (Sys.getenv("EUNOMIA_DATA_FOLDER") == "") {
-Sys.setenv("EUNOMIA_DATA_FOLDER" = file.path(tempdir(), "eunomia"))  
-} 
-if (!dir.exists(file.path(tempdir(), "eunomia"))){ dir.create(file.path(tempdir(), "eunomia"))
-}
-if (!eunomia_is_available()) {
-downloadEunomiaData(pathToData = file.path(tempdir(), "eunomia"))  
-}
-
+# if (Sys.getenv("EUNOMIA_DATA_FOLDER") == "") {
+# Sys.setenv("EUNOMIA_DATA_FOLDER" = file.path(tempdir(), "eunomia"))  
+# } 
+# if (!dir.exists(file.path(tempdir(), "eunomia"))){ dir.create(file.path(tempdir(), "eunomia"))
+# }
+# if (!eunomia_is_available()) {
+# downloadEunomiaData(pathToData = file.path(tempdir(), "eunomia"))  
+# }
+# 
 eunomia_available <- eunomia_is_available()
 
 knitr::opts_chunk$set(
diff --git a/vignettes/a04_cohort_timing.Rmd b/vignettes/a04_cohort_overlap.Rmd
similarity index 100%
rename from vignettes/a04_cohort_timing.Rmd
rename to vignettes/a04_cohort_overlap.Rmd

From 4aa2f88dd57c9aaf9e3c18287fa6f0dd5ab0f583 Mon Sep 17 00:00:00 2001
From: edward-burn <9583964+edward-burn@users.noreply.github.com>
Date: Wed, 28 Feb 2024 15:49:33 +0000
Subject: [PATCH 2/3] update vignette

---
 .Rbuildignore                                 |  4 +-
 .gitignore                                    |  2 +
 .../a02_applying_cohort_restrictions.Rmd      | 55 +++++++++----------
 3 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index ac372eb9..70af2f10 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -5,4 +5,6 @@
 ^README\.Rmd$
 ^pkgdown$
 ^\.github$
-^_pkgdown\.yml$
\ No newline at end of file
+^_pkgdown\.yml$
+^doc$
+^Meta$
diff --git a/.gitignore b/.gitignore
index bf36ab7e..cb661b1e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@
 .DS_Store
 inst/doc
 .Rhistory
+/doc/
+/Meta/
diff --git a/vignettes/a02_applying_cohort_restrictions.Rmd b/vignettes/a02_applying_cohort_restrictions.Rmd
index d3d1e5fe..ad9bf4f1 100644
--- a/vignettes/a02_applying_cohort_restrictions.Rmd
+++ b/vignettes/a02_applying_cohort_restrictions.Rmd
@@ -7,30 +7,27 @@ vignette: >
   %\VignetteEncoding{UTF-8}
 ---
 
+
 ```{r, include = FALSE}
-library(CDMConnector)
-# if (Sys.getenv("EUNOMIA_DATA_FOLDER") == "") {
-# Sys.setenv("EUNOMIA_DATA_FOLDER" = file.path(tempdir(), "eunomia"))  
-# } 
-# if (!dir.exists(file.path(tempdir(), "eunomia"))){ dir.create(file.path(tempdir(), "eunomia"))
-# }
-# if (!eunomia_is_available()) {
-# downloadEunomiaData(pathToData = file.path(tempdir(), "eunomia"))  
-# }
-# 
-eunomia_available <- eunomia_is_available()
 
 knitr::opts_chunk$set(
-  collapse = TRUE, 
-  message = FALSE,
-  warning = FALSE,
-  comment = "#>",
-  eval = eunomia_available
+  collapse = TRUE,
+  eval = TRUE,
+  comment = "#>"
 )
+
+library(CDMConnector)
+library(dplyr, warn.conflicts = FALSE)
+
+if (Sys.getenv("EUNOMIA_DATA_FOLDER") == ""){
+Sys.setenv("EUNOMIA_DATA_FOLDER" = file.path(tempdir(), "eunomia"))}
+if (!dir.exists(Sys.getenv("EUNOMIA_DATA_FOLDER"))){ dir.create(Sys.getenv("EUNOMIA_DATA_FOLDER"))
+downloadEunomiaData()  
+}
 ```
 
 For this example we'll use the Eunomia synthetic data from the CDMConnector package.
-```{r, eval = FALSE}
+```{r}
 library(CDMConnector)
 library(DrugUtilisation)
 library(CohortConstructor)
@@ -40,7 +37,7 @@ cdm <- cdm_from_con(con, cdm_schema = "main",
 ```
 
 Let's start by creating two drug cohorts, one for users of diclofenac and another for users of acetaminophen. We'll use the `generateDrugUtilisationCohortSet()` from the DrugUtilisation package so that we can specify a gap era when creating the cohort.
-```{r, eval = FALSE}
+```{r}
 cdm <- generateDrugUtilisationCohortSet(cdm = cdm,  
                                         name = "medications",
                                         conceptSet = list("diclofenac" = 1124300,
@@ -50,7 +47,7 @@ cohortCount(cdm$medications)
 ```
 
 As well as our medication cohorts, let's also make another cohort containing individuals with a record of a GI bleed. For this cohort we can use `generateConceptCohortSet()` from the CDMConnector package. Later we'll use this cohort when specifying inclusion/ exclusion criteria.
-```{r, eval = FALSE}
+```{r}
 cdm <- generateConceptCohortSet(cdm = cdm,  
                                    name = "gi_bleed", 
                                    conceptSet = list("gi_bleed" = 192671))
@@ -58,7 +55,7 @@ cdm <- generateConceptCohortSet(cdm = cdm,
 
 ## Keep only the first record per person
 Individuals can contribute multiple records per cohort. However now we'll keep only their earliest cohort entry of the remaining records using `restrictToFirstEntry()` from CohortConstructor. We can see that after this we have one record per person for each cohort.
-```{r, eval = FALSE}
+```{r}
 cdm$medications <- cdm$medications %>% 
   restrictToFirstEntry(indexDate = "cohort_start_date")
 
@@ -69,7 +66,7 @@ Note, applying this criteria later after applying other criteria would result in
 
 ## Applying restrictions on patient demographics
 Using `requireDemographics()` we'll require that individuals in our medications cohort are female and, relative to their cohort start date, are between 18 and 85 with at least 30 days of prior observation time in the database. 
-```{r, eval = FALSE}
+```{r}
 cdm$medications <- cdm$medications %>% 
   requireDemographics(indexDate = "cohort_start_date", 
                       ageRange = list(c(18, 85)),
@@ -78,7 +75,7 @@ cdm$medications <- cdm$medications %>%
 ```
 
 We can then see how many people have people have been excluded based on these demographic requirements.
-```{r, eval = FALSE}
+```{r}
 cohort_attrition(cdm$medications) %>% 
   dplyr::filter(reason == "Demographic requirements") %>% 
   dplyr::glimpse()
@@ -87,14 +84,14 @@ cohort_attrition(cdm$medications) %>%
 
 ## Restrictions on calendar dates
 Next we can use `requireInDateRange()` to keep only those records where cohort entry was between a particular date range.
-```{r, eval = FALSE}
+```{r}
 cdm$medications <- cdm$medications %>% 
   requireInDateRange(indexDate = "cohort_start_date", 
                      dateRange = as.Date(c("2000-01-01", "2015-01-01")))
 ```
 
 Again, we can track cohort attrition
-```{r, eval = FALSE}
+```{r}
 cohort_attrition(cdm$medications) %>% 
   dplyr::filter(reason == "cohort_start_date between 2000-01-01 and 2015-01-01") %>% 
   dplyr::glimpse()
@@ -104,24 +101,26 @@ cohort_attrition(cdm$medications) %>%
 ## Restrictions on cohort presence
 We could require that individuals in our medication cohorts have a history of GI bleed. To do this we can use the `requireCohortIntersectFlag()` function.
 
-```{r, eval = FALSE}
+```{r}
 cdm$medications_gi_bleed <- cdm$medications  %>%
   requireCohortIntersectFlag(targetCohortTable = "gi_bleed", 
                              targetCohortId = 1,
                              indexDate = "cohort_start_date", 
-                             window = c(-Inf, 0))
+                             window = c(-Inf, 0)) %>% 
+  dplyr::compute(temporary = FALSE, name = "medications_gi_bleed")
 cohort_count(cdm$medications_gi_bleed)
 ```
 
 Instead of requiring that individuals have history of GI bleed, we could instead require that they are don't have any history of it. In this case we can again use the `requireCohortIntersectFlag()` function, but this time set the negate argument to FALSE to require individuals' absence in this other cohort rather than their presence in it.
 
-```{r, eval = FALSE}
+```{r}
 cdm$medications_no_gi_bleed <- cdm$medications %>%
   requireCohortIntersectFlag(targetCohortTable = "gi_bleed", 
                              targetCohortId = 1,
                              indexDate = "cohort_start_date", 
                              window = c(-Inf, 0), 
-                             negate = TRUE)
+                             negate = TRUE) %>% 
+  dplyr::compute(temporary = FALSE, name = "medications_no_gi_bleed")
 cohort_count(cdm$medications_no_gi_bleed)
 ```
 

From e35efac49de15f22346bd492b3b7ce9c6e944709 Mon Sep 17 00:00:00 2001
From: edward-burn <9583964+edward-burn@users.noreply.github.com>
Date: Wed, 28 Feb 2024 16:01:50 +0000
Subject: [PATCH 3/3] tbl_name

---
 R/summariseCohortOverlap.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/summariseCohortOverlap.R b/R/summariseCohortOverlap.R
index d65bc159..beb6d856 100644
--- a/R/summariseCohortOverlap.R
+++ b/R/summariseCohortOverlap.R
@@ -22,7 +22,7 @@ summariseCohortOverlap <- function(cohort,
 
   # add cohort names
   cdm <- omopgenerics::cdmReference(cohort)
-  name <- omopgenerics:::getTableName(cohort) # change to tableName when og is released
+  name <- attr(cohort, "tbl_name") # change to omopgenerics::getTableName(cohort)  when og is released
 
   cdm[[name]] <- PatientProfiles::addCohortName(cdm[[name]])