Merge pull request #78 from oxford-pharmacoepi/dev_xc

changing 2^i stuff
OHDSI · Feb 13, 2024 · 166cb5c · 166cb5c
2 parents 3c6b4cb + 3979686
commit 166cb5c
Show file tree

Hide file tree

Showing 10 changed files with 144 additions and 120 deletions.
diff --git a/R/getCohortSequence.R b/R/getCohortSequence.R
@@ -21,12 +21,13 @@
 #' and marker cohorts per person.
 #' @param indexWashout Washout period to be applied to the index cohort event.
 #' @param markerWashout Washout period to be applied to the marker cohort event.
-#' @param timeGap The time between two initiations of index and marker. Default is 365.
-#' Change to Inf if no constrains are imposed.
-#' @param firstEver If TRUE then the first sequence is considered followed by assessing its eligibility.
-#' If false then all eligible sequences are assessed and then the first one is picked.
 #' @param blackOutPeriod The minimum time the ADR is expected to take place.
 #' Default is 0, meaning excluding the cases that see both dates on the same day.
+#' @param continuedExposureInterval The time before the start of the second episode
+#' of the drug (could be either marker or index) and the time after the end of the first
+#' episode (could be either marker or index).
+#' @param timeGap The time between two initiations of index and marker. Default is 365.
+#' Change to Inf if no constrains are imposed.
 #'
 #' @return
 #' A table in the cdm reference with subject_id, index_id, marker_id, index_date, marker_date, first_date and cdm_name.
@@ -55,19 +56,9 @@ getCohortSequence <- function(cdm,
                               daysPriorObservation = 0,
                               indexWashout = 0,
                               markerWashout = 0,
+                              continuedExposureInterval = NULL,
                               blackOutPeriod = 0,
-                              timeGap = 365,
-                              firstEver = F){
-
-  # change daysPriorObservation in the event of Inf
-
-  if(isTRUE(firstEver)){
-    cli::cli_abort("error")
-  }
-
-  if(!is.finite(timeGap)){
-    timeGap <- 9999
-  }
+                              timeGap = 365){
 
   # checks
   checkInputGetCohortSequence(cdm = cdm,
@@ -81,8 +72,16 @@ getCohortSequence <- function(cdm,
                               indexWashout = indexWashout,
                               markerWashout = markerWashout,
                               blackOutPeriod = blackOutPeriod,
-                              timeGap = timeGap,
-                              firstEver = firstEver)
+                              continuedExposureInterval = continuedExposureInterval,
+                              timeGap = timeGap)
+
+  if(!is.finite(timeGap)){
+    timeGap <- 99999999999
+  }
+
+  if(is.null(continuedExposureInterval)){
+    continuedExposureInterval <- timeGap
+  }
 
   # modify dateRange if necessary
   if(any(is.na(dateRange))){
@@ -131,6 +130,8 @@ getCohortSequence <- function(cdm,
       dplyr::select(-.data$cohort_definition_id) %>%
       dplyr::rename("cohort_definition_id" = "cohort_definition_id2") %>%
       dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date)
+  } else {
+    cli::cli_abort("combineIndex has to be either NULL or 'ALL' or a list")
   }
 
   if (is.null(combineMarker)){
@@ -155,19 +156,22 @@ getCohortSequence <- function(cdm,
       dplyr::select(-.data$cohort_definition_id) %>%
       dplyr::rename("cohort_definition_id" = "cohort_definition_id2") %>%
       dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date)
+  } else {
+    cli::cli_abort("combineMarker has to be either NULL or 'ALL' or a list")
   }
 
   for (j in (markerCohort %>% dplyr::select(.data$cohort_definition_id) %>% dplyr::distinct() %>% dplyr::pull())){
     for (i in (indexCohort %>% dplyr::select(.data$cohort_definition_id) %>% dplyr::distinct() %>% dplyr::pull())){
-      temp[[paste0("(", i,", ", j, ")")]] <-
+      temp[[paste0("(", i,",", j, ")")]] <-
         indexCohort %>%
         dplyr::filter(.data$cohort_definition_id == i) %>%
         dplyr::group_by(.data$subject_id) %>%
         dbplyr::window_order(.data$cohort_start_date) %>%
         dplyr::mutate(gap_to_prior = .data$cohort_start_date - dplyr::lag(.data$cohort_start_date)) %>%
-        dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$gap_to_prior) %>%
+        dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date, .data$gap_to_prior) %>%
         dplyr::rename(index_id = .data$cohort_definition_id,
                       index_date = .data$cohort_start_date,
+                      index_end_date = .data$cohort_end_date,
                       gap_to_prior_index = .data$gap_to_prior) %>%
         dplyr::filter(.data$index_date <= !!dateRange[[2]] & .data$index_date >= !!dateRange[[1]]) %>%
         dplyr::filter(dplyr::row_number()==1) %>%
@@ -181,9 +185,10 @@ getCohortSequence <- function(cdm,
                             dplyr::group_by(.data$subject_id) %>%
                             dbplyr::window_order(.data$cohort_start_date) %>%
                             dplyr::mutate(gap_to_prior = .data$cohort_start_date - dplyr::lag(.data$cohort_start_date)) %>%
-                            dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$gap_to_prior) %>%
+                            dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date, .data$gap_to_prior) %>%
                             dplyr::rename(marker_id = .data$cohort_definition_id,
                                           marker_date = .data$cohort_start_date,
+                                          marker_end_date = .data$cohort_end_date,
                                           gap_to_prior_marker = .data$gap_to_prior) %>%
                             dplyr::filter(.data$marker_date <= !!dateRange[[2]] & .data$marker_date >= !!dateRange[[1]]) %>%
                             dplyr::filter(dplyr::row_number()==1) %>%
@@ -199,9 +204,11 @@ getCohortSequence <- function(cdm,
   temp <- temp[!sapply(temp, is.null)]
   cdm[[name]] <- Reduce(dplyr::union_all, temp) %>%
     dplyr::mutate(gap = !!CDMConnector::datediff("index_date", "marker_date",
-                                                 interval = "day")) %>%
-    dplyr::filter(abs(.data$gap)>.env$blackOutPeriod & abs(.data$gap)<.env$timeGap) %>%
-    dplyr::select(-.data$gap) %>%
+                                                 interval = "day"),
+                  cei = ifelse((.data$index_date < .data$marker_date), .data$marker_date - .data$index_end_date, .data$index_date - .data$marker_end_date)) %>%
+    dplyr::filter(abs(.data$gap)>.env$blackOutPeriod & abs(.data$gap)<=.env$timeGap) %>%
+    dplyr::filter(.data$cei <= .env$continuedExposureInterval) %>%
+    dplyr::select(-.data$gap, -.data$cei) %>%
     dplyr::mutate(first_date = dplyr::if_else(.data$index_date<=.data$marker_date,
                                               .data$index_date, .data$marker_date),
                   second_date = dplyr::if_else(.data$index_date>=.data$marker_date,

diff --git a/R/getCohortSequence2.R b/R/getCohortSequence2.R
@@ -1,20 +1,3 @@
-# getCohortSequence3 <- function(cdm,
-#                                name = "joined_cohorts",
-#                                dateRange = as.Date(c(NA, NA)),
-#                                indexTable,
-#                                indexId = NULL,
-#                                markerTable,
-#                                markerId = NULL,
-#                                ageGroup = list(c(0, 150)),
-#                                sex = "both",
-#                                daysPriorObservation = 0,
-#                                indexWashout = 0,
-#                                markerWashout = 0,
-#                                timeGap = 365,
-#                                firstEver = T){
-#
-# }
-
 getCohortSequence2 <- function(cdm,
                               name = "joined_cohorts",
                               dateRange = as.Date(c(NA, NA)),
@@ -25,14 +8,7 @@ getCohortSequence2 <- function(cdm,
                               daysPriorObservation = 0,
                               indexWashout = 0,
                               markerWashout = 0,
-                              timeGap = 365,
-                              firstEver = T){
-
-  # change daysPriorObservation in the event of Inf
-
-  if(!isTRUE(firstEver)){
-    cli::cli_abort("error")
-  }
+                              timeGap = 365){
 
   if(!is.finite(timeGap)){
     timeGap <- 999999999
@@ -49,8 +25,7 @@ getCohortSequence2 <- function(cdm,
                               daysPriorObservation = daysPriorObservation,
                               indexWashout = indexWashout,
                               markerWashout = markerWashout,
-                              timeGap = timeGap,
-                              firstEver = firstEver)
+                              timeGap = timeGap)
 
   # modify dateRange if necessary
   if(any(is.na(dateRange))){
@@ -79,7 +54,7 @@ getCohortSequence2 <- function(cdm,
 
   for (j in (markerCohort %>% dplyr::select(.data$cohort_definition_id) %>% dplyr::distinct() %>% dplyr::pull())){
     for (i in (indexCohort %>% dplyr::select(.data$cohort_definition_id) %>% dplyr::distinct() %>% dplyr::pull())){
-      temp[[(2^i)*(3^j)]] <-
+      temp[[paste0("(",i,",",j,")")]] <-
         indexCohort %>%
         dplyr::filter(.data$cohort_definition_id == i) %>%
         dplyr::rename(index_id = .data$cohort_definition_id,

diff --git a/R/getConfidenceInterval.R b/R/getConfidenceInterval.R
@@ -1,5 +1,5 @@
 # CI
-getConfidenceInterval <- function(table, nsr, confidence_interval_level = 0.025){
+getConfidenceInterval <- function(table, nsr, confidenceIntervalLevel = 0.025){
 
   counts <- tibble::tibble(
     index_first = table %>% dplyr::pull(.data$index_first) %>% sum(),
@@ -14,8 +14,8 @@ getConfidenceInterval <- function(table, nsr, confidence_interval_level = 0.025)
     counts$lowerASR_CI <- counts$upperASR_CI <- NA
   } else if (counts$index_first == 0){
     counts$index_first <-  0.5
-    counts$lowerCSR_CI <- stats::qbeta(confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
-    counts$upperCSR_CI <- stats::qbeta(1-confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
+    counts$lowerCSR_CI <- stats::qbeta(confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)
+    counts$upperCSR_CI <- stats::qbeta(1-confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)
 
     counts$lowerCSR_CI <- counts$lowerCSR_CI/(1-counts$lowerCSR_CI)
     counts$upperCSR_CI <- counts$upperCSR_CI/(1-counts$upperCSR_CI)
@@ -25,8 +25,8 @@ getConfidenceInterval <- function(table, nsr, confidence_interval_level = 0.025)
 
   } else if (counts$marker_first == 0){
     counts$marker_first <-  0.5
-    counts$lowerCSR_CI <- stats::qbeta(confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
-    counts$upperCSR_CI <- stats::qbeta(1-confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
+    counts$lowerCSR_CI <- stats::qbeta(confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)
+    counts$upperCSR_CI <- stats::qbeta(1-confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)
 
     counts$lowerCSR_CI <- counts$lowerCSR_CI/(1-counts$lowerCSR_CI)
     counts$upperCSR_CI <- counts$upperCSR_CI/(1-counts$upperCSR_CI)
@@ -35,8 +35,8 @@ getConfidenceInterval <- function(table, nsr, confidence_interval_level = 0.025)
     counts$upperASR_CI <- counts$upperCSR_CI/nsr
 
   } else {
-    counts$lowerCSR_CI <- stats::qbeta(confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
-    counts$upperCSR_CI <- stats::qbeta(1-confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
+    counts$lowerCSR_CI <- stats::qbeta(confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)
+    counts$upperCSR_CI <- stats::qbeta(1-confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)
 
     counts$lowerCSR_CI <- counts$lowerCSR_CI/(1-counts$lowerCSR_CI)
     counts$upperCSR_CI <- counts$upperCSR_CI/(1-counts$upperCSR_CI)

diff --git a/R/getPSSA.R b/R/getPSSA.R
@@ -8,7 +8,7 @@ getPSSA <- function(cdm,
                     cohort_table = NULL,
                     table_name = "pssa",
                     study_time = NULL,
-                    confidence_interval_level = 0.025,
+                    confidenceIntervalLevel = 0.025,
                     prior_obs = 365,
                     start_date = NA,
                     end_date = NA # set both as NA for full
@@ -21,7 +21,7 @@ getPSSA <- function(cdm,
   table_cleaned <- tableCleaning(table = table, study_time = study_time)
   csr<-crudeSequenceRatio(table_cleaned[[2]])
   asr<-adjustedSequenceRatio(table_cleaned[[2]])
-  counts <- getConfidenceInterval(table_cleaned[[2]], confidence_interval_level = confidence_interval_level)
+  counts <- getConfidenceInterval(table_cleaned[[2]], confidenceIntervalLevel = confidenceIntervalLevel)
 
   results <- tibble::tibble(name = table_name,
                     csr = csr,

diff --git a/R/getPSSAStrata.R b/R/getPSSAStrata.R
@@ -8,7 +8,7 @@ getPSSAStrata <- function(cdm,
                           end_date = NA,
                           table_name = "pssa",
                           study_time = NULL,
-                          confidence_interval_level = 0.025){
+                          confidenceIntervalLevel = 0.025){
 
   cdm <- IncidencePrevalence::generateDenominatorCohortSet(cdm = cdm,
                                                            ageGroup = ageGroup,
@@ -28,7 +28,7 @@ getPSSAStrata <- function(cdm,
 
     cohort_groups <- CDMConnector::cohortSet(cdm$denominator) %>% dplyr::mutate(group = paste(.data$age_group, " ", sex))
 
-    strata_results[[cohort_groups %>% dplyr::filter(.data$cohort_definition_id == i) %>% dplyr::pull(.data$group)]]<-getPSSA(cohort_table = drug_cohort, study_time = study_time, confidence_interval_level = confidence_interval_level)
+    strata_results[[cohort_groups %>% dplyr::filter(.data$cohort_definition_id == i) %>% dplyr::pull(.data$group)]]<-getPSSA(cohort_table = drug_cohort, study_time = study_time, confidenceIntervalLevel = confidenceIntervalLevel)
 
   }
   return(strata_results)

diff --git a/R/getPSSASubset.R b/R/getPSSASubset.R
@@ -1,11 +1,11 @@
-getPSSASubset <- function(cdm, index, marker, subset_name, subset_id, table_name = "pssa", study_time = NULL, confidence_interval_level = 0.025){
+getPSSASubset <- function(cdm, index, marker, subset_name, subset_id, table_name = "pssa", study_time = NULL, confidenceIntervalLevel = 0.025){
   cdm[["subset"]] <- cdm[[subset_name]] %>% dplyr::filter(.data$cohort_definition_id == subset_id)
   subset_cdm <- CDMConnector::cdmSubsetCohort(cdm, "subset")
   subset_result <- getPSSA(cdm = subset_cdm,
                            index = index,
                            marker = marker,
                            table_name = table_name,
                            study_time = study_time,
-                           confidence_interval_level = confidence_interval_level)
+                           confidenceIntervalLevel = confidenceIntervalLevel)
   return(subset_result)
 }
diff --git a/R/getSequenceRatios.R b/R/getSequenceRatios.R
@@ -38,36 +38,25 @@ getSequenceRatios <- function(cdm,
                               confidenceIntervalLevel = 0.025,
                               restriction = 548){
 
-  # Check cdm objects, writing schema and index/marker tables
-  checkCdm(cdm, tables=c(outcomeTable))
-  assertWriteSchema(cdm)
-
-  errorMessage <- checkmate::makeAssertCollection()
-  # check relevant formats of the arguments
-  checkmate::assertCharacter(outcomeTable, len = 1, any.missing = FALSE, add = errorMessage)
-
-  # Check confidenceIntervalLevel
-  checkconfidenceIntervalLevel(confidenceIntervalLevel, errorMessage)
-  daysCheck <- all(confidenceIntervalLevel >= 0)
-  if (!isTRUE(daysCheck)) {
-    errorMessage$push(
-      " - confidenceIntervalLevel cannot be negative"
-    )
-  }
+  # checks
+  checkInputGetSequenceRatios(cdm = cdm,
+                              outcomeTable = outcomeTable,
+                              confidenceIntervalLevel = confidenceIntervalLevel,
+                              restriction = restriction)
 
   temp <- list()
   results <- list()
   for (i in (cdm[[outcomeTable]] %>% dplyr::distinct(.data$index_id) %>% dplyr::pull())){
     for (j in (cdm[[outcomeTable]] %>% dplyr::filter(.data$index_id == i) %>% dplyr::distinct(.data$marker_id) %>% dplyr::pull())){
-      temp[[(2^i)*(3^j)]] <-
+      temp[[paste0("(",i,",",j,")")]] <-
         cdm[[outcomeTable]] %>%
         dplyr::filter(.data$index_id == i & .data$marker_id == j) %>%
         dplyr::collect()
 
-      date_start <- min(temp[[(2^i)*(3^j)]]$index_date, temp[[(2^i)*(3^j)]]$marker_date)
+      date_start <- min(temp[[paste0("(",i,",",j,")")]]$index_date, temp[[paste0("(",i,",",j,")")]]$marker_date)
 
-      temp[[(2^i)*(3^j)]] <-
-        temp[[(2^i)*(3^j)]] %>%
+      temp[[paste0("(",i,",",j,")")]] <-
+        temp[[paste0("(",i,",",j,")")]] %>%
         dplyr::mutate(
           orderBA = .data$index_date >= .data$marker_date,
           days_first = as.integer((lubridate::interval(date_start, .data$first_date)) / lubridate::days(1)), # gap between the first drug of a person and the first drug of the whole population
@@ -78,12 +67,12 @@ getSequenceRatios <- function(cdm,
         dplyr::mutate(index_id = i, marker_id = j) %>%
         dplyr::ungroup()
 
-      csr<-crudeSequenceRatio(temp[[(2^i)*(3^j)]])
-      nsr<-nullSequenceRatio(temp[[(2^i)*(3^j)]], restriction = restriction)
-      asr<-adjustedSequenceRatio(temp[[(2^i)*(3^j)]], restriction = restriction)
-      counts <- getConfidenceInterval(temp[[(2^i)*(3^j)]], nsr, confidence_interval_level = confidenceIntervalLevel)
+      csr<-crudeSequenceRatio(temp[[paste0("(",i,",",j,")")]])
+      nsr<-nullSequenceRatio(temp[[paste0("(",i,",",j,")")]], restriction = restriction)
+      asr<-adjustedSequenceRatio(temp[[paste0("(",i,",",j,")")]], restriction = restriction)
+      counts <- getConfidenceInterval(temp[[paste0("(",i,",",j,")")]], nsr, confidenceIntervalLevel = confidenceIntervalLevel)
 
-      results[[(2^i)*(3^j)]] <- cbind(tibble::tibble(csr = csr,
+      results[[paste0("(",i,",",j,")")]] <- cbind(tibble::tibble(csr = csr,
                                       asr = asr),
                                       counts) %>%
         dplyr::mutate(index_id = i, marker_id = j)