Skip to content

Commit

Permalink
Merge pull request #78 from oxford-pharmacoepi/dev_xc
Browse files Browse the repository at this point in the history
changing 2^i stuff
  • Loading branch information
xihang-chen authored Feb 13, 2024
2 parents 3c6b4cb + 3979686 commit 166cb5c
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 120 deletions.
55 changes: 31 additions & 24 deletions R/getCohortSequence.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@
#' and marker cohorts per person.
#' @param indexWashout Washout period to be applied to the index cohort event.
#' @param markerWashout Washout period to be applied to the marker cohort event.
#' @param timeGap The time between two initiations of index and marker. Default is 365.
#' Change to Inf if no constrains are imposed.
#' @param firstEver If TRUE then the first sequence is considered followed by assessing its eligibility.
#' If false then all eligible sequences are assessed and then the first one is picked.
#' @param blackOutPeriod The minimum time the ADR is expected to take place.
#' Default is 0, meaning excluding the cases that see both dates on the same day.
#' @param continuedExposureInterval The time before the start of the second episode
#' of the drug (could be either marker or index) and the time after the end of the first
#' episode (could be either marker or index).
#' @param timeGap The time between two initiations of index and marker. Default is 365.
#' Change to Inf if no constrains are imposed.
#'
#' @return
#' A table in the cdm reference with subject_id, index_id, marker_id, index_date, marker_date, first_date and cdm_name.
Expand Down Expand Up @@ -55,19 +56,9 @@ getCohortSequence <- function(cdm,
daysPriorObservation = 0,
indexWashout = 0,
markerWashout = 0,
continuedExposureInterval = NULL,
blackOutPeriod = 0,
timeGap = 365,
firstEver = F){

# change daysPriorObservation in the event of Inf

if(isTRUE(firstEver)){
cli::cli_abort("error")
}

if(!is.finite(timeGap)){
timeGap <- 9999
}
timeGap = 365){

# checks
checkInputGetCohortSequence(cdm = cdm,
Expand All @@ -81,8 +72,16 @@ getCohortSequence <- function(cdm,
indexWashout = indexWashout,
markerWashout = markerWashout,
blackOutPeriod = blackOutPeriod,
timeGap = timeGap,
firstEver = firstEver)
continuedExposureInterval = continuedExposureInterval,
timeGap = timeGap)

if(!is.finite(timeGap)){
timeGap <- 99999999999
}

if(is.null(continuedExposureInterval)){
continuedExposureInterval <- timeGap
}

# modify dateRange if necessary
if(any(is.na(dateRange))){
Expand Down Expand Up @@ -131,6 +130,8 @@ getCohortSequence <- function(cdm,
dplyr::select(-.data$cohort_definition_id) %>%
dplyr::rename("cohort_definition_id" = "cohort_definition_id2") %>%
dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date)
} else {
cli::cli_abort("combineIndex has to be either NULL or 'ALL' or a list")
}

if (is.null(combineMarker)){
Expand All @@ -155,19 +156,22 @@ getCohortSequence <- function(cdm,
dplyr::select(-.data$cohort_definition_id) %>%
dplyr::rename("cohort_definition_id" = "cohort_definition_id2") %>%
dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date)
} else {
cli::cli_abort("combineMarker has to be either NULL or 'ALL' or a list")
}

for (j in (markerCohort %>% dplyr::select(.data$cohort_definition_id) %>% dplyr::distinct() %>% dplyr::pull())){
for (i in (indexCohort %>% dplyr::select(.data$cohort_definition_id) %>% dplyr::distinct() %>% dplyr::pull())){
temp[[paste0("(", i,", ", j, ")")]] <-
temp[[paste0("(", i,",", j, ")")]] <-
indexCohort %>%
dplyr::filter(.data$cohort_definition_id == i) %>%
dplyr::group_by(.data$subject_id) %>%
dbplyr::window_order(.data$cohort_start_date) %>%
dplyr::mutate(gap_to_prior = .data$cohort_start_date - dplyr::lag(.data$cohort_start_date)) %>%
dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$gap_to_prior) %>%
dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date, .data$gap_to_prior) %>%
dplyr::rename(index_id = .data$cohort_definition_id,
index_date = .data$cohort_start_date,
index_end_date = .data$cohort_end_date,
gap_to_prior_index = .data$gap_to_prior) %>%
dplyr::filter(.data$index_date <= !!dateRange[[2]] & .data$index_date >= !!dateRange[[1]]) %>%
dplyr::filter(dplyr::row_number()==1) %>%
Expand All @@ -181,9 +185,10 @@ getCohortSequence <- function(cdm,
dplyr::group_by(.data$subject_id) %>%
dbplyr::window_order(.data$cohort_start_date) %>%
dplyr::mutate(gap_to_prior = .data$cohort_start_date - dplyr::lag(.data$cohort_start_date)) %>%
dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$gap_to_prior) %>%
dplyr::select(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date, .data$gap_to_prior) %>%
dplyr::rename(marker_id = .data$cohort_definition_id,
marker_date = .data$cohort_start_date,
marker_end_date = .data$cohort_end_date,
gap_to_prior_marker = .data$gap_to_prior) %>%
dplyr::filter(.data$marker_date <= !!dateRange[[2]] & .data$marker_date >= !!dateRange[[1]]) %>%
dplyr::filter(dplyr::row_number()==1) %>%
Expand All @@ -199,9 +204,11 @@ getCohortSequence <- function(cdm,
temp <- temp[!sapply(temp, is.null)]
cdm[[name]] <- Reduce(dplyr::union_all, temp) %>%
dplyr::mutate(gap = !!CDMConnector::datediff("index_date", "marker_date",
interval = "day")) %>%
dplyr::filter(abs(.data$gap)>.env$blackOutPeriod & abs(.data$gap)<.env$timeGap) %>%
dplyr::select(-.data$gap) %>%
interval = "day"),
cei = ifelse((.data$index_date < .data$marker_date), .data$marker_date - .data$index_end_date, .data$index_date - .data$marker_end_date)) %>%
dplyr::filter(abs(.data$gap)>.env$blackOutPeriod & abs(.data$gap)<=.env$timeGap) %>%
dplyr::filter(.data$cei <= .env$continuedExposureInterval) %>%
dplyr::select(-.data$gap, -.data$cei) %>%
dplyr::mutate(first_date = dplyr::if_else(.data$index_date<=.data$marker_date,
.data$index_date, .data$marker_date),
second_date = dplyr::if_else(.data$index_date>=.data$marker_date,
Expand Down
31 changes: 3 additions & 28 deletions R/getCohortSequence2.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,3 @@
# getCohortSequence3 <- function(cdm,
# name = "joined_cohorts",
# dateRange = as.Date(c(NA, NA)),
# indexTable,
# indexId = NULL,
# markerTable,
# markerId = NULL,
# ageGroup = list(c(0, 150)),
# sex = "both",
# daysPriorObservation = 0,
# indexWashout = 0,
# markerWashout = 0,
# timeGap = 365,
# firstEver = T){
#
# }

getCohortSequence2 <- function(cdm,
name = "joined_cohorts",
dateRange = as.Date(c(NA, NA)),
Expand All @@ -25,14 +8,7 @@ getCohortSequence2 <- function(cdm,
daysPriorObservation = 0,
indexWashout = 0,
markerWashout = 0,
timeGap = 365,
firstEver = T){

# change daysPriorObservation in the event of Inf

if(!isTRUE(firstEver)){
cli::cli_abort("error")
}
timeGap = 365){

if(!is.finite(timeGap)){
timeGap <- 999999999
Expand All @@ -49,8 +25,7 @@ getCohortSequence2 <- function(cdm,
daysPriorObservation = daysPriorObservation,
indexWashout = indexWashout,
markerWashout = markerWashout,
timeGap = timeGap,
firstEver = firstEver)
timeGap = timeGap)

# modify dateRange if necessary
if(any(is.na(dateRange))){
Expand Down Expand Up @@ -79,7 +54,7 @@ getCohortSequence2 <- function(cdm,

for (j in (markerCohort %>% dplyr::select(.data$cohort_definition_id) %>% dplyr::distinct() %>% dplyr::pull())){
for (i in (indexCohort %>% dplyr::select(.data$cohort_definition_id) %>% dplyr::distinct() %>% dplyr::pull())){
temp[[(2^i)*(3^j)]] <-
temp[[paste0("(",i,",",j,")")]] <-
indexCohort %>%
dplyr::filter(.data$cohort_definition_id == i) %>%
dplyr::rename(index_id = .data$cohort_definition_id,
Expand Down
14 changes: 7 additions & 7 deletions R/getConfidenceInterval.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# CI
getConfidenceInterval <- function(table, nsr, confidence_interval_level = 0.025){
getConfidenceInterval <- function(table, nsr, confidenceIntervalLevel = 0.025){

counts <- tibble::tibble(
index_first = table %>% dplyr::pull(.data$index_first) %>% sum(),
Expand All @@ -14,8 +14,8 @@ getConfidenceInterval <- function(table, nsr, confidence_interval_level = 0.025)
counts$lowerASR_CI <- counts$upperASR_CI <- NA
} else if (counts$index_first == 0){
counts$index_first <- 0.5
counts$lowerCSR_CI <- stats::qbeta(confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
counts$upperCSR_CI <- stats::qbeta(1-confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
counts$lowerCSR_CI <- stats::qbeta(confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)
counts$upperCSR_CI <- stats::qbeta(1-confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)

counts$lowerCSR_CI <- counts$lowerCSR_CI/(1-counts$lowerCSR_CI)
counts$upperCSR_CI <- counts$upperCSR_CI/(1-counts$upperCSR_CI)
Expand All @@ -25,8 +25,8 @@ getConfidenceInterval <- function(table, nsr, confidence_interval_level = 0.025)

} else if (counts$marker_first == 0){
counts$marker_first <- 0.5
counts$lowerCSR_CI <- stats::qbeta(confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
counts$upperCSR_CI <- stats::qbeta(1-confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
counts$lowerCSR_CI <- stats::qbeta(confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)
counts$upperCSR_CI <- stats::qbeta(1-confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)

counts$lowerCSR_CI <- counts$lowerCSR_CI/(1-counts$lowerCSR_CI)
counts$upperCSR_CI <- counts$upperCSR_CI/(1-counts$upperCSR_CI)
Expand All @@ -35,8 +35,8 @@ getConfidenceInterval <- function(table, nsr, confidence_interval_level = 0.025)
counts$upperASR_CI <- counts$upperCSR_CI/nsr

} else {
counts$lowerCSR_CI <- stats::qbeta(confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
counts$upperCSR_CI <- stats::qbeta(1-confidence_interval_level, counts$index_first + 0.5, counts$marker_first + 0.5)
counts$lowerCSR_CI <- stats::qbeta(confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)
counts$upperCSR_CI <- stats::qbeta(1-confidenceIntervalLevel, counts$index_first + 0.5, counts$marker_first + 0.5)

counts$lowerCSR_CI <- counts$lowerCSR_CI/(1-counts$lowerCSR_CI)
counts$upperCSR_CI <- counts$upperCSR_CI/(1-counts$upperCSR_CI)
Expand Down
4 changes: 2 additions & 2 deletions R/getPSSA.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ getPSSA <- function(cdm,
cohort_table = NULL,
table_name = "pssa",
study_time = NULL,
confidence_interval_level = 0.025,
confidenceIntervalLevel = 0.025,
prior_obs = 365,
start_date = NA,
end_date = NA # set both as NA for full
Expand All @@ -21,7 +21,7 @@ getPSSA <- function(cdm,
table_cleaned <- tableCleaning(table = table, study_time = study_time)
csr<-crudeSequenceRatio(table_cleaned[[2]])
asr<-adjustedSequenceRatio(table_cleaned[[2]])
counts <- getConfidenceInterval(table_cleaned[[2]], confidence_interval_level = confidence_interval_level)
counts <- getConfidenceInterval(table_cleaned[[2]], confidenceIntervalLevel = confidenceIntervalLevel)

results <- tibble::tibble(name = table_name,
csr = csr,
Expand Down
4 changes: 2 additions & 2 deletions R/getPSSAStrata.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ getPSSAStrata <- function(cdm,
end_date = NA,
table_name = "pssa",
study_time = NULL,
confidence_interval_level = 0.025){
confidenceIntervalLevel = 0.025){

cdm <- IncidencePrevalence::generateDenominatorCohortSet(cdm = cdm,
ageGroup = ageGroup,
Expand All @@ -28,7 +28,7 @@ getPSSAStrata <- function(cdm,

cohort_groups <- CDMConnector::cohortSet(cdm$denominator) %>% dplyr::mutate(group = paste(.data$age_group, " ", sex))

strata_results[[cohort_groups %>% dplyr::filter(.data$cohort_definition_id == i) %>% dplyr::pull(.data$group)]]<-getPSSA(cohort_table = drug_cohort, study_time = study_time, confidence_interval_level = confidence_interval_level)
strata_results[[cohort_groups %>% dplyr::filter(.data$cohort_definition_id == i) %>% dplyr::pull(.data$group)]]<-getPSSA(cohort_table = drug_cohort, study_time = study_time, confidenceIntervalLevel = confidenceIntervalLevel)

}
return(strata_results)
Expand Down
4 changes: 2 additions & 2 deletions R/getPSSASubset.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
getPSSASubset <- function(cdm, index, marker, subset_name, subset_id, table_name = "pssa", study_time = NULL, confidence_interval_level = 0.025){
getPSSASubset <- function(cdm, index, marker, subset_name, subset_id, table_name = "pssa", study_time = NULL, confidenceIntervalLevel = 0.025){
cdm[["subset"]] <- cdm[[subset_name]] %>% dplyr::filter(.data$cohort_definition_id == subset_id)
subset_cdm <- CDMConnector::cdmSubsetCohort(cdm, "subset")
subset_result <- getPSSA(cdm = subset_cdm,
index = index,
marker = marker,
table_name = table_name,
study_time = study_time,
confidence_interval_level = confidence_interval_level)
confidenceIntervalLevel = confidenceIntervalLevel)
return(subset_result)
}
39 changes: 14 additions & 25 deletions R/getSequenceRatios.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,36 +38,25 @@ getSequenceRatios <- function(cdm,
confidenceIntervalLevel = 0.025,
restriction = 548){

# Check cdm objects, writing schema and index/marker tables
checkCdm(cdm, tables=c(outcomeTable))
assertWriteSchema(cdm)

errorMessage <- checkmate::makeAssertCollection()
# check relevant formats of the arguments
checkmate::assertCharacter(outcomeTable, len = 1, any.missing = FALSE, add = errorMessage)

# Check confidenceIntervalLevel
checkconfidenceIntervalLevel(confidenceIntervalLevel, errorMessage)
daysCheck <- all(confidenceIntervalLevel >= 0)
if (!isTRUE(daysCheck)) {
errorMessage$push(
" - confidenceIntervalLevel cannot be negative"
)
}
# checks
checkInputGetSequenceRatios(cdm = cdm,
outcomeTable = outcomeTable,
confidenceIntervalLevel = confidenceIntervalLevel,
restriction = restriction)

temp <- list()
results <- list()
for (i in (cdm[[outcomeTable]] %>% dplyr::distinct(.data$index_id) %>% dplyr::pull())){
for (j in (cdm[[outcomeTable]] %>% dplyr::filter(.data$index_id == i) %>% dplyr::distinct(.data$marker_id) %>% dplyr::pull())){
temp[[(2^i)*(3^j)]] <-
temp[[paste0("(",i,",",j,")")]] <-
cdm[[outcomeTable]] %>%
dplyr::filter(.data$index_id == i & .data$marker_id == j) %>%
dplyr::collect()

date_start <- min(temp[[(2^i)*(3^j)]]$index_date, temp[[(2^i)*(3^j)]]$marker_date)
date_start <- min(temp[[paste0("(",i,",",j,")")]]$index_date, temp[[paste0("(",i,",",j,")")]]$marker_date)

temp[[(2^i)*(3^j)]] <-
temp[[(2^i)*(3^j)]] %>%
temp[[paste0("(",i,",",j,")")]] <-
temp[[paste0("(",i,",",j,")")]] %>%
dplyr::mutate(
orderBA = .data$index_date >= .data$marker_date,
days_first = as.integer((lubridate::interval(date_start, .data$first_date)) / lubridate::days(1)), # gap between the first drug of a person and the first drug of the whole population
Expand All @@ -78,12 +67,12 @@ getSequenceRatios <- function(cdm,
dplyr::mutate(index_id = i, marker_id = j) %>%
dplyr::ungroup()

csr<-crudeSequenceRatio(temp[[(2^i)*(3^j)]])
nsr<-nullSequenceRatio(temp[[(2^i)*(3^j)]], restriction = restriction)
asr<-adjustedSequenceRatio(temp[[(2^i)*(3^j)]], restriction = restriction)
counts <- getConfidenceInterval(temp[[(2^i)*(3^j)]], nsr, confidence_interval_level = confidenceIntervalLevel)
csr<-crudeSequenceRatio(temp[[paste0("(",i,",",j,")")]])
nsr<-nullSequenceRatio(temp[[paste0("(",i,",",j,")")]], restriction = restriction)
asr<-adjustedSequenceRatio(temp[[paste0("(",i,",",j,")")]], restriction = restriction)
counts <- getConfidenceInterval(temp[[paste0("(",i,",",j,")")]], nsr, confidenceIntervalLevel = confidenceIntervalLevel)

results[[(2^i)*(3^j)]] <- cbind(tibble::tibble(csr = csr,
results[[paste0("(",i,",",j,")")]] <- cbind(tibble::tibble(csr = csr,
asr = asr),
counts) %>%
dplyr::mutate(index_id = i, marker_id = j)
Expand Down
Loading

0 comments on commit 166cb5c

Please sign in to comment.