Skip to content

Commit

Permalink
Merge pull request #90 from oxford-pharmacoepi/dev_nmb
Browse files Browse the repository at this point in the history
restrict to last entry
  • Loading branch information
edward-burn authored Apr 12, 2024
2 parents 2664705 + f82ce96 commit 3c3b5f4
Show file tree
Hide file tree
Showing 15 changed files with 402 additions and 51 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export(requirePriorObservation)
export(requireSex)
export(requireTableIntersectFlag)
export(restrictToFirstEntry)
export(restrictToLastEntry)
export(settings)
export(splitOverlap)
export(startDateColumn)
Expand Down
17 changes: 14 additions & 3 deletions R/conceptCohort.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,23 @@ conceptCohort <- function(cdm,
cli::cli_inform(c("i" = "Collapsing records."))
# assign to cdm so we keep class, to be removed when https://github.com/darwin-eu-dev/omopgenerics/issues/256
cdm[[name]] <- cohort |>
collapseGap(gap = 0)
cohort <- cdm[[name]] |>
dplyr::inner_join(cdm$observation_period |>
dplyr::select("subject_id" = "person_id",
"observation_period_start_date",
"observation_period_end_date"),
by = "subject_id") |>
dplyr::filter(
.data$observation_period_start_date <= .data$cohort_start_date,
.data$observation_period_end_date >= .data$cohort_end_date,
.data$cohort_start_date <= .data$cohort_end_date
) |>
dplyr::select(-"observation_period_start_date", -"observation_period_end_date") |>
collapseGap(gap = 0) |>
dplyr::compute(name = name, temporary = FALSE)

cli::cli_inform(c("i" = "Creating cohort attributes."))
cdm[[name]] <- cohort |>

cdm[[name]] <- cdm[[name]] |>
omopgenerics::newCohortTable(
cohortSetRef = cohortSet,
cohortAttritionRef = NULL,
Expand Down
2 changes: 1 addition & 1 deletion R/intersectCohort.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ intersectCohort <- function(cohort,
name = omopgenerics::tableName(cohort)) {

# checks
assertCharacter(name)
name <- validateName(name)
validateCohortTable(cohort)
cdm <- omopgenerics::cdmReference(cohort)
validateCDM(cdm)
Expand Down
2 changes: 1 addition & 1 deletion R/requireCohortIntersectFlag.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ requireCohortIntersectFlag <- function(x,
negate = FALSE,
name = omopgenerics::tableName(x)){
# checks
assertCharacter(name, length = 1)
name <- validateName(name)
assertLogical(negate, length = 1)
validateCohortTable(x)
cdm <- omopgenerics::cdmReference(x)
Expand Down
76 changes: 38 additions & 38 deletions R/requireConceptIntersectFlag.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ requireConceptIntersectFlag <- function(x,
negate = FALSE,
name = omopgenerics::tableName(x)){
# checks
assertCharacter(name, length = 1)
name <- validateName(name)
assertLogical(negate, length = 1)
validateCohortTable(x)
cdm <- omopgenerics::cdmReference(x)
Expand All @@ -75,45 +75,45 @@ requireConceptIntersectFlag <- function(x,
}

cdm <- omopgenerics::cdmReference(x)

subsetCohort <- x %>%
dplyr::select(dplyr::all_of(.env$cols)) %>%
PatientProfiles::addConceptIntersectFlag(
conceptSet = conceptSet,
indexDate = indexDate,
targetStartDate = targetStartDate,
targetEndDate = targetEndDate,
window = window,
censorDate = censorDate,
nameStyle = "intersect_concept"
)

if(isFALSE(negate)){
subsetCohort <- subsetCohort %>%
dplyr::filter(.data$intersect_concept == 1) %>%
dplyr::select(!"intersect_concept")
# attrition reason
reason <- glue::glue("Concept {names(conceptSet)} between {window_start} & ",
"{window_end} days relative to {indexDate}")
if (length(conceptSet) == 0) {
cli::cli_inform(c("i" = "Empty codelist provided, returning input cohort"))
} else {
# ie require absence instead of presence
subsetCohort <- subsetCohort %>%
dplyr::filter(.data$intersect_concept != 1) %>%
dplyr::select(!"intersect_concept")
# attrition reason
reason <- glue::glue("Not in concept {names(conceptSet)} between {window_start} & ",
"{window_end} days relative to {indexDate}")
subsetCohort <- x %>%
dplyr::select(dplyr::all_of(.env$cols)) %>%
PatientProfiles::addConceptIntersectFlag(
conceptSet = conceptSet,
indexDate = indexDate,
targetStartDate = targetStartDate,
targetEndDate = targetEndDate,
window = window,
censorDate = censorDate,
nameStyle = "intersect_concept"
)
if(isFALSE(negate)){
subsetCohort <- subsetCohort %>%
dplyr::filter(.data$intersect_concept == 1) %>%
dplyr::select(!"intersect_concept")
# attrition reason
reason <- glue::glue("Concept {names(conceptSet)} between {window_start} & ",
"{window_end} days relative to {indexDate}")
} else {
# ie require absence instead of presence
subsetCohort <- subsetCohort %>%
dplyr::filter(.data$intersect_concept != 1) %>%
dplyr::select(!"intersect_concept")
# attrition reason
reason <- glue::glue("Not in concept {names(conceptSet)} between {window_start} & ",
"{window_end} days relative to {indexDate}")
}
if (!is.null(censorDate)) {
reason <- glue::glue("{reason}, censoring at {censorDate}")
}
x <- x %>%
dplyr::inner_join(subsetCohort,
by = c(cols)) %>%
dplyr::compute(name = name, temporary = FALSE) %>%
CDMConnector::recordCohortAttrition(reason = reason)
}

if (!is.null(censorDate)) {
reason <- glue::glue("{reason}, censoring at {censorDate}")
}

x <- x %>%
dplyr::inner_join(subsetCohort,
by = c(cols)) %>%
dplyr::compute(name = name, temporary = FALSE) %>%
CDMConnector::recordCohortAttrition(reason = reason)

return(x)
}
4 changes: 2 additions & 2 deletions R/requireDateRange.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ requireInDateRange <- function(cohort,
name = omopgenerics::tableName(cohort)) {

# checks
assertCharacter(name)
name <- validateName(name)
validateCohortTable(cohort)
cdm <- omopgenerics::cdmReference(cohort)
validateCDM(cdm)
Expand Down Expand Up @@ -94,7 +94,7 @@ trimToDateRange <- function(cohort,
name = omopgenerics::tableName(cohort)) {

# checks
assertCharacter(name)
name <- validateName(name)
validateCohortTable(cohort)
cdm <- omopgenerics::cdmReference(cohort)
validateCDM(cdm)
Expand Down
2 changes: 1 addition & 1 deletion R/requireDeathFlag.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ requireDeathFlag <- function(x,
negate = FALSE,
name = omopgenerics::tableName(x)) {
# checks
assertCharacter(name, length = 1)
name <- validateName(name)
assertLogical(negate, length = 1)
validateCohortTable(x)
cdm <- omopgenerics::cdmReference(x)
Expand Down
2 changes: 1 addition & 1 deletion R/requireDemographics.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ demographicsFilter <- function(cohort,
attritionFutureObservation
) {
# checks
assertCharacter(name)
name <- validateName(name)
assertChoice(sex, choices = c("Both", "Male", "Female"), length = 1)
validateCohortTable(cohort)
cdm <- omopgenerics::cdmReference(cohort)
Expand Down
2 changes: 1 addition & 1 deletion R/requireTableIntersectFlag.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ requireTableIntersectFlag <- function(x,
negate = FALSE,
name = omopgenerics::tableName(x)){
# checks
assertCharacter(name, length = 1)
name <- validateName(name)
assertLogical(negate, length = 1)
assertCharacter(tableName)
validateCohortTable(x)
Expand Down
63 changes: 62 additions & 1 deletion R/restrictToFirstEntry.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ restrictToFirstEntry <- function(cohort,
name = omopgenerics::tableName(cohort)){

# checks
assertCharacter(name)
name <- validateName(name)
validateCohortTable(cohort)
cdm <- omopgenerics::cdmReference(cohort)
validateCDM(cdm)
Expand Down Expand Up @@ -58,3 +58,64 @@ restrictToFirstEntry <- function(cohort,

return(cohort)
}

#' Restrict cohort to last entry by index date
#'
#' @param cohort A cohort table in a cdm reference.
#' @param cohortId Vector of cohort definition ids to include. If NULL, all
#' cohort definition ids will be used.
#' @param indexDate Column name in cohort that contains the date to restrict on.
#' @param name Name of the new cohort with the restriction.
#' @return A cohort table in a cdm reference.
#' @export
#'
#' @examples
#' \donttest{
#' library(CohortConstructor)
#' library(PatientProfiles)
#' cdm <- mockPatientProfiles()
#' cdm$cohort1 <- restrictToLastEntry(cdm$cohort1)
#' }
#'
restrictToLastEntry <- function(cohort,
cohortId = NULL,
indexDate = "cohort_start_date",
name = omopgenerics::tableName(cohort)){

# checks
name <- validateName(name)
validateCohortTable(cohort)
cdm <- omopgenerics::cdmReference(cohort)
validateCDM(cdm)
validateIndexDate(indexDate, cohort)
ids <- omopgenerics::settings(cohort)$cohort_definition_id
cohortId <- validateCohortId(cohortId, ids)

# restrict to first entry
indexDateSym <- rlang::sym(indexDate)

if (all(ids %in% cohortId)) {
cohort <- cohort |>
dplyr::group_by(.data$subject_id,.data$cohort_definition_id) |>
dplyr::filter(!!indexDateSym == max(!!indexDateSym, na.rm = TRUE)) |>
dplyr::ungroup() |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::newCohortTable() |>
CDMConnector::recordCohortAttrition("Restricted to last entry")
} else {
cohort <- cohort |>
dplyr::filter(.data$cohort_definition_id %in% .env$cohortId) |>
dplyr::group_by(.data$subject_id,.data$cohort_definition_id) |>
dplyr::filter(!!indexDateSym == max(!!indexDateSym, na.rm = TRUE)) |>
dplyr::ungroup() |>
dplyr::union_all(
cohort |>
dplyr::filter(!.data$cohort_definition_id %in% .env$cohortId)
) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::newCohortTable() |>
CDMConnector::recordCohortAttrition("Restricted to last entry", cohortId = cohortId)
}

return(cohort)
}
38 changes: 38 additions & 0 deletions man/restrictToLastEntry.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

84 changes: 84 additions & 0 deletions tests/testthat/test-collapseCohort.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,87 @@ test_that("simple example", {

})

test_that("out of observation", {
cdm <- omock::mockCdmReference() |>
omock::mockCdmFromTable(cohortTable = list("cohort" = dplyr::tibble(
"cohort_definition_id" = 1,
"subject_id" = c(1, 2, 3),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
)))
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "concept", table = dplyr::tibble(
"concept_id" = 1,
"concept_name" = "my concept",
"domain_id" = "drUg",
"vocabulary_id" = NA,
"concept_class_id" = NA,
"concept_code" = NA,
"valid_start_date" = NA,
"valid_end_date" = NA
)
)
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "drug_exposure", table = dplyr::tibble(
"drug_exposure_id" = 1:11,
"person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1),
"drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1),
"drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803),
"drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1800, 1801, 1802, 1803),
"drug_type_concept_id" = 1
) |>
dplyr::mutate(
"drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01"),
"drug_exposure_end_date" = as.Date(.data$drug_exposure_end_date, origin = "2020-01-01")
)
)

cdm <- CDMConnector::copyCdmTo(con = DBI::dbConnect(duckdb::duckdb()), cdm = cdm, schema = "main")

expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort"))

expect_no_error(sameCohort <- cohort |> collapseCohort(gap = 0, name = "new_cohort"))
expect_identical(settings(sameCohort), settings(cohort))
expect_identical(cohortCount(sameCohort), cohortCount(cohort))
expect_identical(
attrition(sameCohort),
attrition(cohort) |>
dplyr::union_all(dplyr::tibble(
"cohort_definition_id" = 1L,
"number_records" = 7L,
"number_subjects" = 2L,
"reason_id" = 2L,
"reason" = "Collapse cohort with gap = 0 days.",
"excluded_records" = 0L,
"excluded_subjects" = 0L
))
)
expect_true(tableName(sameCohort) == "new_cohort")
expect_identical(
omopgenerics::tableSource(sameCohort), omopgenerics::tableSource(cohort)
)

expect_no_error(newCohort <- cohort |> collapseCohort(gap = 1, name = "my_cohort"))
expect_identical(settings(newCohort), settings(cohort))
expect_identical(cohortCount(newCohort), dplyr::tibble(
"cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L
))
expect_identical(
attrition(newCohort),
attrition(cohort) |>
dplyr::union_all(dplyr::tibble(
"cohort_definition_id" = 1L,
"number_records" = 4L,
"number_subjects" = 2L,
"reason_id" = 2L,
"reason" = "Collapse cohort with gap = 1 days.",
"excluded_records" = 3L,
"excluded_subjects" = 0L
))
)
expect_true(tableName(newCohort) == "my_cohort")
expect_identical(
omopgenerics::tableSource(newCohort), omopgenerics::tableSource(cohort)
)

})
Loading

0 comments on commit 3c3b5f4

Please sign in to comment.