Skip to content

Commit

Permalink
Merge pull request #343 from OHDSI/padCohortEnd
Browse files Browse the repository at this point in the history
padCohortEnd
  • Loading branch information
edward-burn authored Oct 11, 2024
2 parents 582917b + 177c466 commit fef2a26
Show file tree
Hide file tree
Showing 6 changed files with 289 additions and 2 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: CohortConstructor
Title: Build and Manipulate Study Cohorts Using a Common Data Model
Version: 0.3.1
Version: 0.3.1.900
Authors@R: c(
person("Edward", "Burn", , "edward.burn@ndorms.ox.ac.uk",
role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9286-1128")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export(intersectCohorts)
export(matchCohorts)
export(measurementCohort)
export(mockCohortConstructor)
export(padCohortEnd)
export(padCohortStart)
export(requireAge)
export(requireCohortIntersect)
Expand Down
114 changes: 114 additions & 0 deletions R/padCohortEnd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#' Add days to cohort end
#'
#' @description
#' `padCohortStart()` Adds (or subtracts) a certain number of days to the cohort
#' end date. Note:
#' * If the days added means that cohort end would be after observation
#' period end date, then observation period end date will be used for cohort
#' exit.
#' * If the days added means that cohort exit would be after the next cohort
#' start then these overlapping cohort entries will be collapsed.
#' * If days subtracted means that cohort end would be before cohort start then
#' the cohort entry will be dropped.
#'
#' @inheritParams cohortDoc
#' @inheritParams cohortIdModifyDoc
#' @inheritParams nameDoc
#' @param days Number of day to add to the cohort end date.
#'
#' @return Cohort table
#' @export
#'
#' @examples
#' \donttest{
#' library(CohortConstructor)
#' cdm <- mockCohortConstructor()
#' # add 10 days to each cohort exit
#' cdm$cohort1 |>
#' padCohortEnd(days = 10)
#' }
padCohortEnd <- function(cohort,
days,
cohortId = NULL,
name = tableName(cohort)) {
# validate input
name <- omopgenerics::validateNameArgument(name, validation = "warning")
cohort <- omopgenerics::validateCohortArgument(cohort)
cdm <- omopgenerics::validateCdmArgument(omopgenerics::cdmReference(cohort))
days <- omopgenerics::assertNumeric(days, length = 1)
days <- as.integer(days)
cohortId <- validateCohortId(cohortId, settings(cohort))

ids <- omopgenerics::settings(cohort)$cohort_definition_id
# temp variable names
futureObsCol <- omopgenerics::uniqueId()
newEndCol <- omopgenerics::uniqueId()
diffCol <- omopgenerics::uniqueId()

if (days > 0) {
# if days is more than zero then updating end could take the date
# out of observation
cohort <- cohort |>
PatientProfiles::addFutureObservationQuery(
futureObservationType = "date",
futureObservationName = futureObsCol)
}

if(length(cohortId) < length(ids)) {
# if only a subset of ids are provided then only update these
cohort <- cohort %>%
dplyr::mutate(
!!newEndCol :=
dplyr::if_else(
.data$cohort_definition_id %in% .env$cohortId,
as.Date(
!!CDMConnector::dateadd(
"cohort_end_date",
number = days,
interval = "day"
)
),
.data$cohort_end_date
)
)
} else {
# if all ids are provided then simpler query - update all
cohort <- cohort %>%
dplyr::mutate(
!!newEndCol := as.Date(
!!CDMConnector::dateadd(
"cohort_end_date",
number = days,
interval = "day")))
}
if (days > 0) {
cohort <- cohort %>%
dplyr::mutate(!!diffCol := !!CDMConnector::datediff(newEndCol, futureObsCol)) |>
dplyr::mutate(cohort_end_date = dplyr::if_else(!!rlang::ensym(diffCol) >= 0,
!!rlang::ensym(newEndCol),
!!rlang::ensym(futureObsCol)))
} else {
cohort <- cohort |>
dplyr::mutate(cohort_end_date = !!rlang::ensym(newEndCol))
}

# drop anyone with end before start
cohort <- cohort %>%
dplyr::filter(.data$cohort_start_date <= .data$cohort_end_date) |>
dplyr::select(!dplyr::any_of(c(futureObsCol, diffCol, newEndCol)))

# collapse any overlapping cohort entries
cohort <- cohort |>
joinOverlap(name = name,
by = c("cohort_definition_id", "subject_id"),
gap = 0)

cdm[[name]] <- cohort |>
dplyr::compute(temporary = FALSE, name = name) |>
omopgenerics::newCohortTable() |>
omopgenerics::recordCohortAttrition(
reason = "Pad cohort start date by {days} day{?s}")

return(cdm[[name]])

}
2 changes: 1 addition & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ reference:
- matches("requireDeathFlag")
- subtitle: Update cohort start and end dates
- contents:
- matches("^trim|exit|entry|padCohortStart")
- matches("^trim|exit|entry|padCohortStart|padCohortEnd")
- subtitle: Concatanate cohort entries
- contents:
- matches("collapseCohorts")
Expand Down
45 changes: 45 additions & 0 deletions man/padCohortEnd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

127 changes: 127 additions & 0 deletions tests/testthat/test-padCohortEnd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
test_that("simple example", {

cdm <- omock::mockCdmFromTables(tables = list(
cohort =
data.frame(
cohort_definition_id = 1L,
subject_id = c(1L, 2L),
cohort_start_date = as.Date(c("2020-01-03","2020-01-03")),
cohort_end_date = as.Date(c("2020-01-04", "2020-01-04"))
)
))
cdm <- cdm |> copyCdm()
cdm <- omopgenerics::insertTable(
cdm = cdm,
name = "observation_period",
table = data.frame(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = as.Date(c("2020-01-01", "2020-01-01")),
observation_period_end_date = as.Date(c("2020-01-10", "2020-01-30")),
period_type_concept_id = 1L
)
)

cdm$cohort_1 <- padCohortEnd(cdm$cohort,
days = 10,
name = "cohort_1")

expect_true(nrow(cdm$cohort_1 |>
dplyr::collect()) == 2)
# same cohort start
expect_true(all(cdm$cohort_1 |>
dplyr::pull("cohort_start_date") ==
as.Date("2020-01-03")))
# new cohort end
expect_true(all(cdm$cohort_1 |>
dplyr::filter(subject_id == 1) |>
dplyr::pull("cohort_end_date") ==
as.Date("2020-01-10"))) # end of obs
expect_true(all(cdm$cohort_1 |>
dplyr::filter(subject_id == 2) |>
dplyr::pull("cohort_end_date") ==
as.Date("2020-01-14"))) # cohort end plus 10 days

# before cohort start - should be dropped if before cohort start
cdm$cohort_2 <- padCohortEnd(cdm$cohort,
days = -2,
name = "cohort_2")
expect_true(nrow(cdm$cohort_2 |>
dplyr::collect()) == 0)


})

test_that("overlapping entries", {

cdm <- omock::mockCdmFromTables(tables = list(
cohort =
data.frame(
cohort_definition_id = c(1L, 1L, 2L, 2L),
subject_id = c(1L, 1L, 1L, 2L),
cohort_start_date = as.Date(c("2020-01-03",
"2020-01-08",
"2020-01-08",
"2020-01-03")),
cohort_end_date = as.Date(c("2020-01-04",
"2020-01-10",
"2020-01-10",
"2020-01-04"))
)
))
cdm <- cdm |> copyCdm()
cdm <- omopgenerics::insertTable(
cdm = cdm,
name = "observation_period",
table = data.frame(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = as.Date(c("2020-01-01", "2020-01-01")),
observation_period_end_date = as.Date(c("2020-01-10", "2020-01-30")),
period_type_concept_id = 1L
)
)

# by adding 10 days, person one will have overlapping entries
# which should be collapsed
# there entry in the other cohort should be unchanged
cdm$cohort_1 <- padCohortEnd(cdm$cohort,
days = 10,
name = "cohort_1")
expect_true(nrow(cdm$cohort_1 |>
dplyr::collect()) == 3)

# new cohort end
expect_true(all(cdm$cohort_1 |>
dplyr::filter(subject_id == 1,
cohort_definition_id == 1) |>
dplyr::pull("cohort_end_date") ==
as.Date("2020-01-10"))) # end of obs
expect_true(all(cdm$cohort_1 |>
dplyr::filter(subject_id == 2) |>
dplyr::pull("cohort_end_date") ==
as.Date("2020-01-14"))) # cohort end plus 10 days

# leave one cohort unchanged
cdm$cohort_2 <- padCohortEnd(cdm$cohort,
days = 10,
name = "cohort_2",
cohortId = 2)
expect_true(nrow(cdm$cohort_2 |>
dplyr::collect()) == 4)


expect_identical(cdm$cohort_2 |>
dplyr::filter(subject_id == 2) |>
dplyr::filter(cohort_definition_id == 2) |>
dplyr::pull("cohort_end_date"), as.Date("2020-01-14"))

expect_equal(
sort(cdm$cohort |>
dplyr::filter(cohort_definition_id == 1) |>
dplyr::pull("cohort_end_date")),
sort(cdm$cohort_2 |>
dplyr::filter(cohort_definition_id == 1) |>
dplyr::pull("cohort_end_date")))

})

0 comments on commit fef2a26

Please sign in to comment.