Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trim demographics #94

Merged
merged 12 commits into from
Apr 16, 2024
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(cohortCodelist)
export(cohortCount)
export(collapseCohort)
export(conceptCohort)
export(demographicsCohort)
export(endDateColumn)
export(getIdentifier)
export(intersectCohort)
Expand All @@ -26,6 +27,7 @@ export(settings)
export(splitOverlap)
export(startDateColumn)
export(tableName)
export(trimDemographics)
export(trimToDateRange)
importFrom(PatientProfiles,endDateColumn)
importFrom(PatientProfiles,startDateColumn)
Expand Down
58 changes: 58 additions & 0 deletions R/demographicsCohort.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#' Create the observation period cohort
#'
#' @param cdm A cdm_reference.
#' @param name Name of the new cohort_table object.
#' @param ageRange A list of minimum and maximum age.
#' @param sex Can be "Both", "Male" or "Female". If one of the latter, only
#' those with that sex will be included.
#' @param minPriorObservation A minimum number of prior observation days in
#' the database.
#' @param minFutureObservation A minimum number of future observation days in
#' the database.
#'
#' @return The cohort with the observation period
#'
#' @export
#'
demographicsCohort <- function(cdm,
name,
ageRange = NULL,
sex = NULL,
minPriorObservation = NULL,
minFutureObservation = NULL) {
# initial checks
cdm <- validateCdm(cdm)
name <- validateName(name)

cdm[[name]] <- cdm$observation_period |>
dplyr::inner_join(
cdm$person |> dplyr::select("person_id") |> dplyr::distinct(),
by = "person_id"
) |>
dplyr::select(
"subject_id" = "person_id",
"cohort_start_date" = "observation_period_start_date",
"cohort_end_date" = "observation_period_end_date"
) |>
dplyr::mutate("cohort_definition_id" = 1) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::newCohortTable(
cohortSetRef = dplyr::tibble(
"cohort_definition_id" = 1, "cohort_name" = "demographics"
),
cohortAttritionRef = NULL,
cohortCodelistRef = NULL
)

cdm[[name]] <- trimDemographics(
cohort = cdm[[name]],
cohortId = NULL,
ageRange = ageRange,
sex = sex,
minPriorObservation = minPriorObservation,
minFutureObservation = minFutureObservation,
name = name
)

return(cdm[[name]])
}
310 changes: 310 additions & 0 deletions R/trimDemographics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
#' Restrict cohort on patient demographics
#'
#' @param cohort A cohort table in a cdm reference.
#' @param cohortId Vector of cohort definition ids to include. If NULL, all
#' cohort definition ids will be used.
#' @param ageRange A list of minimum and maximum age.
#' @param sex Can be "Both", "Male" or "Female". If one of the latter, only
#' those with that sex will be included.
#' @param minPriorObservation A minimum number of prior observation days in
#' the database.
#' @param minFutureObservation A minimum number of future observation days in
#' the database.
#' @param name Name of the new cohort with the demographic requirements.
#'
#' @return The cohort table with only records for individuals satisfying the
#' demographic requirements
#'
#' @export
#'
trimDemographics <- function(cohort,
cohortId = NULL,
ageRange = NULL,
sex = NULL,
minPriorObservation = NULL,
minFutureObservation = NULL,
name = tableName(cohort)) {
# initial validation
cohort <- validateCohortTable(cohort, TRUE)
cohortId <- validateCohortId(cohortId, settings(cohort)$cohort_definition_id)
ageRange <- validateAgeRange(ageRange)
sex <- validateSex(sex)
minPriorObservation <- validateMinPriorObservation(minPriorObservation)
minFutureObservation <- validateMinFutureObservation(minFutureObservation)
name <- validateName(name)

cdm <- omopgenerics::cdmReference(cohort)
tablePrefix <- omopgenerics::tmpPrefix()

cli::cli_inform(c("i" = "Building new trimmed cohort"))

cohort <- cohort |>
dplyr::filter(.data$cohort_definition_id %in% .env$cohortId)

if (!is.null(ageRange)) {
cli::cli_inform(c("Adding birth date"))
cohort <- cohort |>
PatientProfiles::addDateOfBirth(name = "date_0") %>%
dplyr::mutate(!!!datesAgeRange(ageRange))
}
if (!is.null(minPriorObservation) |
!is.null(minFutureObservation) |
!is.null(sex)) {
cli::cli_inform(c("Adding demographics information"))
cohort <- cohort |>
PatientProfiles::addDemographics(
age = FALSE,
sex = !is.null(sex),
priorObservation = !is.null(minPriorObservation),
priorObservationType = "date",
futureObservation = !is.null(minFutureObservation),
futureObservationType = "date"
)
}

newSettings <- settings(cohort) |>
getNewSettings(
cohortId, ageRange, sex, minPriorObservation, minFutureObservation
)

# insert settings
nm <- omopgenerics::uniqueTableName(tablePrefix)
cdm <- omopgenerics::insertTable(
cdm = cdm,
name = nm,
table = newSettings |> dplyr::select(dplyr::any_of(c(
"cohort_definition_id", "require_min_age", "require_max_age",
"require_sex", "require_min_prior_observation",
"require_min_future_observation", "new_cohort_definition_id"
)))
)

cli::cli_inform(c("Creating initial cohort"))
cohort <- cohort |>
dplyr::inner_join(cdm[[nm]], by = "cohort_definition_id") |>
dplyr::select(-"cohort_definition_id") |>
dplyr::rename("cohort_definition_id" = "new_cohort_definition_id") |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::newCohortTable(
cohortSetRef = newSettings |>
dplyr::select(-"cohort_definition_id") |>
dplyr::rename("cohort_definition_id" = "new_cohort_definition_id"),
cohortAttritionRef = attrition(cohort) |>
dplyr::inner_join(
newSettings |>
dplyr::select("cohort_definition_id", "new_cohort_definition_id"),
by = "cohort_definition_id"
) |>
dplyr::select(-"cohort_definition_id") |>
dplyr::rename("cohort_definition_id" = "new_cohort_definition_id"),
cohortCodelistRef = attr(cohort, "cohort_codelist") |>
dplyr::collect() |>
dplyr::inner_join(
newSettings |>
dplyr::select("cohort_definition_id", "new_cohort_definition_id"),
by = "cohort_definition_id"
) |>
dplyr::select(-"cohort_definition_id") |>
dplyr::rename("cohort_definition_id" = "new_cohort_definition_id")
)

if (!is.null(sex)) {
cli::cli_inform(c("Trim sex"))
cohort <- cohort |>
dplyr::filter(
tolower(.data$sex) == .data$require_sex |
tolower(.data$require_sex) == "both"
) |>
dplyr::select(-c("sex", "require_sex")) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::recordCohortAttrition("Restrict sex")
}
if (!is.null(ageRange)) {
cli::cli_inform(c("Trim age"))
cohort <- cohort %>%
dplyr::mutate(
!!!caseAge(ageRange),
"cohort_start_date" = dplyr::if_else(
.data$cohort_start_date <= .data$new_cohort_start_date,
.data$new_cohort_start_date,
.data$cohort_start_date
),
"cohort_end_date" = dplyr::if_else(
.data$cohort_end_date <= .data$new_cohort_end_date,
.data$cohort_end_date,
.data$new_cohort_end_date
)
) |>
dplyr::select(-c(
dplyr::starts_with("date_"), "require_min_age", "require_max_age",
"new_cohort_start_date", "new_cohort_end_date"
)) |>
dplyr::filter(.data$cohort_start_date <= .data$cohort_end_date) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::recordCohortAttrition("Trim age_group")
}
if (!is.null(minPriorObservation)) {
cli::cli_inform(c("Trim prior observation"))
cohort <- cohort %>%
dplyr::mutate(
"new_cohort_start_date" = as.Date(!!CDMConnector::dateadd(
date = "prior_observation",
number = "require_min_prior_observation",
interval = "day"
)),
"cohort_start_date" = dplyr::if_else(
.data$new_cohort_start_date >= .data$cohort_start_date,
.data$new_cohort_start_date,
.data$cohort_start_date
)
) |>
dplyr::filter(.data$cohort_start_date <= .data$cohort_end_date) |>
dplyr::select(-c("require_min_prior_observation", "prior_observation", "new_cohort_start_date")) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::recordCohortAttrition("Trim prior_observation")
}
if (!is.null(minFutureObservation)) {
cli::cli_inform(c("Trim future observation"))
cohort <- cohort %>%
dplyr::filter(
!!CDMConnector::datediff(
start = "cohort_start_date",
end = "future_observation",
interval = "day"
) >=
.data$require_min_future_observation
) |>
dplyr::select(-c("require_min_future_observation", "future_observation")) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::recordCohortAttrition("Require future_observation")
}

# TODO update attrition names to be more coherent with the age groups, sex and so

cli::cli_inform(c("v" = "Cohort trimmed"))
return(cohort)
}

datesAgeRange <- function(ageRange) {
qA <- list()
values <- lapply(ageRange, function(x) {
x[2] <- x[2] + 1
return(x)
}) |>
unlist() |>
unique()
values <- values[!is.infinite(values)]
values <- values[values != 0]
glue::glue("as.Date(local(CDMConnector::dateadd('date_0', {values}, interval = 'year')))") |>
rlang::parse_exprs() |>
rlang::set_names(glue::glue("date_{values}"))
}
getNewSettings <- function(set, cohortId, age, sex, prior, future) {
if (length(age) == 0) {
ageId <- NULL
} else {
ageId <- seq_along(age)
}
if (length(prior) == 0) {
prior <- NULL
} else {
prior <- as.integer(prior)
}
if (length(future) == 0) {
future <- NULL
} else {
future <- as.integer(future)
}
sets <- tidyr::expand_grid(
"cohort_definition_id" = cohortId,
"require_age" = ageId,
"require_sex" = sex,
"require_min_prior_observation" = prior,
"require_min_future_observation" = future
)
if (!is.null(ageId)) {
ageMin <- lapply(age, function(x) {x[1]}) |> unlist()
ageMax <- lapply(age, function(x) {x[2]}) |> unlist()
sets <- sets |>
dplyr::inner_join(
dplyr::tibble(
"require_age" = ageId,
"require_min_age" = ageMin,
"require_max_age" = ageMax
),
by = "require_age"
) |>
dplyr::select(-"require_age")
}
sets <- sets |>
dplyr::select(dplyr::any_of(c(
"cohort_definition_id", "require_min_age", "require_max_age",
"require_sex", "require_min_prior_observation",
"require_min_future_observation"
)))
sets <- set |>
dplyr::inner_join(
sets, by = "cohort_definition_id", suffix = c(".original", "")
) |>
dplyr::mutate("new_cohort_definition_id" = dplyr::row_number())
if (!is.null(age)) {
sets <- sets |>
dplyr::mutate("cohort_name" = paste0(
.data$cohort_name, "_", .data$require_min_age, "_",
.data$require_max_age
))
}
if (!is.null(sex)) {
sets <- sets |>
dplyr::mutate("cohort_name" = paste0(
.data$cohort_name, "_", .data$require_sex
))
}
if (!is.null(prior)) {
sets <- sets |>
dplyr::mutate("cohort_name" = paste0(
.data$cohort_name, "_", .data$require_min_prior_observation
))
}
if (!is.null(future)) {
sets <- sets |>
dplyr::mutate("cohort_name" = paste0(
.data$cohort_name, "_", .data$require_min_future_observation
))
}
sets <- sets |>
# we will need a new release of omopgenerics so we can change ::: -> ::
dplyr::mutate("cohort_name" = omopgenerics:::toSnakeCase(.data$cohort_name))
return(sets)
}
caseAge <- function(age) {
prepareColStart <- function(x, col) {
num <- x |> unlist() |> unique() |> as.character() |> tolower()
x <- paste0("date_", num)
x <- paste0(".data$", col, " == ", num, " ~ .data$", x) |>
paste0(collapse = ",")
x <- paste0("dplyr::case_when(", x, ")") |>
rlang::parse_exprs() |>
rlang::set_names(c("new_cohort_start_date"))
return(x)
}
prepareColEnd <- function(x, col) {
num <- unique(unlist(x))
infFlag <- any(is.infinite(num))
num <- num[!is.infinite(num)]
x <- paste0(".data$", col, " == ", as.character(num), " ~ as.Date(local(CDMConnector::dateadd(date = 'date_", as.character(num+1) ,"', number = -1, interval = 'day')))")
if (infFlag) {
x <- c(x, paste0("is.infinite(.data$", col, ") ~ .data$cohort_end_date"))
}
x <- paste0(x, collapse = ", ")
x <- paste0("dplyr::case_when(", x, ")") |>
rlang::parse_exprs() |>
rlang::set_names("new_cohort_end_date")
return(x)
}
ageMin <- lapply(age, function(x){x[1]}) |>
prepareColStart("require_min_age")
ageMax <- lapply(age, function(x){x[2]}) |>
prepareColEnd("require_max_age")
c(ageMin, ageMax)
}
Loading
Loading