Skip to content

Commit

Permalink
Merge pull request #75 from oxford-pharmacoepi/concept_cohort_set
Browse files Browse the repository at this point in the history
concept function
  • Loading branch information
edward-burn authored Apr 11, 2024
2 parents 8f05e80 + 834e456 commit bcf69bf
Show file tree
Hide file tree
Showing 15 changed files with 567 additions and 109 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
^_pkgdown\.yml$
^doc$
^Meta$
^data-raw$
6 changes: 5 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ Authors@R: c(
person("Marta", "Alcalde", , "marta.alcaldeherraiz@ndorms.ox.ac.uk",
role = c("aut"), comment = c(ORCID = "0000-0000-0000-0000")),
person("Yuchen", "Guo", email = "yuchen.guo@ndorms.ox.ac.uk",
role = c("aut"), comment = c(ORCID = "0000-0002-0847-4855"))
role = c("aut"), comment = c(ORCID = "0000-0002-0847-4855")),
person("Nuria", "Mercade-Besora", , "nuria.mercadebesora@ndorms.ox.ac.uk",
role = c("ctb"), comment = c(ORCID = "0009-0006-7948-3747"))
)
Description: This package aims to provide functionalities to manipulate
and evaluate cohorts in data mapped to the Observational Medical
Expand Down Expand Up @@ -47,3 +49,5 @@ Config/testthat/parallel: true
VignetteBuilder: knitr
Remotes:
darwin-eu-dev/PatientProfiles
Depends:
R (>= 4.1)
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# Generated by roxygen2: do not edit by hand

export(attrition)
export(cohortCodelist)
export(cohortCount)
export(conceptCohort)
export(generateIntersectCohortSet)
export(generateMatchedCohortSet)
export(getIdentifier)
export(joinOverlap)
export(matchCohort)
export(requireAge)
export(requireCohortIntersectFlag)
export(requireDemographics)
Expand All @@ -19,6 +21,7 @@ export(splitOverlap)
export(trimToDateRange)
importFrom(magrittr,"%>%")
importFrom(omopgenerics,attrition)
importFrom(omopgenerics,cohortCodelist)
importFrom(omopgenerics,cohortCount)
importFrom(omopgenerics,settings)
importFrom(rlang,":=")
Expand Down
185 changes: 185 additions & 0 deletions R/conceptCohort.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#' Generate a cohort based on a concept set. The current supported domains are:
#'
#' @param cdm A cdm_reference object.
#' @param conceptSet A conceptSet, can either be a list of concepts, a codelist
#' or a conceptSetExpression (TO DO).
#' @param name Name of the cohort in the cdm object.
#'
#' @export
#'
#' @return A cohort_table object.
#'
conceptCohort <- function(cdm,
conceptSet,
name) {
# initial input validation
cdm <- validateCdm(cdm)
name <- validateName(name)
if (length(conceptSet) == 0) {
cli::cli_inform(c("i" = "Empty codelist provided, returning empty cohort"))
cdm <- omopgenerics::emptyCohortTable(cdm = cdm, name = name)
return(cdm[[name]])
}
conceptSet <- validateConceptSet(conceptSet)

# create concept set tibble
cohortSet <- dplyr::tibble("cohort_name" = names(conceptSet)) |>
dplyr::mutate("cohort_definition_id" = dplyr::row_number())
cohortCodelist <- lapply(conceptSet, dplyr::as_tibble) |>
dplyr::bind_rows(.id = "cohort_name") |>
dplyr::inner_join(cohortSet, by = "cohort_name") |>
dplyr::select("cohort_definition_id", "concept_id" = "value", "codelist_name" = "cohort_name") |>
dplyr::mutate("type" = "index event") |>
addDomains(cdm)

ud <- cohortCodelist |>
dplyr::group_by(.data$domain_id) |>
dplyr::tally() |>
dplyr::collect() |>
dplyr::filter(!.data$domain_id %in% domainsData$domain_id)
for (k in seq_len(nrow(ud))) {
cli::cli_inform(c(
"x" = "Domain {.strong {ud$domain_id[k]}} ({ud$n[k]} concept{?s}) excluded because it is not supported."
))
}

cohortCodelist <- cohortCodelist |>
dplyr::filter(.data$domain_id %in% !!domainsData$domain_id) |>
dplyr::compute()

domains <- cohortCodelist |>
dplyr::select("domain_id") |>
dplyr::distinct() |>
dplyr::pull()

cohorts <- list()
for (k in seq_along(domains)) {
domain <- domains[k]
table <- domainsData$table[domainsData$domain_id == domain]
concept <- domainsData$concept[domainsData$domain_id == domain]
start <- domainsData$start[domainsData$domain_id == domain]
end <- domainsData$end[domainsData$domain_id == domain]
n <- cohortCodelist |>
dplyr::filter(.data$domain_id %in% .env$domain) |>
dplyr::tally() |>
dplyr::pull()
if (table %in% names(cdm)) {
cli::cli_inform(c(
"i" = "Subsetting table {.strong {table}} using {n} concept{?s} with domain: {.strong {domain}}."
))
cohorts[[k]] <- cdm[[table]] |>
dplyr::select(
"subject_id" = "person_id",
"concept_id" = dplyr::all_of(concept),
"cohort_start_date" = dplyr::all_of(start),
"cohort_end_date" = dplyr::all_of(end)
) |>
dplyr::inner_join(
cohortCodelist |>
dplyr::filter(.data$domain_id %in% .env$domain) |>
dplyr::select("concept_id", "cohort_definition_id"),
by = "concept_id"
)
} else {
cli::cli_inform(c(
"x" = "Domain {.strong {domain}} ({n} concept{?s}) excluded because table {table} is not present in the cdm."
))
}
}

if (length(cohorts) == 0) {
cli::cli_inform(c("i" = "No table could be subsetted, returning empty cohort."))
cdm <- omopgenerics::emptyCohortTable(cdm = cdm, name = name)
cdm[[name]] <- cdm[[name]] |>
omopgenerics::newCohortTable(
cohortSetRef = cohortSet,
cohortAttritionRef = NULL,
cohortCodelistRef = NULL
)
return(cdm[[name]])
}

cli::cli_inform(c("i" = "Subsetting tables."))
cohort <- cohorts[[1]]
if (length(cohorts) > 1) {
for (k in 2:length(cohorts)) {
cohort <- cohort |> dplyr::union_all(cohorts[[k]])
}
}
cohort <- cohort |>
dplyr::compute(name = name, temporary = FALSE)

cli::cli_inform(c("i" = "Collapsing records."))
# assign to cdm so we keep class, to be removed when https://github.com/darwin-eu-dev/omopgenerics/issues/256
cdm[[name]] <- cohort |>
collapseGap(gap = 0)
cohort <- cdm[[name]] |>
dplyr::compute(name = name, temporary = FALSE)

cli::cli_inform(c("i" = "Creating cohort attributes."))
cdm[[name]] <- cohort |>
omopgenerics::newCohortTable(
cohortSetRef = cohortSet,
cohortAttritionRef = NULL,
cohortCodelistRef = cohortCodelist |>
dplyr::select(-"domain_id") |>
dplyr::collect()
)

cli::cli_inform(c("v" = "Cohort {.strong {name}} created."))

return(cdm[[name]])
}

addDomains <- function(cohortCodelist, cdm) {
# insert table as temporary
tmpName <- omopgenerics::uniqueTableName()
cdm <- omopgenerics::insertTable(
cdm = cdm, name = tmpName, table = cohortCodelist
)
cdm[[tmpName]] <- cdm[[tmpName]] |> dplyr::compute()

cohortCodelist <- cdm[["concept"]] |>
dplyr::select("concept_id", "domain_id") |>
dplyr::right_join(cdm[[tmpName]], by = "concept_id") |>
dplyr::mutate("domain_id" = tolower(.data$domain_id)) |>
dplyr::compute()

omopgenerics::dropTable(cdm = cdm, name = tmpName)

return(cohortCodelist)
}
collapseGap <- function(cohort, gap) {
start <- cohort |>
dplyr::select(
"cohort_definition_id", "subject_id", "date" = "cohort_start_date"
) |>
dplyr::mutate("date_id" = -1)
end <- cohort |>
dplyr::select(
"cohort_definition_id", "subject_id", "date" = "cohort_end_date"
) |>
dplyr::mutate("date_id" = 1)
start |>
dplyr::union_all(end) |>
dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
dplyr::arrange(.data$date, .data$date_id) |>
dplyr::mutate("cum_id" = cumsum(.data$date_id)) |>
dplyr::filter(
.data$cum_id == 0 | (.data$cum_id == -1 & .data$date_id == -1)
) |>
dplyr::mutate(
"name" = dplyr::if_else(
.data$date_id == -1, "cohort_start_date", "cohort_end_date"
),
"era_id" = dplyr::if_else(.data$date_id == -1, 1, 0)
) |>
dplyr::mutate("era_id" = cumsum(as.numeric(.data$era_id))) |>
dplyr::ungroup() |>
dplyr::arrange() |>
dplyr::select(
"cohort_definition_id", "subject_id", "era_id", "name", "date"
) |>
tidyr::pivot_wider(names_from = "name", values_from = "date") |>
dplyr::select(-"era_id")
}
16 changes: 8 additions & 8 deletions R/generateMatchedCohortSet.R → R/matchCohort.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,21 @@
#' library(dplyr)
#' cdm <- mockDrugUtilisation(numberIndividuals = 100)
#' cdm <- cdm %>%
#' generateMatchedCohortSet(name = "new_matched_cohort",
#' matchCohort(name = "new_matched_cohort",
#' targetCohortName = "cohort1",
#' targetCohortId = c(1,2),
#' matchSex = TRUE,
#' matchYearOfBirth = TRUE,
#' ratio = 2)
#' cdm$new_matched_cohort
#'
generateMatchedCohortSet <- function(cdm,
name,
targetCohortName,
targetCohortId = NULL,
matchSex = TRUE,
matchYearOfBirth = TRUE,
ratio = 1){
matchCohort <- function(cdm,
name,
targetCohortName,
targetCohortId = NULL,
matchSex = TRUE,
matchYearOfBirth = TRUE,
ratio = 1){
cli::cli_inform("Starting matching")

# validate initial input
Expand Down
4 changes: 4 additions & 0 deletions R/reexports-omopgenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ omopgenerics::settings
#' @importFrom omopgenerics attrition
#' @export
omopgenerics::attrition

#' @importFrom omopgenerics cohortCodelist
#' @export
omopgenerics::cohortCodelist
Binary file added R/sysdata.rda
Binary file not shown.
28 changes: 28 additions & 0 deletions R/validateFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@ validateCDM <- function(cdm) {
if (!isTRUE(inherits(cdm, "cdm_reference"))) {
cli::cli_abort("cohort must be part of a cdm reference")
}
return(invisible(cdm))
}

validateCdm <- function(cdm) {
if (!isTRUE(inherits(cdm, "cdm_reference"))) {
cli::cli_abort("cdm must be a cdm_reference object.")
}
return(invisible(cdm))
}

validateCohortTable <- function(cohort) {
Expand Down Expand Up @@ -46,3 +54,23 @@ validateDateRange<-function(dateRange){
}
return(invisible(dateRange))
}

validateName <- function(name) {
em <- c(
"x" = "{name} it is not a valida value for name.",
"i" = "It must be:",
"*" = "lowercase character vector of length 1",
"*" = "NA or NULL values are not allowed"
)
if (!is.character(name) | length(name) != 1 | is.na(name)) {
cli::cli_abort(em)
}
if (tolower(name) != name){
cli::cli_abort(em)
}
return(invisible(name))
}

validateConceptSet <- function(conceptSet) {
omopgenerics::newCodelist(conceptSet)
}
14 changes: 14 additions & 0 deletions data-raw/domainsData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
## code to prepare `DATASET` dataset goes here

domainsData <- dplyr::tribble(
~"domain_id", ~"table", ~"concept", ~"start", ~"end",
"drug", "drug_exposure", "drug_concept_id", "drug_exposure_start_date", "drug_exposure_end_date",
"condition", "condition_occurrence", "condition_concept_id", "condition_start_date", "condition_end_date",
"procedure", "procedure_occurrence", "procedure_concept_id", "procedure_date", "procedure_date",
"observation", "observation", "observation_concept_id", "observation_date", "observation_date",
"measurement", "measurement", "measurement_concept_id", "measurement_date", "measurement_date",
"visit", "visit_occurrence", "visit_concept_id", "visit_start_date", "visit_end_date",
"device", "device_exposure", "device_concept_id", "device_exposure_start_date", "device_exposure_end_date"
)

usethis::use_data(domainsData, internal = TRUE, overwrite = TRUE)
5 changes: 5 additions & 0 deletions man/CohortConstructor-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/conceptCohort.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/generateMatchedCohortSet.Rd → man/matchCohort.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/reexports.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit bcf69bf

Please sign in to comment.