Skip to content

Commit

Permalink
strata by sex in summariseInOBservation
Browse files Browse the repository at this point in the history
  • Loading branch information
martaalcalde committed Jul 24, 2024
1 parent 8c5370c commit a0fd274
Show file tree
Hide file tree
Showing 9 changed files with 371 additions and 269 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export(plotObservationPeriod)
export(plotRecordCount)
export(summariseClinicalRecords)
export(summariseEntryCharacteristics)
export(summariseObservationPeriod)
export(summariseInObservation)
export(summarisePersonDays)
export(summariseRecordCount)
export(suppress)
Expand Down
173 changes: 173 additions & 0 deletions R/summariseInObservation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#' Create a summarised result with the number of people in observation during a specific interval of time
#'
#' @param observationPeriod observation_period omop table.
#' @param unit Whether to stratify by "year" or by "month".
#' @param unitInterval Number of years or months to stratify with.
#' @param sex Whether to stratify by sex (TRUE) or not (FALSE).
#'
#' @return A summarised_result object with the summarised data.
#'
#' @export
#'
summariseInObservation <- function(observationPeriod, unit = "year", unitInterval = 1, sex = FALSE){

# Initial checks ----
assertClass(observationPeriod, "omop_table")

x <- omopgenerics::tableName(observationPeriod)
if (x != "observation_period") {
cli::cli_abort(
"Table name ({x}) is not observation_period, please provide a valid
observation_period table"
)
}

if(observationPeriod |> dplyr::tally() |> dplyr::pull("n") == 0){
cli::cli_warn("observation_period table is empty. Returning an empty summarised result.")
return(omopgenerics::emptySummarisedResult())
}

if(missing(unit)){unit <- "year"}
if(missing(unitInterval)){unitInterval <- 1}

checkUnit(unit)
checkUnitInterval(unitInterval)
assertLogical(sex, length = 1)

# Create initial variables ----
observationPeriod <- observationPeriod |>
dplyr::ungroup()

cdm <- omopgenerics::cdmReference(observationPeriod)

# Add strata variables ----
strata <- c("age_group", "sex")
observationPeriod <- addDemographicsToOmopTable(observationPeriod, date = "observation_period_start_date", ageGroup = NULL, sex)

# Observation period ----
name <- "observation_period"
start_date_name <- startDate(name)
end_date_name <- endDate(name)

interval <- getIntervalTibbleForObservation(observationPeriod, start_date_name, end_date_name, unit, unitInterval)

# Insert interval table to the cdm ----
cdm <- cdm |>
omopgenerics::insertTable(name = "interval", table = interval)

# Calculate denominator ----
denominator <- cdm |> getDenominator()

# Count records ----
result <- observationPeriod |> countRecords(cdm, start_date_name, end_date_name, unit)

# Create summarisedResult
result <- createSummarisedResultObservationPeriod(result, observationPeriod, name, denominator, unit, unitInterval)

omopgenerics::dropTable(cdm = cdm, name = "interval")
return(result)
}

getDenominator <- function(cdm){
cdm[["person"]] |>
dplyr::ungroup() |>
dplyr::select("person_id") |>
dplyr::summarise("n" = dplyr::n()) |>
dplyr::pull("n")
}

getIntervalTibbleForObservation <- function(omopTable, start_date_name, end_date_name, unit, unitInterval){
startDate <- getOmopTableStartDate(omopTable, start_date_name)
endDate <- getOmopTableEndDate(omopTable, end_date_name)

tibble::tibble(
"group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$unit)
) |>
dplyr::rowwise() |>
dplyr::mutate("interval" = max(which(
.data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$unit))
),
na.rm = TRUE)) |>
dplyr::ungroup() |>
dplyr::group_by(.data$interval) |>
dplyr::mutate(
"interval_start_date" = min(.data$group),
"interval_end_date" = dplyr::if_else(.env$unit == "year",
min(.data$group)+lubridate::years(.env$unitInterval)-1,
min(.data$group)+months(.env$unitInterval)-1)
) |>
dplyr::mutate(
"interval_start_date" = as.Date(.data$interval_start_date),
"interval_end_date" = as.Date(.data$interval_end_date)
) |>
dplyr::mutate(
"interval_group" = paste(.data$interval_start_date,"to",.data$interval_end_date)
) |>
dplyr::ungroup() |>
dplyr::select("interval_start_date", "interval_end_date", "interval_group") |>
dplyr::distinct()
}

countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name, unit){
tablePrefix <- omopgenerics::tmpPrefix()

x <- observationPeriod %>%
dplyr::mutate("start" = lubridate::floor_date(.data[[start_date_name]], unit = "month")) |>
dplyr::mutate("end" = lubridate::floor_date(.data[[end_date_name]], unit = "month")) |>
dplyr::group_by(.data$start, .data$end, .data$sex) |>
dplyr::summarise(n = dplyr::n()) |>
dplyr::compute(
name = omopgenerics::uniqueTableName(tablePrefix), temporary = FALSE
)

x <- cdm[["interval"]] |>
dplyr::cross_join(x) |>
dplyr::filter((.data$start < .data$interval_start_date & .data$end >= .data$interval_start_date) |
(.data$start >= .data$interval_start_date & .data$start <= .data$interval_end_date)) |>
dplyr::group_by(.data$interval_group, .data$sex) |>
dplyr::summarise(n = sum(.data$n, na.rm = TRUE), .groups = "drop") |>
dplyr::select("estimate_value" = "n", "sex", "time_interval" = "interval_group") |>
dplyr::collect() |>
dplyr::arrange(.data$time_interval)

omopgenerics::dropTable(cdm = cdm, name = c(dplyr::starts_with(tablePrefix)))

return(x)
}

createSummarisedResultObservationPeriod <- function(result, observationPeriod, name, denominator, unit, unitInterval){
result <- result |>
dplyr::mutate(
"estimate_value" = as.character(.data$estimate_value),
"variable_name" = "overlap_records"
) |>
dplyr::rename("variable_level" = "time_interval") |>
visOmopResults::uniteStrata(cols = c("sex")) |>
dplyr::mutate(
"result_id" = as.integer(1),
"cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(observationPeriod)),
"group_name" = "omop_table",
"group_level" = name,
"estimate_name" = "count",
"estimate_type" = "integer",
"additional_name" = "overall",
"additional_level" = "overall"
)

result <- result |>
rbind(result) |>
dplyr::group_by(.data$variable_level, .data$strata_level) |>
dplyr::mutate(estimate_type = dplyr::if_else(dplyr::row_number() == 2, "percentage", .data$estimate_type)) |>
dplyr::mutate(estimate_value = dplyr::if_else(.data$estimate_type == "percentage", as.character(as.numeric(.data$estimate_value)/denominator*100), .data$estimate_value)) |>
dplyr::mutate(estimate_name = dplyr::if_else(.data$estimate_type == "percentage", "percentage", .data$estimate_name)) |>
omopgenerics::newSummarisedResult(settings = dplyr::tibble(
"result_id" = 1L,
"result_type" = "summarised_observation_period",
"package_name" = "OmopSketch",
"package_version" = as.character(utils::packageVersion("OmopSketch")),
"unit" = .env$unit,
"unitInterval" = .env$unitInterval
))

return(result)
}
128 changes: 0 additions & 128 deletions R/summariseObservationPeriod.R

This file was deleted.

43 changes: 24 additions & 19 deletions R/summariseRecordCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ summariseRecordCount <- function(omopTable, unit = "year", unitInterval = 1, age

if(missing(unit)){unit <- "year"}
if(missing(unitInterval)){unitInterval <- 1}
if(missing(ageGroup) | is.null(ageGroup)){ageGroup <- list("overall" = c(0, Inf))}
if(missing(ageGroup) | is.null(ageGroup)){ageGroup <- NULL}

checkUnit(unit)
checkUnitInterval(unitInterval)
checkAgeGroup(ageGroup)

assertLogical(sex, length = 1)

if(omopTable |> dplyr::tally() |> dplyr::pull("n") == 0){
cli::cli_warn(paste0(omopgenerics::tableName(omopTable), " omop table is empty. Returning an empty summarised result."))

Expand All @@ -46,21 +48,7 @@ summariseRecordCount <- function(omopTable, unit = "year", unitInterval = 1, age
dplyr::select(dplyr::all_of(date), "person_id")

# Use add demographic query -> when both are true (age = FALSE)
if(FALSE %in% c(names(ageGroup) == "overall")){
omopTable <- omopTable |>
PatientProfiles::addAgeQuery(indexDate = date, ageGroup = ageGroup, missingAgeGroupValue = "unknown") |>
dplyr::mutate(age_group = dplyr::if_else(is.na(.data$age_group), "unknown", .data$age_group)) |> # To remove: https://github.com/darwin-eu-dev/PatientProfiles/issues/677
dplyr::select(-dplyr::any_of(c("age")))
}else{
omopTable <- omopTable |> dplyr::mutate(age_group = "overall")
}

if(sex){
omopTable <- omopTable |> PatientProfiles::addSexQuery(missingSexValue = "unknown") |>
dplyr::mutate(sex = dplyr::if_else(is.na(.data$sex), "unknown", .data$sex)) # To remove: https://github.com/darwin-eu-dev/PatientProfiles/issues/677
}else{
omopTable <- omopTable |> dplyr::mutate(sex = "overall")
}
omopTable <- addDemographicsToOmopTable(omopTable, date, ageGroup, sex)

if(name != "observation_period") {
omopTable <- omopTable |>
Expand All @@ -85,12 +73,29 @@ summariseRecordCount <- function(omopTable, unit = "year", unitInterval = 1, age
result <- createOverallGroup(result, ageGroup, sex, strata)

# Create summarised result ----
result <- createSummarisedResult(result, omopTable, name, unit, unitInterval)
result <- createSummarisedResultRecordCount(result, omopTable, name, unit, unitInterval)
omopgenerics::dropTable(cdm = cdm, name = "interval")

return(result)
}

addDemographicsToOmopTable <- function(omopTable, date, ageGroup, sex){
suppressWarnings(omopTable |>
dplyr::mutate(sex = "overall") |>
dplyr::mutate(age_group = "overall") |>
PatientProfiles::addDemographicsQuery(indexDate = date,
age = FALSE,
ageGroup = ageGroup,
missingAgeGroupValue = "unknown",
sex = sex,
missingSexValue = "unknown",
priorObservation = FALSE,
futureObservation = FALSE,
dateOfBirth = FALSE) |>
dplyr::mutate(age_group = dplyr::if_else(is.na(.data$age_group), "unknown", .data$age_group)) |> # To remove: https://github.com/darwin-eu-dev/PatientProfiles/issues/677
dplyr::mutate(sex = dplyr::if_else(is.na(.data$sex), "unknown", .data$sex))) # To remove: https://github.com/darwin-eu-dev/PatientProfiles/issues/677

}

filterInObservation <- function(x, indexDate){
cdm <- omopgenerics::cdmReference(x)
Expand Down Expand Up @@ -158,7 +163,7 @@ getIntervalTibble <- function(omopTable, start_date_name, end_date_name, unit, u
) |>
dplyr::ungroup() |>
dplyr::mutate("my" = paste0(lubridate::month(.data$group),"-",lubridate::year(.data$group))) |>
dplyr::select("interval_group", "my") |>
dplyr::select("interval_group", "my", "interval_start_date","interval_end_date") |>
dplyr::distinct()
}

Expand Down Expand Up @@ -230,7 +235,7 @@ createOverallGroup <- function(result, ageGroup, sex, strata){
return(result)
}

createSummarisedResult <- function(result, omopTable, name, unit, unitInterval){
createSummarisedResultRecordCount <- function(result, omopTable, name, unit, unitInterval){
result <- result |>
dplyr::mutate(
"estimate_value" = as.character(.data$estimate_value),
Expand Down
Loading

0 comments on commit a0fd274

Please sign in to comment.