Skip to content

Commit

Permalink
Merge pull request #15 from oxford-pharmacoepi/mah_summariseTableCoun…
Browse files Browse the repository at this point in the history
…ts()

SummariseTableCounts()
  • Loading branch information
catalamarti authored Jun 15, 2024
2 parents 5ec88b5 + c02b700 commit 01033db
Show file tree
Hide file tree
Showing 11 changed files with 384 additions and 44 deletions.
3 changes: 3 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ Imports:
cli,
CohortCharacteristics,
dplyr,
ggplot2,
glue,
gt,
IncidencePrevalence (>= 0.7.0),
lubridate,
magrittr,
omopgenerics (>= 0.0.3),
PatientProfiles,
rlang,
tibble,
tidyr,
visOmopResults
Depends:
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# Generated by roxygen2: do not edit by hand

export(plotTableCounts)
export(summariseEntryCharacteristics)
export(summariseOmopTable)
export(summarisePersonDays)
export(summariseTableCounts)
export(suppress)
export(tableOmopTable)
importFrom(magrittr,"%>%")
importFrom(omopgenerics,suppress)
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,.env)
34 changes: 34 additions & 0 deletions R/plotTableCounts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#' Create a gt table from a summarised omop_table.
#'
#' @param summarisedTableCounts A summarised_result object with the output from summariseTableCounts().
#'
#' @return A ggplot showing the table counts
#'
#' @export
#'
plotTableCounts <- function(summarisedTableCounts) {
# Initial checks ----
assertClass(summarisedTableCounts, "summarised_result")

if(summarisedTableCounts |> dplyr::tally() |> dplyr::pull("n") == 0){
cli::cli_warn("summarisedOmopTable is empty.")
return(
summarisedTableCounts |>
ggplot2::ggplot()
)
}

# Plot ----
summarisedTableCounts |>
dplyr::mutate(count = as.numeric(.data$estimate_value),
time = .data$strata_level) |>
visOmopResults::splitGroup() |>
ggplot2::ggplot(ggplot2::aes(x = .data$time, y = .data$count, group = .data$omop_table, color = .data$omop_table)) +
ggplot2::geom_point() +
ggplot2::geom_line() +
ggplot2::facet_wrap(facets = "cdm_name") +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggplot2::xlab("Time") +
ggplot2::ylab("Counts") +
ggplot2::labs(color = "Omop table")
}
2 changes: 1 addition & 1 deletion R/summariseOmopTable.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ summariseOmopTable <- function(omopTable,
"package_version" = as.character(utils::packageVersion("OmopSketch"))
))

return(result)
return(result)
}

# Functions -----
Expand Down
213 changes: 213 additions & 0 deletions R/summariseTableCounts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
#' Create a gt table from a summarised omop_table.
#'
#' @param omopTable A summarised_result object with the output from summariseOmopTable().
#' @param unit Whether to stratify by "year" or by "month"
#' @param unitInterval Number of years or months to be used
#'
#' @return A gt object with the summarised data.
#'
#' @importFrom rlang :=
#' @export
#'
summariseTableCounts<- function(omopTable, unit = "year", unitInterval = 1) {

# Initial checks ----
omopTableChecks(omopTable)
unitChecks(unit)
unitIntervalChecks(unitInterval)

if(missing(unit)){
unit <- "year"
}

if(missing(unitInterval)){
unitInterval <- 1
}

cdm <- omopgenerics::cdmReference(omopTable)
omopTable <- omopTable |> dplyr::ungroup()

name <- omopgenerics::tableName(omopTable)
people <- getNumberPeopleInCdm(cdm)
result <- omopgenerics::emptySummarisedResult()
date <- startDate(name)

if(omopTable |> dplyr::tally() |> dplyr::pull("n") == 0){
cli::cli_warn(paste0(omopgenerics::tableName(omopTable), " omop table is empty. Returning an empty summarised result."))
return(result)
}

# Incidence counts ----
omopTable <- omopTable |>
dplyr::select(dplyr::all_of(date), "person_id")

if (name != "observation_period") {
omopTable <- omopTable |>
filterInObservation(indexDate = date)
}

# insert table and then left join
interval <- getIntervalTibble(omopTable, date, unit, unitInterval)

# Insert interval table to the cdm ----
cdm <- cdm |>
omopgenerics::insertTable(name = "interval", table = interval)

# Create summarised result
if(unit == "year"){
result <- omopTable |>
dplyr::rename("incidence_date" = dplyr::all_of(date)) %>%
dplyr::mutate("group" = !!CDMConnector::datepart("incidence_date", "year")) |>
dplyr::left_join(
cdm$interval, by = c("group")
) |>
dplyr::select(-c("interval")) |>
dplyr::group_by(.data$incidence_group) |>
dplyr::summarise("estimate_value" = dplyr::n(), .groups = "drop") |>
dplyr::collect() |>
dplyr::ungroup()

}else if(unit == "month"){
result <- omopTable |>
dplyr::rename("incidence_date" = dplyr::all_of(date)) %>%
dplyr::mutate("group" = !!CDMConnector::datepart("incidence_date", "year")) %>%
dplyr::mutate("month" = !!CDMConnector::datepart("incidence_date", "month")) |>
dplyr::mutate("group" = as.Date(paste0(.data$group,"-",.data$month,"-01"))) |>
dplyr::left_join(
cdm$interval, by = c("group")
) |>
dplyr::select(-c("month","interval")) |>
dplyr::group_by(.data$incidence_group) |>
dplyr::summarise("estimate_value" = dplyr::n(), .groups = "drop") |>
dplyr::collect() |>
dplyr::ungroup()
}

result <- result |>
dplyr::mutate(incidence_group = dplyr::if_else(rep(unitInterval, nrow(result)) == 1,
gsub(" to.*", "", .data$incidence_group),
.data$incidence_group)) |>
dplyr::mutate(
"estimate_value" = as.character(.data$estimate_value),
"variable_name" = "incidence_records"
) |>
visOmopResults::uniteStrata(cols = "incidence_group") |>
dplyr::mutate("strata_name" = dplyr::if_else(.data$strata_name == "incidence_group",
glue::glue("{unitInterval}_{unit}{if (unitInterval > 1) 's' else ''}"),
.data$strata_name)) |>
dplyr::mutate(
"result_id" = as.integer(1),
"cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(omopTable)),
"group_name" = "omop_table",
"group_level" = name,
"variable_level" = NA_character_,
"estimate_name" = "count",
"estimate_type" = "integer",
"additional_name" = "overall",
"additional_level" = "overall"
) |>
omopgenerics::newSummarisedResult()

return(result)
}

omopTableChecks <- function(omopTable){
assertClass(omopTable, "omop_table")
omopTable |>
omopgenerics::tableName() |>
assertChoice(choices = tables$table_name)
}

unitChecks <- function(unit){
inherits(unit, "character")
assertLength(unit, 1)
if(!unit %in% c("year","month")){
cli::cli_abort("units value is not valid. Valid options are year or month.")
}
}

unitIntervalChecks <- function(unitInterval){
inherits(unitInterval, c("numeric", "integer"))
assertLength(unitInterval, 1)
if(unitInterval < 1){
cli::cli_abort("unitInterval input has to be equal or greater than 1.")
}
if(!(unitInterval%%1 == 0)){
cli::cli_abort("unitInterval has to be an integer.")
}
}

filterInObservation <- function(x, indexDate) {
cdm <- omopgenerics::cdmReference(x)
id <- c("person_id", "subject_id")
id <- id[id %in% colnames(x)]

x |>
dplyr::inner_join(
cdm$observation_period |>
dplyr::select(
!!id := "person_id",
"start" = "observation_period_start_date",
"end" = "observation_period_end_date"
),
by = id
) |>
dplyr::filter(
.data[[indexDate]] >= .data$start & .data[[indexDate]] <= .data$end
)
}

getOmopTableStartDate <- function(omopTable, date){
omopTable |>
dplyr::summarise("startDate" = min(.data[[date]], na.rm = TRUE)) |>
dplyr::collect() |>
dplyr::mutate("startDate" = as.Date(paste0(lubridate::year(startDate),"-01-01"))) |>
dplyr::pull("startDate")
}

getOmopTableEndDate <- function(omopTable, date){
omopTable |>
dplyr::summarise("endDate" = max(.data[[date]], na.rm = TRUE)) |>
dplyr::collect() |>
dplyr::mutate("endDate" = as.Date(paste0(lubridate::year(endDate),"-12-31"))) |>
dplyr::pull("endDate")
}

getIntervalTibble <- function(omopTable, date, unit, unitInterval){
startDate <- getOmopTableStartDate(omopTable, date)
endDate <- getOmopTableEndDate(omopTable, date)

if(unit == "year"){
interval <- tibble::tibble(
"group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$unit)
)
}else if(unit == "month"){
interval <- tibble::tibble(
"group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$unit)
)
}

interval <- interval |>
dplyr::rowwise() |>
dplyr::mutate("interval" = max(which(
.data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$unit))
),
na.rm = TRUE)) |>
dplyr::ungroup() |>
dplyr::group_by(.data$interval) |>
dplyr::mutate(
"incidence_group" = paste0(min(.data$group)," to ",max(.data$group))
) |>
dplyr::ungroup() |>
dplyr::mutate("unit" = .env$unit) |>
dplyr::mutate("incidence_group" = dplyr::if_else(
.data$unit == "year",
gsub("-01","",as.character(.data$incidence_group)),
gsub("-01$","",gsub("-01 "," ",as.character(.data$incidence_group))))
) |>
dplyr::mutate("group" = dplyr::if_else(
.data$unit == "year",
gsub("-01","",as.character(.data$group)),
as.character(.data$group)
))
}
5 changes: 1 addition & 4 deletions R/tableOmopTable.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
#' Summarise an omop_table from a cdm_reference object. You will obtain
#' information related to the number of records, number of subjects, whether the
#' records are in observation, number of present domains and number of present
#' concepts.
#' Create a gt table from a summarised omop_table.
#'
#' @param summarisedOmopTable A summarised_result object with the output from summariseOmopTable().
#'
Expand Down
17 changes: 17 additions & 0 deletions man/plotTableCounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/summariseTableCounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 2 additions & 8 deletions man/tableOmopTable.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 01033db

Please sign in to comment.