diff --git a/DESCRIPTION b/DESCRIPTION index 4e95e43..b98439c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,6 +23,8 @@ Imports: cli, CohortCharacteristics, dplyr, + ggplot2, + glue, gt, IncidencePrevalence (>= 0.7.0), lubridate, @@ -30,6 +32,7 @@ Imports: omopgenerics (>= 0.0.3), PatientProfiles, rlang, + tibble, tidyr, visOmopResults Depends: diff --git a/NAMESPACE b/NAMESPACE index fc2f79d..b76242b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,11 +1,14 @@ # Generated by roxygen2: do not edit by hand +export(plotTableCounts) export(summariseEntryCharacteristics) export(summariseOmopTable) export(summarisePersonDays) +export(summariseTableCounts) export(suppress) export(tableOmopTable) importFrom(magrittr,"%>%") importFrom(omopgenerics,suppress) +importFrom(rlang,":=") importFrom(rlang,.data) importFrom(rlang,.env) diff --git a/R/plotTableCounts.R b/R/plotTableCounts.R new file mode 100644 index 0000000..837f834 --- /dev/null +++ b/R/plotTableCounts.R @@ -0,0 +1,34 @@ +#' Create a gt table from a summarised omop_table. +#' +#' @param summarisedTableCounts A summarised_result object with the output from summariseTableCounts(). +#' +#' @return A ggplot showing the table counts +#' +#' @export +#' +plotTableCounts <- function(summarisedTableCounts) { + # Initial checks ---- + assertClass(summarisedTableCounts, "summarised_result") + + if(summarisedTableCounts |> dplyr::tally() |> dplyr::pull("n") == 0){ + cli::cli_warn("summarisedOmopTable is empty.") + return( + summarisedTableCounts |> + ggplot2::ggplot() + ) + } + + # Plot ---- + summarisedTableCounts |> + dplyr::mutate(count = as.numeric(.data$estimate_value), + time = .data$strata_level) |> + visOmopResults::splitGroup() |> + ggplot2::ggplot(ggplot2::aes(x = .data$time, y = .data$count, group = .data$omop_table, color = .data$omop_table)) + + ggplot2::geom_point() + + ggplot2::geom_line() + + ggplot2::facet_wrap(facets = "cdm_name") + + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) + + ggplot2::xlab("Time") + + ggplot2::ylab("Counts") + + ggplot2::labs(color = "Omop table") +} diff --git a/R/summariseOmopTable.R b/R/summariseOmopTable.R index 034b739..5ddb3ba 100644 --- a/R/summariseOmopTable.R +++ b/R/summariseOmopTable.R @@ -140,7 +140,7 @@ summariseOmopTable <- function(omopTable, "package_version" = as.character(utils::packageVersion("OmopSketch")) )) - return(result) +return(result) } # Functions ----- diff --git a/R/summariseTableCounts.R b/R/summariseTableCounts.R new file mode 100644 index 0000000..52e2482 --- /dev/null +++ b/R/summariseTableCounts.R @@ -0,0 +1,213 @@ +#' Create a gt table from a summarised omop_table. +#' +#' @param omopTable A summarised_result object with the output from summariseOmopTable(). +#' @param unit Whether to stratify by "year" or by "month" +#' @param unitInterval Number of years or months to be used +#' +#' @return A gt object with the summarised data. +#' +#' @importFrom rlang := +#' @export +#' +summariseTableCounts<- function(omopTable, unit = "year", unitInterval = 1) { + + # Initial checks ---- + omopTableChecks(omopTable) + unitChecks(unit) + unitIntervalChecks(unitInterval) + + if(missing(unit)){ + unit <- "year" + } + + if(missing(unitInterval)){ + unitInterval <- 1 + } + + cdm <- omopgenerics::cdmReference(omopTable) + omopTable <- omopTable |> dplyr::ungroup() + + name <- omopgenerics::tableName(omopTable) + people <- getNumberPeopleInCdm(cdm) + result <- omopgenerics::emptySummarisedResult() + date <- startDate(name) + + if(omopTable |> dplyr::tally() |> dplyr::pull("n") == 0){ + cli::cli_warn(paste0(omopgenerics::tableName(omopTable), " omop table is empty. Returning an empty summarised result.")) + return(result) + } + + # Incidence counts ---- + omopTable <- omopTable |> + dplyr::select(dplyr::all_of(date), "person_id") + + if (name != "observation_period") { + omopTable <- omopTable |> + filterInObservation(indexDate = date) + } + + # insert table and then left join + interval <- getIntervalTibble(omopTable, date, unit, unitInterval) + + # Insert interval table to the cdm ---- + cdm <- cdm |> + omopgenerics::insertTable(name = "interval", table = interval) + + # Create summarised result + if(unit == "year"){ + result <- omopTable |> + dplyr::rename("incidence_date" = dplyr::all_of(date)) %>% + dplyr::mutate("group" = !!CDMConnector::datepart("incidence_date", "year")) |> + dplyr::left_join( + cdm$interval, by = c("group") + ) |> + dplyr::select(-c("interval")) |> + dplyr::group_by(.data$incidence_group) |> + dplyr::summarise("estimate_value" = dplyr::n(), .groups = "drop") |> + dplyr::collect() |> + dplyr::ungroup() + + }else if(unit == "month"){ + result <- omopTable |> + dplyr::rename("incidence_date" = dplyr::all_of(date)) %>% + dplyr::mutate("group" = !!CDMConnector::datepart("incidence_date", "year")) %>% + dplyr::mutate("month" = !!CDMConnector::datepart("incidence_date", "month")) |> + dplyr::mutate("group" = as.Date(paste0(.data$group,"-",.data$month,"-01"))) |> + dplyr::left_join( + cdm$interval, by = c("group") + ) |> + dplyr::select(-c("month","interval")) |> + dplyr::group_by(.data$incidence_group) |> + dplyr::summarise("estimate_value" = dplyr::n(), .groups = "drop") |> + dplyr::collect() |> + dplyr::ungroup() + } + + result <- result |> + dplyr::mutate(incidence_group = dplyr::if_else(rep(unitInterval, nrow(result)) == 1, + gsub(" to.*", "", .data$incidence_group), + .data$incidence_group)) |> + dplyr::mutate( + "estimate_value" = as.character(.data$estimate_value), + "variable_name" = "incidence_records" + ) |> + visOmopResults::uniteStrata(cols = "incidence_group") |> + dplyr::mutate("strata_name" = dplyr::if_else(.data$strata_name == "incidence_group", + glue::glue("{unitInterval}_{unit}{if (unitInterval > 1) 's' else ''}"), + .data$strata_name)) |> + dplyr::mutate( + "result_id" = as.integer(1), + "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(omopTable)), + "group_name" = "omop_table", + "group_level" = name, + "variable_level" = NA_character_, + "estimate_name" = "count", + "estimate_type" = "integer", + "additional_name" = "overall", + "additional_level" = "overall" + ) |> + omopgenerics::newSummarisedResult() + + return(result) +} + +omopTableChecks <- function(omopTable){ + assertClass(omopTable, "omop_table") + omopTable |> + omopgenerics::tableName() |> + assertChoice(choices = tables$table_name) +} + +unitChecks <- function(unit){ + inherits(unit, "character") + assertLength(unit, 1) + if(!unit %in% c("year","month")){ + cli::cli_abort("units value is not valid. Valid options are year or month.") + } +} + +unitIntervalChecks <- function(unitInterval){ + inherits(unitInterval, c("numeric", "integer")) + assertLength(unitInterval, 1) + if(unitInterval < 1){ + cli::cli_abort("unitInterval input has to be equal or greater than 1.") + } + if(!(unitInterval%%1 == 0)){ + cli::cli_abort("unitInterval has to be an integer.") + } +} + +filterInObservation <- function(x, indexDate) { + cdm <- omopgenerics::cdmReference(x) + id <- c("person_id", "subject_id") + id <- id[id %in% colnames(x)] + + x |> + dplyr::inner_join( + cdm$observation_period |> + dplyr::select( + !!id := "person_id", + "start" = "observation_period_start_date", + "end" = "observation_period_end_date" + ), + by = id + ) |> + dplyr::filter( + .data[[indexDate]] >= .data$start & .data[[indexDate]] <= .data$end + ) +} + +getOmopTableStartDate <- function(omopTable, date){ + omopTable |> + dplyr::summarise("startDate" = min(.data[[date]], na.rm = TRUE)) |> + dplyr::collect() |> + dplyr::mutate("startDate" = as.Date(paste0(lubridate::year(startDate),"-01-01"))) |> + dplyr::pull("startDate") +} + +getOmopTableEndDate <- function(omopTable, date){ + omopTable |> + dplyr::summarise("endDate" = max(.data[[date]], na.rm = TRUE)) |> + dplyr::collect() |> + dplyr::mutate("endDate" = as.Date(paste0(lubridate::year(endDate),"-12-31"))) |> + dplyr::pull("endDate") +} + +getIntervalTibble <- function(omopTable, date, unit, unitInterval){ + startDate <- getOmopTableStartDate(omopTable, date) + endDate <- getOmopTableEndDate(omopTable, date) + + if(unit == "year"){ + interval <- tibble::tibble( + "group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$unit) + ) + }else if(unit == "month"){ + interval <- tibble::tibble( + "group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$unit) + ) + } + + interval <- interval |> + dplyr::rowwise() |> + dplyr::mutate("interval" = max(which( + .data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$unit)) + ), + na.rm = TRUE)) |> + dplyr::ungroup() |> + dplyr::group_by(.data$interval) |> + dplyr::mutate( + "incidence_group" = paste0(min(.data$group)," to ",max(.data$group)) + ) |> + dplyr::ungroup() |> + dplyr::mutate("unit" = .env$unit) |> + dplyr::mutate("incidence_group" = dplyr::if_else( + .data$unit == "year", + gsub("-01","",as.character(.data$incidence_group)), + gsub("-01$","",gsub("-01 "," ",as.character(.data$incidence_group)))) + ) |> + dplyr::mutate("group" = dplyr::if_else( + .data$unit == "year", + gsub("-01","",as.character(.data$group)), + as.character(.data$group) + )) +} diff --git a/R/tableOmopTable.R b/R/tableOmopTable.R index 55517c8..7e3b408 100644 --- a/R/tableOmopTable.R +++ b/R/tableOmopTable.R @@ -1,7 +1,4 @@ -#' Summarise an omop_table from a cdm_reference object. You will obtain -#' information related to the number of records, number of subjects, whether the -#' records are in observation, number of present domains and number of present -#' concepts. +#' Create a gt table from a summarised omop_table. #' #' @param summarisedOmopTable A summarised_result object with the output from summariseOmopTable(). #' diff --git a/man/plotTableCounts.Rd b/man/plotTableCounts.Rd new file mode 100644 index 0000000..ed00784 --- /dev/null +++ b/man/plotTableCounts.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotTableCounts.R +\name{plotTableCounts} +\alias{plotTableCounts} +\title{Create a gt table from a summarised omop_table.} +\usage{ +plotTableCounts(summarisedTableCounts) +} +\arguments{ +\item{summarisedTableCounts}{A summarised_result object with the output from summariseTableCounts().} +} +\value{ +A ggplot showing the table counts +} +\description{ +Create a gt table from a summarised omop_table. +} diff --git a/man/summariseTableCounts.Rd b/man/summariseTableCounts.Rd new file mode 100644 index 0000000..f581578 --- /dev/null +++ b/man/summariseTableCounts.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summariseTableCounts.R +\name{summariseTableCounts} +\alias{summariseTableCounts} +\title{Create a gt table from a summarised omop_table.} +\usage{ +summariseTableCounts(omopTable, unit = "year", unitInterval = 1) +} +\arguments{ +\item{omopTable}{A summarised_result object with the output from summariseOmopTable().} + +\item{unit}{Whether to stratify by "year" or by "month"} + +\item{unitInterval}{Number of years or months to be used} +} +\value{ +A gt object with the summarised data. +} +\description{ +Create a gt table from a summarised omop_table. +} diff --git a/man/tableOmopTable.Rd b/man/tableOmopTable.Rd index 3256670..5d71084 100644 --- a/man/tableOmopTable.Rd +++ b/man/tableOmopTable.Rd @@ -2,10 +2,7 @@ % Please edit documentation in R/tableOmopTable.R \name{tableOmopTable} \alias{tableOmopTable} -\title{Summarise an omop_table from a cdm_reference object. You will obtain -information related to the number of records, number of subjects, whether the -records are in observation, number of present domains and number of present -concepts.} +\title{Create a gt table from a summarised omop_table.} \usage{ tableOmopTable(summarisedOmopTable) } @@ -16,8 +13,5 @@ tableOmopTable(summarisedOmopTable) A gt object with the summarised data. } \description{ -Summarise an omop_table from a cdm_reference object. You will obtain -information related to the number of records, number of subjects, whether the -records are in observation, number of present domains and number of present -concepts. +Create a gt table from a summarised omop_table. } diff --git a/tests/testthat/test-summariseOmopTable.R b/tests/testthat/test-summariseOmopTable.R index 14e034d..3e7ce30 100644 --- a/tests/testthat/test-summariseOmopTable.R +++ b/tests/testthat/test-summariseOmopTable.R @@ -1,21 +1,9 @@ test_that("summariseOmopTable() works", { # Load mock database ---- - dbName <- "GiBleed" - pathEunomia <- here::here("Eunomia") - if (!dir.exists(pathEunomia)) { - dir.create(pathEunomia) - } - CDMConnector::downloadEunomiaData(datasetName = dbName, pathToData = pathEunomia) - Sys.setenv("EUNOMIA_DATA_FOLDER" = pathEunomia) - - db <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) - + con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) cdm <- CDMConnector::cdmFromCon( - con = db, - cdmSchema = "main", - writeSchema = "main", - cdmName = dbName + con = con, cdmSchema = "main", writeSchema = "main" ) # Check all tables work ---- @@ -70,29 +58,14 @@ test_that("summariseOmopTable() works", { domainId = FALSE, typeConcept = FALSE) |> dplyr::tally() |> dplyr::pull() == 3) - - - DBI::dbDisconnect(db) }) test_that("tableOmopTable() works", { # Load mock database ---- - dbName <- "GiBleed" - pathEunomia <- here::here("Eunomia") - if (!dir.exists(pathEunomia)) { - dir.create(pathEunomia) - } - CDMConnector::downloadEunomiaData(datasetName = dbName, pathToData = pathEunomia) - Sys.setenv("EUNOMIA_DATA_FOLDER" = pathEunomia) - - db <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) - + con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) cdm <- CDMConnector::cdmFromCon( - con = db, - cdmSchema = "main", - writeSchema = "main", - cdmName = dbName + con = con, cdmSchema = "main", writeSchema = "main" ) # Check that works ---- @@ -100,5 +73,7 @@ test_that("tableOmopTable() works", { expect_true(inherits(x,"gt_tbl")) expect_warning(tableOmopTable(summariseOmopTable(cdm$death))) expect_true(inherits(tableOmopTable(summariseOmopTable(cdm$death)),"gt_tbl")) + }) + diff --git a/tests/testthat/test-summariseTableCounts.R b/tests/testthat/test-summariseTableCounts.R new file mode 100644 index 0000000..6ad77c4 --- /dev/null +++ b/tests/testthat/test-summariseTableCounts.R @@ -0,0 +1,83 @@ +test_that("summariseTableCounts() works", { + + # Load mock database ---- + con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) + cdm <- CDMConnector::cdmFromCon( + con = con, cdmSchema = "main", writeSchema = "main" + ) + + # Check inputs ---- + expect_true(inherits(summariseTableCounts(omopTable = cdm$observation_period, unit = "month"),"summarised_result")) + expect_true(inherits(summariseTableCounts(omopTable = cdm$observation_period, unitInterval = 5),"summarised_result")) + + expect_no_error(summariseTableCounts(cdm$observation_period)) + expect_no_error(summariseTableCounts(cdm$visit_occurrence)) + expect_no_error(summariseTableCounts(cdm$condition_occurrence)) + expect_no_error(summariseTableCounts(cdm$drug_exposure)) + expect_no_error(summariseTableCounts(cdm$procedure_occurrence)) + expect_warning(summariseTableCounts(cdm$device_exposure)) + expect_no_error(summariseTableCounts(cdm$measurement)) + expect_no_error(summariseTableCounts(cdm$observation)) + expect_warning(summariseTableCounts(cdm$death)) + + # Check inputs ---- + expect_true( + (summariseTableCounts(cdm$observation_period) |> + dplyr::filter(strata_level == 1963) |> + dplyr::pull("estimate_value") |> + as.numeric()) == + (cdm$observation_period |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(observation_period_start_date)) |> + dplyr::filter(year == 1963) |> + dplyr::tally() |> + dplyr::pull("n")) + ) + + expect_true( + summariseTableCounts(cdm$condition_occurrence, unit = "month") |> + dplyr::filter(strata_level == "1961-02") |> + dplyr::pull("estimate_value") |> + as.numeric() == + (cdm$condition_occurrence |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(condition_start_date)) |> + dplyr::mutate(month = lubridate::month(condition_start_date)) |> + dplyr::filter(year == 1961, month == 2) |> + dplyr::tally() |> + dplyr::pull("n")) + ) + + expect_true( + (summariseTableCounts(cdm$condition_occurrence, unit = "month", unitInterval = 3) |> + dplyr::filter(strata_level %in% c("1984-01 to 1984-03")) |> + dplyr::pull("estimate_value") |> + as.numeric()) == + (cdm$condition_occurrence |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(condition_start_date)) |> + dplyr::mutate(month = lubridate::month(condition_start_date)) |> + dplyr::filter(year == 1984, month %in% c(1:3)) |> + dplyr::tally() |> + dplyr::pull("n")) + ) + + expect_true( + (summariseTableCounts(cdm$drug_exposure, unitInterval = 8) |> + dplyr::filter(strata_level == "1981 to 1988") |> + dplyr::pull("estimate_value") |> + as.numeric()) == + (cdm$drug_exposure |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(drug_exposure_start_date)) |> + dplyr::filter(year %in% c(1981:1988)) |> + dplyr::tally() |> + dplyr::pull("n")) + ) + + # summariseTableCounts plot + expect_true(inherits(plotTableCounts(summariseTableCounts(cdm$drug_exposure, unitInterval = 8)),"ggplot")) + expect_warning(inherits(plotTableCounts(summariseTableCounts(cdm$death, unitInterval = 8)),"ggplot")) + expect_true(inherits(plotTableCounts(summariseTableCounts(cdm$death, unitInterval = 8)),"ggplot")) + +})