From 12a6b3ebea0257e899370fecfe1ea8efea0fa9c2 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Mon, 10 Jun 2024 13:20:15 +0100 Subject: [PATCH 1/8] summariseOmopTable() release --- .gitignore | 3 + R/summariseOmopTable.R | 383 +++++++++-------------- tests/testthat/test-summariseOmopTable.R | 31 ++ 3 files changed, 187 insertions(+), 230 deletions(-) create mode 100644 tests/testthat/test-summariseOmopTable.R diff --git a/.gitignore b/.gitignore index 9168bf8..3521ad2 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ .httr-oauth .DS_Store docs + +extras/* +Eunomia/* diff --git a/R/summariseOmopTable.R b/R/summariseOmopTable.R index 135d12c..8df4dec 100644 --- a/R/summariseOmopTable.R +++ b/R/summariseOmopTable.R @@ -10,7 +10,7 @@ #' @param inObservation Whether to include the percentage of records in #' observation. #' @param standardConcept Whether to summarise standard concept. -#' @param sourceConcept Whether to summarise source concept. +#' @param sourceVocabulary Whether to summarise source concept. #' @param domainId Whether to summarise domain id of standard concept id. #' @param typeConcept Whether to summarise type concept id field. #' @@ -22,133 +22,97 @@ summariseOmopTable <- function(omopTable, recordsPerPerson = c("mean", "sd", "median", "q25", "q75", "min", "max"), inObservation = TRUE, standardConcept = TRUE, - sourceConcept = FALSE, + sourceVocabulary = FALSE, domainId = TRUE, typeConcept = TRUE) { - # initial checks + + # Initial checks ---- assertClass(omopTable, "omop_table") + omopTable |> omopgenerics::tableName() |> assertChoice(choices = tables$table_name) + estimates <- PatientProfiles::availableEstimates( - variableType = "numeric", fullQuantiles = TRUE - ) |> + variableType = "numeric", fullQuantiles = TRUE) |> dplyr::pull("estimate_name") assertChoice(recordsPerPerson, choices = estimates, null = TRUE) + recordsPerPerson <- unique(recordsPerPerson) + assertLogical(inObservation, length = 1) assertLogical(standardConcept, length = 1) - assertLogical(sourceConcept, length = 1) + assertLogical(sourceVocabulary, length = 1) assertLogical(domainId, length = 1) assertLogical(typeConcept, length = 1) if ("observation_period" == omopgenerics::tableName(omopTable)) { standardConcept <- FALSE - sourceConcept <- FALSE + sourceVocabulary <- FALSE domainId <- FALSE } cdm <- omopgenerics::cdmReference(omopTable) omopTable <- omopTable |> dplyr::ungroup() - # counts summary - cli::cli_inform("Summarising counts") - persons <- cdm[["person"]] |> - dplyr::ungroup() |> - dplyr::summarise("n" = as.integer(dplyr::n())) |> - dplyr::pull("n") - result <- omopTable |> - dplyr::summarise( - "number_records" = dplyr::n(), - "number_subjects" = dplyr::n_distinct(.data$person_id) - ) |> - dplyr::collect() |> - dplyr::mutate(dplyr::across(dplyr::everything(), as.integer)) |> - dplyr::mutate( - "subjects_percentage" = 100 * .data$number_subjects / .env$persons - ) |> - dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> - tidyr::pivot_longer( - cols = dplyr::everything(), - names_to = "variable_name", - values_to = "estimate_value" - ) |> - dplyr::mutate( - "variable_level" = NA_character_, - "estimate_name" = dplyr::if_else( - grepl("number", .data$variable_name), "count", "percentage" - ), - "estimate_type" = dplyr::if_else( - grepl("number", .data$variable_name), "integer", "percentage" - ) - ) - den <- result |> - dplyr::filter(.data$variable_name == "number_records") |> - dplyr::pull("estimate_value") |> - as.integer() - - # records per person - if (length(recordsPerPerson) > 0) { - cli::cli_inform("Summarising records per person") - suppressMessages( - result <- result |> - dplyr::union_all( - cdm[["person"]] |> - dplyr::left_join( - omopTable |> - dplyr::group_by(.data$person_id) |> - dplyr::summarise( - "records_per_person" = as.integer(dplyr::n()), - .groups = "drop" - ), - by = "person_id", - ) |> - dplyr::mutate("records_per_person" = dplyr::if_else( - is.na(.data$records_per_person), - 0L, - .data$records_per_person - )) |> - PatientProfiles::summariseResult( - variables = "records_per_person", - estimates = recordsPerPerson, - counts = FALSE - ) |> - dplyr::select( - "variable_name", "variable_level", "estimate_name", - "estimate_type", "estimate_value" - ) - ) - ) + people <- getNumberPeopleInCdm(cdm) + result <- omopgenerics::emptySummarisedResult() + + if(omopTable |> dplyr::tally() |> dplyr::pull("n") == 0){ + cli::cli_abort(paste0(omopgenerics::tableName(omopTable), " omop table is empty.")) + } + + # Counts summary ---- + cli::cli_inform(c("i" = "Summarising counts")) + result <- result |> + addNumberSubjects(omopTable) |> + addNumberRecords(omopTable) |> + addSubjectsPercentage(omopTable, people) + + # Records per person summary ---- + if(!is.null(recordsPerPerson)){ + cli::cli_inform(c("i" = "Summarising records per person")) + result <- result |> + addRecordsPerPerson(omopTable, recordsPerPerson, cdm) } - # concept - if (inObservation | standardConcept | sourceConcept | domainId | typeConcept) { - cli::cli_inform("Summarising concepts") - # add variables + # Summary concepts ---- + if (inObservation | standardConcept | sourceVocabulary | domainId | typeConcept) { + cli::cli_inform(c("i" = "Summarising concepts")) + variables <- columnsVariables( - inObservation, standardConcept, sourceConcept, domainId, typeConcept + inObservation, standardConcept, sourceVocabulary, domainId, typeConcept ) + + denominator <- result |> + dplyr::filter(.data$variable_name == "number_records") |> + dplyr::pull("estimate_value") |> + as.integer() + result <- result |> - dplyr::union_all( + dplyr::full_join( omopTable |> addVariables(variables) |> dplyr::group_by(dplyr::across(dplyr::all_of(variables))) |> dplyr::tally() |> dplyr::collect() |> dplyr::mutate("n" = as.integer(.data$n)) |> - summaryData(variables, cdm, den) + summaryData(variables, cdm, denominator, result) ) } + # Format output as a summarised result result <- result |> dplyr::mutate( "result_id" = 1L, "cdm_name" = omopgenerics::cdmName(cdm), - "table_name" = omopgenerics::tableName(omopTable) + "group_name" = "overall", + "group_level" = "overall", + "strata_name" = "overall", + "strata_level" = "overall", + "additional_name" = "overall", + "additional_level" = "overall" ) |> - visOmopResults::uniteGroup("table_name") |> - visOmopResults::uniteStrata() |> - visOmopResults::uniteAdditional() |> omopgenerics::newSummarisedResult(settings = dplyr::tibble( "result_id" = 1L, "result_type" = "summarised_omop_table", @@ -156,26 +120,96 @@ summariseOmopTable <- function(omopTable, "package_version" = as.character(utils::packageVersion("OmopSketch")) )) - return(result) +return(result) +} + +# Functions ----- +getNumberPeopleInCdm <- function(cdm){ + cdm[["person"]] |> + dplyr::pull("person_id") |> + dplyr::n_distinct() +} + +addNumberSubjects <- function(result, omopTable){ + result |> + dplyr::add_row( + "variable_name" = "number_subjects", + "estimate_name" = "count", + "estimate_type" = "integer", + "estimate_value" = as.character(omopTable |> dplyr::pull(.data$person_id) |> dplyr::n_distinct()) + ) +} +addNumberRecords <- function(result, omopTable){ + result |> + dplyr::add_row( + "variable_name" = "number_records", + "estimate_name" = "count", + "estimate_type" = "integer", + "estimate_value" = as.character(omopTable |> dplyr::tally() |> dplyr::pull(.data$n)) + ) +} +addSubjectsPercentage <- function(result, omopTable, people){ + result |> + dplyr::add_row( + "variable_name" = "subjects_percentage", + "estimate_name" = "percentage", + "estimate_type" = "percentage", + "estimate_value" = as.character( + 100* (omopTable |> dplyr::pull(.data$person_id) |> dplyr::n_distinct()) / .env$people + ) + ) +} + +addRecordsPerPerson <- function(result, omopTable, recordsPerPerson, cdm){ + suppressMessages( + result |> + dplyr::union_all( + cdm[["person"]] |> + dplyr::select("person_id") |> + dplyr::left_join( + omopTable |> + dplyr::group_by(.data$person_id) |> + dplyr::summarise( + "records_per_person" = as.integer(dplyr::n()), + .groups = "drop" + ), + by = "person_id" + ) |> + dplyr::mutate("records_per_person" = dplyr::if_else( + is.na(.data$records_per_person), + 0L, + .data$records_per_person + )) |> + PatientProfiles::summariseResult( + variables = "records_per_person", + estimates = recordsPerPerson, + counts = FALSE + ) + ) + ) } -addVariables <- function(x, - variables) { +addVariables <- function(x, variables) { + name <- omopgenerics::tableName(x) + newNames <- c( "person_id", "id" = tableId(name), - "date" = startDate(name), + "start_date" = startDate(name), + "end_date" = endDate(name), "standard" = standardConcept(name), "source" = sourceConcept(name), "type" = typeConcept(name) ) + newNames <- newNames[!is.na(newNames)] cdm <- omopgenerics::cdmReference(x) x <- x |> dplyr::select(dplyr::all_of(newNames)) + # Domain and standard ---- if (any(c("domain_id", "standard") %in% variables)) { x <- x |> dplyr::left_join( @@ -195,29 +229,27 @@ addVariables <- function(x, )) } } - + # Source ---- if ("source" %in% variables) { x <- x |> dplyr::left_join( cdm$concept |> dplyr::select( - "source" = "concept_id", "source_concept" = "standard_concept" + "source" = "concept_id", "vocabulary" = "vocabulary_id" ), by = "source" ) |> - dplyr::mutate("source" = dplyr::case_when( - .data$source == 0 ~ "No matching concept", - .data$source_concept == "S" ~ "Standard", - .data$source_concept == "C" ~ "Classification", - .default = "Source" - )) + dplyr::mutate( + vocabulary = dplyr::if_else(is.na(vocabulary), "No matching concept", vocabulary) + ) |> + dplyr::rename("source_concept" = "source", "source" = "vocabulary") } - + # In observation ---- if ("in_observation" %in% variables) { x <- x |> dplyr::left_join( x |> - dplyr::select("id", "person_id", "date") |> + dplyr::select("id", "person_id", "start_date", "end_date") |> dplyr::inner_join( cdm[["observation_period"]] |> dplyr::select( @@ -228,7 +260,10 @@ addVariables <- function(x, by = "person_id" ) |> dplyr::filter( - .data$date >= .data$obs_start & .data$date <= .data$obs_end + .data$start_date >= .data$obs_start & + .data$start_date <= .data$obs_end & + .data$end_date >= .data$obs_start & + .data$end_date <= .data$obs_end ) |> dplyr::mutate("in_observation" = 1L) |> dplyr::select("id", "in_observation"), @@ -240,47 +275,43 @@ addVariables <- function(x, return(x) } -columnsVariables <- function(inObservation, - standardConcept, - sourceConcept, - domainId, - typeConcept) { +columnsVariables <- function(inObservation, standardConcept, sourceVocabulary, domainId, typeConcept) { c("in_observation", "standard", "domain_id", "source", "type" )[c( - inObservation, standardConcept, domainId, sourceConcept, typeConcept + inObservation, standardConcept, domainId, sourceVocabulary, typeConcept )] } -summaryData <- function(x, variables, cdm, den) { +summaryData <- function(x, variables, cdm, denominator, result) { results <- list() - # in observation + # in observation ---- if ("in_observation" %in% variables) { results[["obs"]] <- x |> dplyr::mutate("in_observation" = dplyr::if_else( !is.na(.data$in_observation), "Yes", "No" )) |> - formatResults("In observation", "in_observation", den) + formatResults("In observation", "in_observation", denominator, result) } - # standard + # standard ----- if ("standard" %in% variables) { results[["standard"]] <- x |> - formatResults("Standard concept", "standard", den) + formatResults("Standard concept", "standard", denominator, result) } - # source + # source ---- if ("source" %in% variables) { - results[["source"]] <- x |> formatResults("Source concept", "source", den) + results[["source"]] <- x |> formatResults("Source concept", "source", denominator, result) } - # domain + # domain ---- if ("domain_id" %in% variables) { - results[["domain"]] <- x |> formatResults("Domain", "domain_id", den) + results[["domain"]] <- x |> formatResults("Domain", "domain_id", denominator, result) } - # type + # type ---- if ("type" %in% variables) { xx <- x |> - formatResults("Type concept id", "type", den) |> + formatResults("Type concept id", "type", denominator, result) |> dplyr::left_join( conceptTypes |> dplyr::select( @@ -324,11 +355,11 @@ summaryData <- function(x, variables, cdm, den) { return(results) } -formatResults <- function(x, variableName, variableLevel, den) { +formatResults <- function(x, variableName, variableLevel, denominator, result) { x |> dplyr::group_by(dplyr::across(dplyr::all_of(variableLevel))) |> dplyr::summarise("count" = sum(.data$n), .groups = "drop") |> - dplyr::mutate("percentage" = 100 * .data$count / .env$den) |> + dplyr::mutate("percentage" = 100 * .data$count / .env$denominator) |> dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> tidyr::pivot_longer( cols = c("count", "percentage"), @@ -345,114 +376,6 @@ formatResults <- function(x, variableName, variableLevel, den) { dplyr::select( "variable_name", "variable_level", "estimate_name", "estimate_type", "estimate_value" - ) + ) |> + dplyr::ungroup() } - -# getFunctions <- function(date, concept) { -# functions <- c( -# rlang::parse_exprs("dplyr::n()") |> -# rlang::set_names("count_number_records"), -# rlang::parse_exprs("dplyr::n_distinct(.data$person_id)") |> -# rlang::set_names("count_number_subjects"), -# rlang::parse_exprs("dplyr::n_distinct(.data$concept_id)") |> -# rlang::set_names("count_distinct_concept_id"), -# rlang::parse_exprs("sum(.data$in_observation, na.rm = TRUE)") |> -# rlang::set_names("count_records_in_observation") -# ) -# functions <- functions[c( -# TRUE, TRUE, date != "cohort_start_date", concept != "cohort_definition_id" -# )] -# return(functions) -# } -# prepareTable <- function(omopTable, date, concept) { -# cdm <- omopgenerics::cdmReference(omopTable) -# -# # domain_id -# if (concept != "cohort_definition_id") { -# omopTable <- omopTable |> -# dplyr::rename("concept_id" = dplyr::all_of(concept)) |> -# dplyr::left_join( -# cdm$concept |> dplyr::select("concept_id", "domain_id"), -# by = "concept_id" -# ) -# } -# -# # year and in_observation -# if (date != "cohort_start_date") { -# omopTable <- omopTable |> -# PatientProfiles::addInObservation(indexDate = date) %>% -# dplyr::mutate( -# "year" = !!CDMConnector::datepart(date = date, interval = "year") -# ) -# } -# -# return(omopTable) -# } -# summaryData <- function(omopTable, functions, byYear){ -# result <- omopTable |> -# dplyr::summarise(!!!functions) |> -# dplyr::collect() -# if ("domain_id" %in% colnames(omopTable)) { -# result <- result |> -# dplyr::bind_rows( -# omopTable |> -# dplyr::group_by(.data$domain_id) |> -# dplyr::summarise(!!!functions, .groups = "drop") |> -# dplyr::collect() -# ) -# } else { -# result <- result |> dplyr::mutate("domain_id" = NA_character_) -# } -# -# if (byYear & "year" %in% colnames(omopTable)) { -# result <- result |> -# dplyr::bind_rows( -# omopTable |> -# dplyr::group_by(.data$year) |> -# dplyr::summarise(!!!functions, .groups = "drop") |> -# dplyr::collect() -# ) -# if ("domain_id" %in% colnames(omopTable)) { -# result <- result |> -# dplyr::bind_rows( -# omopTable |> -# dplyr::group_by(.data$domain_id, .data$year) |> -# dplyr::summarise(!!!functions, .groups = "drop") |> -# dplyr::collect() -# ) -# } -# } else { -# result <- result |> dplyr::mutate("year" = NA_character_) -# } -# return(result) -# } -# formatResult <- function(result, cdm, name) { -# result |> -# tidyr::pivot_longer( -# cols = !c("year", "domain_id"), -# names_to = "name", -# values_to = "estimate_value" -# ) |> -# tidyr::separate_wider_delim( -# cols = "name", -# delim = "_", -# names = c("estimate_name", "variable_name"), -# too_many = "merge" -# ) |> -# dplyr::mutate( -# "estimate_value" = as.character(.data$estimate_value), -# "cdm_name" = omopgenerics::cdmName(cdm = cdm), -# "estimate_type" = "integer", -# "variable_level" = NA_character_, -# "package_name" = "OmopSketch", -# "package_version" = as.character(utils::packageVersion("OmopSketch")), -# "group_name" = "omop_table", -# "group_level" = name, -# "result_type" = "summarised_omop_table", -# "additional_name" = "overall", -# "additional_level" = "overlal" -# ) |> -# dplyr::rename("domain" = "domain_id") |> -# visOmopResults::uniteStrata(cols = c("year", "domain")) |> -# omopgenerics::newSummarisedResult() -# } diff --git a/tests/testthat/test-summariseOmopTable.R b/tests/testthat/test-summariseOmopTable.R new file mode 100644 index 0000000..b319d09 --- /dev/null +++ b/tests/testthat/test-summariseOmopTable.R @@ -0,0 +1,31 @@ +test_that("summariseOmopTable() works", { + + # Load mock database ---- + dbName <- "GiBleed" + pathEunomia <- here::here("Eunomia") + if (!dir.exists(pathEunomia)) { + dir.create(pathEunomia) + } + CDMConnector::downloadEunomiaData(datasetName = dbName, pathToData = pathEunomia) + Sys.setenv("EUNOMIA_DATA_FOLDER" = pathEunomia) + + db <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) + + cdm <- CDMConnector::cdmFromCon( + con = db, + cdmSchema = "main", + writeSchema = "main", + cdmName = dbName + ) + +# Check all tables work + expect_no_error(summariseOmopTable(cdm$observation_period)) + expect_no_error(summariseOmopTable(cdm$visit_occurrence)) + expect_no_error(summariseOmopTable(cdm$condition_occurrence)) + expect_no_error(summariseOmopTable(cdm$drug_exposure)) + expect_no_error(summariseOmopTable(cdm$procedure_occurrence)) + expect_no_error(summariseOmopTable(cdm$device_exposure)) + expect_no_error(summariseOmopTable(cdm$measurement)) + expect_no_error(summariseOmopTable(cdm$observation)) + expect_error(summariseOmopTable(cdm$death)) +}) From ec18ed7f07dc08184b9da2d3cd167898996a8696 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Tue, 11 Jun 2024 13:53:01 +0100 Subject: [PATCH 2/8] tableOmopTable() description update --- R/summariseTableCounts.R | 0 R/tableOmopTable.R | 5 +---- 2 files changed, 1 insertion(+), 4 deletions(-) create mode 100644 R/summariseTableCounts.R diff --git a/R/summariseTableCounts.R b/R/summariseTableCounts.R new file mode 100644 index 0000000..e69de29 diff --git a/R/tableOmopTable.R b/R/tableOmopTable.R index 55517c8..7e3b408 100644 --- a/R/tableOmopTable.R +++ b/R/tableOmopTable.R @@ -1,7 +1,4 @@ -#' Summarise an omop_table from a cdm_reference object. You will obtain -#' information related to the number of records, number of subjects, whether the -#' records are in observation, number of present domains and number of present -#' concepts. +#' Create a gt table from a summarised omop_table. #' #' @param summarisedOmopTable A summarised_result object with the output from summariseOmopTable(). #' From c645511435130c75662f0d4117761e370f3e1f4e Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Wed, 12 Jun 2024 12:49:23 +0100 Subject: [PATCH 3/8] summariseTableCounts() finished --- DESCRIPTION | 1 + NAMESPACE | 2 + R/summariseTableCounts.R | 181 +++++++++++++++++++++ man/summariseTableCounts.Rd | 21 +++ man/tableOmopTable.Rd | 10 +- tests/testthat/test-summariseOmopTable.R | 2 + tests/testthat/test-summariseTableCounts.R | 3 + 7 files changed, 212 insertions(+), 8 deletions(-) create mode 100644 man/summariseTableCounts.Rd create mode 100644 tests/testthat/test-summariseTableCounts.R diff --git a/DESCRIPTION b/DESCRIPTION index 291bb6a..0902487 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,6 +29,7 @@ Imports: omopgenerics (>= 0.0.3), PatientProfiles, rlang, + tibble, tidyr, visOmopResults Depends: diff --git a/NAMESPACE b/NAMESPACE index fc2f79d..6d589aa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,9 +3,11 @@ export(summariseEntryCharacteristics) export(summariseOmopTable) export(summarisePersonDays) +export(summariseTableCounts) export(suppress) export(tableOmopTable) importFrom(magrittr,"%>%") importFrom(omopgenerics,suppress) +importFrom(rlang,":=") importFrom(rlang,.data) importFrom(rlang,.env) diff --git a/R/summariseTableCounts.R b/R/summariseTableCounts.R index e69de29..6d5f3ee 100644 --- a/R/summariseTableCounts.R +++ b/R/summariseTableCounts.R @@ -0,0 +1,181 @@ +#' Create a gt table from a summarised omop_table. +#' +#' @param omopTable A summarised_result object with the output from summariseOmopTable(). +#' @param unit Whether to stratify by "year" or by "month" +#' @param unitInterval Number of years or months to be used +#' +#' @return A gt object with the summarised data. +#' +#' @importFrom rlang := +#' @export +#' +summariseTableCounts<- function(omopTable, unit = "year", unitInterval = 1) { + + # Initial checks ---- + omopTableChecks(omopTable) + unitChecks(unit) + unitIntervalChecks(unitInterval) + + cdm <- omopgenerics::cdmReference(omopTable) + omopTable <- omopTable |> dplyr::ungroup() + + name <- omopgenerics::tableName(omopTable) + people <- getNumberPeopleInCdm(cdm) + result <- omopgenerics::emptySummarisedResult() + date <- startDate(name) + + if(omopTable |> dplyr::tally() |> dplyr::pull("n") == 0){ + cli::cli_warn(paste0(omopgenerics::tableName(omopTable), " omop table is empty. Returning an empty summarised omop table.")) + return(result) + } + + # Incidence counts ---- + omopTable <- omopTable |> + dplyr::select(dplyr::all_of(date), "person_id") + + if (name != "observation_period") { + omopTable <- omopTable |> + filterInObservation(indexDate = date) + } + + startDate <- getOmopTableStartDate(omopTable, date) + endDate <- getOmopTableEndDate(omopTable, date) + + # insert table and then left join + interval <- getIntervalTibble(omopTable, date, unit) + + # Insert interval table to the cdm ---- + cdm <- cdm |> + omopgenerics::insertTable(name = "interval", table = interval) + + # Create summarised result + result <- omopTable |> + dplyr::rename("incidence_date" = dplyr::all_of(date)) %>% + dplyr::mutate("group" = !!CDMConnector::datepart("incidence_date", "year")) %>% + dplyr::mutate("month" = !!CDMConnector::datepart("incidence_date", "month")) |> + dplyr::mutate("group" = as.Date(paste0(.data$group,"-",.data$month,"-01"))) |> + dplyr::left_join( + cdm$interval, by = c("group") + ) |> + dplyr::select(-c("month","interval")) |> + dplyr::group_by(.data$incidence_group) |> + dplyr::summarise("estimate_value" = dplyr::n(), .groups = "drop") |> + dplyr::collect() |> + dplyr::mutate( + "estimate_value" = as.character(.data$estimate_value), + "variable_name" = "incidence_records" + ) |> + visOmopResults::uniteStrata(cols = "incidence_group") |> + dplyr::mutate( + "result_id" = as.integer(1), + "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(omopTable)), + "group_name" = "omop_table", + "group_level" = name, + "variable_level" = NA_character_, + "estimate_name" = "count", + "estimate_type" = "integer", + "additional_name" = "overall", + "additional_level" = "overall" + ) |> + omopgenerics::newSummarisedResult() + + return(result) +} + +omopTableChecks <- function(omopTable){ + assertClass(omopTable, "omop_table") + omopTable |> + omopgenerics::tableName() |> + assertChoice(choices = tables$table_name) +} + +unitChecks <- function(units){ + inherits(.env$unit, "character") + assertLength(.env$unit, 1) + if(!.env$unit %in% c("year","month")){ + cli::cli_abort("units value is not valid. Valid options are year or month.") + } +} + +unitIntervalChecks <- function(unitInterval){ + inherits(unitInterval, c("numeric", "integer")) + assertLength(unitInterval, 1) + if(unitInterval < 1){ + cli::cli_abort("unitInterval input has to be equal or greater than 1.") + } + if(!(unitInterval%%1 == 0)){ + cli::cli_abort("unitInterval has to be an integer.") + } +} + +filterInObservation <- function(x, indexDate) { + cdm <- omopgenerics::cdmReference(x) + id <- c("person_id", "subject_id") + id <- id[id %in% colnames(x)] + + x |> + dplyr::inner_join( + cdm$observation_period |> + dplyr::select( + !!id := "person_id", + "start" = "observation_period_start_date", + "end" = "observation_period_end_date" + ), + by = id + ) |> + dplyr::filter( + .data[[indexDate]] >= .data$start & .data[[indexDate]] <= .data$end + ) +} + +getOmopTableStartDate <- function(omopTable, date){ + omopTable |> + dplyr::summarise("startDate" = min(.data[[date]], na.rm = TRUE)) |> + dplyr::collect() |> + dplyr::mutate("startDate" = as.Date(paste0(lubridate::year(startDate),"-01-01"))) |> + dplyr::pull("startDate") +} + +getOmopTableEndDate <- function(omopTable, date){ + omopTable |> + dplyr::summarise("endDate" = max(.data[[date]], na.rm = TRUE)) |> + dplyr::collect() |> + dplyr::mutate("endDate" = as.Date(paste0(lubridate::year(endDate),"-12-31"))) |> + dplyr::pull("endDate") +} + +getIntervalTibble <- function(omopTable, date, unit){ + if(unit == "year"){ + interval <- tibble::tibble( + "group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$unit) + ) + }else if(unit == "month"){ + interval <- tibble::tibble( + "group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$month) + ) + } + + interval <- interval |> + dplyr::rowwise() |> + dplyr::mutate("interval" = max(which( + .data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$unit)) + ), + na.rm = TRUE)) |> + dplyr::ungroup() |> + dplyr::group_by(.data$interval) |> + dplyr::mutate( + "incidence_group" = paste0(min(.data$group)," to ",max(.data$group)) + ) |> + dplyr::ungroup() |> + dplyr::mutate("unit" = .env$unit) |> + dplyr::mutate("incidence_group" = dplyr::if_else( + .data$unit == "year", + gsub("-01","",as.character(.data$incidence_group)), + gsub("-01$","",gsub("-01 "," ",as.character(.data$incidence_group)))) + ) |> + dplyr::mutate("group" = dplyr::if_else( + .data$unit == "year", + gsub("-01","",as.character(.data$group)), + as.character(.data$group) + )) +} diff --git a/man/summariseTableCounts.Rd b/man/summariseTableCounts.Rd new file mode 100644 index 0000000..f581578 --- /dev/null +++ b/man/summariseTableCounts.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summariseTableCounts.R +\name{summariseTableCounts} +\alias{summariseTableCounts} +\title{Create a gt table from a summarised omop_table.} +\usage{ +summariseTableCounts(omopTable, unit = "year", unitInterval = 1) +} +\arguments{ +\item{omopTable}{A summarised_result object with the output from summariseOmopTable().} + +\item{unit}{Whether to stratify by "year" or by "month"} + +\item{unitInterval}{Number of years or months to be used} +} +\value{ +A gt object with the summarised data. +} +\description{ +Create a gt table from a summarised omop_table. +} diff --git a/man/tableOmopTable.Rd b/man/tableOmopTable.Rd index 3256670..5d71084 100644 --- a/man/tableOmopTable.Rd +++ b/man/tableOmopTable.Rd @@ -2,10 +2,7 @@ % Please edit documentation in R/tableOmopTable.R \name{tableOmopTable} \alias{tableOmopTable} -\title{Summarise an omop_table from a cdm_reference object. You will obtain -information related to the number of records, number of subjects, whether the -records are in observation, number of present domains and number of present -concepts.} +\title{Create a gt table from a summarised omop_table.} \usage{ tableOmopTable(summarisedOmopTable) } @@ -16,8 +13,5 @@ tableOmopTable(summarisedOmopTable) A gt object with the summarised data. } \description{ -Summarise an omop_table from a cdm_reference object. You will obtain -information related to the number of records, number of subjects, whether the -records are in observation, number of present domains and number of present -concepts. +Create a gt table from a summarised omop_table. } diff --git a/tests/testthat/test-summariseOmopTable.R b/tests/testthat/test-summariseOmopTable.R index 14e034d..0877909 100644 --- a/tests/testthat/test-summariseOmopTable.R +++ b/tests/testthat/test-summariseOmopTable.R @@ -100,5 +100,7 @@ test_that("tableOmopTable() works", { expect_true(inherits(x,"gt_tbl")) expect_warning(tableOmopTable(summariseOmopTable(cdm$death))) expect_true(inherits(tableOmopTable(summariseOmopTable(cdm$death)),"gt_tbl")) + + DBI::dbDisconnect(db) }) diff --git a/tests/testthat/test-summariseTableCounts.R b/tests/testthat/test-summariseTableCounts.R new file mode 100644 index 0000000..2856413 --- /dev/null +++ b/tests/testthat/test-summariseTableCounts.R @@ -0,0 +1,3 @@ +test_that("summariseTableCounts() works", { + +}) From ff3c94a107b1c2f911f4c317c98bd9fe674f8d00 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Wed, 12 Jun 2024 21:42:24 +0100 Subject: [PATCH 4/8] summariseTableCounts() tests --- R/summariseTableCounts.R | 67 ++++++++++++++-------- tests/testthat/test-summariseOmopTable.R | 2 +- tests/testthat/test-summariseTableCounts.R | 65 +++++++++++++++++++++ 3 files changed, 110 insertions(+), 24 deletions(-) diff --git a/R/summariseTableCounts.R b/R/summariseTableCounts.R index 6d5f3ee..a375a50 100644 --- a/R/summariseTableCounts.R +++ b/R/summariseTableCounts.R @@ -25,7 +25,7 @@ summariseTableCounts<- function(omopTable, unit = "year", unitInterval = 1) { date <- startDate(name) if(omopTable |> dplyr::tally() |> dplyr::pull("n") == 0){ - cli::cli_warn(paste0(omopgenerics::tableName(omopTable), " omop table is empty. Returning an empty summarised omop table.")) + cli::cli_warn(paste0(omopgenerics::tableName(omopTable), " omop table is empty. Returning an empty summarised result.")) return(result) } @@ -38,29 +38,47 @@ summariseTableCounts<- function(omopTable, unit = "year", unitInterval = 1) { filterInObservation(indexDate = date) } - startDate <- getOmopTableStartDate(omopTable, date) - endDate <- getOmopTableEndDate(omopTable, date) - # insert table and then left join - interval <- getIntervalTibble(omopTable, date, unit) + interval <- getIntervalTibble(omopTable, date, unit, unitInterval) # Insert interval table to the cdm ---- cdm <- cdm |> omopgenerics::insertTable(name = "interval", table = interval) # Create summarised result - result <- omopTable |> - dplyr::rename("incidence_date" = dplyr::all_of(date)) %>% - dplyr::mutate("group" = !!CDMConnector::datepart("incidence_date", "year")) %>% - dplyr::mutate("month" = !!CDMConnector::datepart("incidence_date", "month")) |> - dplyr::mutate("group" = as.Date(paste0(.data$group,"-",.data$month,"-01"))) |> - dplyr::left_join( - cdm$interval, by = c("group") - ) |> - dplyr::select(-c("month","interval")) |> - dplyr::group_by(.data$incidence_group) |> - dplyr::summarise("estimate_value" = dplyr::n(), .groups = "drop") |> - dplyr::collect() |> + if(unit == "year"){ + result <- omopTable |> + dplyr::rename("incidence_date" = dplyr::all_of(date)) %>% + dplyr::mutate("group" = !!CDMConnector::datepart("incidence_date", "year")) |> + dplyr::left_join( + cdm$interval, by = c("group") + ) |> + dplyr::select(-c("interval")) |> + dplyr::group_by(.data$incidence_group) |> + dplyr::summarise("estimate_value" = dplyr::n(), .groups = "drop") |> + dplyr::collect() |> + dplyr::ungroup() + + }else if(unit == "month"){ + result <- omopTable |> + dplyr::rename("incidence_date" = dplyr::all_of(date)) %>% + dplyr::mutate("group" = !!CDMConnector::datepart("incidence_date", "year")) %>% + dplyr::mutate("month" = !!CDMConnector::datepart("incidence_date", "month")) |> + dplyr::mutate("group" = as.Date(paste0(.data$group,"-",.data$month,"-01"))) |> + dplyr::left_join( + cdm$interval, by = c("group") + ) |> + dplyr::select(-c("month","interval")) |> + dplyr::group_by(.data$incidence_group) |> + dplyr::summarise("estimate_value" = dplyr::n(), .groups = "drop") |> + dplyr::collect() |> + dplyr::ungroup() + } + + result <- result |> + dplyr::mutate(incidence_group = dplyr::if_else(rep(unitInterval, nrow(result)) == 1, + gsub(" to.*", "", .data$incidence_group), + .data$incidence_group)) |> dplyr::mutate( "estimate_value" = as.character(.data$estimate_value), "variable_name" = "incidence_records" @@ -89,10 +107,10 @@ omopTableChecks <- function(omopTable){ assertChoice(choices = tables$table_name) } -unitChecks <- function(units){ - inherits(.env$unit, "character") - assertLength(.env$unit, 1) - if(!.env$unit %in% c("year","month")){ +unitChecks <- function(unit){ + inherits(unit, "character") + assertLength(unit, 1) + if(!unit %in% c("year","month")){ cli::cli_abort("units value is not valid. Valid options are year or month.") } } @@ -144,14 +162,17 @@ getOmopTableEndDate <- function(omopTable, date){ dplyr::pull("endDate") } -getIntervalTibble <- function(omopTable, date, unit){ +getIntervalTibble <- function(omopTable, date, unit, unitInterval){ + startDate <- getOmopTableStartDate(omopTable, date) + endDate <- getOmopTableEndDate(omopTable, date) + if(unit == "year"){ interval <- tibble::tibble( "group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$unit) ) }else if(unit == "month"){ interval <- tibble::tibble( - "group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$month) + "group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$unit) ) } diff --git a/tests/testthat/test-summariseOmopTable.R b/tests/testthat/test-summariseOmopTable.R index 0877909..60d8f70 100644 --- a/tests/testthat/test-summariseOmopTable.R +++ b/tests/testthat/test-summariseOmopTable.R @@ -101,6 +101,6 @@ test_that("tableOmopTable() works", { expect_warning(tableOmopTable(summariseOmopTable(cdm$death))) expect_true(inherits(tableOmopTable(summariseOmopTable(cdm$death)),"gt_tbl")) - DBI::dbDisconnect(db) + # DBI::dbDisconnect(db) }) diff --git a/tests/testthat/test-summariseTableCounts.R b/tests/testthat/test-summariseTableCounts.R index 2856413..225b213 100644 --- a/tests/testthat/test-summariseTableCounts.R +++ b/tests/testthat/test-summariseTableCounts.R @@ -1,3 +1,68 @@ test_that("summariseTableCounts() works", { + # Load mock database ---- + dbName <- "GiBleed" + pathEunomia <- here::here("Eunomia") + if (!dir.exists(pathEunomia)) { + dir.create(pathEunomia) + CDMConnector::downloadEunomiaData(datasetName = dbName, pathToData = pathEunomia) + Sys.setenv("EUNOMIA_DATA_FOLDER" = pathEunomia) + } + + db <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) + + cdm <- CDMConnector::cdmFromCon( + con = db, + cdmSchema = "main", + writeSchema = "main", + cdmName = dbName + ) + + # Check inputs ---- + summariseTableCounts(omopTable = cdm$observation_period, + unit = "month", + unitInterval = 1) + expect_true(inherits(summariseTableCounts(omopTable = cdm$observation_period, unit = "month"),"summarised_result")) + expect_true(inherits(summariseTableCounts(omopTable = cdm$observation_period, unitInterval = 5),"summarised_result")) + + expect_no_error(summariseTableCounts(cdm$observation_period)) + expect_no_error(summariseTableCounts(cdm$visit_occurrence)) + expect_no_error(summariseTableCounts(cdm$condition_occurrence)) + expect_no_error(summariseTableCounts(cdm$drug_exposure)) + expect_no_error(summariseTableCounts(cdm$procedure_occurrence)) + expect_warning(summariseTableCounts(cdm$device_exposure)) + expect_no_error(summariseTableCounts(cdm$measurement)) + expect_no_error(summariseTableCounts(cdm$observation)) + expect_warning(summariseTableCounts(cdm$death)) + + # Check inputs ---- + + expect_true( + (summariseTableCounts(cdm$observation_period) |> + dplyr::filter(strata_level == 1963) |> + dplyr::pull("estimate_value") |> + as.numeric()) == + (cdm$observation_period |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(observation_period_start_date)) |> + dplyr::filter(year == 1963) |> + dplyr::tally() |> + dplyr::pull("n")) + ) + + expect_true( + summariseTableCounts(cdm$condition_occurrence, unit = "month") |> + dplyr::filter(strata_level == "1961-02") |> + dplyr::pull("estimate_value") |> + as.numeric() == + (cdm$condition_occurrence |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(condition_start_date)) |> + dplyr::mutate(month = lubridate::month(condition_start_date)) |> + dplyr::filter(year == 1961, month == 2) |> + dplyr::tally() |> + dplyr::pull("n")) + ) + + DBI::dbDisconnect(db) }) From 1c8dda079bf536411a34236c52286a944105516c Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Wed, 12 Jun 2024 21:47:59 +0100 Subject: [PATCH 5/8] Update tests files --- tests/testthat/test-summariseOmopTable.R | 3 ++- tests/testthat/test-summariseTableCounts.R | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-summariseOmopTable.R b/tests/testthat/test-summariseOmopTable.R index 60d8f70..6faa5db 100644 --- a/tests/testthat/test-summariseOmopTable.R +++ b/tests/testthat/test-summariseOmopTable.R @@ -101,6 +101,7 @@ test_that("tableOmopTable() works", { expect_warning(tableOmopTable(summariseOmopTable(cdm$death))) expect_true(inherits(tableOmopTable(summariseOmopTable(cdm$death)),"gt_tbl")) - # DBI::dbDisconnect(db) + DBI::dbDisconnect(db) + unlink(here::here("Eunomia"), recursive = TRUE) }) diff --git a/tests/testthat/test-summariseTableCounts.R b/tests/testthat/test-summariseTableCounts.R index 225b213..e34b1e8 100644 --- a/tests/testthat/test-summariseTableCounts.R +++ b/tests/testthat/test-summariseTableCounts.R @@ -5,9 +5,9 @@ test_that("summariseTableCounts() works", { pathEunomia <- here::here("Eunomia") if (!dir.exists(pathEunomia)) { dir.create(pathEunomia) - CDMConnector::downloadEunomiaData(datasetName = dbName, pathToData = pathEunomia) - Sys.setenv("EUNOMIA_DATA_FOLDER" = pathEunomia) } + CDMConnector::downloadEunomiaData(datasetName = dbName, pathToData = pathEunomia) + Sys.setenv("EUNOMIA_DATA_FOLDER" = pathEunomia) db <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) @@ -65,4 +65,6 @@ test_that("summariseTableCounts() works", { ) DBI::dbDisconnect(db) + + unlink(here::here("Eunomia"), recursive = TRUE) }) From 8a57a800f3ae758f08adc87e0d3af318edd3c4f3 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Wed, 12 Jun 2024 22:22:07 +0100 Subject: [PATCH 6/8] create plotTableCounts() --- DESCRIPTION | 2 ++ NAMESPACE | 1 + R/plotTableCounts.R | 24 ++++++++++++++++ R/summariseTableCounts.R | 5 +++- man/plotTableCounts.Rd | 21 ++++++++++++++ tests/testthat/test-summariseTableCounts.R | 32 ++++++++++++++++++---- 6 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 R/plotTableCounts.R create mode 100644 man/plotTableCounts.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 0902487..db271cb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,6 +22,8 @@ Imports: cli, CohortCharacteristics, dplyr, + ggplot2, + glue, gt, IncidencePrevalence (>= 0.7.0), lubridate, diff --git a/NAMESPACE b/NAMESPACE index 6d589aa..b76242b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(plotTableCounts) export(summariseEntryCharacteristics) export(summariseOmopTable) export(summarisePersonDays) diff --git a/R/plotTableCounts.R b/R/plotTableCounts.R new file mode 100644 index 0000000..63a10b7 --- /dev/null +++ b/R/plotTableCounts.R @@ -0,0 +1,24 @@ +#' Create a gt table from a summarised omop_table. +#' +#' @param omopTable A summarised_result object with the output from summariseOmopTable(). +#' @param unit Whether to stratify by "year" or by "month" +#' @param unitInterval Number of years or months to be used +#' +#' @return A gt object with the summarised data. +#' +#' @importFrom rlang := +#' @export +#' +plotTableCounts <- function(summarisedTableCounts) { + # Initial checks ---- + assertClass(summarisedTableCounts, "summarised_result") + + if(summarisedTableCounts |> dplyr::tally() |> dplyr::pull("n") == 0){ + cli::cli_warn("summarisedOmopTable is empty.") + return( + summarisedTableCounts |> + ggplot2::ggplot() + ) + } + +} diff --git a/R/summariseTableCounts.R b/R/summariseTableCounts.R index a375a50..5290eef 100644 --- a/R/summariseTableCounts.R +++ b/R/summariseTableCounts.R @@ -75,7 +75,7 @@ summariseTableCounts<- function(omopTable, unit = "year", unitInterval = 1) { dplyr::ungroup() } - result <- result |> + result <- result |> dplyr::mutate(incidence_group = dplyr::if_else(rep(unitInterval, nrow(result)) == 1, gsub(" to.*", "", .data$incidence_group), .data$incidence_group)) |> @@ -84,6 +84,9 @@ summariseTableCounts<- function(omopTable, unit = "year", unitInterval = 1) { "variable_name" = "incidence_records" ) |> visOmopResults::uniteStrata(cols = "incidence_group") |> + dplyr::mutate("strata_name" = dplyr::if_else(.data$strata_name == "incidence_group", + glue::glue("{unitInterval}_{unit}{if (unitInterval > 1) 's' else ''}"), + .data$strata_name)) |> dplyr::mutate( "result_id" = as.integer(1), "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(omopTable)), diff --git a/man/plotTableCounts.Rd b/man/plotTableCounts.Rd new file mode 100644 index 0000000..fbfefa2 --- /dev/null +++ b/man/plotTableCounts.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotTableCounts.R +\name{plotTableCounts} +\alias{plotTableCounts} +\title{Create a gt table from a summarised omop_table.} +\usage{ +plotTableCounts(summarisedTableCounts) +} +\arguments{ +\item{omopTable}{A summarised_result object with the output from summariseOmopTable().} + +\item{unit}{Whether to stratify by "year" or by "month"} + +\item{unitInterval}{Number of years or months to be used} +} +\value{ +A gt object with the summarised data. +} +\description{ +Create a gt table from a summarised omop_table. +} diff --git a/tests/testthat/test-summariseTableCounts.R b/tests/testthat/test-summariseTableCounts.R index e34b1e8..66f467f 100644 --- a/tests/testthat/test-summariseTableCounts.R +++ b/tests/testthat/test-summariseTableCounts.R @@ -19,9 +19,6 @@ test_that("summariseTableCounts() works", { ) # Check inputs ---- - summariseTableCounts(omopTable = cdm$observation_period, - unit = "month", - unitInterval = 1) expect_true(inherits(summariseTableCounts(omopTable = cdm$observation_period, unit = "month"),"summarised_result")) expect_true(inherits(summariseTableCounts(omopTable = cdm$observation_period, unitInterval = 5),"summarised_result")) @@ -36,7 +33,6 @@ test_that("summariseTableCounts() works", { expect_warning(summariseTableCounts(cdm$death)) # Check inputs ---- - expect_true( (summariseTableCounts(cdm$observation_period) |> dplyr::filter(strata_level == 1963) |> @@ -64,7 +60,33 @@ test_that("summariseTableCounts() works", { dplyr::pull("n")) ) - DBI::dbDisconnect(db) + expect_true( + (summariseTableCounts(cdm$condition_occurrence, unit = "month", unitInterval = 3) |> + dplyr::filter(strata_level %in% c("1984-01 to 1984-03")) |> + dplyr::pull("estimate_value") |> + as.numeric()) == + (cdm$condition_occurrence |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(condition_start_date)) |> + dplyr::mutate(month = lubridate::month(condition_start_date)) |> + dplyr::filter(year == 1984, month %in% c(1:3)) |> + dplyr::tally() |> + dplyr::pull("n")) + ) + + expect_true( + (summariseTableCounts(cdm$drug_exposure, unitInterval = 8) |> + dplyr::filter(strata_level == "1981 to 1988") |> + dplyr::pull("estimate_value") |> + as.numeric()) == + (cdm$drug_exposure |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(drug_exposure_start_date)) |> + dplyr::filter(year %in% c(1981:1988)) |> + dplyr::tally() |> + dplyr::pull("n")) + ) + DBI::dbDisconnect(db) unlink(here::here("Eunomia"), recursive = TRUE) }) From 8b0fef8e08bb6304a5120436c56147760809ee3a Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Wed, 12 Jun 2024 22:37:20 +0100 Subject: [PATCH 7/8] Update cdm connection in tests --- tests/testthat/test-summariseOmopTable.R | 37 +++------------------- tests/testthat/test-summariseTableCounts.R | 18 ++--------- 2 files changed, 6 insertions(+), 49 deletions(-) diff --git a/tests/testthat/test-summariseOmopTable.R b/tests/testthat/test-summariseOmopTable.R index 5350cda..3e7ce30 100644 --- a/tests/testthat/test-summariseOmopTable.R +++ b/tests/testthat/test-summariseOmopTable.R @@ -1,21 +1,9 @@ test_that("summariseOmopTable() works", { # Load mock database ---- - dbName <- "GiBleed" - pathEunomia <- here::here("Eunomia") - if (!dir.exists(pathEunomia)) { - dir.create(pathEunomia) - } - CDMConnector::downloadEunomiaData(datasetName = dbName, pathToData = pathEunomia) - Sys.setenv("EUNOMIA_DATA_FOLDER" = pathEunomia) - - db <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) - + con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) cdm <- CDMConnector::cdmFromCon( - con = db, - cdmSchema = "main", - writeSchema = "main", - cdmName = dbName + con = con, cdmSchema = "main", writeSchema = "main" ) # Check all tables work ---- @@ -70,29 +58,14 @@ test_that("summariseOmopTable() works", { domainId = FALSE, typeConcept = FALSE) |> dplyr::tally() |> dplyr::pull() == 3) - - - DBI::dbDisconnect(db) }) test_that("tableOmopTable() works", { # Load mock database ---- - dbName <- "GiBleed" - pathEunomia <- here::here("Eunomia") - if (!dir.exists(pathEunomia)) { - dir.create(pathEunomia) - } - CDMConnector::downloadEunomiaData(datasetName = dbName, pathToData = pathEunomia) - Sys.setenv("EUNOMIA_DATA_FOLDER" = pathEunomia) - - db <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) - + con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) cdm <- CDMConnector::cdmFromCon( - con = db, - cdmSchema = "main", - writeSchema = "main", - cdmName = dbName + con = con, cdmSchema = "main", writeSchema = "main" ) # Check that works ---- @@ -101,8 +74,6 @@ test_that("tableOmopTable() works", { expect_warning(tableOmopTable(summariseOmopTable(cdm$death))) expect_true(inherits(tableOmopTable(summariseOmopTable(cdm$death)),"gt_tbl")) - DBI::dbDisconnect(db) - unlink(here::here("Eunomia"), recursive = TRUE) }) diff --git a/tests/testthat/test-summariseTableCounts.R b/tests/testthat/test-summariseTableCounts.R index 66f467f..5e58039 100644 --- a/tests/testthat/test-summariseTableCounts.R +++ b/tests/testthat/test-summariseTableCounts.R @@ -1,21 +1,9 @@ test_that("summariseTableCounts() works", { # Load mock database ---- - dbName <- "GiBleed" - pathEunomia <- here::here("Eunomia") - if (!dir.exists(pathEunomia)) { - dir.create(pathEunomia) - } - CDMConnector::downloadEunomiaData(datasetName = dbName, pathToData = pathEunomia) - Sys.setenv("EUNOMIA_DATA_FOLDER" = pathEunomia) - - db <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) - + con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) cdm <- CDMConnector::cdmFromCon( - con = db, - cdmSchema = "main", - writeSchema = "main", - cdmName = dbName + con = con, cdmSchema = "main", writeSchema = "main" ) # Check inputs ---- @@ -87,6 +75,4 @@ test_that("summariseTableCounts() works", { dplyr::pull("n")) ) - DBI::dbDisconnect(db) - unlink(here::here("Eunomia"), recursive = TRUE) }) From c02b70067be84839696acca760337df279e82558 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Wed, 12 Jun 2024 22:56:19 +0100 Subject: [PATCH 8/8] plotTableCounts() release --- R/plotTableCounts.R | 20 +++++++++++++++----- R/summariseTableCounts.R | 8 ++++++++ man/plotTableCounts.Rd | 8 ++------ tests/testthat/test-summariseTableCounts.R | 5 +++++ 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/R/plotTableCounts.R b/R/plotTableCounts.R index 63a10b7..837f834 100644 --- a/R/plotTableCounts.R +++ b/R/plotTableCounts.R @@ -1,12 +1,9 @@ #' Create a gt table from a summarised omop_table. #' -#' @param omopTable A summarised_result object with the output from summariseOmopTable(). -#' @param unit Whether to stratify by "year" or by "month" -#' @param unitInterval Number of years or months to be used +#' @param summarisedTableCounts A summarised_result object with the output from summariseTableCounts(). #' -#' @return A gt object with the summarised data. +#' @return A ggplot showing the table counts #' -#' @importFrom rlang := #' @export #' plotTableCounts <- function(summarisedTableCounts) { @@ -21,4 +18,17 @@ plotTableCounts <- function(summarisedTableCounts) { ) } + # Plot ---- + summarisedTableCounts |> + dplyr::mutate(count = as.numeric(.data$estimate_value), + time = .data$strata_level) |> + visOmopResults::splitGroup() |> + ggplot2::ggplot(ggplot2::aes(x = .data$time, y = .data$count, group = .data$omop_table, color = .data$omop_table)) + + ggplot2::geom_point() + + ggplot2::geom_line() + + ggplot2::facet_wrap(facets = "cdm_name") + + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) + + ggplot2::xlab("Time") + + ggplot2::ylab("Counts") + + ggplot2::labs(color = "Omop table") } diff --git a/R/summariseTableCounts.R b/R/summariseTableCounts.R index 5290eef..52e2482 100644 --- a/R/summariseTableCounts.R +++ b/R/summariseTableCounts.R @@ -16,6 +16,14 @@ summariseTableCounts<- function(omopTable, unit = "year", unitInterval = 1) { unitChecks(unit) unitIntervalChecks(unitInterval) + if(missing(unit)){ + unit <- "year" + } + + if(missing(unitInterval)){ + unitInterval <- 1 + } + cdm <- omopgenerics::cdmReference(omopTable) omopTable <- omopTable |> dplyr::ungroup() diff --git a/man/plotTableCounts.Rd b/man/plotTableCounts.Rd index fbfefa2..ed00784 100644 --- a/man/plotTableCounts.Rd +++ b/man/plotTableCounts.Rd @@ -7,14 +7,10 @@ plotTableCounts(summarisedTableCounts) } \arguments{ -\item{omopTable}{A summarised_result object with the output from summariseOmopTable().} - -\item{unit}{Whether to stratify by "year" or by "month"} - -\item{unitInterval}{Number of years or months to be used} +\item{summarisedTableCounts}{A summarised_result object with the output from summariseTableCounts().} } \value{ -A gt object with the summarised data. +A ggplot showing the table counts } \description{ Create a gt table from a summarised omop_table. diff --git a/tests/testthat/test-summariseTableCounts.R b/tests/testthat/test-summariseTableCounts.R index 5e58039..6ad77c4 100644 --- a/tests/testthat/test-summariseTableCounts.R +++ b/tests/testthat/test-summariseTableCounts.R @@ -75,4 +75,9 @@ test_that("summariseTableCounts() works", { dplyr::pull("n")) ) + # summariseTableCounts plot + expect_true(inherits(plotTableCounts(summariseTableCounts(cdm$drug_exposure, unitInterval = 8)),"ggplot")) + expect_warning(inherits(plotTableCounts(summariseTableCounts(cdm$death, unitInterval = 8)),"ggplot")) + expect_true(inherits(plotTableCounts(summariseTableCounts(cdm$death, unitInterval = 8)),"ggplot")) + })