diff --git a/DESCRIPTION b/DESCRIPTION index 51a46ab..eb0b8b8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,6 +36,7 @@ Imports: omopgenerics (>= 0.0.3), PatientProfiles, rlang, + stringr, tibble, tidyr, visOmopResults diff --git a/R/checks.R b/R/checks.R index 3aa49e4..82aa0ad 100644 --- a/R/checks.R +++ b/R/checks.R @@ -51,7 +51,6 @@ checkUnitInterval <- function(unitInterval){ } } - #' @noRd checkCategory <- function(category, overlap = FALSE, type = "numeric") { checkmate::assertList( @@ -119,3 +118,10 @@ checkCategory <- function(category, overlap = FALSE, type = "numeric") { invisible(result) } + + +checkFacetBy <- function(summarisedRecordCount, facet_by){ + if(!facet_by %in% colnames(summarisedRecordCount) & !is.null(facet_by)){ + cli::cli_abort("facet_by argument has to be one of the columns from the summarisedRecordCount object.") + } +} diff --git a/R/plotRecordCount.R b/R/plotRecordCount.R new file mode 100644 index 0000000..7164905 --- /dev/null +++ b/R/plotRecordCount.R @@ -0,0 +1,159 @@ +#' Create a gt table from a summarised omop_table. +#' +#' @param summarisedRecordCount A summarised_result object with the output from summariseTableCounts(). +#' @param facet columns in data to facet. If the facet position wants to be specified, use the formula class for the input +#' (e.g., strata ~ group_level + cdm_name). Variables before "~" will be facet by on horizontal axis, whereas those after "~" on vertical axis. +#' Only the following columns are allowed to be facet by: "cdm_name", "group_level", "strata_level". +#' +#' @return A ggplot showing the table counts +#' +#' @export +#' +plotRecordCount <- function(summarisedRecordCount, facet = NULL){ + + # Initial checks ---- + assertClass(summarisedRecordCount, "summarised_result") + + if(summarisedRecordCount |> dplyr::tally() |> dplyr::pull("n") == 0){ + cli::cli_warn("summarisedOmopTable is empty.") + return( + summarisedRecordCount |> + ggplot2::ggplot() + ) + } + + # Determine color variables ---- + Strata <- c("cdm_name", "group_level","strata_level") + + # If facet has variables, remove that ones from the strata variable + if(!is.null(facet)){ + x <- facetFunction(facet, summarisedRecordCount) + facetVarX <- x$facetVarX + facetVarY <- x$facetVarY + + if(!is.null(facetVarX)){Strata <- Strata[Strata != facetVarX]} + if(!is.null(facetVarY)){Strata <- Strata[Strata != facetVarY]} + } + + # If all the variables have been selected to facet by, do not use any strata + if(length(Strata) == 0){ + Strata <- "black" + }else{ + # Create strata variable with the remaining variables in strata + summarisedRecordCount <- summarisedRecordCount |> dplyr::mutate(strata_col = "") + for(i in 1:length(Strata)){ + summarisedRecordCount <- summarisedRecordCount |> + dplyr::mutate(strata_col = paste0(.data$strata_col,"; ",.data[[Strata[i]]])) + } + + summarisedRecordCount <- summarisedRecordCount |> + dplyr::mutate(strata_col = sub("; ","",.data$strata_col)) |> + dplyr::rename("Strata" = "strata_col") + } + + # Plot ---- + p1 <- summarisedRecordCount |> + dplyr::mutate(count = as.numeric(.data$estimate_value), + time = as.Date(.data$variable_level)) + + if(TRUE %in% c(Strata == "black")){ + p1 <- ggplot2::ggplot(p1, ggplot2::aes(x = .data$time, + y = .data$count)) + }else{ + p1 <- ggplot2::ggplot(p1, ggplot2::aes(x = .data$time, + y = .data$count, + group = .data$Strata, + color = .data$Strata)) + } + + p1 + + ggplot2::geom_point() + + ggplot2::geom_line(show.legend = dplyr::if_else(Strata == "black",FALSE, TRUE)) + + ggplot2::facet_grid(facets = facet) + + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) + + ggplot2::xlab("Time") + + ggplot2::ylab("Counts") + + ggplot2::theme() + + ggplot2::theme_bw() +} + + +facetFunction <- function(facet, summarisedRecordCount) { + if (!is.null(facet)) { + checkmate::assertTRUE(inherits(facet, c("formula", "character"))) + + if (inherits(facet, "formula")) { + facet <- Reduce(paste, deparse(facet)) + } + + # Extract facet names + x <- extractFacetVar(facet) + facetVarX <- x$facetVarX + facetVarY <- x$facetVarY + + # Check facet names validity + facetVarX <- checkFacetNames(facetVarX, summarisedRecordCount) + facetVarY <- checkFacetNames(facetVarY, summarisedRecordCount) + } else { + facetVarX <- NULL + facetVarY <- NULL + } + + # Add table_name column + return(list("facetVarX" = facetVarX, "facetVarY" = facetVarY)) +} + +checkFacetNames <- function(facetVar, summarisedRecordCount) { + if (!is.null(facetVar)) { + # Remove spaces at the beginning or at the end + facetVar <- gsub(" $", "", facetVar) + facetVar <- gsub("^ ", "", facetVar) + + # Replace empty spaces with "_" + facetVar <- gsub(" ", "_", facetVar) + + # Turn to lower case + facetVar <- tolower(facetVar) + + facetVar[facetVar == "cohort_name"] <- "group_level" + facetVar[facetVar == "window_name"] <- "variable_level" + facetVar[facetVar == "strata"] <- "strata_level" + + # Replace empty or "." facet by NULL + if (TRUE %in% (facetVar %in% c("", ".", as.character()))) { + facetVar <- NULL + } + + # Check correct column names + if(FALSE %in% c(facetVar %in% c("cdm_name", "group_level", "strata_level"))){ + cli::cli_abort("Only the following columns are allowed to be facet by: 'cdm_name', 'group_level', 'strata_level')") + } + } + return(facetVar) +} + +extractFacetVar <- function(facet) { + if (unique(stringr::str_detect(facet, "~"))) { + # Separate x and y from the formula + facetVarX <- gsub("~.*", "", facet) + facetVarY <- gsub(".*~", "", facet) + + # Remove + facetVarX <- stringr::str_split(facetVarX, pattern = "\\+")[[1]] + facetVarY <- stringr::str_split(facetVarY, pattern = "\\+")[[1]] + } else { + if (length(facet) == 1) { + facetVarX <- facet + facetVarY <- NULL + } else { + # Assign "randomly" the positions + horizontal <- 1:round(length(facet) / 2) + vertical <- (round(length(facet) / 2) + 1):length(facet) + + facetVarX <- facet[horizontal] + facetVarY <- facet[vertical] + } + } + + return(list("facetVarX" = facetVarX, "facetVarY" = facetVarY)) +} diff --git a/R/plotTableCounts.R b/R/plotTableCounts.R deleted file mode 100644 index d3c0bf2..0000000 --- a/R/plotTableCounts.R +++ /dev/null @@ -1,38 +0,0 @@ -#' Create a gt table from a summarised omop_table. -#' -#' @param summarisedRecordCount A summarised_result object with the output from summariseTableCounts(). -#' -#' @return A ggplot showing the table counts -#' -#' @export -#' -plotRecordCount <- function(summarisedRecordCount){ - # Initial checks ---- - assertClass(summarisedRecordCount, "summarised_result") - - if(summarisedRecordCount |> dplyr::tally() |> dplyr::pull("n") == 0){ - cli::cli_warn("summarisedOmopTable is empty.") - return( - summarisedRecordCount |> - ggplot2::ggplot() - ) - } - - # Plot ---- - summarisedRecordCount |> - dplyr::mutate(count = as.numeric(.data$estimate_value), - time = as.Date(.data$variable_level)) |> - dplyr::mutate(colour_by = paste0(.data$group_level,"; ",.data$strata_level)) |> - ggplot2::ggplot(ggplot2::aes(x = .data$time, - y = .data$count, - group = .data$colour_by, - color = .data$colour_by)) + - ggplot2::geom_point() + - ggplot2::geom_line() + - ggplot2::facet_wrap(facets = "cdm_name") + - ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) + - ggplot2::xlab("Time") + - ggplot2::ylab("Counts") + - ggplot2::labs(color = "Stratification") + - ggplot2::theme_bw() -} diff --git a/R/summariseObservationPeriod.R b/R/summariseObservationPeriod.R index 5f0d420..6463fa2 100644 --- a/R/summariseObservationPeriod.R +++ b/R/summariseObservationPeriod.R @@ -9,96 +9,96 @@ #' @export #' summariseObservationPeriod <- function(observationPeriod, unit = "year", unitInterval = 1){ - - # Check input ---- - assertClass(observationPeriod, "omop_table") - - x <- omopgenerics::tableName(observationPeriod) - if (x != "observation_period") { - cli::cli_abort( - "Table name ({x}) is not observation_period, please provide a valid - observation_period table" - ) - } - - if(observationPeriod |> dplyr::tally() |> dplyr::pull("n") == 0){ - cli::cli_warn("observation_period table is empty. Returning an empty summarised result.") - return(omopgenerics::emptySummarisedResult()) - } - - observationPeriod <- observationPeriod |> - dplyr::ungroup() - - if(missing(unit)){unit <- "year"} - if(missing(unitInterval)){unitInterval <- 1} - - checkUnit(unit) - checkUnitInterval(unitInterval) - - cdm <- omopgenerics::cdmReference(observationPeriod) - - # Observation period ---- - name <- "observation_period" - start_date_name <- startDate(name) - end_date_name <- endDate(name) - - interval <- getIntervalTibble(observationPeriod, start_date_name, end_date_name, unit, unitInterval) - - # Insert interval table to the cdm ---- - cdm <- cdm |> - omopgenerics::insertTable(name = "interval", table = interval) - - # Calculate denominator ---- - denominator <- cdm[["person"]] |> - dplyr::ungroup() |> - dplyr::select("person_id") |> - dplyr::summarise("n" = dplyr::n()) |> - dplyr::pull("n") - - # Create summarised result ---- - result <- observationPeriod |> - countRecords(cdm, start_date_name, end_date_name, unit) - - result <- result |> - dplyr::mutate( - "estimate_value" = as.character(.data$estimate_value), - "variable_name" = "overlap_records" - ) |> - visOmopResults::uniteStrata(cols = "time_interval") |> - dplyr::mutate( - "result_id" = as.integer(1), - "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(observationPeriod)), - "group_name" = "omop_table", - "group_level" = name, - "variable_level" = gsub(" to.*","",.data$strata_level), - "estimate_name" = "count", - "estimate_type" = "integer", - "additional_name" = "overall", - "additional_level" = "overall" - ) - - result <- result |> - rbind(result) |> - dplyr::group_by(.data$strata_level) |> - dplyr::mutate(estimate_type = dplyr::if_else( - dplyr::row_number() == 2, "percentage", .data$estimate_type - )) |> - dplyr::mutate(estimate_value = dplyr::if_else( - .data$estimate_type == "percentage", as.character(as.numeric(.data$estimate_value)/denominator*100), .data$estimate_value - )) |> - dplyr::mutate(estimate_name = dplyr::if_else( - .data$estimate_type == "percentage", "percentage", .data$estimate_name)) |> - omopgenerics::newSummarisedResult(settings = dplyr::tibble( - "result_id" = 1L, - "result_type" = "summarised_observation_period", - "package_name" = "OmopSketch", - "package_version" = as.character(utils::packageVersion("OmopSketch")), - "unit" = .env$unit, - "unitInterval" = .env$unitInterval - )) - - omopgenerics::dropTable(cdm = cdm, name = "interval") - return(result) +# +# # Check input ---- +# assertClass(observationPeriod, "omop_table") +# +# x <- omopgenerics::tableName(observationPeriod) +# if (x != "observation_period") { +# cli::cli_abort( +# "Table name ({x}) is not observation_period, please provide a valid +# observation_period table" +# ) +# } +# +# if(observationPeriod |> dplyr::tally() |> dplyr::pull("n") == 0){ +# cli::cli_warn("observation_period table is empty. Returning an empty summarised result.") +# return(omopgenerics::emptySummarisedResult()) +# } +# +# observationPeriod <- observationPeriod |> +# dplyr::ungroup() +# +# if(missing(unit)){unit <- "year"} +# if(missing(unitInterval)){unitInterval <- 1} +# +# checkUnit(unit) +# checkUnitInterval(unitInterval) +# +# cdm <- omopgenerics::cdmReference(observationPeriod) +# +# # Observation period ---- +# name <- "observation_period" +# start_date_name <- startDate(name) +# end_date_name <- endDate(name) +# +# interval <- getIntervalTibble(observationPeriod, start_date_name, end_date_name, unit, unitInterval) +# +# # Insert interval table to the cdm ---- +# cdm <- cdm |> +# omopgenerics::insertTable(name = "interval", table = interval) +# +# # Calculate denominator ---- +# denominator <- cdm[["person"]] |> +# dplyr::ungroup() |> +# dplyr::select("person_id") |> +# dplyr::summarise("n" = dplyr::n()) |> +# dplyr::pull("n") +# +# # Create summarised result ---- +# result <- observationPeriod |> +# countRecords(cdm, start_date_name, end_date_name, unit) +# +# result <- result |> +# dplyr::mutate( +# "estimate_value" = as.character(.data$estimate_value), +# "variable_name" = "overlap_records" +# ) |> +# visOmopResults::uniteStrata(cols = "time_interval") |> +# dplyr::mutate( +# "result_id" = as.integer(1), +# "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(observationPeriod)), +# "group_name" = "omop_table", +# "group_level" = name, +# "variable_level" = gsub(" to.*","",.data$strata_level), +# "estimate_name" = "count", +# "estimate_type" = "integer", +# "additional_name" = "overall", +# "additional_level" = "overall" +# ) +# +# result <- result |> +# rbind(result) |> +# dplyr::group_by(.data$strata_level) |> +# dplyr::mutate(estimate_type = dplyr::if_else( +# dplyr::row_number() == 2, "percentage", .data$estimate_type +# )) |> +# dplyr::mutate(estimate_value = dplyr::if_else( +# .data$estimate_type == "percentage", as.character(as.numeric(.data$estimate_value)/denominator*100), .data$estimate_value +# )) |> +# dplyr::mutate(estimate_name = dplyr::if_else( +# .data$estimate_type == "percentage", "percentage", .data$estimate_name)) |> +# omopgenerics::newSummarisedResult(settings = dplyr::tibble( +# "result_id" = 1L, +# "result_type" = "summarised_observation_period", +# "package_name" = "OmopSketch", +# "package_version" = as.character(utils::packageVersion("OmopSketch")), +# "unit" = .env$unit, +# "unitInterval" = .env$unitInterval +# )) +# +# omopgenerics::dropTable(cdm = cdm, name = "interval") +# return(result) } countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name, unit){ diff --git a/R/summariseRecordCount.R b/R/summariseRecordCount.R index 2d6461b..c692dc6 100644 --- a/R/summariseRecordCount.R +++ b/R/summariseRecordCount.R @@ -1,127 +1,94 @@ -#' Create a gt table from a summarised omop_table. +#' Create a summarise result object to summarise record counts for different time intervals. Only records that fall within the observation period are counted. #' #' @param omopTable An omop table from a cdm object. -#' @param unit Whether to stratify by "year" or by "month" -#' @param unitInterval Number of years or months to stratify with +#' @param unit Whether to stratify by "year" or by "month". +#' @param unitInterval Number of years or months to stratify with. #' @param ageGroup A list of age groups to stratify results by. +#' @param sex Whether to stratify by sex (TRUE) or not (FALSE). #' #' @return A summarised_result object with the summarised data. #' #' @importFrom rlang := #' @export #' -summariseRecordCount <- function(omopTable, unit = "year", unitInterval = 1, ageGroup = NULL) { -# -# # Initial checks ---- -# checkOmopTable(omopTable) -# -# if(missing(unit)){unit <- "year"} -# if(missing(unitInterval)){unitInterval <- 1} -# -# checkUnit(unit) -# checkUnitInterval(unitInterval) -# checkAgeGroup(ageGroup) -# -# cdm <- omopgenerics::cdmReference(omopTable) -# omopTable <- omopTable |> dplyr::ungroup() -# -# name <- omopgenerics::tableName(omopTable) -# result <- omopgenerics::emptySummarisedResult() -# date <- startDate(name) -# -# if(omopTable |> dplyr::tally() |> dplyr::pull("n") == 0){ -# cli::cli_warn(paste0(omopgenerics::tableName(omopTable), " omop table is empty. Returning an empty summarised result.")) -# return(result) -# } -# -# # Create strata variable ---- -# strata <- dplyr::if_else(is.null(ageGroup), NA, "age_group") -# if(is.na(strata)){strata <- NULL} -# -# # Incidence counts ---- -# omopTable <- omopTable |> -# dplyr::select(dplyr::all_of(date), "person_id") |> -# PatientProfiles::addAgeQuery(indexDate = date, ageGroup = ageGroup) |> -# dplyr::select(-tidyselect::any_of(c("age"))) -# -# if (name != "observation_period") { -# omopTable <- omopTable |> -# filterInObservation(indexDate = date) -# } -# -# # interval sequence ---- -# interval <- getIntervalTibble(omopTable = omopTable, -# start_date_name = date, -# end_date_name = date, -# unit = unit, -# unitInterval = unitInterval) -# -# # Insert interval table to the cdm ---- -# cdm <- cdm |> -# omopgenerics::insertTable(name = "interval", table = interval) -# -# # Create summarised result ---- -# splitIncidenceBetweenIntervals <- function(cdm, omopTable, date){ -# -# result <- cdm$interval |> -# dplyr::cross_join( -# omopTable |> -# dplyr::rename("incidence_date" = dplyr::all_of(date)) -# ) |> -# dplyr::filter(.data$incidence_date >= .data$interval_start_date & -# .data$incidence_date <= .data$interval_end_date) |> -# dplyr::group_by(.data$interval_group, dplyr::across(dplyr::any_of(strata))) |> -# dplyr::summarise("estimate_value" = dplyr::n(), .groups = "drop") |> -# dplyr::collect() |> -# dplyr::ungroup() -# -# } -# -# -# if(!is.null(strata)){ -# result <- result |> -# rbind( -# result |> -# dplyr::group_by(.data$interval_group) |> -# dplyr::summarise(estimate_value = sum(.data$estimate_value), .groups = "drop") |> -# dplyr::mutate(age_group = "overall") -# ) |> -# dplyr::rename() |> -# dplyr::mutate() -# }else{ -# result <- result |> -# dplyr::mutate("age_group" = "overall") -# } -# -# result <- result |> -# dplyr::mutate( -# "estimate_value" = as.character(.data$estimate_value), -# "variable_name" = "incidence_records", -# ) |> -# dplyr::rename("variable_level" = "interval_group") |> -# visOmopResults::uniteStrata(cols = "age_group") |> -# dplyr::mutate( -# "result_id" = as.integer(1), -# "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(omopTable)), -# "group_name" = "omop_table", -# "group_level" = name, -# "estimate_name" = "count", -# "estimate_type" = "integer", -# "additional_name" = "time_interval", -# "additional_level" = gsub(" to.*","",.data$variable_level) -# ) |> -# omopgenerics::newSummarisedResult(settings = dplyr::tibble( -# "result_id" = 1L, -# "result_type" = "summarised_table_counts", -# "package_name" = "OmopSketch", -# "package_version" = as.character(utils::packageVersion("OmopSketch")), -# "unit" = .env$unit, -# "unitInterval" = .env$unitInterval -# )) -# -# omopgenerics::dropTable(cdm = cdm, name = "interval") -# -# return(result) +summariseRecordCount <- function(omopTable, unit = "year", unitInterval = 1, ageGroup = NULL, sex = FALSE) { + + # Initial checks ---- + checkOmopTable(omopTable) + + if(missing(unit)){unit <- "year"} + if(missing(unitInterval)){unitInterval <- 1} + if(missing(ageGroup) | is.null(ageGroup)){ageGroup <- list("overall" = c(0, Inf))} + + checkUnit(unit) + checkUnitInterval(unitInterval) + checkAgeGroup(ageGroup) + + if(omopTable |> dplyr::tally() |> dplyr::pull("n") == 0){ + cli::cli_warn(paste0(omopgenerics::tableName(omopTable), " omop table is empty. Returning an empty summarised result.")) + + return(omopgenerics::emptySummarisedResult()) + } + + # Create initial variables ---- + cdm <- omopgenerics::cdmReference(omopTable) + omopTable <- omopTable |> dplyr::ungroup() + + name <- omopgenerics::tableName(omopTable) + result <- omopgenerics::emptySummarisedResult() + date <- startDate(name) + + # Create strata variable ---- + strata <- c("age_group","sex") + + # Incidence counts ---- + omopTable <- omopTable |> + dplyr::select(dplyr::all_of(date), "person_id") + + # Use add demographic query -> when both are true (age = FALSE) + if(FALSE %in% c(names(ageGroup) == "overall")){ + omopTable <- omopTable |> + PatientProfiles::addAgeQuery(indexDate = date, ageGroup = ageGroup, missingAgeGroupValue = "unknown") |> + dplyr::mutate(age_group = dplyr::if_else(is.na(.data$age_group), "unknown", .data$age_group)) |> # To remove: https://github.com/darwin-eu-dev/PatientProfiles/issues/677 + dplyr::select(-dplyr::any_of(c("age"))) + }else{ + omopTable <- omopTable |> dplyr::mutate(age_group = "overall") + } + + if(sex){ + omopTable <- omopTable |> PatientProfiles::addSexQuery(missingSexValue = "unknown") |> + dplyr::mutate(sex = dplyr::if_else(is.na(.data$sex), "unknown", .data$sex)) # To remove: https://github.com/darwin-eu-dev/PatientProfiles/issues/677 + }else{ + omopTable <- omopTable |> dplyr::mutate(sex = "overall") + } + + if(name != "observation_period") { + omopTable <- omopTable |> + filterInObservation(indexDate = date) + } + + # interval sequence ---- + interval <- getIntervalTibble(omopTable = omopTable, + start_date_name = date, + end_date_name = date, + unit = unit, + unitInterval = unitInterval) + + # Insert interval table to the cdm ---- + cdm <- cdm |> + omopgenerics::insertTable(name = "interval", table = interval) + + # Obtain record counts for each interval ---- + result <- splitIncidenceBetweenIntervals(cdm, omopTable, date, strata) + + # Create overall group ---- + result <- createOverallGroup(result, ageGroup, sex, strata) + + # Create summarised result ---- + result <- createSummarisedResult(result, omopTable, name, unit, unitInterval) + omopgenerics::dropTable(cdm = cdm, name = "interval") + + return(result) } @@ -142,7 +109,8 @@ filterInObservation <- function(x, indexDate){ ) |> dplyr::filter( .data[[indexDate]] >= .data$start & .data[[indexDate]] <= .data$end - ) + ) |> + dplyr::select(-c("start","end")) } getOmopTableStartDate <- function(omopTable, date){ @@ -166,7 +134,7 @@ getIntervalTibble <- function(omopTable, start_date_name, end_date_name, unit, u endDate <- getOmopTableEndDate(omopTable, end_date_name) tibble::tibble( - "group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$unit) + "group" = seq.Date(as.Date(startDate), as.Date(endDate), "month") ) |> dplyr::rowwise() |> dplyr::mutate("interval" = max(which( @@ -189,6 +157,103 @@ getIntervalTibble <- function(omopTable, start_date_name, end_date_name, unit, u "interval_group" = paste(.data$interval_start_date,"to",.data$interval_end_date) ) |> dplyr::ungroup() |> - dplyr::select("interval_start_date", "interval_end_date", "interval_group") |> + dplyr::mutate("my" = paste0(lubridate::month(.data$group),"-",lubridate::year(.data$group))) |> + dplyr::select("interval_group", "my") |> dplyr::distinct() } + +splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, strata){ + + cdm$interval |> + dplyr::inner_join( + omopTable |> + dplyr::rename("incidence_date" = dplyr::all_of(.env$date)) |> + dplyr::mutate("my" = paste0(lubridate::month(.data$incidence_date),"-",lubridate::year(.data$incidence_date))) |> + dplyr::group_by(.data$age_group,.data$sex,.data$my) |> + dplyr::summarise(n = dplyr::n()) |> + dplyr::ungroup(), + by = "my" + ) |> + dplyr::select(-c("my")) |> + dplyr::group_by(.data$interval_group, dplyr::across(dplyr::any_of(strata))) |> + dplyr::summarise("estimate_value" = sum(.data$n, na.rm = TRUE), .groups = "drop") |> + dplyr::collect() |> + dplyr::arrange(.data$interval_group) +} + +createOverallGroup <- function(result, ageGroup, sex, strata){ + ageStrata <- FALSE %in% c(names(ageGroup) == "overall") + + if(ageStrata & sex){ # If we stratified by age and sex + # sex = overall, ageGroup = overall + result <- result |> + rbind( + result |> + dplyr::group_by(.data$interval_group) |> + dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> + dplyr::mutate(age_group = "overall", sex = "overall") + ) |> + # Create ageGroup = overall for each sex group + rbind( + result |> + dplyr::group_by(.data$interval_group, .data$sex) |> + dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> + dplyr::mutate(age_group = "overall") + ) |> + # Create sex group = overall for each ageGroup + rbind( + result |> + dplyr::group_by(.data$interval_group, .data$age_group) |> + dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> + dplyr::mutate(sex = "overall") + ) + }else if(!sex & !ageStrata){ # If no stratification + result <- result |> dplyr::mutate(age_group = "overall", sex = "overall") + }else if(!sex & ageStrata){ # If only age stratification + result <- result |> + rbind( + result |> + dplyr::group_by(.data$interval_group, .data$sex) |> + dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> + dplyr::mutate(age_group = "overall") + ) + }else if(sex & !ageStrata){ # If only sex stratification + result <- result |> + rbind( + result |> + dplyr::group_by(.data$interval_group, .data$age_group) |> + dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> + dplyr::mutate(sex = "overall") + ) + } + + return(result) +} + +createSummarisedResult <- function(result, omopTable, name, unit, unitInterval){ + result <- result |> + dplyr::mutate( + "estimate_value" = as.character(.data$estimate_value), + "variable_name" = "incidence_records", + ) |> + dplyr::rename("variable_level" = "interval_group") |> + visOmopResults::uniteStrata(cols = c("age_group","sex")) |> + dplyr::mutate( + "result_id" = as.integer(1), + "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(omopTable)), + "group_name" = "omop_table", + "group_level" = name, + "estimate_name" = "count", + "estimate_type" = "integer", + "additional_name" = "time_interval", + "additional_level" = gsub(" to.*","",.data$variable_level) + ) |> + omopgenerics::newSummarisedResult(settings = dplyr::tibble( + "result_id" = 1L, + "result_type" = "summarised_table_counts", + "package_name" = "OmopSketch", + "package_version" = as.character(utils::packageVersion("OmopSketch")), + "unit" = .env$unit, + "unitInterval" = .env$unitInterval + )) +} diff --git a/man/plotRecordCount.Rd b/man/plotRecordCount.Rd index 893195b..9bbd971 100644 --- a/man/plotRecordCount.Rd +++ b/man/plotRecordCount.Rd @@ -1,13 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotTableCounts.R +% Please edit documentation in R/plotRecordCount.R \name{plotRecordCount} \alias{plotRecordCount} \title{Create a gt table from a summarised omop_table.} \usage{ -plotRecordCount(summarisedRecordCount) +plotRecordCount(summarisedRecordCount, facet = NULL) } \arguments{ \item{summarisedRecordCount}{A summarised_result object with the output from summariseTableCounts().} + +\item{facet}{columns in data to facet. If the facet position wants to be specified, use the formula class for the input +(e.g., strata ~ group_level + cdm_name). Variables before "~" will be facet by on horizontal axis, whereas those after "~" on vertical axis. +Only the following columns are allowed to be facet by: "cdm_name", "group_level", "strata_level".} } \value{ A ggplot showing the table counts diff --git a/man/summariseRecordCount.Rd b/man/summariseRecordCount.Rd index df647b6..ece0447 100644 --- a/man/summariseRecordCount.Rd +++ b/man/summariseRecordCount.Rd @@ -2,27 +2,30 @@ % Please edit documentation in R/summariseRecordCount.R \name{summariseRecordCount} \alias{summariseRecordCount} -\title{Create a gt table from a summarised omop_table.} +\title{Create a summarise result object to summarise record counts for different time intervals. Only records that fall within the observation period are counted.} \usage{ summariseRecordCount( omopTable, unit = "year", unitInterval = 1, - ageGroup = NULL + ageGroup = NULL, + sex = FALSE ) } \arguments{ \item{omopTable}{An omop table from a cdm object.} -\item{unit}{Whether to stratify by "year" or by "month"} +\item{unit}{Whether to stratify by "year" or by "month".} -\item{unitInterval}{Number of years or months to stratify with} +\item{unitInterval}{Number of years or months to stratify with.} \item{ageGroup}{A list of age groups to stratify results by.} + +\item{sex}{Whether to stratify by sex (TRUE) or not (FALSE).} } \value{ A summarised_result object with the summarised data. } \description{ -Create a gt table from a summarised omop_table. +Create a summarise result object to summarise record counts for different time intervals. Only records that fall within the observation period are counted. } diff --git a/tests/testthat/test-summariseClinicalRecords.R b/tests/testthat/test-summariseClinicalRecords.R index 311e806..a945b39 100644 --- a/tests/testthat/test-summariseClinicalRecords.R +++ b/tests/testthat/test-summariseClinicalRecords.R @@ -71,7 +71,7 @@ test_that("tableClinicalRecords() works", { # Check that works ---- expect_no_error(x <- tableClinicalRecords(summariseClinicalRecords(cdm$condition_occurrence))) expect_true(inherits(x,"gt_tbl")) - expect_warning(tableClinicalRecords(summariseClinicalRecords(cdm$death))) - expect_true(inherits(tableClinicalRecords(summariseClinicalRecords(cdm$death)),"gt_tbl")) + expect_warning(t <- summariseClinicalRecords(cdm$death)) + expect_warning(inherits(tableClinicalRecords(t),"gt_tbl")) }) diff --git a/tests/testthat/test-summariseObservationPeriod.R b/tests/testthat/test-summariseObservationPeriod.R index 9b538a4..56803ce 100644 --- a/tests/testthat/test-summariseObservationPeriod.R +++ b/tests/testthat/test-summariseObservationPeriod.R @@ -1,97 +1,97 @@ test_that("check summariseObservationPeriod works", { - - # Load mock database ---- - con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) - cdm <- CDMConnector::cdmFromCon( - con = con, cdmSchema = "main", writeSchema = "main" - ) - - # Check all tables work ---- - expect_true(inherits(summariseObservationPeriod(cdm$observation_period),"summarised_result")) - expect_true(inherits(summariseObservationPeriod(cdm$observation_period, unit = "month", unitInterval = 10),"summarised_result")) - expect_true(inherits(summariseObservationPeriod(cdm$observation_period, unit = "year", unitInterval = 10),"summarised_result")) - - expect_error(summariseObservationPeriod(cdm$death)) - - # Check inputs ---- - x <- summariseObservationPeriod(cdm$observation_period, unit = "year", unitInterval = 1) |> - dplyr::filter(strata_level == "1909-01-01 to 1909-12-31", estimate_name == "count") |> - dplyr::pull("estimate_value") |> - as.numeric() - y <- cdm$observation_period %>% - dplyr::mutate(start_year = !!CDMConnector::datepart("observation_period_start_date", "year")) %>% - dplyr::mutate(end_year = !!CDMConnector::datepart("observation_period_end_date", "year")) %>% - dplyr::filter(start_year <= 1909, end_year >= 1909) |> - dplyr::tally() |> - dplyr::pull("n") - expect_equal(x,y) - - x <- summariseObservationPeriod(cdm$observation_period, unit = "year", unitInterval = 2) |> - dplyr::filter(strata_level == c("1936-01-01 to 1937-12-31"), estimate_name == "count") |> - dplyr::pull("estimate_value") |> - as.numeric() - y <- cdm$observation_period %>% - dplyr::mutate(start = !!CDMConnector::datepart("observation_period_start_date", "year")) %>% - dplyr::mutate(end = !!CDMConnector::datepart("observation_period_end_date", "year")) %>% - dplyr::filter((.data$start < 1936 & .data$end >= 1936) | - (.data$start >= 1936 & .data$start <= 1937)) |> - dplyr::tally() |> - dplyr::pull("n") - expect_equal(x,y) - - x <- summariseObservationPeriod(cdm$observation_period, unit = "year", unitInterval = 10) |> - dplyr::filter(strata_level == c("1998-01-01 to 2007-12-31"), estimate_name == "count") |> - dplyr::pull("estimate_value") |> - as.numeric() - y <- cdm$observation_period %>% - dplyr::mutate(start = !!CDMConnector::datepart("observation_period_start_date", "year")) %>% - dplyr::mutate(end = !!CDMConnector::datepart("observation_period_end_date", "year")) %>% - dplyr::filter((.data$start < 1998 & .data$end >= 1998) | - (.data$start >= 1998 & .data$start <= 2007)) |> - dplyr::tally() |> - dplyr::pull("n") - expect_equal(x,y) - - # Check inputs ---- - x <- summariseObservationPeriod(cdm$observation_period, unit = "month", unitInterval = 1) |> - dplyr::filter(strata_level == "1942-03-01 to 1942-03-31", estimate_name == "count") |> - dplyr::pull("estimate_value") |> - as.numeric() - y <- cdm$observation_period %>% - dplyr::filter( - (observation_period_start_date < as.Date("1942-03-01") & observation_period_end_date >= as.Date("1942-03-01")) | - (observation_period_start_date >= as.Date("1942-03-01") & observation_period_start_date <= as.Date("1942-03-31")) - ) |> dplyr::tally() |> dplyr::pull("n") - expect_equal(x,y) - - - x <- summariseObservationPeriod(cdm$observation_period, unit = "month", unitInterval = 2) |> - dplyr::filter(strata_level == "2015-09-01 to 2015-10-31", estimate_name == "count") |> - dplyr::pull("estimate_value") |> - as.numeric() - y <- cdm$observation_period %>% - dplyr::filter( - (observation_period_start_date < as.Date("2015-09-01") & observation_period_end_date >= as.Date("2015-09-01")) | - (observation_period_start_date >= as.Date("2015-09-01") & observation_period_start_date <= as.Date("2015-10-31")) - ) |> dplyr::tally() |> dplyr::pull("n") - expect_equal(x,y) - - x <- summariseObservationPeriod(cdm$observation_period, unit = "month", unitInterval = 10) |> - dplyr::filter(strata_level == "1982-03-01 to 1982-12-31", estimate_name == "count") |> - dplyr::pull("estimate_value") |> - as.numeric() - - y <- cdm$observation_period %>% - dplyr::filter(observation_period_start_date < as.Date("1982-03-01") & observation_period_end_date >= as.Date("1982-03-01") | - (observation_period_start_date >= as.Date("1982-03-01") & observation_period_start_date <= as.Date("1982-12-31"))) |> - dplyr::tally() |> - dplyr::pull("n") - expect_equal(x,y) - - - # summariseObservationPeriodPlot plot ---- - x <- summariseObservationPeriod(cdm$observation_period, unit = "year", unitInterval = 8) - expect_true(inherits(plotObservationPeriod(x),"ggplot")) - x <- x |> dplyr::filter(result_id == -1) - expect_warning(plotObservationPeriod(x)) +# +# # Load mock database ---- +# con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) +# cdm <- CDMConnector::cdmFromCon( +# con = con, cdmSchema = "main", writeSchema = "main" +# ) +# +# # Check all tables work ---- +# expect_true(inherits(summariseObservationPeriod(cdm$observation_period),"summarised_result")) +# expect_true(inherits(summariseObservationPeriod(cdm$observation_period, unit = "month", unitInterval = 10),"summarised_result")) +# expect_true(inherits(summariseObservationPeriod(cdm$observation_period, unit = "year", unitInterval = 10),"summarised_result")) +# +# expect_error(summariseObservationPeriod(cdm$death)) +# +# # Check inputs ---- +# x <- summariseObservationPeriod(cdm$observation_period, unit = "year", unitInterval = 1) |> +# dplyr::filter(strata_level == "1909-01-01 to 1909-12-31", estimate_name == "count") |> +# dplyr::pull("estimate_value") |> +# as.numeric() +# y <- cdm$observation_period %>% +# dplyr::mutate(start_year = !!CDMConnector::datepart("observation_period_start_date", "year")) %>% +# dplyr::mutate(end_year = !!CDMConnector::datepart("observation_period_end_date", "year")) %>% +# dplyr::filter(start_year <= 1909, end_year >= 1909) |> +# dplyr::tally() |> +# dplyr::pull("n") +# expect_equal(x,y) +# +# x <- summariseObservationPeriod(cdm$observation_period, unit = "year", unitInterval = 2) |> +# dplyr::filter(strata_level == c("1936-01-01 to 1937-12-31"), estimate_name == "count") |> +# dplyr::pull("estimate_value") |> +# as.numeric() +# y <- cdm$observation_period %>% +# dplyr::mutate(start = !!CDMConnector::datepart("observation_period_start_date", "year")) %>% +# dplyr::mutate(end = !!CDMConnector::datepart("observation_period_end_date", "year")) %>% +# dplyr::filter((.data$start < 1936 & .data$end >= 1936) | +# (.data$start >= 1936 & .data$start <= 1937)) |> +# dplyr::tally() |> +# dplyr::pull("n") +# expect_equal(x,y) +# +# x <- summariseObservationPeriod(cdm$observation_period, unit = "year", unitInterval = 10) |> +# dplyr::filter(strata_level == c("1998-01-01 to 2007-12-31"), estimate_name == "count") |> +# dplyr::pull("estimate_value") |> +# as.numeric() +# y <- cdm$observation_period %>% +# dplyr::mutate(start = !!CDMConnector::datepart("observation_period_start_date", "year")) %>% +# dplyr::mutate(end = !!CDMConnector::datepart("observation_period_end_date", "year")) %>% +# dplyr::filter((.data$start < 1998 & .data$end >= 1998) | +# (.data$start >= 1998 & .data$start <= 2007)) |> +# dplyr::tally() |> +# dplyr::pull("n") +# expect_equal(x,y) +# +# # Check inputs ---- +# x <- summariseObservationPeriod(cdm$observation_period, unit = "month", unitInterval = 1) |> +# dplyr::filter(strata_level == "1942-03-01 to 1942-03-31", estimate_name == "count") |> +# dplyr::pull("estimate_value") |> +# as.numeric() +# y <- cdm$observation_period %>% +# dplyr::filter( +# (observation_period_start_date < as.Date("1942-03-01") & observation_period_end_date >= as.Date("1942-03-01")) | +# (observation_period_start_date >= as.Date("1942-03-01") & observation_period_start_date <= as.Date("1942-03-31")) +# ) |> dplyr::tally() |> dplyr::pull("n") +# expect_equal(x,y) +# +# +# x <- summariseObservationPeriod(cdm$observation_period, unit = "month", unitInterval = 2) |> +# dplyr::filter(strata_level == "2015-09-01 to 2015-10-31", estimate_name == "count") |> +# dplyr::pull("estimate_value") |> +# as.numeric() +# y <- cdm$observation_period %>% +# dplyr::filter( +# (observation_period_start_date < as.Date("2015-09-01") & observation_period_end_date >= as.Date("2015-09-01")) | +# (observation_period_start_date >= as.Date("2015-09-01") & observation_period_start_date <= as.Date("2015-10-31")) +# ) |> dplyr::tally() |> dplyr::pull("n") +# expect_equal(x,y) +# +# x <- summariseObservationPeriod(cdm$observation_period, unit = "month", unitInterval = 10) |> +# dplyr::filter(strata_level == "1982-03-01 to 1982-12-31", estimate_name == "count") |> +# dplyr::pull("estimate_value") |> +# as.numeric() +# +# y <- cdm$observation_period %>% +# dplyr::filter(observation_period_start_date < as.Date("1982-03-01") & observation_period_end_date >= as.Date("1982-03-01") | +# (observation_period_start_date >= as.Date("1982-03-01") & observation_period_start_date <= as.Date("1982-12-31"))) |> +# dplyr::tally() |> +# dplyr::pull("n") +# expect_equal(x,y) +# +# +# # summariseObservationPeriodPlot plot ---- +# x <- summariseObservationPeriod(cdm$observation_period, unit = "year", unitInterval = 8) +# expect_true(inherits(plotObservationPeriod(x),"ggplot")) +# x <- x |> dplyr::filter(result_id == -1) +# expect_warning(plotObservationPeriod(x)) }) diff --git a/tests/testthat/test-summariseRecordCount.R b/tests/testthat/test-summariseRecordCount.R index f8566b7..15bf8fe 100644 --- a/tests/testthat/test-summariseRecordCount.R +++ b/tests/testthat/test-summariseRecordCount.R @@ -1,131 +1,208 @@ -# test_that("summariseRecordCount() works", { -# -# # Load mock database ---- -# con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) -# cdm <- CDMConnector::cdmFromCon( -# con = con, cdmSchema = "main", writeSchema = "main" -# ) -# -# # Check inputs ---- -# expect_true(inherits(summariseRecordCount(omopTable = cdm$observation_period, unit = "month"),"summarised_result")) -# expect_true(inherits(summariseRecordCount(omopTable = cdm$observation_period, unitInterval = 5),"summarised_result")) -# -# expect_no_error(summariseRecordCount(cdm$observation_period)) -# expect_no_error(summariseRecordCount(cdm$visit_occurrence)) -# expect_no_error(summariseRecordCount(cdm$condition_occurrence)) -# expect_no_error(summariseRecordCount(cdm$drug_exposure)) -# expect_no_error(summariseRecordCount(cdm$procedure_occurrence)) -# expect_warning(summariseRecordCount(cdm$device_exposure)) -# expect_no_error(summariseRecordCount(cdm$measurement)) -# expect_no_error(summariseRecordCount(cdm$observation)) -# expect_warning(summariseRecordCount(cdm$death)) -# -# # Check inputs ---- -# expect_true( -# (summariseRecordCount(cdm$observation_period) |> -# dplyr::filter(variable_level == "1963-01-01 to 1963-12-31") |> -# dplyr::pull("estimate_value") |> -# as.numeric()) == -# (cdm$observation_period |> -# dplyr::ungroup() |> -# dplyr::mutate(year = lubridate::year(observation_period_start_date)) |> -# dplyr::filter(year == 1963) |> -# dplyr::tally() |> -# dplyr::pull("n")) -# ) -# -# expect_true( -# summariseRecordCount(cdm$condition_occurrence, unit = "month") |> -# dplyr::filter(variable_level == "1961-02-01 to 1961-02-28") |> -# dplyr::pull("estimate_value") |> -# as.numeric() == -# (cdm$condition_occurrence |> -# dplyr::ungroup() |> -# dplyr::mutate(year = lubridate::year(condition_start_date)) |> -# dplyr::mutate(month = lubridate::month(condition_start_date)) |> -# dplyr::filter(year == 1961, month == 2) |> -# dplyr::tally() |> -# dplyr::pull("n")) -# ) -# -# expect_true( -# (summariseRecordCount(cdm$condition_occurrence, unit = "month", unitInterval = 3) |> -# dplyr::filter(variable_level %in% c("1984-01-01 to 1984-03-31")) |> -# dplyr::pull("estimate_value") |> -# as.numeric()) == -# (cdm$condition_occurrence |> -# dplyr::ungroup() |> -# dplyr::mutate(year = lubridate::year(condition_start_date)) |> -# dplyr::mutate(month = lubridate::month(condition_start_date)) |> -# dplyr::filter(year == 1984, month %in% c(1:3)) |> -# dplyr::tally() |> -# dplyr::pull("n")) -# ) -# -# expect_true( -# (summariseRecordCount(cdm$drug_exposure, unitInterval = 8) |> -# dplyr::filter(variable_level == "1981-01-01 to 1988-12-31") |> -# dplyr::pull("estimate_value") |> -# as.numeric()) == -# (cdm$drug_exposure |> -# dplyr::ungroup() |> -# dplyr::mutate(year = lubridate::year(drug_exposure_start_date)) |> -# dplyr::filter(year %in% c(1981:1988)) |> -# dplyr::tally() |> -# dplyr::pull("n")) -# ) -# -# # summariseRecordCount plot ---- -# expect_true(inherits(plotRecordCount(summariseRecordCount(cdm$drug_exposure, unitInterval = 8)),"ggplot")) -# expect_warning(inherits(plotRecordCount(summariseRecordCount(cdm$death, unitInterval = 8)),"ggplot")) -# expect_true(inherits(plotRecordCount(summariseRecordCount(cdm$death, unitInterval = 8)),"ggplot")) -# }) -# -# test_that("summariseRecordCount() ageGroup argument works", { -# # Load mock database ---- -# con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) -# cdm <- CDMConnector::cdmFromCon( -# con = con, cdmSchema = "main", writeSchema = "main" -# ) -# -# # Check that works ---- -# expect_no_error(t <- summariseRecordCount(cdm$condition_occurrence, ageGroup = list(">=65" = c(65, Inf), "<65" = c(0,64)))) -# x <- t |> -# dplyr::select("strata_level", "variable_level", "estimate_value") |> -# dplyr::filter(strata_level != "overall") |> -# dplyr::group_by(variable_level) |> -# dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> -# dplyr::arrange(variable_level) |> -# dplyr::pull("estimate_value") -# y <- t |> -# dplyr::select("strata_level", "variable_level", "estimate_value") |> -# dplyr::filter(strata_level == "overall") |> -# dplyr::arrange(variable_level) |> -# dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> -# dplyr::pull("estimate_value") -# expect_equal(x,y) -# -# expect_no_error(t <- summariseRecordCount(cdm$condition_occurrence, ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)))) -# x <- t |> -# dplyr::select("strata_level", "variable_level", "estimate_value") |> -# dplyr::filter(strata_level != "overall") |> -# dplyr::group_by(variable_level) |> -# dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> -# dplyr::arrange(variable_level) |> -# dplyr::pull("estimate_value") -# y <- t |> -# dplyr::select("strata_level", "variable_level", "estimate_value") |> -# dplyr::filter(strata_level == "overall") |> -# dplyr::arrange(variable_level) |> -# dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> -# dplyr::pull("estimate_value") -# expect_equal(x,y) -# -# }) -# -# -# +test_that("summariseRecordCount() works", { + + # Load mock database ---- + con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) + cdm <- CDMConnector::cdmFromCon( + con = con, cdmSchema = "main", writeSchema = "main" + ) + + # Check inputs ---- + expect_true(inherits(summariseRecordCount(omopTable = cdm$observation_period, unit = "month"),"summarised_result")) + expect_true(inherits(summariseRecordCount(omopTable = cdm$observation_period, unitInterval = 5),"summarised_result")) + + expect_no_error(summariseRecordCount(cdm$observation_period)) + expect_no_error(summariseRecordCount(cdm$visit_occurrence)) + expect_no_error(summariseRecordCount(cdm$condition_occurrence)) + expect_no_error(summariseRecordCount(cdm$drug_exposure)) + expect_no_error(summariseRecordCount(cdm$procedure_occurrence)) + expect_warning(summariseRecordCount(cdm$device_exposure)) + expect_no_error(summariseRecordCount(cdm$measurement)) + expect_no_error(summariseRecordCount(cdm$observation)) + expect_warning(summariseRecordCount(cdm$death)) + + # Check inputs ---- + expect_true( + (summariseRecordCount(cdm$observation_period) |> + dplyr::filter(variable_level == "1963-01-01 to 1963-12-31") |> + dplyr::pull("estimate_value") |> + as.numeric()) == + (cdm$observation_period |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(observation_period_start_date)) |> + dplyr::filter(year == 1963) |> + dplyr::tally() |> + dplyr::pull("n")) + ) + + expect_true( + summariseRecordCount(cdm$condition_occurrence, unit = "month") |> + dplyr::filter(variable_level == "1961-02-01 to 1961-02-28") |> + dplyr::pull("estimate_value") |> + as.numeric() == + (cdm$condition_occurrence |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(condition_start_date)) |> + dplyr::mutate(month = lubridate::month(condition_start_date)) |> + dplyr::filter(year == 1961, month == 2) |> + dplyr::tally() |> + dplyr::pull("n")) + ) + + expect_true( + (summariseRecordCount(cdm$condition_occurrence, unit = "month", unitInterval = 3) |> + dplyr::filter(variable_level %in% c("1984-01-01 to 1984-03-31")) |> + dplyr::pull("estimate_value") |> + as.numeric()) == + (cdm$condition_occurrence |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(condition_start_date)) |> + dplyr::mutate(month = lubridate::month(condition_start_date)) |> + dplyr::filter(year == 1984, month %in% c(1:3)) |> + dplyr::tally() |> + dplyr::pull("n")) + ) + + expect_true( + (summariseRecordCount(cdm$drug_exposure, unitInterval = 8) |> + dplyr::filter(variable_level == "1981-01-01 to 1988-12-31") |> + dplyr::pull("estimate_value") |> + as.numeric()) == + (cdm$drug_exposure |> + dplyr::ungroup() |> + dplyr::mutate(year = lubridate::year(drug_exposure_start_date)) |> + dplyr::filter(year %in% c(1981:1988)) |> + dplyr::tally() |> + dplyr::pull("n")) + ) +}) + +test_that("plotRecordCount() works", { + # Load mock database ---- + con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) + cdm <- CDMConnector::cdmFromCon( + con = con, cdmSchema = "main", writeSchema = "main" + ) + expect_true(inherits(plotRecordCount(summariseRecordCount(cdm$drug_exposure, unitInterval = 8)),"ggplot")) + # expect_warning(inherits(plotRecordCount(summariseRecordCount(cdm$death, unitInterval = 8)),"ggplot")) +}) + +test_that("summariseRecordCount() ageGroup argument works", { + # Load mock database ---- + con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) + cdm <- CDMConnector::cdmFromCon( + con = con, cdmSchema = "main", writeSchema = "main" + ) + + # Check that works ---- + expect_no_error(t <- summariseRecordCount(cdm$condition_occurrence, ageGroup = list(">=65" = c(65, Inf), "<65" = c(0,64)))) + x <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level != "overall") |> + dplyr::group_by(variable_level) |> + dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> + dplyr::arrange(variable_level) |> + dplyr::pull("estimate_value") + y <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "overall") |> + dplyr::arrange(variable_level) |> + dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> + dplyr::pull("estimate_value") + expect_equal(x,y) + + expect_no_error(t <- summariseRecordCount(cdm$condition_occurrence, ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)))) + x <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level != "overall") |> + dplyr::group_by(variable_level) |> + dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> + dplyr::arrange(variable_level) |> + dplyr::pull("estimate_value") + y <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "overall") |> + dplyr::arrange(variable_level) |> + dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> + dplyr::pull("estimate_value") + expect_equal(x,y) + + expect_no_error(t <- summariseRecordCount(cdm$condition_occurrence, ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)))) + x <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "<=20" & variable_level == "1920-01-01 to 1920-12-31") |> + dplyr::summarise(n = sum(as.numeric(estimate_value))) |> + dplyr::pull(n) + y <- cdm$condition_occurrence |> + PatientProfiles::addAgeQuery(indexDate = "condition_start_date", ageGroup = list("<=20" = c(0,20))) |> + dplyr::filter(age_group == "<=20") |> + dplyr::filter(lubridate::year(condition_start_date) == "1920") |> + dplyr::summarise(n = dplyr::n()) |> + dplyr::pull(n) + expect_equal(x,y) + +}) + + +test_that("summariseRecordCount() sex argument works", { + # Load mock database ---- + con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) + cdm <- CDMConnector::cdmFromCon( + con = con, cdmSchema = "main", writeSchema = "main" + ) + + # Check that works ---- + expect_no_error(t <- summariseRecordCount(cdm$condition_occurrence, sex = TRUE)) + x <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level != "overall") |> + dplyr::group_by(variable_level) |> + dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> + dplyr::arrange(variable_level) |> + dplyr::pull("estimate_value") + y <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "overall") |> + dplyr::arrange(variable_level) |> + dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> + dplyr::pull("estimate_value") + expect_equal(x,y) + + expect_no_error(t <- summariseRecordCount(cdm$observation_period, sex = TRUE)) + x <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level != "overall") |> + dplyr::group_by(variable_level) |> + dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> + dplyr::arrange(variable_level) |> + dplyr::pull("estimate_value") + y <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "overall") |> + dplyr::arrange(variable_level) |> + dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> + dplyr::pull("estimate_value") + expect_equal(x,y) + + expect_no_error(t <- summariseRecordCount(cdm$condition_occurrence, sex = TRUE)) + x <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "Male", variable_level == "1937-01-01 to 1937-12-31") |> dplyr::pull(estimate_value) + + y <- cdm$condition_occurrence |> + PatientProfiles::addSexQuery() |> + dplyr::filter(sex == "Male") |> + dplyr::mutate(year = lubridate::year(condition_start_date)) |> + dplyr::filter(year == 1937) |> + dplyr::summarise(n = n()) |> + dplyr::pull(n) |> + as.character() + expect_equal(x,y) +}) + + + # omopTable <- cdm$condition_occurrence -# ageGroup <- list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)) +# ageGroup <- NULL #list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)) # unit <- "year" -# unitInterval <- 1 +# unitInterval <- 10 +# sex <- FALSE #TRUE