remove cross_joint()

OHDSI · Jul 24, 2024 · 5e28df8 · 5e28df8
1 parent 1c282c1
commit 5e28df8
Show file tree

Hide file tree

Showing 6 changed files with 217 additions and 217 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -38,7 +38,6 @@ Imports:
     rlang,
     tibble,
     tidyr,
-    tidyselect,
     visOmopResults
 Depends: 
     R (>= 2.10)

diff --git a/R/summariseObservationPeriod.R b/R/summariseObservationPeriod.R
@@ -9,96 +9,96 @@
 #' @export
 #'
 summariseObservationPeriod <- function(observationPeriod, unit = "year", unitInterval = 1){
-
-  # Check input ----
-  assertClass(observationPeriod, "omop_table")
-
-  x <- omopgenerics::tableName(observationPeriod)
-  if (x != "observation_period") {
-    cli::cli_abort(
-      "Table name ({x}) is not observation_period, please provide a valid
-      observation_period table"
-    )
-  }
-
-  if(observationPeriod |> dplyr::tally() |> dplyr::pull("n") == 0){
-    cli::cli_warn("observation_period table is empty. Returning an empty summarised result.")
-    return(omopgenerics::emptySummarisedResult())
-  }
-
-  observationPeriod <- observationPeriod |>
-    dplyr::ungroup()
-
-  if(missing(unit)){unit <- "year"}
-  if(missing(unitInterval)){unitInterval <- 1}
-
-  checkUnit(unit)
-  checkUnitInterval(unitInterval)
-
-  cdm <- omopgenerics::cdmReference(observationPeriod)
-
-  # Observation period ----
-  name <- "observation_period"
-  start_date_name <- startDate(name)
-  end_date_name   <- endDate(name)
-
-  interval <- getIntervalTibble(observationPeriod, start_date_name, end_date_name, unit, unitInterval)
-
-  # Insert interval table to the cdm ----
-  cdm <- cdm |>
-    omopgenerics::insertTable(name = "interval", table = interval)
-
-  # Calculate denominator ----
-  denominator <- cdm[["person"]] |>
-    dplyr::ungroup() |>
-    dplyr::select("person_id") |>
-    dplyr::summarise("n" = dplyr::n()) |>
-    dplyr::pull("n")
-
-  # Create summarised result ----
-  result <- observationPeriod |>
-    countRecords(cdm, start_date_name, end_date_name, unit)
-
-  result <- result |>
-    dplyr::mutate(
-      "estimate_value" = as.character(.data$estimate_value),
-      "variable_name" = "overlap_records"
-    ) |>
-    visOmopResults::uniteStrata(cols = "time_interval") |>
-    dplyr::mutate(
-      "result_id" = as.integer(1),
-      "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(observationPeriod)),
-      "group_name"  = "omop_table",
-      "group_level" = name,
-      "variable_level" = gsub(" to.*","",.data$strata_level),
-      "estimate_name" = "count",
-      "estimate_type" = "integer",
-      "additional_name" = "overall",
-      "additional_level" = "overall"
-    )
-
-  result <- result |>
-    rbind(result) |>
-    dplyr::group_by(.data$strata_level) |>
-    dplyr::mutate(estimate_type = dplyr::if_else(
-      dplyr::row_number() == 2, "percentage", .data$estimate_type
-    )) |>
-    dplyr::mutate(estimate_value = dplyr::if_else(
-      .data$estimate_type == "percentage", as.character(as.numeric(.data$estimate_value)/denominator*100), .data$estimate_value
-    )) |>
-    dplyr::mutate(estimate_name = dplyr::if_else(
-      .data$estimate_type == "percentage", "percentage", .data$estimate_name)) |>
-    omopgenerics::newSummarisedResult(settings = dplyr::tibble(
-      "result_id" = 1L,
-      "result_type" = "summarised_observation_period",
-      "package_name" = "OmopSketch",
-      "package_version" = as.character(utils::packageVersion("OmopSketch")),
-      "unit" = .env$unit,
-      "unitInterval" = .env$unitInterval
-    ))
-
-  omopgenerics::dropTable(cdm = cdm, name = "interval")
-  return(result)
+#
+#   # Check input ----
+#   assertClass(observationPeriod, "omop_table")
+#
+#   x <- omopgenerics::tableName(observationPeriod)
+#   if (x != "observation_period") {
+#     cli::cli_abort(
+#       "Table name ({x}) is not observation_period, please provide a valid
+#       observation_period table"
+#     )
+#   }
+#
+#   if(observationPeriod |> dplyr::tally() |> dplyr::pull("n") == 0){
+#     cli::cli_warn("observation_period table is empty. Returning an empty summarised result.")
+#     return(omopgenerics::emptySummarisedResult())
+#   }
+#
+#   observationPeriod <- observationPeriod |>
+#     dplyr::ungroup()
+#
+#   if(missing(unit)){unit <- "year"}
+#   if(missing(unitInterval)){unitInterval <- 1}
+#
+#   checkUnit(unit)
+#   checkUnitInterval(unitInterval)
+#
+#   cdm <- omopgenerics::cdmReference(observationPeriod)
+#
+#   # Observation period ----
+#   name <- "observation_period"
+#   start_date_name <- startDate(name)
+#   end_date_name   <- endDate(name)
+#
+#   interval <- getIntervalTibble(observationPeriod, start_date_name, end_date_name, unit, unitInterval)
+#
+#   # Insert interval table to the cdm ----
+#   cdm <- cdm |>
+#     omopgenerics::insertTable(name = "interval", table = interval)
+#
+#   # Calculate denominator ----
+#   denominator <- cdm[["person"]] |>
+#     dplyr::ungroup() |>
+#     dplyr::select("person_id") |>
+#     dplyr::summarise("n" = dplyr::n()) |>
+#     dplyr::pull("n")
+#
+#   # Create summarised result ----
+#   result <- observationPeriod |>
+#     countRecords(cdm, start_date_name, end_date_name, unit)
+#
+#   result <- result |>
+#     dplyr::mutate(
+#       "estimate_value" = as.character(.data$estimate_value),
+#       "variable_name" = "overlap_records"
+#     ) |>
+#     visOmopResults::uniteStrata(cols = "time_interval") |>
+#     dplyr::mutate(
+#       "result_id" = as.integer(1),
+#       "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(observationPeriod)),
+#       "group_name"  = "omop_table",
+#       "group_level" = name,
+#       "variable_level" = gsub(" to.*","",.data$strata_level),
+#       "estimate_name" = "count",
+#       "estimate_type" = "integer",
+#       "additional_name" = "overall",
+#       "additional_level" = "overall"
+#     )
+#
+#   result <- result |>
+#     rbind(result) |>
+#     dplyr::group_by(.data$strata_level) |>
+#     dplyr::mutate(estimate_type = dplyr::if_else(
+#       dplyr::row_number() == 2, "percentage", .data$estimate_type
+#     )) |>
+#     dplyr::mutate(estimate_value = dplyr::if_else(
+#       .data$estimate_type == "percentage", as.character(as.numeric(.data$estimate_value)/denominator*100), .data$estimate_value
+#     )) |>
+#     dplyr::mutate(estimate_name = dplyr::if_else(
+#       .data$estimate_type == "percentage", "percentage", .data$estimate_name)) |>
+#     omopgenerics::newSummarisedResult(settings = dplyr::tibble(
+#       "result_id" = 1L,
+#       "result_type" = "summarised_observation_period",
+#       "package_name" = "OmopSketch",
+#       "package_version" = as.character(utils::packageVersion("OmopSketch")),
+#       "unit" = .env$unit,
+#       "unitInterval" = .env$unitInterval
+#     ))
+#
+#   omopgenerics::dropTable(cdm = cdm, name = "interval")
+#   return(result)
 }
 
 countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name, unit){

diff --git a/R/summariseRecordCount.R b/R/summariseRecordCount.R
@@ -1,4 +1,4 @@
-#' Create a summarise result object to summarise record counts for different time intervals.
+#' Create a summarise result object to summarise record counts for different time intervals. Only records that fall within the observation period are counted.
 #'
 #' @param omopTable An omop table from a cdm object.
 #' @param unit Whether to stratify by "year" or by "month".
@@ -45,18 +45,19 @@ summariseRecordCount <- function(omopTable, unit = "year", unitInterval = 1, age
   omopTable <- omopTable |>
     dplyr::select(dplyr::all_of(date), "person_id")
 
+  # Use add demographic query -> when both are true (age = FALSE)
   if(FALSE %in% c(names(ageGroup) == "overall")){
     omopTable <- omopTable |>
       PatientProfiles::addAgeQuery(indexDate = date, ageGroup = ageGroup, missingAgeGroupValue = "unknown") |>
-      dplyr::mutate(age_group = dplyr::if_else(is.na(.data$age_group), "unknown", .data$age_group)) |>
-      dplyr::select(-tidyselect::any_of(c("age")))
+      dplyr::mutate(age_group = dplyr::if_else(is.na(.data$age_group), "unknown", .data$age_group)) |> # To remove: https://github.com/darwin-eu-dev/PatientProfiles/issues/677
+      dplyr::select(-dplyr::any_of(c("age")))
   }else{
     omopTable <- omopTable |> dplyr::mutate(age_group = "overall")
   }
 
   if(sex){
-    omopTable <- omopTable |> PatientProfiles::addSexQuery() |>
-      dplyr::mutate(sex = dplyr::if_else(is.na(.data$sex), "unknown", .data$sex))
+    omopTable <- omopTable |> PatientProfiles::addSexQuery(missingSexValue = "unknown") |>
+      dplyr::mutate(sex = dplyr::if_else(is.na(.data$sex), "unknown", .data$sex)) # To remove: https://github.com/darwin-eu-dev/PatientProfiles/issues/677
   }else{
     omopTable <- omopTable |> dplyr::mutate(sex = "overall")
   }
@@ -108,7 +109,8 @@ filterInObservation <- function(x, indexDate){
     ) |>
     dplyr::filter(
       .data[[indexDate]] >= .data$start & .data[[indexDate]] <= .data$end
-    )
+    ) |>
+    dplyr::select(-c("start","end"))
 }
 
 getOmopTableStartDate <- function(omopTable, date){
@@ -132,7 +134,7 @@ getIntervalTibble <- function(omopTable, start_date_name, end_date_name, unit, u
   endDate   <- getOmopTableEndDate(omopTable, end_date_name)
 
   tibble::tibble(
-    "group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$unit)
+    "group" = seq.Date(as.Date(startDate), as.Date(endDate), "month")
   ) |>
     dplyr::rowwise() |>
     dplyr::mutate("interval" = max(which(
@@ -155,24 +157,28 @@ getIntervalTibble <- function(omopTable, start_date_name, end_date_name, unit, u
       "interval_group" = paste(.data$interval_start_date,"to",.data$interval_end_date)
     ) |>
     dplyr::ungroup() |>
-    dplyr::select("interval_start_date", "interval_end_date", "interval_group") |>
+    dplyr::mutate("my" = paste0(lubridate::month(.data$group),"-",lubridate::year(.data$group))) |>
+    dplyr::select("interval_group", "my") |>
     dplyr::distinct()
 }
 
 splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, strata){
-  result <- cdm$interval |>
-    dplyr::cross_join(
+
+  cdm$interval |>
+    dplyr::inner_join(
       omopTable |>
-        dplyr::rename("incidence_date" = dplyr::all_of(date))
+        dplyr::rename("incidence_date" = dplyr::all_of(.env$date)) |>
+        dplyr::mutate("my" = paste0(lubridate::month(.data$incidence_date),"-",lubridate::year(.data$incidence_date))) |>
+        dplyr::group_by(.data$age_group,.data$sex,.data$my) |>
+        dplyr::summarise(n = dplyr::n()) |>
+        dplyr::ungroup(),
+      by = "my"
     ) |>
-    dplyr::filter(.data$incidence_date >= .data$interval_start_date &
-                    .data$incidence_date <= .data$interval_end_date) |>
+    dplyr::select(-c("my")) |>
     dplyr::group_by(.data$interval_group, dplyr::across(dplyr::any_of(strata))) |>
-    dplyr::summarise("estimate_value" = dplyr::n(), .groups = "drop") |>
+    dplyr::summarise("estimate_value" = sum(.data$n, na.rm = TRUE), .groups = "drop") |>
     dplyr::collect() |>
-    dplyr::ungroup()
-
-  return(result)
+    dplyr::arrange(.data$interval_group)
 }
 
 createOverallGroup <- function(result, ageGroup, sex, strata){
@@ -187,19 +193,14 @@ createOverallGroup <- function(result, ageGroup, sex, strata){
           dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |>
           dplyr::mutate(age_group = "overall", sex = "overall")
       ) |>
-      dplyr::rename()
-
     # Create ageGroup = overall for each sex group
-    result <- result |>
       rbind(
         result |>
           dplyr::group_by(.data$interval_group, .data$sex) |>
           dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |>
           dplyr::mutate(age_group = "overall")
-      )
-
+      ) |>
     # Create sex group = overall for each ageGroup
-    result <- result |>
       rbind(
         result |>
           dplyr::group_by(.data$interval_group, .data$age_group) |>

diff --git a/man/summariseRecordCount.Rd b/man/summariseRecordCount.Rd