Skip to content

Commit

Permalink
remove cross_joint()
Browse files Browse the repository at this point in the history
  • Loading branch information
martaalcalde committed Jul 24, 2024
1 parent 1c282c1 commit 5e28df8
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 217 deletions.
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ Imports:
rlang,
tibble,
tidyr,
tidyselect,
visOmopResults
Depends:
R (>= 2.10)
Expand Down
180 changes: 90 additions & 90 deletions R/summariseObservationPeriod.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,96 +9,96 @@
#' @export
#'
summariseObservationPeriod <- function(observationPeriod, unit = "year", unitInterval = 1){

# Check input ----
assertClass(observationPeriod, "omop_table")

x <- omopgenerics::tableName(observationPeriod)
if (x != "observation_period") {
cli::cli_abort(
"Table name ({x}) is not observation_period, please provide a valid
observation_period table"
)
}

if(observationPeriod |> dplyr::tally() |> dplyr::pull("n") == 0){
cli::cli_warn("observation_period table is empty. Returning an empty summarised result.")
return(omopgenerics::emptySummarisedResult())
}

observationPeriod <- observationPeriod |>
dplyr::ungroup()

if(missing(unit)){unit <- "year"}
if(missing(unitInterval)){unitInterval <- 1}

checkUnit(unit)
checkUnitInterval(unitInterval)

cdm <- omopgenerics::cdmReference(observationPeriod)

# Observation period ----
name <- "observation_period"
start_date_name <- startDate(name)
end_date_name <- endDate(name)

interval <- getIntervalTibble(observationPeriod, start_date_name, end_date_name, unit, unitInterval)

# Insert interval table to the cdm ----
cdm <- cdm |>
omopgenerics::insertTable(name = "interval", table = interval)

# Calculate denominator ----
denominator <- cdm[["person"]] |>
dplyr::ungroup() |>
dplyr::select("person_id") |>
dplyr::summarise("n" = dplyr::n()) |>
dplyr::pull("n")

# Create summarised result ----
result <- observationPeriod |>
countRecords(cdm, start_date_name, end_date_name, unit)

result <- result |>
dplyr::mutate(
"estimate_value" = as.character(.data$estimate_value),
"variable_name" = "overlap_records"
) |>
visOmopResults::uniteStrata(cols = "time_interval") |>
dplyr::mutate(
"result_id" = as.integer(1),
"cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(observationPeriod)),
"group_name" = "omop_table",
"group_level" = name,
"variable_level" = gsub(" to.*","",.data$strata_level),
"estimate_name" = "count",
"estimate_type" = "integer",
"additional_name" = "overall",
"additional_level" = "overall"
)

result <- result |>
rbind(result) |>
dplyr::group_by(.data$strata_level) |>
dplyr::mutate(estimate_type = dplyr::if_else(
dplyr::row_number() == 2, "percentage", .data$estimate_type
)) |>
dplyr::mutate(estimate_value = dplyr::if_else(
.data$estimate_type == "percentage", as.character(as.numeric(.data$estimate_value)/denominator*100), .data$estimate_value
)) |>
dplyr::mutate(estimate_name = dplyr::if_else(
.data$estimate_type == "percentage", "percentage", .data$estimate_name)) |>
omopgenerics::newSummarisedResult(settings = dplyr::tibble(
"result_id" = 1L,
"result_type" = "summarised_observation_period",
"package_name" = "OmopSketch",
"package_version" = as.character(utils::packageVersion("OmopSketch")),
"unit" = .env$unit,
"unitInterval" = .env$unitInterval
))

omopgenerics::dropTable(cdm = cdm, name = "interval")
return(result)
#
# # Check input ----
# assertClass(observationPeriod, "omop_table")
#
# x <- omopgenerics::tableName(observationPeriod)
# if (x != "observation_period") {
# cli::cli_abort(
# "Table name ({x}) is not observation_period, please provide a valid
# observation_period table"
# )
# }
#
# if(observationPeriod |> dplyr::tally() |> dplyr::pull("n") == 0){
# cli::cli_warn("observation_period table is empty. Returning an empty summarised result.")
# return(omopgenerics::emptySummarisedResult())
# }
#
# observationPeriod <- observationPeriod |>
# dplyr::ungroup()
#
# if(missing(unit)){unit <- "year"}
# if(missing(unitInterval)){unitInterval <- 1}
#
# checkUnit(unit)
# checkUnitInterval(unitInterval)
#
# cdm <- omopgenerics::cdmReference(observationPeriod)
#
# # Observation period ----
# name <- "observation_period"
# start_date_name <- startDate(name)
# end_date_name <- endDate(name)
#
# interval <- getIntervalTibble(observationPeriod, start_date_name, end_date_name, unit, unitInterval)
#
# # Insert interval table to the cdm ----
# cdm <- cdm |>
# omopgenerics::insertTable(name = "interval", table = interval)
#
# # Calculate denominator ----
# denominator <- cdm[["person"]] |>
# dplyr::ungroup() |>
# dplyr::select("person_id") |>
# dplyr::summarise("n" = dplyr::n()) |>
# dplyr::pull("n")
#
# # Create summarised result ----
# result <- observationPeriod |>
# countRecords(cdm, start_date_name, end_date_name, unit)
#
# result <- result |>
# dplyr::mutate(
# "estimate_value" = as.character(.data$estimate_value),
# "variable_name" = "overlap_records"
# ) |>
# visOmopResults::uniteStrata(cols = "time_interval") |>
# dplyr::mutate(
# "result_id" = as.integer(1),
# "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(observationPeriod)),
# "group_name" = "omop_table",
# "group_level" = name,
# "variable_level" = gsub(" to.*","",.data$strata_level),
# "estimate_name" = "count",
# "estimate_type" = "integer",
# "additional_name" = "overall",
# "additional_level" = "overall"
# )
#
# result <- result |>
# rbind(result) |>
# dplyr::group_by(.data$strata_level) |>
# dplyr::mutate(estimate_type = dplyr::if_else(
# dplyr::row_number() == 2, "percentage", .data$estimate_type
# )) |>
# dplyr::mutate(estimate_value = dplyr::if_else(
# .data$estimate_type == "percentage", as.character(as.numeric(.data$estimate_value)/denominator*100), .data$estimate_value
# )) |>
# dplyr::mutate(estimate_name = dplyr::if_else(
# .data$estimate_type == "percentage", "percentage", .data$estimate_name)) |>
# omopgenerics::newSummarisedResult(settings = dplyr::tibble(
# "result_id" = 1L,
# "result_type" = "summarised_observation_period",
# "package_name" = "OmopSketch",
# "package_version" = as.character(utils::packageVersion("OmopSketch")),
# "unit" = .env$unit,
# "unitInterval" = .env$unitInterval
# ))
#
# omopgenerics::dropTable(cdm = cdm, name = "interval")
# return(result)
}

countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name, unit){
Expand Down
47 changes: 24 additions & 23 deletions R/summariseRecordCount.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Create a summarise result object to summarise record counts for different time intervals.
#' Create a summarise result object to summarise record counts for different time intervals. Only records that fall within the observation period are counted.
#'
#' @param omopTable An omop table from a cdm object.
#' @param unit Whether to stratify by "year" or by "month".
Expand Down Expand Up @@ -45,18 +45,19 @@ summariseRecordCount <- function(omopTable, unit = "year", unitInterval = 1, age
omopTable <- omopTable |>
dplyr::select(dplyr::all_of(date), "person_id")

# Use add demographic query -> when both are true (age = FALSE)
if(FALSE %in% c(names(ageGroup) == "overall")){
omopTable <- omopTable |>
PatientProfiles::addAgeQuery(indexDate = date, ageGroup = ageGroup, missingAgeGroupValue = "unknown") |>
dplyr::mutate(age_group = dplyr::if_else(is.na(.data$age_group), "unknown", .data$age_group)) |>
dplyr::select(-tidyselect::any_of(c("age")))
dplyr::mutate(age_group = dplyr::if_else(is.na(.data$age_group), "unknown", .data$age_group)) |> # To remove: https://github.com/darwin-eu-dev/PatientProfiles/issues/677
dplyr::select(-dplyr::any_of(c("age")))
}else{
omopTable <- omopTable |> dplyr::mutate(age_group = "overall")
}

if(sex){
omopTable <- omopTable |> PatientProfiles::addSexQuery() |>
dplyr::mutate(sex = dplyr::if_else(is.na(.data$sex), "unknown", .data$sex))
omopTable <- omopTable |> PatientProfiles::addSexQuery(missingSexValue = "unknown") |>
dplyr::mutate(sex = dplyr::if_else(is.na(.data$sex), "unknown", .data$sex)) # To remove: https://github.com/darwin-eu-dev/PatientProfiles/issues/677
}else{
omopTable <- omopTable |> dplyr::mutate(sex = "overall")
}
Expand Down Expand Up @@ -108,7 +109,8 @@ filterInObservation <- function(x, indexDate){
) |>
dplyr::filter(
.data[[indexDate]] >= .data$start & .data[[indexDate]] <= .data$end
)
) |>
dplyr::select(-c("start","end"))
}

getOmopTableStartDate <- function(omopTable, date){
Expand All @@ -132,7 +134,7 @@ getIntervalTibble <- function(omopTable, start_date_name, end_date_name, unit, u
endDate <- getOmopTableEndDate(omopTable, end_date_name)

tibble::tibble(
"group" = seq.Date(as.Date(startDate), as.Date(endDate), .env$unit)
"group" = seq.Date(as.Date(startDate), as.Date(endDate), "month")
) |>
dplyr::rowwise() |>
dplyr::mutate("interval" = max(which(
Expand All @@ -155,24 +157,28 @@ getIntervalTibble <- function(omopTable, start_date_name, end_date_name, unit, u
"interval_group" = paste(.data$interval_start_date,"to",.data$interval_end_date)
) |>
dplyr::ungroup() |>
dplyr::select("interval_start_date", "interval_end_date", "interval_group") |>
dplyr::mutate("my" = paste0(lubridate::month(.data$group),"-",lubridate::year(.data$group))) |>
dplyr::select("interval_group", "my") |>
dplyr::distinct()
}

splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, strata){
result <- cdm$interval |>
dplyr::cross_join(

cdm$interval |>
dplyr::inner_join(
omopTable |>
dplyr::rename("incidence_date" = dplyr::all_of(date))
dplyr::rename("incidence_date" = dplyr::all_of(.env$date)) |>
dplyr::mutate("my" = paste0(lubridate::month(.data$incidence_date),"-",lubridate::year(.data$incidence_date))) |>
dplyr::group_by(.data$age_group,.data$sex,.data$my) |>
dplyr::summarise(n = dplyr::n()) |>
dplyr::ungroup(),
by = "my"
) |>
dplyr::filter(.data$incidence_date >= .data$interval_start_date &
.data$incidence_date <= .data$interval_end_date) |>
dplyr::select(-c("my")) |>
dplyr::group_by(.data$interval_group, dplyr::across(dplyr::any_of(strata))) |>
dplyr::summarise("estimate_value" = dplyr::n(), .groups = "drop") |>
dplyr::summarise("estimate_value" = sum(.data$n, na.rm = TRUE), .groups = "drop") |>
dplyr::collect() |>
dplyr::ungroup()

return(result)
dplyr::arrange(.data$interval_group)
}

createOverallGroup <- function(result, ageGroup, sex, strata){
Expand All @@ -187,19 +193,14 @@ createOverallGroup <- function(result, ageGroup, sex, strata){
dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(age_group = "overall", sex = "overall")
) |>
dplyr::rename()

# Create ageGroup = overall for each sex group
result <- result |>
rbind(
result |>
dplyr::group_by(.data$interval_group, .data$sex) |>
dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(age_group = "overall")
)

) |>
# Create sex group = overall for each ageGroup
result <- result |>
rbind(
result |>
dplyr::group_by(.data$interval_group, .data$age_group) |>
Expand Down
4 changes: 2 additions & 2 deletions man/summariseRecordCount.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 5e28df8

Please sign in to comment.