Skip to content

Commit

Permalink
add reference date as command line arg (#45)
Browse files Browse the repository at this point in the history
  • Loading branch information
sbidari authored Nov 19, 2024
1 parent ad74064 commit 9118e76
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 25 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/create_baseline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ jobs:

- name: generate baseline
run: |
Rscript src/code/get_baseline.r
TODAY=$(date +'%Y-%m-%d')
REF_DATE=$(date -d "$TODAY + 3 days" +'%Y-%m-%d')
Rscript src/code/get_baseline.r --reference-date "$REF_DATE"
- name: Commit changes
uses: EndBug/add-and-commit@v9
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/create_ensemble.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ jobs:
shell: Rscript {0}

- name: generate ensemble
run: Rscript src/code/get_ensemble.r
run: |
TODAY=$(date +'%Y-%m-%d')
REF_DATE=$(date -d "$TODAY + 2 days" +'%Y-%m-%d')
Rscript src/code/get_ensemble.r --reference-date "$REF_DATE"
- name: Commit changes
uses: EndBug/add-and-commit@v9
Expand Down
43 changes: 26 additions & 17 deletions src/code/get_baseline.r
Original file line number Diff line number Diff line change
@@ -1,23 +1,37 @@
library(epipredict)

#' Return `date` if it has the desired weekday, else the next date that does
#' @param date `Date` vector
#' @param ltwday integerish vector; of weekday code(s), following POSIXlt
#' encoding but allowing either 0 or 7 to represent Sunday.
#' @return `Date` object
curr_or_next_date_with_ltwday <- function(date, ltwday) {
checkmate::assert_class(date, "Date")
checkmate::assert_integerish(ltwday, lower = 0L, upper = 7L)
date + (ltwday - as.POSIXlt(date)$wday) %% 7L
parser <- argparser::arg_parser(
"Create a flat baseline model for covid-19 hospital admissions"
)
parser <- argparser::add_argument(
parser, "--reference-date",
help = "reference date in YYYY-MM-DD format"
)

args <- argparser::parse_args(parser)
reference_date <- as.Date(args$reference_date)

dow_supplied <- lubridate::wday(reference_date,
week_start = 7,
label = FALSE
)
if (dow_supplied != 7) {
cli::cli_abort(message = paste0(
"Expected `reference_date` to be a Saturday, day number 7 ",
"of the week, given the `week_start` value of Sunday. ",
"Got {reference_date}, which is day number ",
"{dow_supplied} of the week."
))
}

# Prepare data, use tentative file-name/location, might need to be changed
desired_max_time_value <- reference_date - 7L

target_tbl <- readr::read_csv(
"target-data/covid-hospital-admissions.csv",
col_types = readr::cols_only(
date = readr::col_date(format = ""),
location = readr::col_character(),
location_name = readr::col_character(),
state = readr::col_character(),
value = readr::col_double()
)
)
Expand All @@ -26,17 +40,12 @@ loc_df <- read.csv("target-data/locations.csv")

target_epi_df <- target_tbl |>
dplyr::transmute(
geo_value = loc_df$abbreviation[match(location_name, loc_df$location_name)],
geo_value = state,
time_value = .data$date,
weekly_count = .data$value
) |>
epiprocess::as_epi_df()

# date settings
forecast_as_of_date <- Sys.Date()
reference_date <- curr_or_next_date_with_ltwday(forecast_as_of_date, 6L)
desired_max_time_value <- reference_date - 7L

# Validation:
excess_latency_tbl <- target_epi_df |>
tidyr::drop_na(weekly_count) |>
Expand Down
36 changes: 30 additions & 6 deletions src/code/get_ensemble.r
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
# R script to create ensemble forecats using models submitted to the CovidHub

ref_date <- lubridate::ceiling_date(Sys.Date(), "week") - lubridate::days(1)
parser <- argparser::arg_parser(
"Create a hub ensemble model for covid-19 hospital admissions"
)
parser <- argparser::add_argument(
parser, "--reference-date",
help = "reference date in YYYY-MM-DD format"
)

args <- argparser::parse_args(parser)
reference_date <- as.Date(args$reference_date)

dow_supplied <- lubridate::wday(reference_date,
week_start = 7,
label = FALSE
)
if (dow_supplied != 7) {
cli::cli_abort(message = paste0(
"Expected `reference_date` to be a Saturday, day number 7 ",
"of the week, given the `week_start` value of Sunday. ",
"Got {reference_date}, which is day number ",
"{dow_supplied} of the week."
))
}

hub_path <- "."
task_id_cols <- c(
"reference_date", "location", "horizon",
Expand All @@ -15,7 +38,7 @@ if (!dir.exists(output_dirpath)) {
hub_content <- hubData::connect_hub(hub_path)
current_forecasts <- hub_content |>
dplyr::filter(
reference_date == ref_date,
reference_date == reference_date,
!str_detect(model_id, "CovidHub")
) |>
hubData::collect_hub()
Expand All @@ -41,9 +64,10 @@ eligible_models <- purrr::map(yml_files, is_model_designated) |>

write.csv(
eligible_models,
file.path(
output_dirpath,
paste0(as.character(ref_date), "-", "models-to-include-in-ensemble.csv")
file.path(output_dirpath,
paste0(
as.character(reference_date), "-", "models-to-include-in-ensemble.csv"
)
),
row.names = FALSE
)
Expand Down Expand Up @@ -74,7 +98,7 @@ write.csv(
median_ensemble_outputs,
file.path(
output_dirpath,
paste0(as.character(ref_date), "-", "CovidHub-ensemble.csv")
paste0(as.character(reference_date), "-", "CovidHub-ensemble.csv")
),
row.names = FALSE
)

0 comments on commit 9118e76

Please sign in to comment.