diff --git a/.github/workflows/create_baseline.yaml b/.github/workflows/create_baseline.yaml index 5005310..93eb21e 100644 --- a/.github/workflows/create_baseline.yaml +++ b/.github/workflows/create_baseline.yaml @@ -29,7 +29,9 @@ jobs: - name: generate baseline run: | - Rscript src/code/get_baseline.r + TODAY=$(date +'%Y-%m-%d') + REF_DATE=$(date -d "$TODAY + 3 days" +'%Y-%m-%d') + Rscript src/code/get_baseline.r --reference-date "$REF_DATE" - name: Commit changes uses: EndBug/add-and-commit@v9 diff --git a/.github/workflows/create_ensemble.yaml b/.github/workflows/create_ensemble.yaml index 02ef1d7..81569a0 100644 --- a/.github/workflows/create_ensemble.yaml +++ b/.github/workflows/create_ensemble.yaml @@ -29,7 +29,10 @@ jobs: shell: Rscript {0} - name: generate ensemble - run: Rscript src/code/get_ensemble.r + run: | + TODAY=$(date +'%Y-%m-%d') + REF_DATE=$(date -d "$TODAY + 2 days" +'%Y-%m-%d') + Rscript src/code/get_ensemble.r --reference-date "$REF_DATE" - name: Commit changes uses: EndBug/add-and-commit@v9 diff --git a/src/code/get_baseline.r b/src/code/get_baseline.r index 6bfb36d..7390864 100644 --- a/src/code/get_baseline.r +++ b/src/code/get_baseline.r @@ -1,23 +1,37 @@ library(epipredict) -#' Return `date` if it has the desired weekday, else the next date that does -#' @param date `Date` vector -#' @param ltwday integerish vector; of weekday code(s), following POSIXlt -#' encoding but allowing either 0 or 7 to represent Sunday. -#' @return `Date` object -curr_or_next_date_with_ltwday <- function(date, ltwday) { - checkmate::assert_class(date, "Date") - checkmate::assert_integerish(ltwday, lower = 0L, upper = 7L) - date + (ltwday - as.POSIXlt(date)$wday) %% 7L +parser <- argparser::arg_parser( + "Create a flat baseline model for covid-19 hospital admissions" +) +parser <- argparser::add_argument( + parser, "--reference-date", + help = "reference date in YYYY-MM-DD format" +) + +args <- argparser::parse_args(parser) +reference_date <- as.Date(args$reference_date) + +dow_supplied <- lubridate::wday(reference_date, + week_start = 7, + label = FALSE +) +if (dow_supplied != 7) { + cli::cli_abort(message = paste0( + "Expected `reference_date` to be a Saturday, day number 7 ", + "of the week, given the `week_start` value of Sunday. ", + "Got {reference_date}, which is day number ", + "{dow_supplied} of the week." + )) } -# Prepare data, use tentative file-name/location, might need to be changed +desired_max_time_value <- reference_date - 7L + target_tbl <- readr::read_csv( "target-data/covid-hospital-admissions.csv", col_types = readr::cols_only( date = readr::col_date(format = ""), location = readr::col_character(), - location_name = readr::col_character(), + state = readr::col_character(), value = readr::col_double() ) ) @@ -26,17 +40,12 @@ loc_df <- read.csv("target-data/locations.csv") target_epi_df <- target_tbl |> dplyr::transmute( - geo_value = loc_df$abbreviation[match(location_name, loc_df$location_name)], + geo_value = state, time_value = .data$date, weekly_count = .data$value ) |> epiprocess::as_epi_df() -# date settings -forecast_as_of_date <- Sys.Date() -reference_date <- curr_or_next_date_with_ltwday(forecast_as_of_date, 6L) -desired_max_time_value <- reference_date - 7L - # Validation: excess_latency_tbl <- target_epi_df |> tidyr::drop_na(weekly_count) |> diff --git a/src/code/get_ensemble.r b/src/code/get_ensemble.r index 4479d83..73057bb 100644 --- a/src/code/get_ensemble.r +++ b/src/code/get_ensemble.r @@ -1,6 +1,29 @@ # R script to create ensemble forecats using models submitted to the CovidHub -ref_date <- lubridate::ceiling_date(Sys.Date(), "week") - lubridate::days(1) +parser <- argparser::arg_parser( + "Create a hub ensemble model for covid-19 hospital admissions" +) +parser <- argparser::add_argument( + parser, "--reference-date", + help = "reference date in YYYY-MM-DD format" +) + +args <- argparser::parse_args(parser) +reference_date <- as.Date(args$reference_date) + +dow_supplied <- lubridate::wday(reference_date, + week_start = 7, + label = FALSE +) +if (dow_supplied != 7) { + cli::cli_abort(message = paste0( + "Expected `reference_date` to be a Saturday, day number 7 ", + "of the week, given the `week_start` value of Sunday. ", + "Got {reference_date}, which is day number ", + "{dow_supplied} of the week." + )) +} + hub_path <- "." task_id_cols <- c( "reference_date", "location", "horizon", @@ -15,7 +38,7 @@ if (!dir.exists(output_dirpath)) { hub_content <- hubData::connect_hub(hub_path) current_forecasts <- hub_content |> dplyr::filter( - reference_date == ref_date, + reference_date == reference_date, !str_detect(model_id, "CovidHub") ) |> hubData::collect_hub() @@ -41,9 +64,10 @@ eligible_models <- purrr::map(yml_files, is_model_designated) |> write.csv( eligible_models, - file.path( - output_dirpath, - paste0(as.character(ref_date), "-", "models-to-include-in-ensemble.csv") + file.path(output_dirpath, + paste0( + as.character(reference_date), "-", "models-to-include-in-ensemble.csv" + ) ), row.names = FALSE ) @@ -74,7 +98,7 @@ write.csv( median_ensemble_outputs, file.path( output_dirpath, - paste0(as.character(ref_date), "-", "CovidHub-ensemble.csv") + paste0(as.character(reference_date), "-", "CovidHub-ensemble.csv") ), row.names = FALSE )