Skip to content

Commit 1944396

Browse files
Add maximum date to dataset and extra dataset functions (#54)
- Maximum date is 2023-12-31 (ccodwg/CovidTimelineCanada#117)
1 parent d1b52c6 commit 1944396

File tree

2 files changed

+33
-3
lines changed

2 files changed

+33
-3
lines changed

R/extra_datasets.R

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,24 @@ extra_datasets <- function() {
88
# announce start
99
cat("Assembling extra datasets...", fill = TRUE)
1010

11+
# define maximum date for dataset
12+
dataset_max_date <- as.Date("2023-12-31")
13+
14+
# function: filter for maximum date
15+
filter_max_date <- function(d, date_col = "date") {
16+
# ensure date col is correct type
17+
d[[date_col]] <- as.Date(d[[date_col]])
18+
# filter for maximum date
19+
dplyr::filter(d, .data[[date_col]] <= dataset_max_date)
20+
}
21+
1122
# PHAC wastewater dataset
1223
tryCatch(
1324
{
1425
# load data
1526
d <- read_d("raw_data/active_ts/can/can_wastewater_copies_per_ml_subhr_ts.csv", val_numeric = TRUE)
27+
# max date
28+
d <- filter_max_date(d)
1629
# write dataset
1730
utils::write.csv(d, file.path("extra_data", "phac_wastewater", "phac_wastewater.csv"), row.names = FALSE, quote = 1:7, na = "")
1831
},
@@ -40,6 +53,9 @@ extra_datasets <- function() {
4053
cases_weekly = round(.data$percent_positivity_weekly * .data$tests_completed_weekly / 100),
4154
.data$update
4255
)
56+
# max date
57+
ter <- filter_max_date(ter, "date_end")
58+
# write dataset
4359
utils::write.csv(ter, file.path("extra_data", "territories_rvdss_since_2022-09-03", "territories_rvdss_since_2022-09-03.csv"), row.names = FALSE, quote = 1:3, na = "")
4460
},
4561
error = function(e) {
@@ -51,13 +67,14 @@ extra_datasets <- function() {
5167
# sk biweekly HR-level case snapshots
5268
tryCatch(
5369
{
54-
## process
70+
# process
5571
sk <- read_d("raw_data/reports/sk/sk_crisp_report.csv") |>
5672
dplyr::transmute(.data$date_start, .data$date_end, .data$region, .data$sub_region_1, cases_weekly = .data$cases) |>
5773
dplyr::filter(.data$date_start >= as.Date("2022-12-25") & !is.na(.data$sub_region_1) & !is.na(.data$cases_weekly)) |>
5874
convert_hr_names()
59-
60-
## write file
75+
# max date
76+
sk <- filter_max_date(sk, "date_end")
77+
# write file
6178
utils::write.csv(sk, file.path("extra_data", "sk_biweekly_cases_hr", "sk_biweekly_cases_hr.csv"), row.names = FALSE, quote = 1:4)
6279
rm(sk) # clean up
6380
},
@@ -299,6 +316,8 @@ extra_datasets <- function() {
299316
tidyr::pivot_wider(names_from = .data$characteristics, values_from = .data$value) |>
300317
# sort by region (CAN first) and date
301318
dplyr::arrange(dplyr::if_else(.data$region == "CAN", 0, 1), .data$region, .data$date)
319+
# max date
320+
statcan <- filter_max_date(statcan)
302321
# write dataset
303322
utils::write.csv(statcan, file.path("extra_data", "statcan_excess_mortality", "statcan_excess_mortality.csv"), row.names = FALSE, quote = 1:2)
304323
# write new release date
@@ -314,10 +333,14 @@ extra_datasets <- function() {
314333
## hosp/ICU extra data report
315334
tryCatch(
316335
{
336+
# process
317337
d <- googlesheets4::read_sheet(
318338
ss = "1ZTUb3fVzi6CLZAbU3lj6T6FTzl5Aq-arBNL49ru3VLo",
319339
sheet = "hospital_icu_extra",
320340
)
341+
# max date
342+
d <- filter_max_date(d)
343+
# write dataset
321344
utils::write.csv(
322345
d,
323346
file.path("extra_data", "hospital_icu_extra", "hospital_icu_extra.csv"),
@@ -345,6 +368,9 @@ extra_datasets <- function() {
345368
.data$hosp_admissions,
346369
.data$icu_admissions
347370
)
371+
# max date
372+
d <- filter_max_date(d, "date_end")
373+
# write dataset
348374
utils::write.csv(
349375
d,
350376
file.path("extra_data", "ns_extra_respiratory_watch", "ns_extra_respiratory_watch.csv"),

R/write_funs.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ write_dataset <- function(d, geo, name, ext = "csv") {
201201
match.arg(ext, c("csv", "json"), several.ok = FALSE)
202202
# construct path
203203
out_path <- file.path("data", geo, paste(name, ext, sep = "."))
204+
# set dataset max date (2023-12-31) for CSV datasets
205+
if (ext == "csv") {
206+
d <- dplyr::filter(d, .data$date <= as.Date("2023-12-31"))
207+
}
204208
# check maximum date: stop with error if max date is greater than current date
205209
if (ext == "csv") {
206210
date_max <- max(d$date)

0 commit comments

Comments
 (0)