Skip to content

Commit

Permalink
Update MB report script (#95)
Browse files Browse the repository at this point in the history
- Update MB report script to reflect new "season" and removal of health region data table
- Health region data must now be manually estimated from figure 3 of the report
  • Loading branch information
jeanpaulrsoucy committed Aug 14, 2023
1 parent 666c483 commit 87f4ef0
Showing 1 changed file with 15 additions and 22 deletions.
37 changes: 15 additions & 22 deletions utils/report_mb.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ ds <- rvest::read_html(url)
tb_cases <- ds %>% rvest::html_elements(".tgN") %>% {.[grep("tb_cases", .)]} %>% rvest::html_table() %>% dplyr::nth(1)
tb_severity <- ds %>% rvest::html_elements(".tgN") %>% {.[grep("tb_severity", .)]} %>% rvest::html_table() %>% dplyr::nth(1)
tb_testing <- ds %>% rvest::html_elements(".tgN") %>% {.[grep("tb_testing", .)]} %>% rvest::html_table() %>% dplyr::nth(1)
tb_hr <- ds %>% rvest::html_table() %>% {.[grep("Health Region|Helath region", .)]} %>% dplyr::nth(1)

# function: extract number from specific substring in specific row of an HTML table
extract_from_tab <- function(tab, row_n, string, parse_num = TRUE) {
Expand All @@ -54,28 +53,26 @@ mb <- dplyr::bind_rows(
sub_region_1 = NA,
cases = NA, # calculated by formula
cases_weekly = extract_from_tab(tb_cases, 2, "(?<=Cases this week: )\\d*", parse_num = FALSE),
`cumulative_cases_since_2022-07-03` = extract_from_tab(tb_cases, 2, "(?<=Total cases: )\\d*", parse_num = FALSE),
`cumulative_cases_since_2022-07-03_weekly_diff` = NA, # calculated by formula
`cumulative_cases_since_2022-07-03` = NA,
`cumulative_cases_since_2022-07-03_weekly_diff` = NA,
`cumulative_cases_since_2023-07-02` = extract_from_tab(tb_cases, 2, "(?<=Total cases: )\\d*", parse_num = FALSE),
`cumulative_cases_since_2023-07-02_weekly_diff` = NA, # calculated by formula
deaths_weekly = extract_from_tab(tb_severity, 2, "(?<=Severe outcomes this week.{0,1000}Deaths: ).{1,5}(?!\r)", parse_num = FALSE),
`cumulative_deaths_since_2022-07-03` = extract_from_tab(tb_severity, 2, "(?<=Total severe outcomes.{0,1000}Deaths: ).{1,5}(?!\r)", parse_num = FALSE),
`cumulative_hospitalizations_since_2022-07-03` = extract_from_tab(tb_severity, 2, "(?<=Total severe outcomes.{0,1000}Hospital admissions: ).{1,5}(?!\r)", parse_num = FALSE),
`cumulative_deaths_since_2022-07-03` = NA,
`cumulative_deaths_since_2023-07-02` = extract_from_tab(tb_severity, 2, "(?<=Total severe outcomes.{0,1000}Deaths: ).{1,5}(?!\r)", parse_num = FALSE),
`cumulative_hospitalizations_since_2022-07-03` = NA,
`cumulative_hospitalizations_since_2023-07-02` = extract_from_tab(tb_severity, 2, "(?<=Total severe outcomes.{0,1000}Hospital admissions: ).{1,5}(?!\r)", parse_num = FALSE),
new_hospitalizations = extract_from_tab(tb_severity, 2, "(?<=Severe outcomes this week.{0,1000}Hospital admissions: ).{1,5}(?!\r)", parse_num = FALSE),
`cumulative_icu_since_2022-07-03` = extract_from_tab(tb_severity, 2, "(?<=Total severe outcomes.{0,1000}ICU admissions: ).{1,5}(?!\r)", parse_num = FALSE),
`cumulative_icu_since_2022-07-03` = NA,
`cumulative_icu_since_2023-07-02` = extract_from_tab(tb_severity, 2, "(?<=Total severe outcomes.{0,1000}ICU admissions: ).{1,5}(?!\r)", parse_num = FALSE),
new_icu = extract_from_tab(tb_severity, 2, "(?<=Severe outcomes this week.{0,1000}ICU admissions[\\d]{0,1}: ).{1,5}(?!\r)", parse_num = FALSE),
`cumulative_tests_completed_2022-07-03` = extract_from_tab(tb_testing, 2, "Specimens tested: \\d*"),
`cumulative_tests_completed_2022-07-03` = NA,
`cumulative_tests_completed_2023-07-02` = extract_from_tab(tb_testing, 2, "Specimens tested: \\d*"),
new_tests_completed_daily = extract_from_tab(tb_testing, 2, "Average daily specimens: \\d*"),
positivity_rate_weekly = extract_from_tab(tb_testing, 2, "Weekly positivity rate: \\d*\\.\\d*(?=%)"),
`cumulative_people_tested_2022-07-03` = extract_from_tab(tb_testing, 2, "Tested people: \\d*")
),
dplyr::tibble(
date = date_local,
source = url,
date_start = date_start,
date_end = date_end,
region = "MB",
sub_region_1 = tb_hr %>% dplyr::pull(1),
`cumulative_cases_since_2022-07-03` = tb_hr %>% dplyr::pull("Total cases") %>% as.character(),
cases_weekly = tb_hr %>% dplyr::pull("Cases this week") %>% as.character()
`cumulative_people_tested_2022-07-03` = NA,
`cumulative_people_tested_2023-07-02` = extract_from_tab(tb_testing, 2, "Tested people: \\d*"),
notes = NA
)
)

Expand All @@ -87,9 +84,5 @@ for (i in 1:ncol(mb)) {
}
}

# check column sums
mb[["cumulative_cases_since_2022-07-03"]][1] == sum(mb[["cumulative_cases_since_2022-07-03"]][2:6])
mb[["cases_weekly"]][1] == sum(mb[["cases_weekly"]][2:6])

# append data
googlesheets4::sheet_append(data = mb, ss = "1ZTUb3fVzi6CLZAbU3lj6T6FTzl5Aq-arBNL49ru3VLo", sheet = "mb_weekly_report_2")

0 comments on commit 87f4ef0

Please sign in to comment.