Skip to content

Commit

Permalink
Merge branch 'main' into example_files_team5
Browse files Browse the repository at this point in the history
  • Loading branch information
LucieContamin committed Apr 30, 2024
2 parents b488cb4 + e607b57 commit 9955982
Show file tree
Hide file tree
Showing 6 changed files with 337 additions and 5 deletions.
25 changes: 25 additions & 0 deletions .github/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Packages required for the validation (update requirements (cache) on the 2024-04-24)
https://github.com/midas-network/SMHvalidation/archive/refs/heads/main.zip # v0.1.0
https://github.com/Infectious-Disease-Modeling-Hubs/hubUtils/archive/refs/heads/main.zip"
arrow
covidcast
cowplot
data.table
dplyr
ggplot2
ggpubr
gh
glue
grDevices
grid
gridExtra
gtable
lubridate
purrr
readr
remotes
scales
stringr
tibble
tidyr
tidyselect
39 changes: 39 additions & 0 deletions .github/workflows/install_cache_dependencies.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Cache install

on:
workflow_dispatch:

jobs:
cache-R-library:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 1
sparse-checkout: |
.github
- name: Setup R
uses: r-lib/actions/setup-r@v2

- name: Install system dependencies
run: sudo apt-get install libcurl4-openssl-dev libudunits2-dev libgdal-dev

- name: Cache R packages
if: runner.os != 'Windows'
uses: actions/cache@v2
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-${{ hashFiles('.github/requirements.txt') }}-1

- name: Install dependencies
run: |
R -e 'Sys.setenv("NOT_CRAN" = TRUE)
install.packages(c("arrow", "gh", "remotes"))
Sys.unsetenv("NOT_CRAN")
remotes::install_url("https://github.com/midas-network/SMHvalidation/archive/refs/heads/main.zip")
remotes::install_url("https://github.com/Infectious-Disease-Modeling-Hubs/hubUtils/archive/refs/heads/main.zip")'
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
73 changes: 73 additions & 0 deletions .github/workflows/validation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: Validate and Visualize Submission
on:
pull_request_target:
types: [synchronize, opened, reopened]
branches:
- master
paths:
- 'model-output/**.parquet'

jobs:
validates-files:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 1
sparse-checkout: |
.github
code
hub-config
auxiliary-data
- name: Setup R
uses: r-lib/actions/setup-r@v2

- name: Install system dependencies
run: sudo apt-get install libcurl4-openssl-dev libudunits2-dev libgdal-dev

- name: Cache R packages
id: cache
uses: actions/cache@v2
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-${{ hashFiles('.github/requirements.txt') }}-1

- name: Install dependencies
if: ( steps.cache.outputs.cache-hit != 'true' )
run: |
R -e 'Sys.setenv("NOT_CRAN" = TRUE)
install.packages(c("arrow", "gh", "remotes"))
Sys.unsetenv("NOT_CRAN")
remotes::install_url("https://github.com/midas-network/SMHvalidation/archive/refs/heads/main.zip")
remotes::install_url("https://github.com/Infectious-Disease-Modeling-Hubs/hubUtils/archive/refs/heads/main.zip")'
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

- name: Run Validation - Open/reopen
if: ( github.event.action != 'synchronize' )
run: |
Rscript code/validation.R
env:
GH_PR_NUMBER: ${{ github.event.pull_request.number }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Run Validation - Synchronize
if: ( github.event.action == 'synchronize' )
run: |
Rscript code/validation.R
env:
GH_PR_NUMBER: ${{ github.event.pull_request.number }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GH_COMMIT_SHA: ${{ github.event.pull_request.head.sha }}

- name: Upload Projection Plot as Artifact
uses: actions/upload-artifact@v3
if: always()
with:
name: validation_plot_${{ github.event.pull_request.number }}
path: ./proj_plot/*
if-no-files-found: ignore

195 changes: 195 additions & 0 deletions code/validation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
library(SMHvalidation)
library(gh)
library(dplyr)

# Check if validation need to run
if (nchar(Sys.getenv("GH_COMMIT_SHA")) > 1) {
test <- gh::gh(paste0("GET /repos/",
"midas-network/covid19-smh-research/commits/",
Sys.getenv("GH_COMMIT_SHA")))
check <- grepl("model-output/", unique(unlist(purrr::map(test$files,
"filename"))))
} else {
check <- TRUE
}

if (isFALSE(all(check))) {
test_tot <- NA
print("no update in model-output folder")
} else {
# Prerequisite
pop_path <- "auxiliary-data/location_census/locations.csv"
js_def_file <- "hub-config/tasks.json"
lst_gs <- NULL

# check if submissions file
pr_files <- gh::gh(paste0("GET /repos/",
"midas-network/covid19-smh-research/pulls/",
Sys.getenv("GH_PR_NUMBER"), "/files"))

pr_files_name <- purrr::map(pr_files, "filename")
pr_files_name <- pr_files_name[!"removed" == purrr::map(pr_files, "status")]
pr_sub_files <-
stringr::str_extract(pr_files_name,
"model-output/.+/\\d{4}-\\d{2}-\\d{2}(-.*)?")
pr_sub_files <- unique(na.omit(pr_sub_files))
pr_sub_files <- grep("(A|a)bstract", pr_sub_files, value = TRUE,
invert = TRUE)
round_id <- unique(stringr::str_extract(pr_sub_files,
"\\d{4}-\\d{2}-\\d{2}"))
config_json <- jsonlite::read_json(js_def_file)
rounds_ids <- unique(hubUtils::get_round_ids(config_json))
sel_round <- grepl(paste(round_id, collapse = "|"), rounds_ids)
if (all(isFALSE(sel_round))) {
stop("The round id in the submission file was not recognized, please ",
"verify")
}
if (is.null(unlist(purrr::map(config_json$rounds[sel_round], "partition")))) {
partition = NULL
} else {
partition = unlist(purrr::map(config_json$rounds[sel_round], "partition"))

}
# Run validation on file corresponding to the submission file format
if (length(pr_sub_files) > 0) {
if (!(dir.exists(paste0(getwd(), "/proj_plot"))))
dir.create(paste0(getwd(), "/proj_plot"))
sub_file_date <- unique(stringr::str_extract(basename(pr_sub_files),
"\\d{4}-\\d{2}-\\d{2}"))
if (is.null(partition)) {
team_name <- unique(basename(dirname(pr_sub_files)))
group_files <- paste0(sub_file_date, "-", team_name)
} else {
group_files <- sub_file_date
file_paths <- stringr::str_extract(pr_sub_files,
"(?<=model-output/)(.+\\/)?")
team_name <- unique(unlist(purrr::map(strsplit(file_paths, "/"),1)))
}
test_tot <- lapply(group_files, function(y) {
# select submission files
pr_sub_files_group <- grep(y, pr_sub_files, value = TRUE)
pr_sub_files_lst <- pr_files[grepl(paste(pr_sub_files_group, collapse = "|"),
purrr::map(pr_files, "filename"))]
pr_sub_files_lst <-
pr_sub_files_lst[!grepl("(A|a)bstract",
purrr::map(pr_sub_files_lst, "filename"))]
# run validation on all files
test_tot <- lapply(seq_len(length(pr_sub_files_lst)), function(x) {
# submission file download
if (is.null(partition)) {
url_link <- URLdecode(pr_sub_files_lst[[x]]$raw_url)
download.file(url_link, basename(url_link))
} else {
file_part <- paste0(getwd(), "/part_sub/",
pr_sub_files_lst[[x]]$filename)
if (!(dir.exists(dirname(file_part))))
dir.create(dirname(file_part), recursive = TRUE)
url_link <- pr_sub_files_lst[[x]]$raw_url
download.file(url_link, file_part)
}
})
gc()
# run validation
if (sub_file_date > "2024-01-01") {
merge_col <- TRUE
n_decimal <- 1
} else {
merge_col <- FALSE
n_decimal <- NULL
}
if (is.null(partition)) {
val_path <- basename(pr_sub_files_group)
round_id <- NULL
} else {
val_path <- paste0(getwd(), "/part_sub/model-output/", team_name, "/")
round_id <- sub_file_date
}
arg_list <- list(path = val_path, js_def = js_def_file, lst_gs = lst_gs,
pop_path = pop_path, merge_sample_col = merge_col,
partition = partition, round_id = round_id,
n_decimal = n_decimal)
test <- capture.output(try(do.call(SMHvalidation::validate_submission,
arg_list)))
gc()
if (length(grep("Run validation on fil", test, invert = TRUE)) == 0) {
test <- try(do.call(SMHvalidation::validate_submission, arg_list))
test <- test[1]
gc()
}
# Visualization
df <- try({
arrow::open_dataset(val_path, partitioning = partition) %>%
dplyr::filter(output_type == "quantile") %>%
dplyr::collect()
})
gc()
# print(head(df))
if (all(class(df) != "try-error") && nrow(df) > 0) {
test_viz <- try(generate_validation_plots(
path_proj = val_path, lst_gs = NULL,
save_path = paste0(getwd(), "/proj_plot"), y_sqrt = FALSE,
plot_quantiles = c(0.025, 0.975), partition = partition))
} else {
test_viz <- NA
}
gc()
if (class(test_viz) == "try-error")
file.remove(dir(paste0(getwd(), "/proj_plot"), full.names = TRUE))
# list of the viz and validation results
test_tot <- list(valid = test, viz = test_viz)
# returns all output
return(test_tot)
})
} else {
test_tot <-
list(list(valid = paste0("No projection submission file in the standard ",
"SMH file format found in the Pull-Request. No ",
"validation was run.")))
}
}

if (!all(is.na(test_tot))) {
# Post validation results as comment on the open PR
test_valid <- purrr::map(test_tot, "valid")
message <- purrr::map(test_valid, paste, collapse = "\n")

lapply(seq_len(length(message)), function(x) {
gh::gh(paste0("POST /repos/", "midas-network/covid19-smh-research/",
"issues/", Sys.getenv("GH_PR_NUMBER"), "/comments"),
body = message[[x]],
.token = Sys.getenv("GH_TOKEN"))
})

# Post visualization results as comment on the open PR
test_viz <- purrr::map(test_tot, "viz")
if (any(!is.na(test_viz))) {
message_plot <- paste0(
"If the submission contains projection file(s) with quantile projection, ",
"a pdf containing visualization plots of the submission is available and ",
"downloadable in the GitHub actions. Please click on 'details' on the ",
"right of the 'Validate submission' checks. The pdf is available in a ZIP ",
"file as an artifact of the GH Actions. For more information, please see ",
"[here](https://docs.github.com/en/actions/managing-workflow-runs/downloading-workflow-artifacts)")

if (any(unlist(purrr::map(test_viz, class)) == "try-error")) {
message_plot <- capture.output(
cat(message_plot, "\n\n\U000274c Error: ",
"The visualization encounters an issue and might not be available,",
" if the validation does not return any error, please feel free to",
" tag `@LucieContamin` for any question."))
}

gh::gh(paste0("POST /repos/", "midas-network/covid19-smh-research/",
"issues/", Sys.getenv("GH_PR_NUMBER"),"/comments"),
body = message_plot,
.token = Sys.getenv("GH_TOKEN"))
}


# Validate or stop the github actions
if (any(grepl("(\U000274c )?Error", test_valid))) {
stop("The submission contains one or multiple issues")
} else if (any(grepl("Warning", test_valid))) {
warning(" The submission is accepted but contains some warnings")
}
}
4 changes: 2 additions & 2 deletions hub-config/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,8 @@
}
],
"submissions_due": {
"start": "2023-03-26",
"end": "2023-06-26"
"start": "2024-05-15",
"end": "2024-06-26"
}
}
]
Expand Down
6 changes: 3 additions & 3 deletions model-output/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -330,10 +330,10 @@ Teams should provide the following 23 quantiles:

For example:

|origin_date|scenario_id|location|target|horizon|age_group|output_type|output_type_id|run_grouping|stochastic_run|value|
|origin_date|scenario_id|location|target|horizon|race_ethnicity|output_type|output_type_id|run_grouping|stochastic_run|value|
|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
|2023-11-12|A-2023-10-27|US|inc hosp|1|0-0.99|quantile|0.010|NA|NA||
|2023-11-12|A-2023-10-27|US|inc hosp|1|0-0.99|quantile|0.025|NA|NA||
|2020-10-15|A-2020-05-01|37|inc case|1|asian|quantile|0.010|NA|NA||
|2020-10-15|A-2020-05-01|37|inc case|1|asian|quantile|0.025|NA|NA||
||||||||||||


Expand Down

0 comments on commit 9955982

Please sign in to comment.