Skip to content

Commit

Permalink
implement per-session caching, closes #1
Browse files Browse the repository at this point in the history
  • Loading branch information
petrbouchal committed Jan 24, 2020
1 parent 98a3e9a commit c1dc4bd
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 12 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: czso
Title: Use Open Data from the Czech Statistical Office in R
Version: 0.1.1
Version: 0.1.2
Authors@R:
person(given = "Petr",
family = "Bouchal",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# czso 0.1.2

* add per-session caching to `get_catalogue()` and `get_table()`, incl. new `force_redownload` parameter

# czso 0.1.1

* fixed error when loading zipped files in `get_table()`
Expand Down
30 changes: 21 additions & 9 deletions R/core.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,22 @@ get_catalogue <- function(provider = "\\u010cesk\\u00fd statistick\\u00fd \\u00f
title_filter = NULL,
description_filter = NULL,
keyword_filter = NULL,
provider_filter = NULL)
provider_filter = NULL,
force_redownload = F)
{
if(!is.null(provider))
provider_uni <- stringi::stri_unescape_unicode(provider)
else provider_uni <- NULL
message("Reading full list of all datasets on data.gov.cz...")
dslist0 <- suppressWarnings(suppressMessages(vroom::vroom("https://data.gov.cz/soubor/datov%C3%A9-sady.csv",
td <- paste(tempdir(), "czso", sep = "/")
dir.create(td, showWarnings = F, recursive = T)
tf <- paste0(td, "/", "dataset_list.csv")
if(file.exists(tf) & !force_redownload) {
message(stringr::str_glue("File already in {td}, not downloading. Set `force_redownload` to TRUE if needed."))
} else {
utils::download.file("https://data.gov.cz/soubor/datov%C3%A9-sady.csv", tf, headers = c('User-Agent' = ua_header))
}
message("Reading full list of all datasets available on data.gov.cz...")
dslist0 <- suppressWarnings(suppressMessages(vroom::vroom(tf,
col_types = readr::cols(.default = "c")))) %>%
dplyr::rename_all(~stringi::stri_trans_general(., "latin-ascii")) %>%
dplyr::select(provider = poskytovatel,
Expand Down Expand Up @@ -123,16 +132,19 @@ get_resource_pointer <- function(dataset_id, resource_num = 1) {
#' \dontrun{
#' get_table("110080")
#' }
get_table <- function(dataset_id, resource_num = 1) {
get_table <- function(dataset_id, resource_num = 1, force_redownload = F) {
ptr <- get_resource_pointer(dataset_id)
url <- ptr$url
type <- ptr$format
ext <- tools::file_ext(url)
td <- paste0(tempdir(), "/czso/", dataset_id, "/")
dir.create(td, recursive = T, showWarnings = F)

dfile <- paste0(td, "ds_", dataset_id, ".", ext)
utils::download.file(url, destfile = dfile, headers = ua_header)
td <- paste(tempdir(), "czso", dataset_id, sep = "/")
dir.create(td, showWarnings = F, recursive = T)
dfile <- paste0(td, "/ds_", dataset_id, ".", ext)
if(file.exists(dfile) & !force_redownload) {
message(stringr::str_glue("File already in {td}, not downloading. Set `force_redownload` to TRUE if needed."))
} else {
utils::download.file(url, dfile, headers = c('User-Agent' = ua_header))
}

# print(dfile)

Expand Down
3 changes: 2 additions & 1 deletion man/get_catalogue.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/get_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c1dc4bd

Please sign in to comment.