Skip to content

Commit

Permalink
curl::multi_download()
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed Aug 29, 2024
1 parent 241ae44 commit 35c254d
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 42 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ Depends:
Imports:
arrow (>= 15.0.1),
checkmate,
curl (>= 5.0.0),
dplyr,
duckplyr,
httr (>= 1.4.1),
tools
Suggests:
covr,
Expand Down
13 changes: 9 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
# censobr v0.3.29999 dev

* Major changes
* Some functions `read_population`, `read_mortality`, `read_families`, `read_emigration` now include a new parameter `merge_households` (logical) to indicate whether the function should merge household variables to the output data. Closes [#31](https://github.com/ipeaGIT/censobr/issues/31)
* {censobr} now imports the {duckplyr} package, which is used for merging hosuehold data in the issue #31.
* New vignette showing how to work with larger-than-memory data. Closes [#42](https://github.com/ipeaGIT/censobr/issues/42)
* Some functions (`read_population`, `read_mortality`, `read_families`, `read_emigration`) now include a new parameter `merge_households` (logical) to indicate whether the function should merge household variables to the output data. Closes [#31](https://github.com/ipeaGIT/censobr/issues/31)
* {censobr} now imports the {duckplyr} package, which is used for merging household data. Closes issue [#31](https://github.com/ipeaGIT/censobr/issues/31).
* New vignette showing how to work with larger-than-memory data. Closes [#42](https://github.com/ipeaGIT/censobr/issues/42). The vignette still needs to be expanded with more examples, though.

* Minor changes
* Removed dependency on the {httr} package
* Now using `curl::multi_download()` to download files in parallel. This bringds the advantage that the pacakge now automatically detects whether the data/documentation file has been upated and should be downloaded again.

* Changes to data sets and files included in this version:
* Population microdata for the year 2000 now include a few columns that were not included before. Closes [#44](https://github.com/ipeaGIT/censobr/issues/44)
* Included additional columns and fixed minor error in data dictionary of 2010 microdata. Closes [#45](https://github.com/ipeaGIT/censobr/issues/45)
* Included additional columns and fixed minor errors in data dictionary of 2010 microdata. Closes [#45](https://github.com/ipeaGIT/censobr/issues/45)



# censobr v0.3.2
Expand Down
47 changes: 26 additions & 21 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,39 +21,44 @@ download_file <- function(file_url = parent.frame()$file_url,
# create local dir
if (isTRUE(cache) & !dir.exists(censobr_env$cache_dir)) { dir.create(censobr_env$cache_dir, recursive=TRUE) }

# location of local file
# path to local file
local_file <- paste0(censobr_env$cache_dir,"/",file_name)

# cache message
cache_message(local_file, cache)

# If not caching, remove local file to download it again
if (cache==FALSE & file.exists(local_file)) {
unlink(local_file, recursive = T)
# this is necessary to silence download message when reading local file
if(file.exists(local_file) & isTRUE(cache)){
showProgress <- FALSE
}

# has the file been downloaded already? If not, download it
if (cache==FALSE |
!file.exists(local_file) |
file.info(local_file)$size == 0) {

# download data
try(silent = TRUE,
httr::GET(url=file_url,
if(showProgress==TRUE){ httr::progress()},
httr::write_disk(local_file, overwrite = TRUE),
config = httr::config(ssl_verifypeer = FALSE))
# download files
try(silent = TRUE,
downloaded_files <- curl::multi_download(
urls = file_url,
destfiles = local_file,
progress = showProgress,
resume = cache
)
)
}

# if anything fails, return NULL (fail gracefully)
if (any(!downloaded_files$success | is.na(downloaded_files$success))) {
msg <- paste(
"File cached locally seems to be corrupted. Please download it again using 'cache = FALSE'.",
sprintf("Alternatively, you can remove the corrupted file with 'censobr::censobr_cache(delete_file = \"%s\")'", basename(local_file)),
sep = "\n")
message(msg)
return(invisible(NULL))
}

# Halt function if download failed (file must exist and be larger than 200 kb)
if (!file.exists(local_file) | file.info(local_file)$size < 5000) {
message('Internet connection not working properly.')
return(invisible(NULL))
}

} else {
return(local_file)
}
return(local_file)
} # nocov end


Expand All @@ -73,7 +78,7 @@ arrow_open_dataset <- function(filename){
error = function(e){
msg <- paste(
"File cached locally seems to be corrupted. Please download it again using 'cache = FALSE'.",
sprintf("Alternatively, you can remove the corrupted file with 'censobr::censobr_cache(delete_file = \"%s\")'", filename),
sprintf("Alternatively, you can remove the corrupted file with 'censobr::censobr_cache(delete_file = \"%s\")'", basename(filename)),
sep = "\n"
)
stop(msg)
Expand All @@ -84,7 +89,7 @@ arrow_open_dataset <- function(filename){
#' Message when caching file
#'
#' @param local_file The address of a file passed from the download_file function.
#' @param cache Logical.
#' @param cache Logical. Whether the cached data should be used.

#' @return A message
#'
Expand Down
26 changes: 10 additions & 16 deletions tests/tests_rafa/test_rafa.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,20 @@
link <- 'https://ftp.ibge.gov.br/Trabalho_e_Rendimento/Pesquisa_Nacional_por_Amostra_de_Domicilios_continua/Trimestral/Microdados/2023/PNADC_032023.zip'
file <- basename(link)

#### cache tests

tic()
httr::GET(url = link,
# httr::timeout(10),
httr::progress(),
httr::write_disk(file, overwrite = T))
toc()
system.time(
df <- read_families(year = 2000,
showProgress = T,
cache = T)
)

censobr_cache(delete_file = '2000_families')

tic()
link |>
httr2::request() |>
httr2::req_progress() |>
httr2::req_perform(path = file)
toc()



dici cenus tract 1970
arquivo dos 80
############3
# dici cenus tract 1970
# arquivo dos 80


# devtools::install_github("ipeaGIT/r5r", subdir = "r-package", force=T)
Expand Down

0 comments on commit 35c254d

Please sign in to comment.