diff --git a/DESCRIPTION b/DESCRIPTION index 81957bf..7516c3f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,9 +31,9 @@ Depends: Imports: arrow (>= 15.0.1), checkmate, + curl (>= 5.0.0), dplyr, duckplyr, - httr (>= 1.4.1), tools Suggests: covr, diff --git a/NEWS.md b/NEWS.md index dd74a11..4df9db7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,13 +1,18 @@ # censobr v0.3.29999 dev * Major changes - * Some functions `read_population`, `read_mortality`, `read_families`, `read_emigration` now include a new parameter `merge_households` (logical) to indicate whether the function should merge household variables to the output data. Closes [#31](https://github.com/ipeaGIT/censobr/issues/31) - * {censobr} now imports the {duckplyr} package, which is used for merging hosuehold data in the issue #31. - * New vignette showing how to work with larger-than-memory data. Closes [#42](https://github.com/ipeaGIT/censobr/issues/42) + * Some functions (`read_population`, `read_mortality`, `read_families`, `read_emigration`) now include a new parameter `merge_households` (logical) to indicate whether the function should merge household variables to the output data. Closes [#31](https://github.com/ipeaGIT/censobr/issues/31) + * {censobr} now imports the {duckplyr} package, which is used for merging household data. Closes issue [#31](https://github.com/ipeaGIT/censobr/issues/31). + * New vignette showing how to work with larger-than-memory data. Closes [#42](https://github.com/ipeaGIT/censobr/issues/42). The vignette still needs to be expanded with more examples, though. + +* Minor changes + * Removed dependency on the {httr} package + * Now using `curl::multi_download()` to download files in parallel. This bringds the advantage that the pacakge now automatically detects whether the data/documentation file has been upated and should be downloaded again. * Changes to data sets and files included in this version: * Population microdata for the year 2000 now include a few columns that were not included before. Closes [#44](https://github.com/ipeaGIT/censobr/issues/44) - * Included additional columns and fixed minor error in data dictionary of 2010 microdata. Closes [#45](https://github.com/ipeaGIT/censobr/issues/45) + * Included additional columns and fixed minor errors in data dictionary of 2010 microdata. Closes [#45](https://github.com/ipeaGIT/censobr/issues/45) + # censobr v0.3.2 diff --git a/R/utils.R b/R/utils.R index 7cb7db0..3904a22 100644 --- a/R/utils.R +++ b/R/utils.R @@ -21,39 +21,44 @@ download_file <- function(file_url = parent.frame()$file_url, # create local dir if (isTRUE(cache) & !dir.exists(censobr_env$cache_dir)) { dir.create(censobr_env$cache_dir, recursive=TRUE) } - # location of local file + # path to local file local_file <- paste0(censobr_env$cache_dir,"/",file_name) # cache message cache_message(local_file, cache) - # If not caching, remove local file to download it again - if (cache==FALSE & file.exists(local_file)) { - unlink(local_file, recursive = T) + # this is necessary to silence download message when reading local file + if(file.exists(local_file) & isTRUE(cache)){ + showProgress <- FALSE } - # has the file been downloaded already? If not, download it - if (cache==FALSE | - !file.exists(local_file) | - file.info(local_file)$size == 0) { - - # download data - try(silent = TRUE, - httr::GET(url=file_url, - if(showProgress==TRUE){ httr::progress()}, - httr::write_disk(local_file, overwrite = TRUE), - config = httr::config(ssl_verifypeer = FALSE)) + # download files + try(silent = TRUE, + downloaded_files <- curl::multi_download( + urls = file_url, + destfiles = local_file, + progress = showProgress, + resume = cache + ) ) - } + + # if anything fails, return NULL (fail gracefully) + if (any(!downloaded_files$success | is.na(downloaded_files$success))) { + msg <- paste( + "File cached locally seems to be corrupted. Please download it again using 'cache = FALSE'.", + sprintf("Alternatively, you can remove the corrupted file with 'censobr::censobr_cache(delete_file = \"%s\")'", basename(local_file)), + sep = "\n") + message(msg) + return(invisible(NULL)) + } # Halt function if download failed (file must exist and be larger than 200 kb) if (!file.exists(local_file) | file.info(local_file)$size < 5000) { message('Internet connection not working properly.') return(invisible(NULL)) + } - } else { - return(local_file) - } + return(local_file) } # nocov end @@ -73,7 +78,7 @@ arrow_open_dataset <- function(filename){ error = function(e){ msg <- paste( "File cached locally seems to be corrupted. Please download it again using 'cache = FALSE'.", - sprintf("Alternatively, you can remove the corrupted file with 'censobr::censobr_cache(delete_file = \"%s\")'", filename), + sprintf("Alternatively, you can remove the corrupted file with 'censobr::censobr_cache(delete_file = \"%s\")'", basename(filename)), sep = "\n" ) stop(msg) @@ -84,7 +89,7 @@ arrow_open_dataset <- function(filename){ #' Message when caching file #' #' @param local_file The address of a file passed from the download_file function. -#' @param cache Logical. +#' @param cache Logical. Whether the cached data should be used. #' @return A message #' diff --git a/tests/tests_rafa/test_rafa.R b/tests/tests_rafa/test_rafa.R index 599bf2f..9200e3b 100644 --- a/tests/tests_rafa/test_rafa.R +++ b/tests/tests_rafa/test_rafa.R @@ -1,26 +1,20 @@ -link <- 'https://ftp.ibge.gov.br/Trabalho_e_Rendimento/Pesquisa_Nacional_por_Amostra_de_Domicilios_continua/Trimestral/Microdados/2023/PNADC_032023.zip' -file <- basename(link) +#### cache tests -tic() -httr::GET(url = link, - # httr::timeout(10), - httr::progress(), - httr::write_disk(file, overwrite = T)) -toc() +system.time( + df <- read_families(year = 2000, + showProgress = T, + cache = T) +) +censobr_cache(delete_file = '2000_families') -tic() -link |> - httr2::request() |> - httr2::req_progress() |> - httr2::req_perform(path = file) -toc() -dici cenus tract 1970 -arquivo dos 80 +############3 +# dici cenus tract 1970 +# arquivo dos 80 # devtools::install_github("ipeaGIT/r5r", subdir = "r-package", force=T)