From 5817449c254398e33322d939471058a7c7d34612 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 19 Feb 2025 17:54:20 -0500 Subject: [PATCH 1/9] Add the ERA5 download function. --- modules/data.atmosphere/NAMESPACE | 2 +- modules/data.atmosphere/R/ERA5_download.R | 142 ++++++++++++++++++ .../man/ERA5_cds_annual_download.Rd | 43 ++++++ 3 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 modules/data.atmosphere/R/ERA5_download.R create mode 100644 modules/data.atmosphere/man/ERA5_cds_annual_download.Rd diff --git a/modules/data.atmosphere/NAMESPACE b/modules/data.atmosphere/NAMESPACE index e544337209..cc8d89a5c3 100644 --- a/modules/data.atmosphere/NAMESPACE +++ b/modules/data.atmosphere/NAMESPACE @@ -4,6 +4,7 @@ export(.download.raw.met.module) export(.extract.nc.module) export(.met2model.module) export(AirDens) +export(ERA5_cds_annual_download) export(ERA5_met_process) export(align.met) export(build_cf_variables_table_url) @@ -18,7 +19,6 @@ export(debias.met.regression) export(download.Ameriflux) export(download.AmerifluxLBL) export(download.CRUNCEP) -export(download.ERA5.old) export(download.FACE) export(download.Fluxnet2015) export(download.FluxnetLaThuile) diff --git a/modules/data.atmosphere/R/ERA5_download.R b/modules/data.atmosphere/R/ERA5_download.R new file mode 100644 index 0000000000..fc703f7a21 --- /dev/null +++ b/modules/data.atmosphere/R/ERA5_download.R @@ -0,0 +1,142 @@ +#' @description +#' This function helps to download the yearly ERA5 data based on the prescribed features using the CDS API. +#' @title ERA5_cds_annual_download +#' +#' @param years Numeric: a series of years to be downloaded (e.g., 2012:2021). +#' @param months List: a list contains months to be downloaded (e.g., list("01", "02") to download files in Jan and Feb). +#' @param days List: a list contains days to be downloaded (e.g., list("01", "02") to download files in the first and second days). +#' @param times List: a list contains times to be downloaded (e.g., list('00:00','03:00') to download files for the times 12:00 and 3:00 am UTC). +#' @param area Character: a string contains the bounding box (formatted as "North/West/South/East") to be downloaded (e.g., "85/-179/7/-20"). +#' @param variables List: a list contains variables to be downloaded (e.g., list("2m_temperature","surface_pressure")). +#' @param outdir Character: physical path where the ERA5 data are stored. +#' @param auto.create.key Boolean: decide if we want to generate the CDS RC file if it doesn't exist, the default is TRUE. +#' +#' @return A vector containing file paths to the downloaded files. +#' @export +#' +#' @examples +#' @author Dongchen Zhang +ERA5_cds_annual_download <- function(years, months, days, times, area, variables, outdir, auto.create.key = T) { + options(timeout=360000) + #load cdsapi from python environment. + tryCatch({ + cdsapi <- reticulate::import("cdsapi") + }, error = function(e) { + PEcAn.logger::logger.severe( + "Failed to load `cdsapi` Python library. ", + "Please make sure it is installed to a location accessible to `reticulate`.", + "You should be able to install it with the following command: ", + "`pip install --user cdsapi`.", + "The following error was thrown by `reticulate::import(\"cdsapi\")`: ", + conditionMessage(e) + ) + }) + #define function for building credential file. + #maybe as a helper function. + getnetrc <- function (dl_dir) { + netrc <- file.path(dl_dir, ".cdsapirc") + if (file.exists(netrc) == FALSE || + any(grepl("https://cds.climate.copernicus.eu/api/v2", + readLines(netrc))) == FALSE) { + netrc_conn <- file(netrc) + writeLines(c( + sprintf( + "url: %s", + getPass::getPass(msg = "Enter URL from the following link \n (https://cds.climate.copernicus.eu/api-how-to#install-the-cds-api-key):") + ), + sprintf( + "key: %s", + getPass::getPass(msg = "Enter KEY from the following link \n (https://cds.climate.copernicus.eu/api-how-to#install-the-cds-api-key):") + ) + ), + netrc_conn) + close(netrc_conn) + message( + "A netrc file with your CDS Login credentials was stored in the output directory " + ) + } + return(netrc) + } + #check if the token exists for the cdsapi. + if (!file.exists(file.path(Sys.getenv("HOME"), ".cdsapirc")) & auto.create.key) { + getnetrc(Sys.getenv("HOME")) + } else if (!file.exists(file.path(Sys.getenv("HOME"), ".cdsapirc")) & !auto.create.key) { + PEcAn.logger::logger.severe( + "Please create a `${HOME}/.cdsapirc` file as described here:", + "https://cds.climate.copernicus.eu/api-how-to#install-the-cds-api-key ." + ) + } + #grab the client object. + tryCatch({ + c <- cdsapi$Client() + }, error = function(e) { + PEcAn.logger::logger.severe( + "The following error was thrown by `cdsapi$Client()`: ", + conditionMessage(e) + ) + }) + # loop over years. + nc.paths <- c() + for (y in years) { + fname <- file.path(outdir, paste0("ERA5_", y, ".grib")) + # start retrieving data. + # you need to have an account for downloaing the files + # Read the documantion for how to setup your account and settings before trying this + # https://confluence.ecmwf.int/display/CKB/How+to+download+ERA5#HowtodownloadERA5-3-DownloadERA5datathroughtheCDSAPI + c$retrieve( + 'reanalysis-era5-single-levels', + list( + 'product_type' = 'ensemble_members', + 'data_format' = 'grib', + "download_format" = "unarchived", + 'day' = days, + 'time' = times, + 'month' = months, + 'year' = as.character(y), + "area" = area, + 'variable' = variables + ), + fname + ) + # convert grib to nc file. + nc.path <- gsub(".grib", ".nc", fname, fixed = T) + cmd <- paste("grib_to_netcdf", fname, "-o", nc.path) + out <- system(cmd, intern = F, ignore.stdout = T, ignore.stderr = T) + # check if _0001 or _0005 exists in the nc variable names. + nc <- ncdf4::nc_open(nc.path, write = T) + var.names <- names(nc$var) + if (any(grepl("_000", var.names, fixed = T))) { + ind.use <- which(grepl("_0001", var.names, fixed = T)) + ind.aba <- which(grepl("_0005", var.names, fixed = T)) + # if it is only the case where only _0001 and _0005 are occurring. + if ((length(ind.use) + length(ind.aba)) == length(var.names) & + length(ind.use) == length(ind.aba)) { + # rename variable name with _0001 pattern to its origin name. + for (i in ind.use) { + nc <- ncdf4::ncvar_rename(nc, var.names[i], gsub(pattern = "_0001", replacement = "", x = var.names[i], fixed = T)) + } + # synchronize nc file. + ncdf4::nc_sync(nc) + ncdf4::nc_close(nc) + # delete variables end with _0005 to make the ERA5_preprocess function work. + # rename the nc file to the old file so we can generate a new nc file with the correct name. + file.rename(nc.path, gsub(".nc", "_old.nc", nc.path, fixed = T)) + cmd <- paste("ncks -x -v @VARS@ @OLDNC@ @NEWNC@") + cmd <- gsub("@VARS@", paste(var.names[ind.aba], collapse = ","), cmd) + cmd <- gsub("@OLDNC@", gsub(".nc", "_old.nc", nc.path, fixed = T), cmd) + cmd <- gsub("@NEWNC@", nc.path, cmd) + out <- system(cmd, intern = F, ignore.stdout = T, ignore.stderr = T) + # delete the old nc file. + unlink(gsub(".nc", "_old.nc", nc.path, fixed = T)) + } else { + PEcAn.logger::logger.info("Unknown variable format. Please check the variable mannually!") + return(NA) + } + } + # store the path. + nc.paths <- c(nc.paths, nc.path) + # remove previous grib file. + unlink(fname) + } + return(nc.paths) +} \ No newline at end of file diff --git a/modules/data.atmosphere/man/ERA5_cds_annual_download.Rd b/modules/data.atmosphere/man/ERA5_cds_annual_download.Rd new file mode 100644 index 0000000000..5b6c97b6f1 --- /dev/null +++ b/modules/data.atmosphere/man/ERA5_cds_annual_download.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ERA5_download.R +\name{ERA5_cds_annual_download} +\alias{ERA5_cds_annual_download} +\title{ERA5_cds_annual_download} +\usage{ +ERA5_cds_annual_download( + years, + months, + days, + times, + area, + variables, + outdir, + auto.create.key = T +) +} +\arguments{ +\item{years}{Numeric: a series of years to be downloaded (e.g., 2012:2021).} + +\item{months}{List: a list contains months to be downloaded (e.g., list("01", "02") to download files in Jan and Feb).} + +\item{days}{List: a list contains days to be downloaded (e.g., list("01", "02") to download files in the first and second days).} + +\item{times}{List: a list contains times to be downloaded (e.g., list('00:00','03:00') to download files for the times 12:00 and 3:00 am UTC).} + +\item{area}{Character: a string contains the bounding box (formatted as "North/West/South/East") to be downloaded (e.g., "85/-179/7/-20").} + +\item{variables}{List: a list contains variables to be downloaded (e.g., list("2m_temperature","surface_pressure")).} + +\item{outdir}{Character: physical path where the ERA5 data are stored.} + +\item{auto.create.key}{Boolean: decide if we want to generate the CDS RC file if it doesn't exist, the default is TRUE.} +} +\value{ +A vector containing file paths to the downloaded files. +} +\description{ +This function helps to download the yearly ERA5 data based on the prescribed features using the CDS API. +} +\author{ +Dongchen Zhang +} From d85812ae64f1a14a005bc25500cb69a57bdf2e5d Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 19 Feb 2025 17:54:48 -0500 Subject: [PATCH 2/9] Remove the old ERA5 download function and tweak the script in the inst folder that does the download. --- modules/data.atmosphere/R/download.ERA5.R | 199 ------------------ .../inst/ERA5/ERA5_NA_download.R | 82 +++----- .../data.atmosphere/man/download.ERA5.old.Rd | 74 ------- 3 files changed, 33 insertions(+), 322 deletions(-) delete mode 100644 modules/data.atmosphere/R/download.ERA5.R delete mode 100644 modules/data.atmosphere/man/download.ERA5.old.Rd diff --git a/modules/data.atmosphere/R/download.ERA5.R b/modules/data.atmosphere/R/download.ERA5.R deleted file mode 100644 index 87c6e7a9d3..0000000000 --- a/modules/data.atmosphere/R/download.ERA5.R +++ /dev/null @@ -1,199 +0,0 @@ -#' Download ERA 5 data -#' -#' Link to [full data documentation](https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation). -#' -#' Under the hood, this function uses the Python `cdsapi` module, -#' which can be installed via `pip` (`pip install --user cdsapi`). The -#' module is accessed via the `reticulate` package. -#' -#' Using the CDS API requires you to create a free account at -#' https://cds.climate.copernicus.eu. Once you have done that, you -#' will need to configure the CDS API on your local machine by -#' creating a `${HOME}/.cdsapi` file, as described -#' [here](https://cds.climate.copernicus.eu/api-how-to#install-the-cds-api-key). -#' -#' @param outfolder Directory where results should be written -#' @param product_types Character vector of product types, or `"all"`. -#' Must be one or more of: `"reanalysis"`, `"ensemble members"`, -#' `"ensemble mean"`, `"ensemble spread"` -#' @param reticulate_python Path to Python binary for `reticulate` -#' (passed to [reticulate::use_python()]). If `NULL` (default), use -#' the system default. -#' @param start_date,end_date Range of years to retrieve. Format is -#' `YYYY-MM-DD`. -#' @param lat.in,lon.in Site coordinates, decimal degrees (numeric) -#' @param overwrite Logical. If `FALSE` (default), skip any files with -#' the same target name (i.e. same variable) that already exist in -#' `outfolder`. If `TRUE`, silently overwrite existing files. -#' @param ... Currently unused. Allows soaking up additional arguments -#' to other methods. -#' @return Character vector of file names containing raw, downloaded -#' data (invisibly) -#' @author Alexey Shiklomanov -#' @md - # ^ tells Roxygen to interpret this fn's doc block as Markdown -#' @export -#' @examples -#' \dontrun{ -#' files <- download.ERA5( -#' "ERA5_output", -#' start_date = "2010-01-01", -#' end_date = "2010-02-01", -#' lat.in = 45.5594, -#' lon.in = -84.6738, -#' product_types = "all" -#' ) -#' } -download.ERA5.old <- function(outfolder, start_date, end_date, lat.in, lon.in, - product_types = "all", - overwrite = FALSE, - reticulate_python = NULL, - ...) { - PEcAn.logger::logger.warn( - "This function is an incomplete prototype! Use with caution!" - ) - - PEcAn.utils::need_packages("reticulate") - - if (!is.null(reticulate_python)) { - reticulate::use_python(reticulate_python) - } - - tryCatch({ - cdsapi <- reticulate::import("cdsapi") - }, error = function(e) { - PEcAn.logger::logger.severe( - "Failed to load `cdsapi` Python library. ", - "Please make sure it is installed to a location accessible to `reticulate`.", - "You should be able to install it with the following command: ", - "`pip install --user cdsapi`.", - "The following error was thrown by `reticulate::import(\"cdsapi\")`: ", - conditionMessage(e) - ) - }) - - - if (!file.exists(file.path(Sys.getenv("HOME"), ".cdsapirc"))) - PEcAn.logger::logger.severe( - "Please create a `${HOME}/.cdsapirc` file as described here:", - "https://cds.climate.copernicus.eu/api-how-to#install-the-cds-api-key ." - ) - - - tryCatch({ - cclient <- cdsapi$Client() - }, error = function(e) { - PEcAn.logger::logger.severe( - "The following error was thrown by `cdsapi$Client()`: ", - conditionMessage(e) - ) - }) - - all_products <- c("reanalysis", "ensemble_members", - "ensemble mean", "ensemble_spread") - - if (product_types == "all") { - product_types <- all_products - } - - if (any(!product_types %in% all_products)) { - bad_products <- setdiff(product_types, all_products) - PEcAn.logger::logger.severe(sprintf( - "Invalid product types %s. Products must be one of the following: %s", - paste0("`", bad_products, "`", collapse = ", "), - paste0("`", all_products, "`", collapse = ", ") - )) - } - - # Full data documentation: - # https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation - variables <- tibble::tribble( - ~cf_name, ~units, ~api_name, ~ncdf_name, - "air_temperature", "Kelvin", "2m_temperature", "t2m", - "air_pressure", "Pa", "surface_pressure", NA_character_, - NA_character_, "Kelvin", "2m_dewpoint_temperature", NA_character_, - "precipitation_flux", "kg/m2/s", "total_precipitation", NA_character_, - "eastward_wind", "m/s", "10m_u_component_of_wind", NA_character_, - "northward_wind", "m/s", "10m_v_component_of_wind", NA_character_, - "surface_downwelling_shortwave_flux_in_air", "W/m2", "surface_solar_radiation_downwards", NA_character_, - "surface_downwelling_longwave_flux_in_air", "W/m2", "surface_thermal_radiation_downwards", NA_character_ - ) - nvar <- nrow(variables) - - # Spatial subset must be a bounding box (N, W, S, E). This sets the - # bounding box to a single point -- the closest coordinate at the - # 0.25 x 0.25 resolution of the product. - area <- rep(round(c(lat.in, lon.in) * 4) / 4, 2) - - files <- character() - dir.create(outfolder, showWarnings = FALSE) - - - # First, download all the files - for (i in seq_len(nvar)) { - var <- variables[["api_name"]][[i]] - PEcAn.logger::logger.debug(glue::glue( - "Downloading variable {i} of {nvar} ({var})." - )) - fname <- file.path(outfolder, paste("era5", var, "nc", sep = ".")) - if (file.exists(fname) && !overwrite) { - PEcAn.logger::logger.warn(glue::glue( - "File `{fname}` already exists, and `overwrite` is FALSE. ", - "Skipping to next variable." - )) - next - } - do_next <- tryCatch({ - cclient$retrieve( - "reanalysis-era5-single-levels", - list( - variable = var, - product_type = 'ensemble_members', - date = paste(start_date, end_date, sep = "/"), - time = "00/to/23/by/1", - area = area, - grid = c(0.25, 0.25), - format = "netcdf" - ), - fname - ) - FALSE - }, error = function(e) { - PEcAn.logger::logger.warn( - glue::glue( - "Failed to download variable `{var}`. ", - "Skipping to next variable. ", - "Error message was:\n", - conditionMessage(e) - ) - ) - TRUE - }) - - if (isTRUE(do_next)) next - files <- c(files, fname) - } - - # TODO: Return `data.frame`, like the other methods. - invisible(files) - - ## # Then, post-process each file - ## for (i in seq_len(files)) { - ## nc <- ncdf4::nc_open(files[i]) - ## shortname <- names(nc[["var"]]) - ## var_sub <- variables[variables[["ncdf_name"]] == shortname, ] - ## ncvar <- ncdf4::ncvar_def(var_sub[["cf_name"]], dim = ...) - ## # TODO: Check units, and perform conversions where necessary - ## # TODO: Convert to PEcAn standard format - ## } - - # TODO: Figure out how do download ensembles. - - # NOTE: Dew point temperature has to be converted to specific - # humidity. Can do something like the following: - # dewpoint <- ncdf4::ncvar_get(nc, "d2m") # Check units! - # airtemp <- ncdf4::ncvar_get(nc, "t2m") - # pressure <- ncdf4::ncvar_get(nc, "") # Make sure this is in Pa - # rh <- get.rh(airtemp, dewpoint) / 100 # Check units! - # qair <- rh2qair(rh, airtemp, pressure) # Pressure in Pa -} diff --git a/modules/data.atmosphere/inst/ERA5/ERA5_NA_download.R b/modules/data.atmosphere/inst/ERA5/ERA5_NA_download.R index 700cdf88f1..2bc076451a 100644 --- a/modules/data.atmosphere/inst/ERA5/ERA5_NA_download.R +++ b/modules/data.atmosphere/inst/ERA5/ERA5_NA_download.R @@ -2,52 +2,36 @@ library(reticulate) library(future) library(purrr) library(furrr) -setwd("/projectnb/dietzelab/dongchen/anchorSites/ERA5/") # change this to your own working directory -if (future::supportsMulticore()) { - future::plan(future::multicore) -} else { - future::plan(future::multisession) -} -options(timeout=360000) -c(2012:2021) %>% - future_map(function(year) { - - # you need to have an account for downloaing the files - # Read the documantion for how to setup your account and settings before trying this - # https://confluence.ecmwf.int/display/CKB/How+to+download+ERA5#HowtodownloadERA5-3-DownloadERA5datathroughtheCDSAPI - cdsapi <-import("cdsapi") - c <- cdsapi$Client() - - c$retrieve( - 'reanalysis-era5-single-levels', - list( - 'product_type' = 'ensemble_members', - 'format' = 'netcdf', - 'day' = list('01','02','03', - '04','05','06', - '07','08','09', - '10','11','12', - '13','14','15', - '16','17','18', - '19','20','21', - '22','23','24', - '25','26','27', - '28','29','30', - '31'), - 'time' = list('00:00','03:00','06:00', - '09:00','12:00','15:00', - '18:00','21:00'), - 'month' = list('01','02','03', - '04','05','06', - '07','08','09', - '10','11','12'), - 'year' = as.character(year), - "area" = "84/-179/14/-52", - 'variable' = list( "2m_temperature","surface_pressure", - "2m_dewpoint_temperature","total_precipitation", - "10m_u_component_of_wind","10m_v_component_of_wind", - "surface_solar_radiation_downwards","surface_thermal_radiation_downwards") - ), - paste0('ERA5_',year,'.nc') - ) - },.progress = T ) +outdir <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/" +years <- 2012:2024 +months <- list('01','02','03', + '04','05','06', + '07','08','09', + '10','11','12') +days <- list('01','02','03', + '04','05','06', + '07','08','09', + '10','11','12', + '13','14','15', + '16','17','18', + '19','20','21', + '22','23','24', + '25','26','27', + '28','29','30', + '31') +times <- list('00:00','03:00','06:00', + '09:00','12:00','15:00', + '18:00','21:00') +area <- "85/-179/7/-20" +variables <- list( "2m_temperature","surface_pressure", + "2m_dewpoint_temperature","total_precipitation", + "10m_u_component_of_wind","10m_v_component_of_wind", + "surface_solar_radiation_downwards","surface_thermal_radiation_downwards") +paths <- PEcAn.data.atmosphere::ERA5_cds_annual_download(years = year, + months = months, + days = days, + times = times, + area = area, + variables = variables, + outdir = outdir, + auto.create.key = T) diff --git a/modules/data.atmosphere/man/download.ERA5.old.Rd b/modules/data.atmosphere/man/download.ERA5.old.Rd deleted file mode 100644 index 64199c95ae..0000000000 --- a/modules/data.atmosphere/man/download.ERA5.old.Rd +++ /dev/null @@ -1,74 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/download.ERA5.R -\name{download.ERA5.old} -\alias{download.ERA5.old} -\title{Download ERA 5 data} -\usage{ -download.ERA5.old( - outfolder, - start_date, - end_date, - lat.in, - lon.in, - product_types = "all", - overwrite = FALSE, - reticulate_python = NULL, - ... -) -} -\arguments{ -\item{outfolder}{Directory where results should be written} - -\item{start_date, end_date}{Range of years to retrieve. Format is -\code{YYYY-MM-DD}.} - -\item{lat.in, lon.in}{Site coordinates, decimal degrees (numeric)} - -\item{product_types}{Character vector of product types, or \code{"all"}. -Must be one or more of: \code{"reanalysis"}, \code{"ensemble members"}, -\code{"ensemble mean"}, \code{"ensemble spread"}} - -\item{overwrite}{Logical. If \code{FALSE} (default), skip any files with -the same target name (i.e. same variable) that already exist in -\code{outfolder}. If \code{TRUE}, silently overwrite existing files.} - -\item{reticulate_python}{Path to Python binary for \code{reticulate} -(passed to \code{\link[reticulate:use_python]{reticulate::use_python()}}). If \code{NULL} (default), use -the system default.} - -\item{...}{Currently unused. Allows soaking up additional arguments -to other methods.} -} -\value{ -Character vector of file names containing raw, downloaded -data (invisibly) -} -\description{ -Link to \href{https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation}{full data documentation}. -} -\details{ -Under the hood, this function uses the Python \code{cdsapi} module, -which can be installed via \code{pip} (\verb{pip install --user cdsapi}). The -module is accessed via the \code{reticulate} package. - -Using the CDS API requires you to create a free account at -https://cds.climate.copernicus.eu. Once you have done that, you -will need to configure the CDS API on your local machine by -creating a \verb{$\{HOME\}/.cdsapi} file, as described -\href{https://cds.climate.copernicus.eu/api-how-to#install-the-cds-api-key}{here}. -} -\examples{ -\dontrun{ -files <- download.ERA5( - "ERA5_output", - start_date = "2010-01-01", - end_date = "2010-02-01", - lat.in = 45.5594, - lon.in = -84.6738, - product_types = "all" -) -} -} -\author{ -Alexey Shiklomanov -} From 50a5cd8750537ed23b48f44a2f79baa4ecf43bbd Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 19 Feb 2025 20:21:42 -0500 Subject: [PATCH 3/9] Update dependency. --- docker/depends/pecan_package_dependencies.csv | 1 + modules/data.atmosphere/DESCRIPTION | 1 + 2 files changed, 2 insertions(+) diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv index c049d1f710..499fc088b4 100644 --- a/docker/depends/pecan_package_dependencies.csv +++ b/docker/depends/pecan_package_dependencies.csv @@ -79,6 +79,7 @@ "GEDI4R","*","modules/data.remote","Suggests",FALSE "geonames","> 0.998","modules/data.atmosphere","Imports",FALSE "getPass","*","base/remote","Suggests",FALSE +"getPass","*","modules/data.atmosphere","Suggests",FALSE "getPass","*","modules/data.land","Suggests",FALSE "getPass","*","modules/data.remote","Suggests",FALSE "ggmcmc","*","modules/meta.analysis","Suggests",FALSE diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION index ebfc9bbc63..9c8f2bb366 100644 --- a/modules/data.atmosphere/DESCRIPTION +++ b/modules/data.atmosphere/DESCRIPTION @@ -63,6 +63,7 @@ Suggests: foreach, furrr, future, + getPass, knitr, mockery, parallel, From 2cd16bf5096a49fb8ce5fab323642a99dc169e8a Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 19 Feb 2025 20:22:37 -0500 Subject: [PATCH 4/9] Update change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6547b2df78..9cdceaccda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha - Added new feature of downloading datasets from the NASA DAAC ORNL database. - Extended downscale function and created 'downscale_hrly' so that it handles more frequent data - Added 'aggregate' as a new feature for downscaled data +- Added ERA5 download function that applies to the new CDS API. ### Fixed From ed5c34428dd4972747eb83bcfb0078b4ba429f36 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 21 Feb 2025 11:52:23 -0500 Subject: [PATCH 5/9] rename function. --- .../man/ERA5_cds_annual_download.Rd | 43 ------------------- 1 file changed, 43 deletions(-) delete mode 100644 modules/data.atmosphere/man/ERA5_cds_annual_download.Rd diff --git a/modules/data.atmosphere/man/ERA5_cds_annual_download.Rd b/modules/data.atmosphere/man/ERA5_cds_annual_download.Rd deleted file mode 100644 index 5b6c97b6f1..0000000000 --- a/modules/data.atmosphere/man/ERA5_cds_annual_download.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ERA5_download.R -\name{ERA5_cds_annual_download} -\alias{ERA5_cds_annual_download} -\title{ERA5_cds_annual_download} -\usage{ -ERA5_cds_annual_download( - years, - months, - days, - times, - area, - variables, - outdir, - auto.create.key = T -) -} -\arguments{ -\item{years}{Numeric: a series of years to be downloaded (e.g., 2012:2021).} - -\item{months}{List: a list contains months to be downloaded (e.g., list("01", "02") to download files in Jan and Feb).} - -\item{days}{List: a list contains days to be downloaded (e.g., list("01", "02") to download files in the first and second days).} - -\item{times}{List: a list contains times to be downloaded (e.g., list('00:00','03:00') to download files for the times 12:00 and 3:00 am UTC).} - -\item{area}{Character: a string contains the bounding box (formatted as "North/West/South/East") to be downloaded (e.g., "85/-179/7/-20").} - -\item{variables}{List: a list contains variables to be downloaded (e.g., list("2m_temperature","surface_pressure")).} - -\item{outdir}{Character: physical path where the ERA5 data are stored.} - -\item{auto.create.key}{Boolean: decide if we want to generate the CDS RC file if it doesn't exist, the default is TRUE.} -} -\value{ -A vector containing file paths to the downloaded files. -} -\description{ -This function helps to download the yearly ERA5 data based on the prescribed features using the CDS API. -} -\author{ -Dongchen Zhang -} From 2f3a1ca7ac8f12da2b9c8a7edebe5a20db937237 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 21 Feb 2025 11:53:12 -0500 Subject: [PATCH 6/9] Apply Mike's suggestions. --- modules/data.atmosphere/R/ERA5_download.R | 61 ++++++++++++++++++----- 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/modules/data.atmosphere/R/ERA5_download.R b/modules/data.atmosphere/R/ERA5_download.R index fc703f7a21..46a54d8152 100644 --- a/modules/data.atmosphere/R/ERA5_download.R +++ b/modules/data.atmosphere/R/ERA5_download.R @@ -2,22 +2,42 @@ #' This function helps to download the yearly ERA5 data based on the prescribed features using the CDS API. #' @title ERA5_cds_annual_download #' -#' @param years Numeric: a series of years to be downloaded (e.g., 2012:2021). -#' @param months List: a list contains months to be downloaded (e.g., list("01", "02") to download files in Jan and Feb). -#' @param days List: a list contains days to be downloaded (e.g., list("01", "02") to download files in the first and second days). -#' @param times List: a list contains times to be downloaded (e.g., list('00:00','03:00') to download files for the times 12:00 and 3:00 am UTC). -#' @param area Character: a string contains the bounding box (formatted as "North/West/South/East") to be downloaded (e.g., "85/-179/7/-20"). -#' @param variables List: a list contains variables to be downloaded (e.g., list("2m_temperature","surface_pressure")). -#' @param outdir Character: physical path where the ERA5 data are stored. +#' @param outfolder Character: physical path where the ERA5 data are stored. +#' @param start_date character: the start date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date) +#' @param end_date character: the end date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date) +#' @param extent numeric: a vector of numbers contains the bounding box (formatted as xmin, xmax, ymin, ymax) to be downloaded. +#' @param variables character: a vector contains variables to be downloaded (e.g., c("2m_temperature","surface_pressure")). #' @param auto.create.key Boolean: decide if we want to generate the CDS RC file if it doesn't exist, the default is TRUE. +#' @param timeout numeric: the maximum time (in seconds) allowed to download the data. The default is 36000 seconds. #' #' @return A vector containing file paths to the downloaded files. #' @export #' -#' @examples +#' @importFrom magrittr %>% #' @author Dongchen Zhang -ERA5_cds_annual_download <- function(years, months, days, times, area, variables, outdir, auto.create.key = T) { - options(timeout=360000) +download.ERA5_cds_annual <- function(outfolder, start_date, end_date, extent, variables, auto.create.key = T, timeout = 36000) { + # check shell environments. + if ("try-error" %in% class(try(system("grib_to_netcdf"), silent = T))) { + PEcAn.logger::logger.info("The grib_to_netcdf function is not detected in shell command.") + return(NA) + } + if ("try-error" %in% class(try(system("ncks"), silent = T))) { + PEcAn.logger::logger.info("The ncks function is not detected in shell command.") + return(NA) + } + # setup timeout for download. + options(timeout=timeout) + # convert arguments to CDS API specific arguments. + years <- sort(unique(lubridate::year(seq(lubridate::date(start_date), lubridate::date(end_date), "1 year")))) + months <- sort(unique(lubridate::month(seq(lubridate::date(start_date), lubridate::date(end_date), "1 month")))) %>% + purrr::map(function(d)sprintf("%02d", d)) + days <- sort(unique(lubridate::day(seq(lubridate::date(start_date), lubridate::date(end_date), "1 day")))) %>% + purrr::map(function(d)sprintf("%02d", d)) + times <- list('00:00','03:00','06:00', + '09:00','12:00','15:00', + '18:00','21:00') + area <- paste(c(extent[4], extent[1], extent[3], extent[2]), collapse = "/") + variables <- as.list(variables) #load cdsapi from python environment. tryCatch({ cdsapi <- reticulate::import("cdsapi") @@ -59,7 +79,12 @@ ERA5_cds_annual_download <- function(years, months, days, times, area, variables } #check if the token exists for the cdsapi. if (!file.exists(file.path(Sys.getenv("HOME"), ".cdsapirc")) & auto.create.key) { - getnetrc(Sys.getenv("HOME")) + if (!require(getPass)) { + PEcAn.logger::logger.info("The getPass pacakge is not installed for creating the API key.") + return(NA) + } else { + getnetrc(Sys.getenv("HOME")) + } } else if (!file.exists(file.path(Sys.getenv("HOME"), ".cdsapirc")) & !auto.create.key) { PEcAn.logger::logger.severe( "Please create a `${HOME}/.cdsapirc` file as described here:", @@ -78,7 +103,7 @@ ERA5_cds_annual_download <- function(years, months, days, times, area, variables # loop over years. nc.paths <- c() for (y in years) { - fname <- file.path(outdir, paste0("ERA5_", y, ".grib")) + fname <- file.path(outfolder, paste0("ERA5_", y, ".grib")) # start retrieving data. # you need to have an account for downloaing the files # Read the documantion for how to setup your account and settings before trying this @@ -138,5 +163,15 @@ ERA5_cds_annual_download <- function(years, months, days, times, area, variables # remove previous grib file. unlink(fname) } - return(nc.paths) + # construct results to meet the requirements of pecan.met workflow. + results <- vector("list", length = length(years)) + for (i in seq_along(results)) { + results[[i]] <- list(file = nc.paths[i], + host = PEcAn.remote::fqdn(), + startdate = paste0(paste(years[i], months[1], days[1], sep = "-"), " ", times[1], ":00"), + enddate = paste0(paste(years[i], months[length(months)], days[length(days)], sep = "-"), " ", times[length(times)], ":00"), + mimetype = "application/x-netcdf", + formatname = "ERA5_year.nc") + } + return(results) } \ No newline at end of file From 64d9b8d5370776df0e6be059ab1429f6b2b52094 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 21 Feb 2025 11:53:19 -0500 Subject: [PATCH 7/9] Update documentation. --- modules/data.atmosphere/NAMESPACE | 3 +- .../man/download.ERA5_cds_annual.Rd | 40 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 modules/data.atmosphere/man/download.ERA5_cds_annual.Rd diff --git a/modules/data.atmosphere/NAMESPACE b/modules/data.atmosphere/NAMESPACE index cc8d89a5c3..4744072f9e 100644 --- a/modules/data.atmosphere/NAMESPACE +++ b/modules/data.atmosphere/NAMESPACE @@ -4,7 +4,6 @@ export(.download.raw.met.module) export(.extract.nc.module) export(.met2model.module) export(AirDens) -export(ERA5_cds_annual_download) export(ERA5_met_process) export(align.met) export(build_cf_variables_table_url) @@ -19,6 +18,7 @@ export(debias.met.regression) export(download.Ameriflux) export(download.AmerifluxLBL) export(download.CRUNCEP) +export(download.ERA5_cds_annual) export(download.FACE) export(download.Fluxnet2015) export(download.FluxnetLaThuile) @@ -109,6 +109,7 @@ export(upscale_met) export(wide2long) export(write_noaa_gefs_netcdf) importFrom(dplyr,"%>%") +importFrom(magrittr,"%>%") importFrom(rlang,.data) importFrom(rlang,.env) importFrom(sf,st_crs) diff --git a/modules/data.atmosphere/man/download.ERA5_cds_annual.Rd b/modules/data.atmosphere/man/download.ERA5_cds_annual.Rd new file mode 100644 index 0000000000..a8fce77ce5 --- /dev/null +++ b/modules/data.atmosphere/man/download.ERA5_cds_annual.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ERA5_download.R +\name{download.ERA5_cds_annual} +\alias{download.ERA5_cds_annual} +\title{ERA5_cds_annual_download} +\usage{ +download.ERA5_cds_annual( + outfolder, + start_date, + end_date, + extent, + variables, + auto.create.key = T, + timeout = 36000 +) +} +\arguments{ +\item{outfolder}{Character: physical path where the ERA5 data are stored.} + +\item{start_date}{character: the start date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)} + +\item{end_date}{character: the end date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)} + +\item{extent}{numeric: a vector of numbers contains the bounding box (formatted as xmin, xmax, ymin, ymax) to be downloaded.} + +\item{variables}{character: a vector contains variables to be downloaded (e.g., c("2m_temperature","surface_pressure")).} + +\item{auto.create.key}{Boolean: decide if we want to generate the CDS RC file if it doesn't exist, the default is TRUE.} + +\item{timeout}{numeric: the maximum time (in seconds) allowed to download the data. The default is 36000 seconds.} +} +\value{ +A vector containing file paths to the downloaded files. +} +\description{ +This function helps to download the yearly ERA5 data based on the prescribed features using the CDS API. +} +\author{ +Dongchen Zhang +} From 589794cd9d2c97ad4e0e39f76312b6d6d22f1679 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 21 Feb 2025 22:02:51 -0500 Subject: [PATCH 8/9] rename function because it can download sub annual data. --- .../{download.ERA5_cds_annual.Rd => download.ERA5_cds.Rd} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename modules/data.atmosphere/man/{download.ERA5_cds_annual.Rd => download.ERA5_cds.Rd} (93%) diff --git a/modules/data.atmosphere/man/download.ERA5_cds_annual.Rd b/modules/data.atmosphere/man/download.ERA5_cds.Rd similarity index 93% rename from modules/data.atmosphere/man/download.ERA5_cds_annual.Rd rename to modules/data.atmosphere/man/download.ERA5_cds.Rd index a8fce77ce5..5c8d1d460e 100644 --- a/modules/data.atmosphere/man/download.ERA5_cds_annual.Rd +++ b/modules/data.atmosphere/man/download.ERA5_cds.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ERA5_download.R -\name{download.ERA5_cds_annual} -\alias{download.ERA5_cds_annual} +\name{download.ERA5_cds} +\alias{download.ERA5_cds} \title{ERA5_cds_annual_download} \usage{ -download.ERA5_cds_annual( +download.ERA5_cds( outfolder, start_date, end_date, From 480bfce29a67e2f1cc51bf27e384438754541bed Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 21 Feb 2025 22:03:14 -0500 Subject: [PATCH 9/9] update example script. --- modules/data.atmosphere/NAMESPACE | 2 +- modules/data.atmosphere/R/ERA5_download.R | 2 +- .../inst/ERA5/ERA5_NA_download.R | 55 ++++++------------- 3 files changed, 20 insertions(+), 39 deletions(-) diff --git a/modules/data.atmosphere/NAMESPACE b/modules/data.atmosphere/NAMESPACE index 4744072f9e..35c1bb0dd7 100644 --- a/modules/data.atmosphere/NAMESPACE +++ b/modules/data.atmosphere/NAMESPACE @@ -18,7 +18,7 @@ export(debias.met.regression) export(download.Ameriflux) export(download.AmerifluxLBL) export(download.CRUNCEP) -export(download.ERA5_cds_annual) +export(download.ERA5_cds) export(download.FACE) export(download.Fluxnet2015) export(download.FluxnetLaThuile) diff --git a/modules/data.atmosphere/R/ERA5_download.R b/modules/data.atmosphere/R/ERA5_download.R index 46a54d8152..26a1ce2b53 100644 --- a/modules/data.atmosphere/R/ERA5_download.R +++ b/modules/data.atmosphere/R/ERA5_download.R @@ -15,7 +15,7 @@ #' #' @importFrom magrittr %>% #' @author Dongchen Zhang -download.ERA5_cds_annual <- function(outfolder, start_date, end_date, extent, variables, auto.create.key = T, timeout = 36000) { +download.ERA5_cds <- function(outfolder, start_date, end_date, extent, variables, auto.create.key = T, timeout = 36000) { # check shell environments. if ("try-error" %in% class(try(system("grib_to_netcdf"), silent = T))) { PEcAn.logger::logger.info("The grib_to_netcdf function is not detected in shell command.") diff --git a/modules/data.atmosphere/inst/ERA5/ERA5_NA_download.R b/modules/data.atmosphere/inst/ERA5/ERA5_NA_download.R index 2bc076451a..0cdbb7dfad 100644 --- a/modules/data.atmosphere/inst/ERA5/ERA5_NA_download.R +++ b/modules/data.atmosphere/inst/ERA5/ERA5_NA_download.R @@ -1,37 +1,18 @@ -library(reticulate) -library(future) -library(purrr) -library(furrr) -outdir <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/" -years <- 2012:2024 -months <- list('01','02','03', - '04','05','06', - '07','08','09', - '10','11','12') -days <- list('01','02','03', - '04','05','06', - '07','08','09', - '10','11','12', - '13','14','15', - '16','17','18', - '19','20','21', - '22','23','24', - '25','26','27', - '28','29','30', - '31') -times <- list('00:00','03:00','06:00', - '09:00','12:00','15:00', - '18:00','21:00') -area <- "85/-179/7/-20" -variables <- list( "2m_temperature","surface_pressure", - "2m_dewpoint_temperature","total_precipitation", - "10m_u_component_of_wind","10m_v_component_of_wind", - "surface_solar_radiation_downwards","surface_thermal_radiation_downwards") -paths <- PEcAn.data.atmosphere::ERA5_cds_annual_download(years = year, - months = months, - days = days, - times = times, - area = area, - variables = variables, - outdir = outdir, - auto.create.key = T) +outfolder <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/" +start_date <- "2012-01-01" +end_date <- "2021-12-31" +extent <- c(-179, -20, 7, 85) +variables <- c("2m_temperature", + "surface_pressure", + "2m_dewpoint_temperature", + "total_precipitation", + "10m_u_component_of_wind", + "10m_v_component_of_wind", + "surface_solar_radiation_downwards", + "surface_thermal_radiation_downwards") +results <- PEcAn.data.atmosphere::download.ERA5_cds(outfolder = outfolder, + start_date = start_date, + end_date = end_date, + extent = extent, + variables = variables, + auto.create.key = T)