diff --git a/.gitignore b/.gitignore index 2809289..dd5c0e9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.prj +.lsp diff --git a/DESCRIPTION b/DESCRIPTION index 7090523..9cb10b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: econdatar Title: Automation of time series uploads and downloads -Version: 2.0.4 +Version: 2.0.5 Date: 2023-03-13 Authors@R: c(person("Byron", "Botha", role = c("aut", "cre"), email = "byron@codera.co.za"), person("Sebastian", "Krantz", role = "ctb")) diff --git a/R/read_econdata.R b/R/read_econdata.R index 8f47297..7462fc4 100644 --- a/R/read_econdata.R +++ b/R/read_econdata.R @@ -112,14 +112,23 @@ read_econdata <- function(id, ..., tidy = FALSE) { data_structure <- data_message[[2]]$structures[["data-structures"]][[1]][[2]] - series_dims <- sapply(data_structure$components, function(component) { + series_ids <- sapply(data_structure$components, function(component) { if (component[[1]] == "#sdmx.infomodel.datastructure.Dimension") { component[[2]][["concept-identity"]][[2]]$id } else { NA } - }) |> - na.omit() + }) + + series_pos <- sapply(data_structure$components, function(component) { + if (component[[1]] == "#sdmx.infomodel.datastructure.Dimension") { + component[[2]]$position + } else { + NA + } + }) + + series_dims <- na.omit(series_ids[order(series_pos)]) obs_attrs <- sapply(data_structure$components, function(component) { if (component[[1]] == "#sdmx.infomodel.datastructure.Attribute") { @@ -148,13 +157,13 @@ read_econdata <- function(id, ..., tidy = FALSE) { query_params <- list() - if (is.null(params$release) || params$release != "unreleased") { - - tryCatch(query_params$release <- strftime(params$release, "%Y-%m-%dT%H:%M:%S"), - error = function(e) { query_params$release <- NULL }) + if (is.null(params$release)) params$release = "latest" - if (is.null(query_params$release)) { + if (params$release != "unreleased") { + query_params$release <- tryCatch({ + strftime(params$release, "%Y-%m-%dT%H:%M:%S") + }, error = function(e) { response <- GET(env$repository$url, path = paste(env$repository$path, "datasets", @@ -169,36 +178,39 @@ read_econdata <- function(id, ..., tidy = FALSE) { data_message <- content(response, type = "application/json", encoding = "UTF-8") - if (is.null(params$release) || params$release == "latest") { - release <- tail(data_message$releases, n = 1)[[1]]$release |> + if (params$release == "latest") { + release <- head(data_message$releases, n = 1)[[1]]$release |> as.POSIXct(x, tz = "UTC", format = "%Y-%m-%dT%H:%M:%SZ") attr(release, "tzone") <- "Africa/Johannesburg" - query_params$release <- strftime(release, "%Y-%m-%dT%H:%M:%S") + + return(strftime(release, "%Y-%m-%dT%H:%M:%S")) } else { release <- sapply(data_message$releases, function(release) { - if(params$release == release$description) { - release$release - } else { - NA - } - }) |> - na.omit() |> - head(n = 1) + if(params$release == release$description) { + release$release + } else { + NA + } + }) |> + na.omit() |> + head(n = 1) if (length(release) != 0) { release <- as.POSIXct(release, tz = "UTC", format = "%Y-%m-%dT%H:%M:%SZ") attr(release, "tzone") <- "Africa/Johannesburg" - query_params$release <- strftime(release, "%Y-%m-%dT%H:%M:%S") + + return(strftime(release, "%Y-%m-%dT%H:%M:%S")) } else { message("Release not found, returning latest release instead.") release <- tail(data_message$releases, n = 1)[[1]]$release |> as.POSIXct(x, tz = "UTC", format = "%Y-%m-%dT%H:%M:%SZ") attr(release, "tzone") <- "Africa/Johannesburg" - query_params$release <- strftime(release, "%Y-%m-%dT%H:%M:%S") + + return(strftime(release, "%Y-%m-%dT%H:%M:%S")) } } - } + }) } if (!is.null(params$series_key)) { @@ -277,6 +289,13 @@ read_econdata <- function(id, ..., tidy = FALSE) { if (length(database) == 1) { return(database[[1]]) } else { - return(list("data-sets", database[[1]])) + if (tidy) { + names(database) <- + paste0("v", sapply(database, + function(x) attr(x, "metadata")$version)) + return(database) + } else { + return(database) + } } } diff --git a/R/read_structure.R b/R/read_structure.R deleted file mode 100644 index e69de29..0000000 diff --git a/R/write_release.R b/R/write_release.R index 43b57d0..d2e3694 100644 --- a/R/write_release.R +++ b/R/write_release.R @@ -26,7 +26,9 @@ write_release <- function(id, version, providerid, description, reset = FALSE, r if (!is.null(params$release)) { query_params$release <- params$release } else { - query_params$release <- format(Sys.time(), "%Y-%m-%dT%H:%M:%S") + query_params$release <- format(Sys.time(), + "%Y-%m-%dT%H:%M:%S", + tz = "Africa/Johannesburg") } diff --git a/R/write_structure.R b/R/write_structure.R deleted file mode 100644 index 1b51a6b..0000000 --- a/R/write_structure.R +++ /dev/null @@ -1,3 +0,0 @@ -write_structure <- function(x, create = FALSE) { - -} diff --git a/README.md b/README.md index b1f03b9..b80b195 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ ```r install.packages(c("remotes", "tcltk"), repos = "https://cran.mirror.ac.za") library("remotes") -install_github("coderaanalytics/econdatar", ref = "2.0.4") +install_github("coderaanalytics/econdatar", ref = "2.0.5") ``` Install from disk @@ -30,7 +30,7 @@ Or if selecting a particular release **(recommended)**, [see](https://github.com ```r library("remotes") remove.packages("econdatar") -install_github("coderaanalytics/econdatar", ref = "2.0.4") +install_github("coderaanalytics/econdatar", ref = "2.0.5") ``` Please see the [EconData blog](https://randomsample.co.za) for in depth tutorials diff --git a/man/read_econdata.Rd b/man/read_econdata.Rd index b5eebf4..b4b3b8a 100644 --- a/man/read_econdata.Rd +++ b/man/read_econdata.Rd @@ -5,7 +5,7 @@ read_econdata } \description{ -Returns the data set for the given dataflow - ECONDATA:id(version) and data provider - ECONDATA:id, as a list, or as tidy \emph{data.table}'s. Available data sets can be looked up from the data registry (http://www.econdata.co.za/FusionRegistry). Tidying can be done directly within \code{read_econdata()}, or ex-post using \code{econdata_tidy()}. +Returns the data for the given data set - ECONDATA:id(version), as a list, or as tidy \emph{data.table}'s. Available data sets can be looked up from the web platform (http://www.econdata.co.za). Tidying can be done directly within \code{read_econdata()}, or ex-post using \code{econdata_tidy()}. } \usage{ read_econdata(id, \dots, tidy = FALSE) @@ -18,13 +18,13 @@ econdata_tidy(x, \dots) \item{\dots}{Further \emph{Optional} arguments: \tabular{llll}{ - \code{agencyid} \tab\tab Agency responsible for the data definition. \cr\cr - \code{version} \tab\tab Version of the data definition. \cr\cr - \code{provideragencyid} \tab\tab Agency responsible for making the data available. \cr\cr - \code{providerid} \tab\tab Provider of the data. \cr\cr - \code{file} \tab\tab character. File name for retrieving JSON data from disk. \cr\cr - \code{username} \tab\tab character. EconData username. \cr\cr - \code{password} \tab\tab character. EconData password. \cr\cr + \code{agencyid} \tab\tab character. Agency responsible for the metadata creation/maintenance. \cr + \code{version} \tab\tab character. Version(s) of the data (different versions will have different metadata), or 'all' to return all available versions. \cr + \code{series_key} \tab\tab character. A character vector specifying a subset of time series (see the web platform (export function) for details). \cr + \code{release} \tab\tab character or time object with format \%Y-\%m-\%dT\%H:\%M:\%S. The release description, or a date/time which will return the data as it was at that moment, or 'latest', or 'unreleased'. \cr + \code{file} \tab\tab character. File name for retrieving JSON data from disk. \cr + \code{username} \tab\tab character. Web username. \cr + \code{password} \tab\tab character. Web password. \cr } } @@ -47,17 +47,15 @@ econdata_tidy(x, \dots) } } \details{ -Specifying the full dataflow and data provider details (as opposed to only using the data id) allows more fine-grained control over the data set being queried. This is not necessary if there is only a single definition of the data and a single provider, which is typically the case. - An EconData account (http://www.econdata.co.za) is required to use this function. The user must provide their credentials either through the function arguments, or by setting the ECONDATA_CREDENTIALS environment variable using the syntax: "username;password", e.g. \code{Sys.setenv(ECONDATA_CREDENTIALS="username;password")}. If credentials are not supplied by the aforementioned methods a GUI dialog will prompt the user for credentials. } \value{ %% ~Describe the value returned -If \code{tidy = FALSE}, a list of data frames is returned, where the names of the list are the EconData series codes, and each data frame has a single column named 'OBS_VALUE' containing the data, with corresponding dates attached as rownames. Each data frame further has a \code{"metadata"} attribute providing information about the series. The entire list of data frames also has a \code{"metadata"} attribute, providing information about the dataset. If multiple datasets (or versions of a dataset if \code{version} is left empty) are being queried, a list of such lists is returned. +If \code{tidy = FALSE}, a list of data frames is returned, where the names of the list are the EconData series codes, and each data frame has a single column named 'OBS_VALUE' containing the data, with corresponding dates attached as rownames. Each data frame further has a \code{"metadata"} attribute providing information about the series. The entire list of data frames also has a \code{"metadata"} attribute, providing information about the dataset. If multiple datasets (or versions of a dataset if \code{version} is specified as 'all') are being queried, a list of such lists is returned. -If \code{tidy = TRUE} and \code{wide = TRUE} (the default), a single \emph{data.table} is returned where the first column is the date, and the remaining columns are series named by their EconData codes. Each series has two attributes: \code{"label"} provides a variable label combining important metadata from the \code{"metadata"} attribute in the non-tidy format, and \code{"source.code"} gives the series code assigned by the original data provider. The table has the same dataset-level \code{"metadata"} attribute as the list of data frames if \code{tidy = FALSE}. If multiple datasets (or versions of a dataset if \code{version} is left empty) are being queried, a list of such \emph{data.table}'s is returned. +If \code{tidy = TRUE} and \code{wide = TRUE} (the default), a single \emph{data.table} is returned where the first column is the date, and the remaining columns are series named by their EconData codes. Each series has two attributes: \code{"label"} provides a variable label combining important metadata from the \code{"metadata"} attribute in the non-tidy format, and \code{"source.code"} gives the series code assigned by the original data provider. The table has the same dataset-level \code{"metadata"} attribute as the list of data frames if \code{tidy = FALSE}. If multiple datasets (or versions of a dataset if \code{version} is specified as 'all') are being queried, a list of such \emph{data.table}'s is returned. -If \code{tidy = TRUE} and \code{wide = FALSE} and \code{compact = FALSE} (the default), a named list of two \emph{data.table}'s is returned. The first, \code{"data"}, has columns 'code', 'date' and 'value' providing the data in a long format. The second, \code{"metadata"}, provides dataset and series-level matadata, with one row for each series. If \code{compact = TRUE}, these two datasets are combined, where all repetitive content is converted to factors for more efficient storage. If multiple datasets (or versions of a dataset if \code{version} is left empty) are being queried, \code{compact = FALSE} gives a nested list, whereas \code{compact = TRUE} binds everything together to a single long frame. In general, if \code{wide = FALSE}, no attributes are attached to the tables or columns in the tables. +If \code{tidy = TRUE} and \code{wide = FALSE} and \code{compact = FALSE} (the default), a named list of two \emph{data.table}'s is returned. The first, \code{"data"}, has columns 'code', 'date' and 'value' providing the data in a long format. The second, \code{"metadata"}, provides dataset and series-level matadata, with one row for each series. If \code{compact = TRUE}, these two datasets are combined, where all repetitive content is converted to factors for more efficient storage. If multiple datasets (or versions of a dataset if \code{version} is specified as 'all') are being queried, \code{compact = FALSE} gives a nested list, whereas \code{compact = TRUE} binds everything together to a single long frame. In general, if \code{wide = FALSE}, no attributes are attached to the tables or columns in the tables. %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} @@ -82,12 +80,16 @@ ELECTRICITY_LONG <- econdata_tidy(ELECTRICITY, wide = FALSE) with(ELECTRICITY_LONG, metadata[data, on = "data_key"]) # CPI Analytical Series: Different Revisions -CPI_ANL <- read_econdata(id = "CPI_ANL_SERIES") +CPI_ANL <- read_econdata(id = "CPI_ANL_SERIES", version = "all") CPI_ANL_WIDE <- econdata_tidy(CPI_ANL) CPI_ANL_LONG <- econdata_tidy(CPI_ANL, wide = FALSE, combine = TRUE) CPI_ANL_ALLMETA <- econdata_tidy(CPI_ANL, wide = FALSE, allmeta = TRUE) # v2.0 has some 0-obs series -# Can query a specific version by adding e.g. version = "2.0" to the call +# Can query a specific version by adding e.g. version = "2.0.0" to the call + +# Returns 5-10 years (daily average bond yields) not yet contained in the latest release +# (particularly useful for daily data that is released monthly) +MARKET_RATES <- read_econdata(id = "MARKET_RATES", series_key = "CMJD003.B.A", release = "unreleased") } }