Skip to content

Commit

Permalink
Data updates to 2022 FAOSTAT along with BYU and PCe processing updates (
Browse files Browse the repository at this point in the history
realxinzhao authored Oct 17, 2024

Verified

This commit was signed with the committer’s verified signature.
jettcc Jettcc
1 parent 1870dac commit 24a8d58
Showing 240 changed files with 24,443 additions and 10,162 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/draft-pdf.yml
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@ jobs:
# This should be the path to the paper within your repo.
paper-path: paper.md
- name: Upload
uses: actions/upload-artifact@v1
uses: actions/upload-artifact@v4
with:
name: paper
# This is the output path where Pandoc will write the compiled
18 changes: 15 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ Rplots.pdf
# Vignette files
inst/doc

# Proprietary files
# Potential proprietary files
IEA_EnergyBalances_2019.csv.gz
*CEDS_emissions.csv

@@ -33,8 +33,20 @@ IEA_EnergyBalances_2019.csv.gz


# FAOSTATA raw zip
inst/extdata/aglu/FAO/FAOSTAT/*.zip
inst/extdata/aglu/FAO/FAOSTAT_Archive/*
inst/extdata/FAOSTAT/*.zip
inst/extdata/FAOSTAT_Archive*/*

# Model-specific gcamfaostat output files
gcamfaostat*/

# Model-specific input files
inst/extdata/GCAM/mi_headers
inst/extdata/GCAM/ModelInterface
inst/extdata/GCAM/aglu/FAO/GCAMFAOSTAT*


# Developer
paper
developertemp

#docs
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: gcamfaostat
Type: Package
Title: Prepare, process, and synthesize FAOSTAT data for global agroeconomic and multisector dynamic modeling
Version: 1.0.0
Date: 2023-05-06
Version: 1.0.1
Date: 2024-10-17
Authors@R: c(person("Xin", "Zhao", email = "xin.zhao@pnnl.gov", role = c("cre", "aut"), comment = c(ORCID = "0000-0002-1801-4393")),
person("Maksym", "Chepeliev", role = "aut", comment = c(ORCID = "0000-0001-8585-2314")),
person("Pralit", "Patel", role = "aut", comment = c(ORCID = "0000-0003-3992-1061")),
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@ export(approx_fun)
export(approx_fun_constant)
export(assert_FBS_balance)
export(change_iso_code)
export(chord_wrapper)
export(chunk_inputs)
export(chunk_outputs)
export(chunk_readylist)
1,010 changes: 131 additions & 879 deletions R/constants.R

Large diffs are not rendered by default.

176 changes: 16 additions & 160 deletions R/driver.R
Original file line number Diff line number Diff line change
@@ -141,9 +141,6 @@ tibbelize_outputs <- function(chunk_data, chunk_name) {
#' @param write_xml Write XML Batch chunk outputs to disk?
#' @param outdir Location to write output data (ignored if \code{write_outputs} is \code{FALSE})
#' @param xmldir Location to write output XML (ignored if \code{write_outputs} is \code{FALSE})
#' @param user_modifications A list of function names which implement a user mod chunk. See vignettes/usermod_vignette.Rmd for more details and examples.
#' @param xml_suffix A suffix to be appended at the end of all XML file name if not null. Such a feature is
#' useful when using \code{user_modifications} to generate alternative scenarios.
#' @return A list of all built data (or a data map tibble if requested).
#' @details The driver loads any necessary data from input files,
#' runs all code chunks in an order dictated by their dependencies,
@@ -164,9 +161,7 @@ driver <- function(all_data = empty_data(),
write_outputs = FALSE,
write_xml = write_outputs,
outdir = OUTPUTS_DIR, xmldir = XML_DIR,
quiet = FALSE,
user_modifications = NULL,
xml_suffix = NULL) {
quiet = FALSE) {

# If users ask to stop after a chunk, but also specify they want particular inputs,
# or if they ask to stop before a chunk, while asking for outputs, that's confusing.
@@ -193,16 +188,8 @@ driver <- function(all_data = empty_data(),
assert_that(is.logical(write_xml))
assert_that(is.logical(quiet))

# we need to use package data to set this in effect in such a way that drake does not notice
# and think all XML files need to be rebuilt with the suffix
if (!is.null(xml_suffix)){
xml.XML_SUFFIX <<- xml_suffix
}
if(!is.null(user_modifications) && is.null(xml_suffix)) {
warning("It is highly reccommended to utilize `xml_suffix` to distinguish XML inputs derived from `user_modifications`")
}

if(!quiet) cat("GCAM Data System v", as.character(utils::packageVersion(PACKAGE_NAME)), "\n", sep = "")
if(!quiet) cat("gcamfaostat Data System v", as.character(utils::packageVersion(PACKAGE_NAME)), "\n", sep = "")

chunklist <- find_chunks()
if(!quiet) cat("Found", nrow(chunklist), "chunks\n")
@@ -211,59 +198,6 @@ driver <- function(all_data = empty_data(),
chunkoutputs <- chunk_outputs(chunklist$name)
if(!quiet) cat("Found", nrow(chunkoutputs), "chunk data products\n")

# check if any user chunks are set in which case we need to adjust the
# chunklist/chunkinputs/chunkoutputs to shim the user chunk in place
if(!is.null(user_modifications)) {
# a user modification chunk uses a special command: driver.DECLARE_MODIFY
# to indicate which ds "objects" it wants to modify
# that chunk will then require those objects as inputs AND produce those
# objects as output
# these chunks will also be allowed to specify regular driver.DECLARE_INPUTS
# which will be passed as input but NOT output back out

# in order to shim the user modification in we adjust all chunk_inputs
# for any object to be modified to input the object with the constant
# data.USER_MOD_POSTFIX appended to the end instead
# and the user chunk will input the original object and output the appended
# data name

# first get a list of all objects that are to be modified
lapply(user_modifications, chunk_inputs, driver.DECLARE_MODIFY) %>%
bind_rows() %>%
# generate the mod data name by appending data.USER_MOD_POSTFIX
mutate(data_mod = paste0(input, data.USER_MOD_POSTFIX)) ->
modify_table

# adjust chunkinputs so chunks that require as input any object that is
# to be modified will now input the data.USER_MOD_POSTFIX appended name
chunkinputs %>%
left_join(select(modify_table, input, data_mod), by = c("input")) %>%
mutate(input = if_else(is.na(data_mod), input, data_mod),
from_file = if_else(is.na(data_mod), from_file, FALSE)) %>%
select(-data_mod) %>%
# add on the input requirements for the user mod chunk which are the
# original object names as well as any other driver.DECLARE_INPUTS they
# require
bind_rows(select(modify_table, -data_mod),
lapply(user_modifications, chunk_inputs, driver.DECLARE_INPUTS)) ->
chunkinputs

# add in the outputs from the user mod chunks which are the modified object names
# appended with data.USER_MOD_POSTFIX
lapply(user_modifications, chunk_outputs, driver.DECLARE_MODIFY) %>%
bind_rows() %>%
mutate(output = paste0(output, data.USER_MOD_POSTFIX)) %>%
bind_rows(chunkoutputs) ->
chunkoutputs

# now we just need to add the user mod chunks to the chunklist
bind_rows(chunklist,
tibble(name = user_modifications,
module = "user",
chunk = user_modifications,
disabled = FALSE)) ->
chunklist
}

# Keep track of chunk inputs for later pruning
chunkinputs %>%
@@ -456,9 +390,7 @@ driver <- function(all_data = empty_data(),
#' @param write_xml Write XML Batch chunk outputs to disk?
#' @param xmldir Location to write output XML (ignored if \code{write_outputs} is \code{FALSE})
#' @param quiet Suppress output?
#' @param user_modifications A list of function names which implement a user mod chunk. See vignettes/usermod_vignette.Rmd for more details and examples.
#' @param xml_suffix A suffix to be appended at the end of all XML file name if not null. Such a feature is
#' useful when using \code{user_modifications} to generate alternative scenarios.
#' @param write_csv_model If "GCAM", the CSV will be generated and saved to DIR_OUTPUT_CSV
#' @param ... Additional arguments to be forwarded on to \code{make}
#' @return A list of all built data (or a data map tibble if requested).
#' @importFrom magrittr "%>%"
@@ -474,13 +406,20 @@ driver_drake <- function(
outputs_of(return_outputs_of)),
return_data_map_only = FALSE,
return_plan_only = FALSE,
write_xml = FALSE,
xmldir = XML_DIR,
write_csv_model = FALSE,
csv_dir = paste0("output/gcamfaostat_", write_csv_model),
quiet = FALSE,
user_modifications = NULL,
xml_suffix = NULL,
...){

# assign write_csv_model value to OUTPUT_Export_CSV in the package environment
assign(x = "OUTPUT_Export_CSV", value = write_csv_model, envir = rlang::pkg_env("gcamfaostat"))

if (write_csv_model != FALSE) {

# assign output path (csv_dir) to DIR_OUTPUT_CSV in the package environment
assign(x = "DIR_OUTPUT_CSV", value = csv_dir, envir = rlang::pkg_env("gcamfaostat"))
dir.create(DIR_OUTPUT_CSV, showWarnings = FALSE, recursive = TRUE)
}

# We merely suggest drake as we can still run the data system via driver
# with out it. Ensure we have it before proceeding.
@@ -510,25 +449,16 @@ driver_drake <- function(
assert_that(is.null(return_data_names) | is.character(return_data_names))
assert_that(is.logical(return_data_map_only))
assert_that(is.logical(return_plan_only))
assert_that(is.logical(write_xml))
assert_that(is.logical(quiet))
# PREBUILT_DATA cannot be NULL
assert_that(!is.null(PREBUILT_DATA), msg = "PREBUILT_DATA is NULL")

# we need to use package data to set this in effect in such a way that drake does not notice
# and think all XML files need to be rebuilt with the suffix
if (!is.null(xml_suffix)){
xml.XML_SUFFIX <<- xml_suffix
}
if(!is.null(user_modifications) && is.null(xml_suffix)) {
warning("It is highly reccommended to utilize `xml_suffix` to distinguish XML inputs derived from `user_modifications`")
}

if(return_plan_only) {
assert_that(!return_data_map_only)
}

if(!quiet) message("GCAM Data System v", as.character(utils::packageVersion(PACKAGE_NAME)), sep = "")
if(!quiet) message("gcamfaostat Data System v", as.character(utils::packageVersion(PACKAGE_NAME)), sep = "")

chunklist <- find_chunks()
if(!quiet) message("Found ", nrow(chunklist), " chunks")
@@ -537,60 +467,6 @@ driver_drake <- function(
chunkoutputs <- chunk_outputs(chunklist$name)
if(!quiet) message("Found ", nrow(chunkoutputs), " chunk data products")

# check if any user chunks are set in which case we need to adjust the
# chunklist/chunkinputs/chunkoutputs to shim the user chunk in place
if(!is.null(user_modifications)) {
# a user modification chunk uses a special command: driver.DECLARE_MODIFY
# to indicate which ds "objects" it wants to modify
# that chunk will then require those objects as inputs AND produce those
# objects as output
# these chunks will also be allowed to specify regular driver.DECLARE_INPUTS
# which will be passed as input but NOT output back out

# in order to shim the user modification in we adjust all chunk_inputs
# for any object to be modified to input the object with the constant
# data.USER_MOD_POSTFIX appended to the end instead
# and the user chunk will input the original object and output the appended
# data name

# first get a list of all objects that are to be modified
lapply(user_modifications, chunk_inputs, driver.DECLARE_MODIFY) %>%
bind_rows() %>%
# generate the mod data name by appending data.USER_MOD_POSTFIX
mutate(data_mod = paste0(input, data.USER_MOD_POSTFIX)) ->
modify_table

# adjust chunkinputs so chunks that require as input any object that is
# to be modified will now input the data.USER_MOD_POSTFIX appended name
chunkinputs %>%
left_join(select(modify_table, input, data_mod), by = c("input")) %>%
mutate(input = if_else(is.na(data_mod), input, data_mod),
from_file = if_else(is.na(data_mod), from_file, FALSE)) %>%
select(-data_mod) %>%
# add on the input requirements for the user mod chunk which are the
# original object names as well as any other driver.DECLARE_INPUTS they
# require
bind_rows(select(modify_table, -data_mod),
lapply(user_modifications, chunk_inputs, driver.DECLARE_INPUTS)) ->
chunkinputs

# add in the outputs from the user mod chunks which are the modify object names
# appended with data.USER_MOD_POSTFIX
lapply(user_modifications, chunk_outputs, driver.DECLARE_MODIFY) %>%
bind_rows() %>%
mutate(output = paste0(output, data.USER_MOD_POSTFIX)) %>%
bind_rows(chunkoutputs) ->
chunkoutputs

# now we just need to add the user mod chunks to the chunklist
bind_rows(chunklist,
tibble(name = user_modifications,
module = "user",
chunk = user_modifications,
disabled = FALSE)) ->
chunklist
}

# Keep track of chunk inputs for later pruning
chunkinputs %>%
group_by(input) %>%
@@ -650,11 +526,6 @@ driver_drake <- function(
chunks_to_run <- c(unfound_inputs$input, chunklist$name)
}

if (write_xml == TRUE) {
dir.create(xmldir, showWarnings = FALSE, recursive = TRUE)
}


# Loop over each chunk and add a target for it and the command to build it
# as appropriate for if it is just loading a FILE or running an actual chunk.
target <- c()
@@ -712,7 +583,7 @@ driver_drake <- function(
# Also note we explicitly list just the inputs required for the chunk which is
# different than in driver where we give `all_data`, again this is for drake so it
# can match up target names to commands and develop the dependencies between them.
nsprefix <- if_else(chunk %in% user_modifications, "", paste0(PACKAGE_NAME, ":::"))
nsprefix <- paste0(PACKAGE_NAME, ":::")

command <- c(command, paste0(nsprefix, chunk, "('", driver.MAKE, "', c(", paste(make.names(input_names), collapse = ","), "))"))

@@ -728,21 +599,6 @@ driver_drake <- function(
target <- c(target, make.names(po))
command <- c(command, paste(chunk, '["', po, '"]', sep = ""))

# We need to seperate out XML outputs so that we can add commands
# to actually run the XML conversion and write out the gcam inputs
po_xml <- subset(chunkoutputs, name == chunk & to_xml)$output
if(write_xml && length(po_xml) > 0) {
# Add the xmldir to the XML output name and include those in the
# target list.
po_xml_path = file.path(xmldir, po_xml) %>% gsub("/{2,}", "/", .)# Don't want multiple consecutive slashes, as drake views that as separate object
target <- c(target, make.names(po_xml_path))
# Generate the command to run the XML conversion:
# `xml/out1.xml <- run_xml_conversion(set_xml_file_helper(out1.xml, file_out("xml/out1.xml")))`
# Note, the `file_out()` wrapper notifies drake the XML file is an output
# of this plan and allows it to know to re-produce missing/altered XML files
command <- c(command, paste0("run_xml_conversion(set_xml_file_helper(", po_xml, "[[1]],
file_out('", po_xml_path, "')))"))
}
}


13 changes: 5 additions & 8 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -7,12 +7,7 @@ PACKAGE_NAME <- "gcamfaostat"
# silence package check
utils::globalVariables(c(
".", "APE_comm", "APE_comm_source", "Area", "harvested", "Closing stocks",
"Export", "FALSE", "FAO_ag_items_PRODSTAT", "Feed", "Food",
"GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020",
"GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009",
"GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean",
"GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020",
"GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019", "Import",
"Export", "FALSE", "FAO_ag_items_PRODSTAT", "Feed", "Food", "Import",
"Import_Demand_Share", "Loss", "Mapping_SUA_PrimaryEquivalent", "Opening stocks",
"Opening stocks1", "Ostock", "Ostock_Demand_Share", "Ostock_demand", "Other uses",
"PREBUILT_DATA", "Processed", "Production", "Q25asMin", "Regional demand",
@@ -35,7 +30,9 @@ utils::globalVariables(c(
"FAO_an_items_PRODSTAT", "FAOupdate", "GCAM_AgLU_SUA_APE_1973_2019",
"GCAM_DATA_MAP", "GCAM_commodity", "GCAM_region_names", "GCAM_subsector",
"Localupdate", "TM", "Tourist", "consumption", "datasetname", "localfilesize",
"moving_avg", "remotefilezize", "unit", "Tourist consumption", "xml.XML_SUFFIX"))
"moving_avg", "remotefilezize", "unit", "Tourist consumption", "xml.XML_SUFFIX",
"macronutrient_value", "source_item", "region_ID", "QCL_CROP_PRIMARY",
"DS_key_coproduct_item_code", "CBH", "QCL_Cake"))

#' find_header
#'
@@ -414,7 +411,7 @@ find_chunks <- function(pattern = "^module_[a-zA-Z\\.]*_.*$", include_disabled =

ls(name = parent.env(environment()), pattern = pattern) %>%
tibble::tibble(name = .,
disabled = grepl("_DISABLED$", name) | grepl(paste0("^module_.*", DISABLED_MODULES), name)) %>%
disabled = grepl("_DISABLED$", name) | grepl(paste0("^module_.*", DISABLED_MODULES, collapse = "|"), name)) %>%
filter(include_disabled | !disabled) %>%
tidyr::separate(name, into = c("x", "module", "chunk"), remove = FALSE,
sep = "_", extra = "merge") %>%
19 changes: 13 additions & 6 deletions R/xfaostat_L101_RawDataPreProc1_QCL.R
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@
module_xfaostat_L101_RawDataPreProc1_QCL <- function(command, ...) {

MODULE_INPUTS <-
c(FAOSTAT_FILE = "aglu/FAO/FAOSTAT/Trade_CropsLivestock_E_All_Data_Normalized")
c(FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Production_Crops_Livestock_E_All_Data_Normalized"))

MODULE_OUTPUTS <-
c("QCL_wide", # Ag production quantity and harvested area
@@ -55,7 +55,10 @@ module_xfaostat_L101_RawDataPreProc1_QCL <- function(command, ...) {
# *[QCL] FAOSTAT Production and area ----

## Load raw data
FAOSTAT_load_raw_data(DATASETCODE = "QCL", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT, .Envir = Curr_Envir)
FAOSTAT_load_raw_data(DATASETCODE = "QCL", .Envir = Curr_Envir)

#FAOSTAT_load_raw_data("QCL", GET_MAPPINGCODE = "AreaCodes", .Envir = Curr_Envir)
#FAOSTAT_load_raw_data("QCL", GET_MAPPINGCODE = "ItemCodes", .Envir = Curr_Envir)

QCL %>% distinct(element_code, element)

@@ -76,10 +79,14 @@ module_xfaostat_L101_RawDataPreProc1_QCL <- function(command, ...) {
# Prod Popultn (5314) for Beewax and honey is removed since data is only before 1990
filter(element_code != 5314) %>%
# Remove NA for simplicity for now; expend.grid later
# All Coir (coconut fiber) is filtered out due to NA
# All Coir (coconut fiber; item_code == 813) was previously filtered out due to NA, but now available for a few regions
filter(!is.na(value)) %>%
# remove accent
rm_accent("item", "area") -> QCL1
rm_accent("item", "area") %>%
# need to ensure all area names came from the same source! 107 and 223
mutate(area = replace(area, area == "CA?te dIvoire", "Cote dIvoire"),
area = replace(area, area == "TA?rkiye", "Turkiye"))-> QCL1


QCL1 %>% spread(year, value) ->
QCL_wide
@@ -92,14 +99,14 @@ module_xfaostat_L101_RawDataPreProc1_QCL <- function(command, ...) {
add_title("FAO primary production country and code") %>%
add_units("NA") %>%
add_comments("FAO Country and code") %>%
add_precursors("aglu/FAO/FAOSTAT/Trade_CropsLivestock_E_All_Data_Normalized") ->
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "Production_Crops_Livestock_E_All_Data_Normalized")) ->
QCL_area_code_map

QCL_wide %>%
add_title("FAO primary production (QCL)", overwrite = TRUE) %>%
add_units("ha/tonne") %>%
add_comments("Preprocessed FAOSTAT primary production") %>%
add_precursors("aglu/FAO/FAOSTAT/Trade_CropsLivestock_E_All_Data_Normalized") ->
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "Production_Crops_Livestock_E_All_Data_Normalized")) ->
QCL_wide

verify_identical_prebuilt(QCL_area_code_map)
110 changes: 96 additions & 14 deletions R/xfaostat_L101_RawDataPreProc2_PP_PD_OA.R
Original file line number Diff line number Diff line change
@@ -18,17 +18,20 @@
module_xfaostat_L101_RawDataPreProc2_PP_PD_OA <- function(command, ...) {

MODULE_INPUTS <-
c(FAOSTAT_FILE = "aglu/FAO/FAOSTAT/Prices_E_All_Data_Normalized",
FAOSTAT_FILE = "aglu/FAO/FAOSTAT/Deflators_E_All_Data_Normalized",
FAOSTAT_FILE = "aglu/FAO/FAOSTAT/Population_E_All_Data_Normalized",
FILE = "aglu/FAO/FAOSTAT/Other_supplementary/GDP_deflator_Taiwan",
c(FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Prices_E_All_Data_Normalized"),
FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Deflators_E_All_Data_Normalized"),
FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Population_E_All_Data_Normalized"),
FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Investment_CapitalStock_E_All_Data_Normalized"),
FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Macro-Statistics_Key_Indicators_E_All_Data_Normalized"),
FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Other_supplementary/GDP_deflator_Taiwan"),
"QCL_area_code_map")


MODULE_OUTPUTS <-
c("PP_wide", # Producer prices
"PD", # GDP deflator
"OA") # Population
"OA", # Population
"CS", # Capital stock
"MK") # Macro-Statistics (GDP)

if(command == driver.DECLARE_INPUTS) {
return(MODULE_INPUTS)
@@ -52,6 +55,8 @@ module_xfaostat_L101_RawDataPreProc2_PP_PD_OA <- function(command, ...) {
PP_wide <- extract_prebuilt_data("PP_wide")
PD <- extract_prebuilt_data("PD")
OA <- extract_prebuilt_data("OA")
CS <- extract_prebuilt_data("CS")
MK <- extract_prebuilt_data("MK")

} else {

@@ -64,10 +69,18 @@ module_xfaostat_L101_RawDataPreProc2_PP_PD_OA <- function(command, ...) {

# *[PP] Producer price ----

FAOSTAT_load_raw_data(DATASETCODE = "PP", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT, .Envir = Curr_Envir)
FAOSTAT_load_raw_data(DATASETCODE = "PP", .Envir = Curr_Envir)
# check data
PP %>% distinct(element, element_code, unit)

assertthat:: assert_that(
PP %>% distinct(element, element_code, unit) %>%
filter(element_code == 5539) %>% pull(element) == "Producer Price Index (2014-2016 = 100)",
msg = "Price index element changed; please check and update."
)
PriceIndexYear = 2015


PP %>%
filter(
area_code < 350,
@@ -95,7 +108,7 @@ module_xfaostat_L101_RawDataPreProc2_PP_PD_OA <- function(command, ...) {
pp_baseindex = `Producer Price Index (2014-2016 = 100)`) %>%
filter(!is.na(pp_base)) %>%
group_by(area, area_code, item) %>%
filter(year == 2015) %>% within(rm(year)) %>%
filter(year == PriceIndexYear) %>% within(rm(year)) %>%
ungroup(),
by = c("area_code", "area", "item_code", "item")
) %>% mutate(
@@ -125,14 +138,15 @@ module_xfaostat_L101_RawDataPreProc2_PP_PD_OA <- function(command, ...) {
add_units("USD/tonne") %>%
add_comments("Preprocessed FAOSTAT producer prices") %>%
add_precursors("QCL_area_code_map",
"aglu/FAO/FAOSTAT/Prices_E_All_Data_Normalized") ->
file.path(DIR_RAW_DATA_FAOSTAT, "Prices_E_All_Data_Normalized")) ->
PP_wide

verify_identical_prebuilt(PP_wide)

# [PD] FAO_GDP_deflators ----
#**************************************

FAOSTAT_load_raw_data(DATASETCODE = "PD", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT, .Envir = Curr_Envir)
FAOSTAT_load_raw_data(DATASETCODE = "PD", .Envir = Curr_Envir)
# read in Taiwan values as FAO does not have Taiwan price data
# GDP_deflator_Taiwan

@@ -178,16 +192,16 @@ module_xfaostat_L101_RawDataPreProc2_PP_PD_OA <- function(command, ...) {
add_units("Unitless") %>%
add_comments("Preprocessed FAOSTAT regional gdp deflators") %>%
add_precursors("QCL_area_code_map",
"aglu/FAO/FAOSTAT/Deflators_E_All_Data_Normalized",
"aglu/FAO/FAOSTAT/Other_supplementary/GDP_deflator_Taiwan") ->
file.path(DIR_RAW_DATA_FAOSTAT, "Deflators_E_All_Data_Normalized"),
file.path(DIR_RAW_DATA_FAOSTAT, "Other_supplementary/GDP_deflator_Taiwan")) ->
PD

verify_identical_prebuilt(PD)


# *[OA]: Population ----

FAOSTAT_load_raw_data(DATASETCODE = "OA", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT, .Envir = Curr_Envir)
FAOSTAT_load_raw_data(DATASETCODE = "OA", .Envir = Curr_Envir)

OA %>% distinct(element, element_code)
OA %>% distinct(item, item_code)
@@ -212,10 +226,78 @@ module_xfaostat_L101_RawDataPreProc2_PP_PD_OA <- function(command, ...) {
add_title("FAO population") %>%
add_units("tonne") %>%
add_comments("Preprocessed FAO OA") %>%
add_precursors("aglu/FAO/FAOSTAT/Population_E_All_Data_Normalized") ->
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "Population_E_All_Data_Normalized")) ->
OA

verify_identical_prebuilt(OA)

# *[CS]: Capital stock ----
FAOSTAT_load_raw_data(DATASETCODE = "CS", .Envir = Curr_Envir)

#CS %>% distinct(element, element_code)
#CS %>% distinct(item, item_code)
# check area mapping
# CS %>% filter(area_code < 400) %>%
# distinct(area, area_code) %>% full_join(QCL_area_code_map, by = c("area_code"))

CS %>% filter(area_code %in% QCL_area_code,
# only keep regions with production
element_code == 6184 # Value US$, 2015 prices
) %>%
select(area_code,
area,
item_code,
item,
element_code,
element,
year,
value,
unit) %>%
rm_accent("item", "area") -> CS1

### output OA ----
CS1 %>%
add_title("FAO capiral stock") %>%
add_units("2015 USD") %>%
add_comments("Preprocessed FAO CS") %>%
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "Investment_CapitalStock_E_All_Data_Normalized")) ->
CS

verify_identical_prebuilt(CS)


# *[MK]: macroeconomic stat (GDP) ----
FAOSTAT_load_raw_data(DATASETCODE = "MK", .Envir = Curr_Envir)

#MK %>% distinct(element, element_code)
#MK %>% distinct(item, item_code)

MK %>%
filter(
area_code < 350, # rm aggregated
element_code %in% c(6110, 6184),
item_code == 22008) %>%
select(area_code,
area,
item_code,
item,
element_code,
element,
year,
value,
unit) %>%
rm_accent("item", "area") -> MK1

### output MK ----
MK1 %>%
add_title("FAO GDP") %>%
add_units("2015 million USD or nominal million USD") %>%
add_comments("Preprocessed FAO MK") %>%
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "Macro-Statistics_Key_Indicators_E_All_Data_Normalized")) ->
MK

verify_identical_prebuilt(MK)

}

return_data(MODULE_OUTPUTS)
8 changes: 4 additions & 4 deletions R/xfaostat_L101_RawDataPreProc3_SCL_FBS.R
Original file line number Diff line number Diff line change
@@ -18,8 +18,8 @@
module_xfaostat_L101_RawDataPreProc3_SCL_FBS <- function(command, ...) {

MODULE_INPUTS <-
c(FAOSTAT_FILE = "aglu/FAO/FAOSTAT/SUA_Crops_Livestock_E_All_Data_Normalized",
FAOSTAT_FILE = "aglu/FAO/FAOSTAT/FoodBalanceSheets_E_All_Data_Normalized",
c(FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "SUA_Crops_Livestock_E_All_Data_Normalized"),
FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "FoodBalanceSheets_E_All_Data_Normalized"),
"QCL_area_code_map")

MODULE_OUTPUTS <-
@@ -108,7 +108,7 @@ module_xfaostat_L101_RawDataPreProc3_SCL_FBS <- function(command, ...) {
add_title("FAO supply utilization account dataset, 2010+, wide") %>%
add_units("tonne") %>%
add_comments("Preprocessed FAOSTAT SCL") %>%
add_precursors("aglu/FAO/FAOSTAT/SUA_Crops_Livestock_E_All_Data_Normalized",
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "SUA_Crops_Livestock_E_All_Data_Normalized"),
"QCL_area_code_map") ->
SCL_wide

@@ -147,7 +147,7 @@ module_xfaostat_L101_RawDataPreProc3_SCL_FBS <- function(command, ...) {
add_title("FAO food balance sheet, 2010-") %>%
add_units("1000 tonne") %>%
add_comments("Preprocessed FAOSTAT SCL") %>%
add_precursors("aglu/FAO/FAOSTAT/FoodBalanceSheets_E_All_Data_Normalized",
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "FoodBalanceSheets_E_All_Data_Normalized"),
"QCL_area_code_map") ->
FBS_wide

Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.

#' module_xfaostat_L101_RawDataPreProc4_FBSH_CB
#' module_xfaostat_L101_RawDataPreProc4_FBSH_CBH
#'
#' Preprocess raw faostat data part 4 FBSH and CB
#' Preprocess raw faostat data part 4 FBSH and CBH
#'
#' @param command API command to execute
#' @param ... other optional parameters, depending on command
@@ -15,17 +15,15 @@
#' @importFrom tibble tibble
#' @importFrom tidyr complete drop_na gather nesting spread replace_na
#' @author XZ 2023
module_xfaostat_L101_RawDataPreProc4_FBSH_CB <- function(command, ...) {
module_xfaostat_L101_RawDataPreProc4_FBSH_CBH <- function(command, ...) {

MODULE_INPUTS <-
c(FAOSTAT_FILE = "aglu/FAO/FAOSTAT/FoodBalanceSheetsHistoric_E_All_Data_Normalized",
FAOSTAT_FILE = "aglu/FAO/FAOSTAT/CommodityBalances_(non-food)_E_All_Data_Normalized",
c(FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "FoodBalanceSheetsHistoric_E_All_Data_Normalized"),
FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "CommodityBalances_(non-food)_(-2013_old_methodology)_E_All_Data_Normalized"),
"QCL_area_code_map")

MODULE_OUTPUTS <-
c(#"FBSH", # Old food balance sheet
#"CB", # Old non food utilization accounting
"FBSH_CB_wide") # Combined FBSH and CB
c("FBSH_CBH_wide") # Combined Old food balance sheet (FBSH) and Old non food utilization accounting (CB)


if(command == driver.DECLARE_INPUTS) {
@@ -47,7 +45,7 @@ module_xfaostat_L101_RawDataPreProc4_FBSH_CB <- function(command, ...) {
if(Process_Raw_FAO_Data == FALSE) {

# Prebuilt data is read here ----
FBSH_CB_wide <- extract_prebuilt_data("FBSH_CB_wide")
FBSH_CBH_wide <- extract_prebuilt_data("FBSH_CBH_wide")

} else {

@@ -82,15 +80,15 @@ module_xfaostat_L101_RawDataPreProc4_FBSH_CB <- function(command, ...) {
unit) %>%
rm_accent("item", "area") -> FBSH1

## *[CB] Non-food Balance ----
## *[CBH] Non-food Balance ----

FAOSTAT_load_raw_data("CB", .Envir = Curr_Envir) # Old FBS-nonfood -2013
FAOSTAT_load_raw_data("CBH", .Envir = Curr_Envir) # Old FBS-nonfood -2013

assertthat::assert_that(CB %>% pull(year) %>% max <= 2013)
assertthat::assert_that(CBH %>% pull(year) %>% max <= 2013)

CB %>% distinct(element, element_code, unit)
CBH %>% distinct(element, element_code, unit)
# Keep population (old)
CB %>% filter(item_code < 2901,
CBH %>% filter(item_code < 2901,
year >= min(FAOSTAT_Hist_Year_FBSH),
!element_code %in% c(5300),
area_code %in% QCL_area_code) %>%
@@ -105,14 +103,14 @@ module_xfaostat_L101_RawDataPreProc4_FBSH_CB <- function(command, ...) {
unit) %>%
rm_accent("item", "area") %>%
mutate(value = value / 1000,
unit = "1000 tonnes") -> CB1 # convert to Kton
unit = "1000 tonnes") -> CBH1 # convert to Kton


## *FBSH_CB merge the two----
## *FBSH_CBH merge the two----
# load processed data

FBSH1 %>% distinct(item_code) %>%
dplyr::intersect(CB %>% distinct(item_code)) %>%
dplyr::intersect(CBH %>% distinct(item_code)) %>%
pull ->
dup_item_code

@@ -129,7 +127,7 @@ module_xfaostat_L101_RawDataPreProc4_FBSH_CB <- function(command, ...) {
) -> element_code_map

FBSH1 %>%
bind_rows(CB1 %>% filter(!item_code %in% dup_item_code)) %>%
bind_rows(CBH1 %>% filter(!item_code %in% dup_item_code)) %>%
filter(!element_code %in% c(645, 664, 674, 684, 511)) %>% # remove non-balance items
mutate(element = gsub(
" Quantity| supply quantity \\(tonnes\\)| \\(non-food\\)",
@@ -145,50 +143,51 @@ module_xfaostat_L101_RawDataPreProc4_FBSH_CB <- function(command, ...) {
# but adding back the FBSH one since FBS and FBSH have the same one
left_join(element_code_map,
by = "element") ->
FBSH_CB
FBSH_CBH

# Rice and products and Groundnuts related adjustments
# to make FBS and FBSH more consistent
SHELL_RATE_groundnuts <- 0.7
Mill_RATE_rice <- 0.667

FBSH_CB %>% distinct(element, element_code, unit)
FBSH_CB %>%
FBSH_CBH %>% distinct(element, element_code, unit)
FBSH_CBH %>%
filter(!item_code %in% c(2805, 2556)) %>%
# Adjust to Rice and products and Groundnuts in FBS and bind
bind_rows(
FBSH_CB %>% filter(item_code %in% c(2805)) %>%
FBSH_CBH %>% filter(item_code %in% c(2805)) %>%
mutate(
item = "Rice and products",
item_code = 2807,
value = value / Mill_RATE_rice
) %>%
bind_rows(
FBSH_CB %>% filter(item_code %in% c(2556)) %>%
FBSH_CBH %>% filter(item_code %in% c(2556)) %>%
mutate(
item = "Groundnuts",
item_code = 2552,
value = value / SHELL_RATE_groundnuts
)
)
) ->
FBSH_CB1
FBSH_CBH1

FBSH_CB1 %>% spread(year, value) ->
FBSH_CB_wide
FBSH_CBH1 %>% spread(year, value) ->
FBSH_CBH_wide

### output FBSH_CB and clean memory ----
### output FBSH_CBH and clean memory ----

FBSH_CB_wide %>%
add_title("FAO FBSH and CB, food and commodity balance before 2013, wide", overwrite = T) %>%
FBSH_CBH_wide %>%
add_title("FAO FBSH and CBH, food and commodity balance before 2013, wide", overwrite = T) %>%
add_units("1000 tonne") %>%
add_comments("Preprocessed FAO FBSH_CB") %>%
add_precursors("aglu/FAO/FAOSTAT/FoodBalanceSheetsHistoric_E_All_Data_Normalized",
"aglu/FAO/FAOSTAT/CommodityBalances_(non-food)_E_All_Data_Normalized",
add_comments("Preprocessed FAO FBSH_CBH") %>%
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "FoodBalanceSheetsHistoric_E_All_Data_Normalized"),
file.path(DIR_RAW_DATA_FAOSTAT, "CommodityBalances_(non-food)_(-2013_old_methodology)_E_All_Data_Normalized"),
"QCL_area_code_map") ->
FBSH_CB_wide
FBSH_CBH_wide

verify_identical_prebuilt(FBSH_CBH_wide)

verify_identical_prebuilt(FBSH_CB_wide)
}


4 changes: 2 additions & 2 deletions R/xfaostat_L101_RawDataPreProc5_TCL.R
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@
module_xfaostat_L101_RawDataPreProc5_TCL <- function(command, ...) {

MODULE_INPUTS <-
c(FAOSTAT_FILE = "aglu/FAO/FAOSTAT/Trade_CropsLivestock_E_All_Data_Normalized",
c(FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Trade_CropsLivestock_E_All_Data_Normalized"),
"QCL_area_code_map")

MODULE_OUTPUTS <-
@@ -75,7 +75,7 @@ module_xfaostat_L101_RawDataPreProc5_TCL <- function(command, ...) {
add_title("FAO TCL") %>%
add_units("tonne") %>%
add_comments("Preprocessed FAO TCL") %>%
add_precursors("aglu/FAO/FAOSTAT/Trade_CropsLivestock_E_All_Data_Normalized",
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "Trade_CropsLivestock_E_All_Data_Normalized"),
"QCL_area_code_map") ->
TCL_wide

4 changes: 2 additions & 2 deletions R/xfaostat_L101_RawDataPreProc6_TM.R
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@
module_xfaostat_L101_RawDataPreProc6_TM <- function(command, ...) {

MODULE_INPUTS <-
c(FAOSTAT_FILE = "aglu/FAO/FAOSTAT/Trade_DetailedTradeMatrix_E_All_Data_Normalized",
c(FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Trade_DetailedTradeMatrix_E_All_Data_Normalized"),
"QCL_area_code_map")

MODULE_OUTPUTS <-
@@ -121,7 +121,7 @@ module_xfaostat_L101_RawDataPreProc6_TM <- function(command, ...) {
add_title("FAO bilateral trade (TM)", overwrite = T) %>%
add_units("tonne") %>%
add_comments("Preprocessed FAO TM_wide") %>%
add_precursors("aglu/FAO/FAOSTAT/Trade_DetailedTradeMatrix_E_All_Data_Normalized",
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "Trade_DetailedTradeMatrix_E_All_Data_Normalized"),
"QCL_area_code_map") ->
TM_bilateral_wide

9 changes: 5 additions & 4 deletions R/xfaostat_L101_RawDataPreProc7_FO.R
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@
module_xfaostat_L101_RawDataPreProc7_FO <- function(command, ...) {

MODULE_INPUTS <-
c(FAOSTAT_FILE = "aglu/FAO/FAOSTAT/Forestry_E_All_Data_Normalized")
c(FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Forestry_E_All_Data_Normalized"))

MODULE_OUTPUTS <-
c("FO_RoundwoodProducts") # Forestry data
@@ -48,12 +48,12 @@ module_xfaostat_L101_RawDataPreProc7_FO <- function(command, ...) {
# Load required inputs ----
get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)

FAOSTAT_load_raw_data(DATASETCODE = "FO", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT, .Envir = Curr_Envir)
FAOSTAT_load_raw_data(DATASETCODE = "FO", .Envir = Curr_Envir)

FO %>% filter(year >= min(FAOSTAT_Hist_Year),
area_code < 350, # Rm aggregated area
item_code %in% c(1861, 1864, 1865, 2038, 1868, 1871,
1634, 1873, 1872, 1875)) %>%
1634, 1873, 1872, 1875, 1876)) %>%
# see meta data in https://www.fao.org/faostat/en/#data/FO
# 1861 Roundwood
# 1864 Wood fuel
@@ -65,6 +65,7 @@ module_xfaostat_L101_RawDataPreProc7_FO <- function(command, ...) {
# 1873 Wood-based panels
# 1872 Sawnwood
# 1875 Wood pulp
# 1876 Paper and paperboard
select(area_code,
area,
item_code,
@@ -84,7 +85,7 @@ module_xfaostat_L101_RawDataPreProc7_FO <- function(command, ...) {
add_title("FAO forestry data") %>%
add_units("m3") %>%
add_comments("FAO raw forestry data and main products") %>%
add_precursors("aglu/FAO/FAOSTAT/Forestry_E_All_Data_Normalized") ->
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "Forestry_E_All_Data_Normalized")) ->
FO_RoundwoodProducts


12 changes: 6 additions & 6 deletions R/xfaostat_L101_RawDataPreProc8_RL_RFN.R
Original file line number Diff line number Diff line change
@@ -18,8 +18,8 @@
module_xfaostat_L101_RawDataPreProc8_RL_RFN <- function(command, ...) {

MODULE_INPUTS <-
c(FAOSTAT_FILE = "aglu/FAO/FAOSTAT/Inputs_LandUse_E_All_Data_Normalized",
FAOSTAT_FILE = "aglu/FAO/FAOSTAT/Inputs_FertilizersNutrient_E_All_Data_Normalized")
c(FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Inputs_LandUse_E_All_Data_Normalized"),
FAOSTAT_FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Inputs_FertilizersNutrient_E_All_Data_Normalized"))

MODULE_OUTPUTS <-
c("RL", # Land
@@ -51,7 +51,7 @@ module_xfaostat_L101_RawDataPreProc8_RL_RFN <- function(command, ...) {
# Load required inputs ----
get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)

FAOSTAT_load_raw_data(DATASETCODE = "RL", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT, .Envir = Curr_Envir)
FAOSTAT_load_raw_data(DATASETCODE = "RL", .Envir = Curr_Envir)

RL %>%
filter(year >= min(FAOSTAT_Hist_Year),
@@ -65,15 +65,15 @@ module_xfaostat_L101_RawDataPreProc8_RL_RFN <- function(command, ...) {
add_title("FAO land data") %>%
add_units("ha") %>%
add_comments("FAO raw land data") %>%
add_precursors("aglu/FAO/FAOSTAT/Inputs_LandUse_E_All_Data_Normalized") ->
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "Inputs_LandUse_E_All_Data_Normalized")) ->
RL

verify_identical_prebuilt(RL)



# RFN ----
FAOSTAT_load_raw_data(DATASETCODE = "RFN", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT, .Envir = Curr_Envir)
FAOSTAT_load_raw_data(DATASETCODE = "RFN", .Envir = Curr_Envir)

RFN %>%
filter(year %in% FAOSTAT_Hist_Year,
@@ -87,7 +87,7 @@ module_xfaostat_L101_RawDataPreProc8_RL_RFN <- function(command, ...) {
add_title("FAO fertilizer data") %>%
add_units("t N") %>%
add_comments("FAO raw fertilizer data") %>%
add_precursors("aglu/FAO/FAOSTAT/Inputs_FertilizersNutrient_E_All_Data_Normalized") ->
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "Inputs_FertilizersNutrient_E_All_Data_Normalized")) ->
RFN

verify_identical_prebuilt(RFN)
39 changes: 21 additions & 18 deletions R/xfaostat_L102_ProductionArea.R
Original file line number Diff line number Diff line change
@@ -18,8 +18,9 @@
module_xfaostat_L102_ProductionArea <- function(command, ...) {

MODULE_INPUTS <-
c(#FILE = "aglu/FAO/FAO_an_items_PRODSTAT",
"QCL_wide", "FBS_wide", "FBSH_CB_wide")
c("QCL_wide",
"FBS_wide",
"FBSH_CBH_wide")

MODULE_OUTPUTS <-
c("QCL_PROD",
@@ -36,9 +37,8 @@ module_xfaostat_L102_ProductionArea <- function(command, ...) {

year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check.
QCL_wide <- element_code <- element <- area_code <- item_code <- area <-
item <- unit <- FBS_wide <- FBSH_CB_wide <- GCAM_commodity <- Production <-
`Producing Animals/Slaughtered` <- Yield <- QCL_COMM_AN_PRIMARY_item <-
FAO_an_items_PRODSTAT_item <- NULL
item <- unit <- FBS_wide <- FBSH_CBH_wide <- GCAM_commodity <- Production <-
`Producing Animals/Slaughtered` <- Yield <- QCL_COMM_AN_PRIMARY_item <- NULL

all_data <- list(...)[[1]]

@@ -55,7 +55,7 @@ module_xfaostat_L102_ProductionArea <- function(command, ...) {
FBS_wide %>% gather_years() %>%
FAOSTAT_AREA_RM_NONEXIST() -> FBS

FBSH_CB_wide %>% gather_years() %>%
FBSH_CBH_wide %>% gather_years() %>%
FAOSTAT_AREA_RM_NONEXIST() -> FBSH_CB


@@ -68,14 +68,15 @@ module_xfaostat_L102_ProductionArea <- function(command, ...) {

# Check elements
QCL %>% distinct(element, element_code, unit)
QCL %>% distinct(item, item_code) # 276 = 160 primary crop + 46 primary an + 17 live animal + 54 others
QCL %>% distinct(item, item_code) # 276 = 160 primary crop (-2) + 45 primary an + 17 live animal + 57 others

# QCL will be grouped by elements
# Pull primary crop items with positive harvested area 160
QCL %>% filter(element_code == 5312) %>% filter(value >0) %>%
# Pull primary crop items with positive harvested area 160 - 2
# Brazil nuts, with shell (216) and Mushrooms and truffles (449) are removed since FAO removed area harvested
QCL %>% filter(element_code == 5312) %>% filter(value > 0) %>%
distinct(item_code, item) -> QCL_COMM_CROP_PRIMARY

# Primary animal products, including fat hides etc. 46
# Primary animal products, including fat hides etc. 45
QCL %>% filter(element_code %in% c(5410, 5413, 5420, 5417, 5422, 5424, 5320)) %>%
distinct(item, item_code) -> QCL_COMM_AN_PRIMARY

@@ -101,7 +102,7 @@ module_xfaostat_L102_ProductionArea <- function(command, ...) {
distinct(element,element_code, unit) -> UnitMap

QCL %>%
filter(item_code %in% c(QCL_COMM_CROP_PRIMARY %>% pull(item_code))) %>% #160 primary items
filter(item_code %in% c(QCL_COMM_CROP_PRIMARY %>% pull(item_code))) %>% #158 primary items
filter(element_code != 5419) %>% # rm yield
# complete all
complete(nesting(area_code, area), nesting(item_code, item), nesting(element_code, element, unit), year) %>%
@@ -116,11 +117,11 @@ module_xfaostat_L102_ProductionArea <- function(command, ...) {
QCL_CROP_PRIMARY

#QCL_CROP_PRIMARY %>% FF_check_count_plot()
#QCL_CROP_PRIMARY %>% nrow()/(160); QCL_CROP_PRIMARY %>% distinct(year); QCL_CROP_PRIMARY %>% distinct(area_code)
#QCL_CROP_PRIMARY %>% nrow()/(158); QCL_CROP_PRIMARY %>% distinct(year); QCL_CROP_PRIMARY %>% distinct(area_code)

## QCL_COMM_AN_PRIMARY ----
#*******************************************
# 45 items 19 meat + 2 egg + 5 milk + (2+9) bee & hide + 13 fat & offal
# 45 items 19 meat + 2 egg + 5 milk + 1 bee + 18 fat & offal & hide & snails
QCL %>%
filter(item_code %in% c(QCL_COMM_AN_PRIMARY %>% pull(item_code))) %>%
distinct(element_code, element, unit)
@@ -406,7 +407,7 @@ module_xfaostat_L102_ProductionArea <- function(command, ...) {
add_title("FAO live animal stock and production") %>%
add_units("various") %>%
add_comments("Detailed FAO QCL data processing for live animal and production") %>%
add_precursors("QCL_wide", "FBS_wide", "FBSH_CB_wide") ->
add_precursors("QCL_wide", "FBS_wide", "FBSH_CBH_wide") ->
QCL_AN_LIVEANIMAL

QCL_AN_PRIMARY_MILK %>%
@@ -451,9 +452,9 @@ module_xfaostat_L102_ProductionArea <- function(command, ...) {
# dpi = 200, width = 9, height = 5 )
# rm(p)

QCL_ALL %>% distinct(year); # 60 years
QCL_ALL %>% distinct(year); # 53 years
QCL_ALL %>% distinct(element, element_code, unit) # QCL_COMM_AN_LIVEANIMAL_MEATEQ has no element_code
QCL_ALL %>% distinct(item) # 160 primary crop + 46 primary an + 54 others + 17 +12
QCL_ALL %>% distinct(item) # 158 primary crop + 45 primary an + 57 others + 17 +12


# QCL_ALL %>%
@@ -469,6 +470,8 @@ module_xfaostat_L102_ProductionArea <- function(command, ...) {

# P.S. Check primary product mapping ----

# Load "aglu/FAO/FAO_an_items_PRODSTAT"

#FAO_an_items_PRODSTAT <- FAO_an_items_PRODSTAT %>% filter(!is.na(GCAM_commodity))

# # Get fish items through mapping
@@ -494,8 +497,8 @@ module_xfaostat_L102_ProductionArea <- function(command, ...) {
# FF_join_checkmap(c("QCL_COMM_CROP_PRIMARY", "FAO_ag_items_PRODSTAT"), "item_code", "item",.ENVIR = Curr_Envir) %>%
# mutate(match = if_else(QCL_COMM_CROP_PRIMARY_item == FAO_ag_items_PRODSTAT_item , T, F))
# checkitem %>% filter(is.na(match)|match == F)
# # 160 primary items (matching here) + 15/16 fodder crops
#
# 158 primary items (matching here) + 15/16 fodder crops

# checkitem <-
# FF_join_checkmap(c("QCL_COMM_AN_PRIMARY", "FAO_an_items_PRODSTAT"), "item_code", "item",.ENVIR = Curr_Envir) %>%
# mutate(match = if_else(QCL_COMM_AN_PRIMARY_item == FAO_an_items_PRODSTAT_item, T, F))
4 changes: 2 additions & 2 deletions R/xfaostat_L104_ProductionAreaAddFodder.R
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@
module_xfaostat_L104_ProductionAreaAddFodder <- function(command, ...) {

MODULE_INPUTS <-
c(FILE = "aglu/FAO/FAOSTAT/Other_supplementary/FAO_fodder_Prod_t_HA_ha_PRODSTAT_2011",
c(FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Other_supplementary/FAO_fodder_Prod_t_HA_ha_PRODSTAT_2011"),
"QCL_area_code_map")

MODULE_OUTPUTS <-
@@ -103,7 +103,7 @@ module_xfaostat_L104_ProductionAreaAddFodder <- function(command, ...) {
add_title("Processed fodder crop production and area") %>%
add_units("tonne and ha") %>%
add_comments("Data is from old GCAM data v5.4") %>%
add_precursors("aglu/FAO/FAOSTAT/Other_supplementary/FAO_fodder_Prod_t_HA_ha_PRODSTAT_2011",
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "Other_supplementary/FAO_fodder_Prod_t_HA_ha_PRODSTAT_2011"),
"QCL_area_code_map")->
QCL_FODDERCROP

43 changes: 23 additions & 20 deletions R/xfaostat_L105_DataConnectionToSUA.R
Original file line number Diff line number Diff line change
@@ -18,12 +18,12 @@
module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {

MODULE_INPUTS <-
c(FILE = "aglu/FAO/FAO_items",
c(FILE = file.path(DIR_RAW_DATA_FAOSTAT, "FAO_items"),
"QCL_PROD",
"QCL_AN_LIVEANIMAL_MEATEQ",
"TCL_wide",
"TM_bilateral_wide",
"FBSH_CB_wide",
"FBSH_CBH_wide",
"FBS_wide",
"SCL_wide")

@@ -38,7 +38,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {

year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check.
SCL_wide <- element_code <- element <- area_code <- item_code <- area <-
item <- unit <- FBS_wide <- FBSH_CB_wide <- TCL_wide <- TM_bilateral_wide <-
item <- unit <- FBS_wide <- FBSH_CBH_wide <- TCL_wide <- TM_bilateral_wide <-
QCL_PROD <- FAO_items <- tier <- QCL <- oil <-
cake <- SCL_item_oil <- SCL_item_cake <- cake_rate <- cake_rate_world <-
DS_key_coproduct_item <- Production <- Import <- Export <- DS_demand <-
@@ -62,9 +62,9 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
FAOSTAT_AREA_RM_NONEXIST() -> FBS

FBSH_CB_wide %>% gather_years() %>%
FBSH_CBH_wide %>% gather_years() %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
FAOSTAT_AREA_RM_NONEXIST() -> FBSH_CB
FAOSTAT_AREA_RM_NONEXIST() -> FBSH_CBH

TCL_wide %>% gather_years() %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
@@ -74,7 +74,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
filter(value > 0) -> TM_bilateral

rm(SCL_wide, FBS_wide, FBSH_CB_wide, TCL_wide, TM_bilateral_wide)
rm(SCL_wide, FBS_wide, FBSH_CBH_wide, TCL_wide, TM_bilateral_wide)


# Get area code in QCL that is consistent with FBS e.g., after 2010 only
@@ -85,7 +85,8 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {

SCL %>% distinct(element)
# Update SCL element name for convenience
SCL %>% mutate(element = gsub(" Quantity| supply quantity \\(tonnes\\)| \\(non-food\\)", "", element)) ->
# Will need to update element with element_code for better improvemence
SCL %>% mutate(element = gsub(" supply quantity \\(tonnes\\)| \\(non-food\\)| quantity| Quantity", "", element)) ->
SCL
SCL_element_new <-
c("Opening stocks", "Production", "Export", "Import", "Stock Variation",
@@ -138,7 +139,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
# keep only balance items
filter(!element_code %in% c(645, 664, 674, 684)) %>%
# simplify elements and make them consistent with SUA
mutate(element = gsub(" Quantity| supply quantity \\(tonnes\\)| \\(non-food\\)", "", element),
mutate(element = gsub("supply quantity \\(tonnes\\)| \\(non-food\\)| quantity| Quantity", "", element),
element = replace(element, element == "Losses", "Loss"),
element = replace(element, element == "Processing", "Processed")) %>%
# convert units back to tonnes first since FBS originally used 1000 tons
@@ -160,7 +161,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
# Merge Sudan regions to be consistent with data
# Mainly for storage data concerns
# And only keep data > min(FAOSTAT_Hist_Year_FBS)
for (.DF in c("SCL", "TCL_TM", "TCL_gross", "FBSH_CB", "FBS", "QCL_PROD")) {
for (.DF in c("SCL", "TCL_TM", "TCL_gross", "FBSH_CBH", "FBS", "QCL_PROD")) {
get(.DF) %>% filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
# merge Sudan and South Sudan
FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL(SUDAN2012_MERGE = T) %>%
@@ -385,8 +386,10 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {

# 3. Process items in FAO_items to get Balanced SUA data ----
## 3.1 Bal_new_tier1 ----
# Tier1 includes 168 items with best sources e.g. bilateral trade (TM) prodstat (QCL) and supply-utilization-account (SCL)
# SCL has balanced data processed by FAO but the quality was poor with low consistency
# Tier1 includes 209 = 210-1 items with best sources e.g. bilateral trade (TM) prodstat (QCL) and supply-utilization-account (SCL)
# Note that item 237 Oil soybean was moved from Tier1 to Tier2 to use SCL for production due to Brazil data issue in QCL
# SCL has balanced data processed by FAO but the quality was poor with low consistency


Get_SUA_TEMPLATE(.ITEM_CODE = FAO_items %>% filter(tier == 1) %>% pull(item_code)) %>%
SUA_TEMPLATE_LEFT_JOIN("QCL") %>%
@@ -406,7 +409,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
assert_FBS_balance(Bal_new_tier1)

## 3.2 Bal_new_tier2 ----
# Tier2 includes 139 items that had no data or low quality data in QCL so used production from SCL
# Tier2 includes 204 items that had no data or low quality data in QCL so used production from SCL

Get_SUA_TEMPLATE(.ITEM_CODE = FAO_items %>% filter(tier == 2) %>% pull(item_code)) %>%
SUA_TEMPLATE_LEFT_JOIN("TM") %>%
@@ -423,7 +426,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {


## 3.3 Bal_new_tier3 ----
# Tier3 includes 61 items that had QCL but no bilateral trade data
# Tier3 includes 21 items that had QCL but no bilateral trade data
# so use gross trade from SCL

Get_SUA_TEMPLATE(.ITEM_CODE = FAO_items %>% filter(tier == 3) %>% pull(item_code)) %>%
@@ -449,7 +452,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
assert_FBS_balance(Bal_new_tier3)

## 3.4 Bal_new_tier4 ----
# Tier4 includes 84 items included in SCL but not in Tier1-3
# Tier4 includes 40 items included in SCL but not in Tier1-3

Get_SUA_TEMPLATE(.ITEM_CODE = FAO_items %>% filter(tier == 4) %>% pull(item_code)) %>%
SUA_TEMPLATE_LEFT_JOIN("SCL") %>%
@@ -470,7 +473,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {


## 3.5 Bal_new_tier5 ----
#Tier7 includes 12 fish items from FBS and FBSH. Item code came from FBS as well
#Tier5 includes 12 fish items from FBS and FBSH. Item code came from FBS as well

Get_SUA_TEMPLATE(.ITEM_CODE = FAO_items %>% filter(tier == 5) %>% pull(item_code)) %>%
SUA_TEMPLATE_LEFT_JOIN("FBS") %>%
@@ -491,6 +494,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
## 3.6 Bal_new_tier6 ----
# Tier6 includes 29 items that included in QCL for production but not in Tier1 to Tier5
# "Rice, paddy (rice milled equivalent)" removed as not needed and excluded by FAOSTAT in 2023
# 773 (Flax, processed but not spun) is changed to 771 (Flax, raw or retted)

Get_SUA_TEMPLATE(.ITEM_CODE = FAO_items %>% filter(tier == 6) %>% pull(item_code)) %>%
SUA_TEMPLATE_LEFT_JOIN("QCL") %>%
@@ -523,9 +527,8 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
SUA_bal_adjust %>% # Unit is converted to 1000 tonnes!
left_join(FAO_items %>% select(item_code, item), by = "item_code") ->
Bal_new_tier6
assert_FBS_balance(.DF = Bal_new_tier6)


assert_FBS_balance(.DF = Bal_new_tier6)


## 3.7 Bal_new_tier7 ----
@@ -643,20 +646,20 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {

assert_FBS_balance(.DF = Bal_new_all)

rm(TCL_gross, TCL_TM, SCL, FBS, FBSH_CB, FAO_items)
rm(TCL_gross, TCL_TM, SCL, FBS, FBSH_CBH, FAO_items)
rm(list = ls(pattern = "Bal_new_tier*"))


Bal_new_all %>%
add_title("Bal_new_all") %>%
add_units("Ktonne") %>%
add_comments("Preprocessed FAO SUA 2010 - 2021") %>%
add_precursors("aglu/FAO/FAO_items",
add_precursors(file.path(DIR_RAW_DATA_FAOSTAT, "FAO_items"),
"QCL_PROD",
"QCL_AN_LIVEANIMAL_MEATEQ",
"TCL_wide",
"TM_bilateral_wide",
"FBSH_CB_wide",
"FBSH_CBH_wide",
"FBS_wide",
"SCL_wide")->
Bal_new_all
16 changes: 8 additions & 8 deletions R/xfaostat_L106_FoodMacroNutrient.R
Original file line number Diff line number Diff line change
@@ -21,8 +21,8 @@ module_xfaostat_L106_FoodMacroNutrient <- function(command, ...) {
c("SCL_wide",
"FBS_wide",
"OA",
FILE = "aglu/FAO/FAO_an_items_cal_SUA",
FILE = "aglu/FAO/MAPPING_FAO_FBS_SUA")
FILE = file.path(DIR_RAW_DATA_FAOSTAT, "FAO_an_items_cal_SUA"),
FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Mapping_FAO_FBS_SUA"))

MODULE_OUTPUTS <-
c("SUA_food_macronutrient_rate")
@@ -39,7 +39,7 @@ module_xfaostat_L106_FoodMacroNutrient <- function(command, ...) {
value_world <- value_reg <- Diff <- p_Diff <- quantile <- `Food supply quantity (kg/capita/yr)` <-
`Protein supply quantity (g/capita/day)` <- `Fat supply quantity (g/capita/day)` <-
FAO_an_items_cal_SUA <- Mcal_t <- fat_Perc <- protein_Perc <- FAO_FBS_code <-
SCL_item_code <- CPC_code <- MAPPING_FAO_FBS_SUA <- unit <- element <-
SCL_item_code <- CPC_code <- Mapping_FAO_FBS_SUA <- unit <- element <-
area_code <- item_code <- element_code <- OA <- FBS_wide <- SCL_wide <-
`Food supply (kcal/capita/day)` <- NULL

@@ -125,7 +125,7 @@ module_xfaostat_L106_FoodMacroNutrient <- function(command, ...) {
#*******************************************

SCL %>% filter(element_code %in% c(261, 271, 281, 5141)) %>% #All 3 cal protein fats and food in ton
right_join(MAPPING_FAO_FBS_SUA %>%
right_join(Mapping_FAO_FBS_SUA %>%
filter(!is.na(CPC_code)) %>%
select(item_code = SCL_item_code, FAO_FBS_code, FBS_label),
by = "item_code") %>%
@@ -197,7 +197,7 @@ module_xfaostat_L106_FoodMacroNutrient <- function(command, ...) {
#*******************************************

# Adding the 12 fish item from FBS
MAPPING_FAO_FBS_SUA %>% filter(is.na(CPC_code)) %>%
Mapping_FAO_FBS_SUA %>% filter(is.na(CPC_code)) %>%
select(item = FBS_label, item_code = FAO_FBS_code) -> Fish_item

FBS %>% right_join(Fish_item, by = c("item_code", "item")) -> FBS_fish
@@ -245,7 +245,7 @@ module_xfaostat_L106_FoodMacroNutrient <- function(command, ...) {

rm(SUA_food_macronutrient, SUA_food_yearmean,
SUA_food_yearmean_fill, SUA_food_yearareamean,
OA, POP, SCL, FBS, MAPPING_FAO_FBS_SUA,
OA, POP, SCL, FBS, Mapping_FAO_FBS_SUA,
SUA_food_macronutrient_rate_nofish, FAO_an_items_cal_SUA)
rm(Fish_item)
rm(checkarea, checkitem, checkelement)
@@ -259,8 +259,8 @@ module_xfaostat_L106_FoodMacroNutrient <- function(command, ...) {
add_precursors("SCL_wide",
"FBS_wide",
"OA",
"aglu/FAO/FAO_an_items_cal_SUA",
"aglu/FAO/MAPPING_FAO_FBS_SUA") ->
file.path(DIR_RAW_DATA_FAOSTAT, "FAO_an_items_cal_SUA"),
file.path(DIR_RAW_DATA_FAOSTAT, "Mapping_FAO_FBS_SUA")) ->
SUA_food_macronutrient_rate

# P.S. ----
5 changes: 3 additions & 2 deletions R/xfaostat_L201_Forestry.R
Original file line number Diff line number Diff line change
@@ -42,14 +42,15 @@ module_xfaostat_L201_Forestry <- function(command, ...) {

## Proprocess and quick clean ----

c(1865, 1634, 1873, 872, 1875) -> Key_FO_Items
c(1865, 1634, 1873, 1872, 1875, 1876) -> Key_FO_Items
# 1865 Industrial roundwood
# 1634 Veneer sheets
# 1873 Wood-based panels
# 1872 Sawnwood
# 1875 Wood pulp
# 1876 Paper and paperboard

FO_RoundwoodProducts %>% filter(item_code %in% c(1865, 1634, 1873, 872, 1875)) ->
FO_RoundwoodProducts %>% filter(item_code %in% c(1865, 1634, 1873, 1872, 1875, 1876)) ->
L201.FO_RoundwoodProducts


63 changes: 38 additions & 25 deletions R/xfaostat_helper_funcs.R
Original file line number Diff line number Diff line change
@@ -12,8 +12,9 @@
#' @return Information of FAOSTAT input dataset
#' @export

gcamfaostat_metadata <- function(.DIR_RAW_DATA_FAOSTAT = DIR_RAW_DATA_FAOSTAT,
OnlyReturnDatasetCodeRequired = FALSE){
gcamfaostat_metadata <- function(.DIR_RAW_DATA_FAOSTAT = file.path("inst/extdata", DIR_RAW_DATA_FAOSTAT),
OnlyReturnDatasetCodeRequired = FALSE,
Save_metadata = FALSE){

assertthat::assert_that(OnlyReturnDatasetCodeRequired == TRUE|OnlyReturnDatasetCodeRequired == FALSE)

@@ -24,32 +25,41 @@ gcamfaostat_metadata <- function(.DIR_RAW_DATA_FAOSTAT = DIR_RAW_DATA_FAOSTAT,
"TCL", "TM", # Gross and bilateral trade
"SCL", # Supply utilization accounting
"FBS", "FBSH", # New and old food balance sheet
"CB", # Old non food utilization accounting
#"CB",
"CBH", # New and old non food utilization accounting
"RFN", # Fertilizer by nutrient
"RL", # Land Use
"FO", # Forest production and trade
"OA" # Population
"OA", # Population
"CS" # Capital stock
)

if (OnlyReturnDatasetCodeRequired == T) {
return(FAO_dataset_code_required)
}

DIR_FAOSTAT_METADATA <- file.path(.DIR_RAW_DATA_FAOSTAT, "metadata_log")
dir.create(DIR_FAOSTAT_METADATA, showWarnings = F)

# Save a table includes all FAOSTAT data info and links
fao_metadata <- FAOSTAT_metadata() %>% filter(datasetcode %in% FAO_dataset_code_required)
readr::write_csv(fao_metadata, file.path(DIR_FAOSTAT_METADATA, paste0("FAOSTAT_METADATA_", Sys.Date(),".csv")))
rlang::inform(paste0("A Full FAOSTAT metadata downloaded and updated in `",
file.path(.DIR_RAW_DATA_FAOSTAT, "metadata_log", "`")))

if (Save_metadata == TRUE) {

DIR_FAOSTAT_METADATA <- file.path(.DIR_RAW_DATA_FAOSTAT, "metadata_log")
dir.create(DIR_FAOSTAT_METADATA, showWarnings = F)

readr::write_csv(fao_metadata, file.path(DIR_FAOSTAT_METADATA, paste0("FAOSTAT_METADATA_", Sys.Date(),".csv")))
rlang::inform(paste0("A Full FAOSTAT metadata downloaded and updated in `",
file.path(.DIR_RAW_DATA_FAOSTAT, "metadata_log", "`")))
}


rlang::inform("---------------------------------------------------------")

rlang::inform(paste0("See returned table for the infomation of FAOSTAT dataset processed in this R package"))

DataCodePrebuilt <-
PREBUILT_DATA %>% names() %>% strsplit(split = "_") %>% unlist %>%
setdiff(c("wide", "Roundwood", "code", "area", "bilateral", "map"))
setdiff(c("wide", "RoundwoodProducts", "code", "area", "bilateral", "map"))

FF_rawdata_info(DATA_FOLDER = .DIR_RAW_DATA_FAOSTAT,
DATASETCODE = FAO_dataset_code_required,
@@ -88,7 +98,7 @@ gcamfaostat_metadata <- function(.DIR_RAW_DATA_FAOSTAT = DIR_RAW_DATA_FAOSTAT,
#' @export

FF_download_FAOSTAT <- function(DATASETCODE,
DATA_FOLDER = DIR_RAW_DATA_FAOSTAT,
DATA_FOLDER = file.path("inst/extdata", DIR_RAW_DATA_FAOSTAT),
OverWrite = FALSE){

FAOSTAT_metadata <- `download.file` <- NULL
@@ -98,7 +108,7 @@ FF_download_FAOSTAT <- function(DATASETCODE,
assertthat::assert_that(OverWrite == TRUE|OverWrite == FALSE)
assertthat::assert_that(length(DATASETCODE) == 1, msg = "Single dataset allowed; consider using a loop or the function FF_rawdata_info() for downloading multiple datasets")

dir.create(DIR_RAW_DATA_FAOSTAT, showWarnings = F)
dir.create(DATA_FOLDER, showWarnings = F)



@@ -144,11 +154,17 @@ FF_download_FAOSTAT <- function(DATASETCODE,

FF_download_RemoteArchive <-
function(DATASETCODE = NULL,
RemoteArchiveURL = "https://zenodo.org/record/8260225/files/",
DATA_FOLDER = DIR_RAW_DATA_FAOSTAT,
RemoteArchiveURL = "https://zenodo.org/record/13941470/files/",
DATA_FOLDER = file.path("inst/extdata", DIR_RAW_DATA_FAOSTAT),
OverWrite = FALSE){

warnings("The current archive is for GCAM v7 release, not the latest!")


# version v1.0.0 (August, 2022): "https://zenodo.org/record/8260225/files/"
# version v1.0.1_temp (June, 2024):"https://zenodo.org/record/11602356/files/"
# version v1.0.1 (October, 2024):"https://zenodo.org/record/13941470/files/"

warnings("The current archive is for gcamfaostat v1.0.1")

assertthat::assert_that(is.character(DATASETCODE))
assertthat::assert_that(is.character(RemoteArchiveURL))
@@ -209,7 +225,7 @@ FF_download_RemoteArchive <-
#' @export

FF_rawdata_info <- function(
DATA_FOLDER = DIR_RAW_DATA_FAOSTAT,
DATA_FOLDER = file.path("inst/extdata", DIR_RAW_DATA_FAOSTAT),
DATASETCODE,
DOWNLOAD_NONEXIST = FALSE,
FAOSTAT_or_Archive = "Archive"){
@@ -222,7 +238,10 @@ FF_rawdata_info <- function(
assertthat::assert_that(is.character(DATASETCODE))
assertthat::assert_that(is.logical(DOWNLOAD_NONEXIST))
assertthat::assert_that(FAOSTAT_or_Archive == "FAOSTAT"|FAOSTAT_or_Archive == "Archive")
assertthat::assert_that(file.exists(DATA_FOLDER))

if (!file.exists(DATA_FOLDER)) {
DATA_FOLDER <- "."
}

file.info(dir(DATA_FOLDER, full.names = T)) %>%
tibble::rownames_to_column(var = "filelocation") %>%
@@ -234,14 +253,8 @@ FF_rawdata_info <- function(
#localfilesize = utils:::format.object_size(size, "MB", digits = 0),
localfilesize = paste0(round(size/10^6, digits = 0), " MB" )) %>%
# Join the latest metadata
# Note that FAO raw data had a typo (missing space) in Trade_CropsLivestock_E_All_Data_(Normalized).zip
# Temporary fix here
# This was fixed in 2022 updates
right_join(FAOSTAT_metadata() %>% filter(datasetcode %in% DATASETCODE) %>%
mutate(filelocation = basename(filelocation)), #%>%
#mutate(filelocation = replace(filelocation,
# filelocation == "Trade_CropsLivestock_E_All_Data_(Normalized).zip",
# "Trade_Crops_Livestock_E_All_Data_(Normalized).zip")),
mutate(filelocation = basename(filelocation)),
by = "filelocation") %>%
transmute(datasetcode, datasetname,
FAOupdate = dateupdate, Localupdate = mtime,
@@ -327,7 +340,7 @@ FAOSTAT_metadata <- function (code = NULL){
#' @export

FAOSTAT_load_raw_data <- function(DATASETCODE,
DATA_FOLDER = DIR_RAW_DATA_FAOSTAT,
DATA_FOLDER = file.path("inst/extdata", DIR_RAW_DATA_FAOSTAT),
GET_MAPPINGCODE = NULL,
.Envir = NULL ){
assertthat::assert_that(is.character(DATASETCODE))
73 changes: 73 additions & 0 deletions R/xfaostat_visualization_funcs.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,78 @@



#' chord_wrapper: a wrapper function of the chordDiagram function
#' Title
#'
#' @param .DF Input data frame.
#' @param .FigTitle Figure title
#' @param .SaveDir The directory for saving the plot; the default is man/figures.
#' @param .GRIDCOLOR Colors of grid; consider using circlize::rand_color function
#' @param .ORDER Order of the gird
#' @param .SaveName Name of the saved file in .SaveDir
#' @param .SaveScaler A scaler controling the size (dpi = 600)
#' @importFrom grDevices dev.off png
#'
#' @return A saved plot in .SaveDir
#' @export


chord_wrapper <- function(.DF,
.FigTitle = NULL,
.SaveDir = "../man/figures",
.GRIDCOLOR = NULL,
.ORDER = NULL,
.SaveName,
.SaveScaler = 1){

# treemap save to a path
png(filename= file.path(.SaveDir, paste0(.SaveName,".png")), res = 600,
width= 7000 * .SaveScaler, height= 7000 * .SaveScaler )


chordDiagram(as.data.frame(.DF),
transparency = 0.5,
directional = 1,
direction.type = c("diffHeight", "arrows"),
diffHeight = -uh(2, "mm")
,link.arr.type = "big.arrow"
,annotationTrack = c("grid")
,grid.col = .GRIDCOLOR
,order = .ORDER
,preAllocateTracks = list(list(track.height = c(0.3))
,list(track.height = c(0.035))
))

title(main = .FigTitle)

circos.track(track.index = 3, panel.fun = function(x, y) {
circos.axis(h = 1, labels.cex = 0.8)
}, bg.border = NA)

circos.track(track.index = 1, panel.fun = function(x, y) {
xlim = get.cell.meta.data("xlim")
xplot = get.cell.meta.data("xplot")
ylim = get.cell.meta.data("ylim")
sector.name = get.cell.meta.data("sector.index")

#make text label vertical when space is too small; cex to adjust font size

if(abs(xplot[2] - xplot[1]) < 20 | abs(xplot[2] - xplot[1]) > 340) {
circos.text(mean(xlim), ylim[1], sector.name, facing = "clockwise",
niceFacing = TRUE, adj = c(0, 0.5), col = "black",
cex = 1)
} else {
circos.text(mean(xlim), ylim[1], sector.name, facing = "inside",
niceFacing = TRUE, adj = c(0.5, 0), col= "black",
cex = 1)
} }, bg.border = NA)

dev.off() #dump

circos.clear()
}


#' treemap_wrapper: a warpper function of the treemap function
#'
#' @param .DF Input data frame. The data frame needs to include index columns (need to be first columns) and a value column.
765 changes: 765 additions & 0 deletions R/yextension_L100_FoodBalanceSheet.R

Large diffs are not rendered by default.

193 changes: 125 additions & 68 deletions R/xfaostat_L999_CSVExport.R → R/yfaostat_GCAM_CSVExport.R

Large diffs are not rendered by default.

95 changes: 95 additions & 0 deletions R/yfaostat_SUA_FBS_CSVExport.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.

#' module_yfaostat_SUA_FBS_CSVExport
#'
#' Generate supply utilization balance in primary equivalent
#'
#' @param command API command to execute
#' @param ... other optional parameters, depending on command
#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
#' \code{command} is "MAKE") all the generated outputs
#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
#' @importFrom assertthat assert_that
#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at
#' first case_when vars
#' @importFrom tibble tibble
#' @importFrom tidyr complete drop_na gather nesting spread replace_na
#' @author XZ Sep2024
module_yfaostat_SUA_FBS_CSVExport <- function(command, ...) {

MODULE_INPUTS <-
c(FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Mapping_SUA_PrimaryEquivalent"),
FILE = file.path(DIR_RAW_DATA_FAOSTAT, "SUA_item_code_map"),
FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Mapping_FAO_iso_reg"),
"GCAM_APE_after2010",
"Bal_new_all",
"FBS_wide",
"FAO_Food_Macronutrient_All")

MODULE_OUTPUTS <-
c(CSV = "GCAMFAOSTAT_DataArchive_SUA")

if(command == driver.DECLARE_INPUTS) {
return(MODULE_INPUTS)
} else if(command == driver.DECLARE_OUTPUTS) {
return(MODULE_OUTPUTS)
} else if(command == driver.MAKE) {

year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check.

all_data <- list(...)[[1]]


Curr_Envir <- environment()



if (OUTPUT_Export_CSV == "DataArchive_SUA_FBS") {

# Load required inputs ----
get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)


# SUA and FBS ----
## *SUA ----

Bal_new_all %>% filter(value != 0.0) %>%
transmute(area_code, item_code, element, year, value) %>%
add_title("GCAMFAOSTAT_DataArchive_SUA") %>%
add_units("1000 tonnes") %>%
add_comments("gcamfaostat Export CSV") %>%
add_precursors("Bal_new_all") ->
GCAMFAOSTAT_DataArchive_SUA


output_csv_data(
gcam_dataset = GCAMFAOSTAT_DataArchive_SUA,
out_filename = "GCAMFAOSTAT_DataArchive_SUA",
col_type_nonyear = "iifin",
title = "Supply_utilization_accounting for all FAO items in FAOSTAT_Hist_Year_FBS",
unit = "1000 tonnes",
code = "SCL",
description = "Data is compiled and generated by gcamfaostat. Data is balanced in trade, supply_utilization, and storage",
out_dir = DIR_OUTPUT_CSV,
GZIP = F)

rm(Bal_new_all)


}
else {

lapply(MODULE_OUTPUTS[MODULE_OUTPUTS %>% names() == "CSV"],
function(output){
assign(output, empty_data() %>%
add_title(output), envir = Curr_Envir)
})
}


return_data(MODULE_OUTPUTS)

} else {
stop("Unknown command")
}
}
39 changes: 25 additions & 14 deletions R/zaglu_L100.FAO_SUA_PrimaryEquivalent.R
Original file line number Diff line number Diff line change
@@ -69,7 +69,6 @@ module_aglu_L100.FAO_SUA_PrimaryEquivalent <- function(command, ...) {

get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)


# Get Supply-utilization account (SUA) elements and use as factor
All_Bal_element <- levels(GCAMFAOSTAT_SUA$element)
All_Bal_element <- factor(All_Bal_element, levels = All_Bal_element)
@@ -143,10 +142,10 @@ module_aglu_L100.FAO_SUA_PrimaryEquivalent <- function(command, ...) {

# need to remove gross trade when export > production
# to maintain triangle the inequality rule
# Note that Prod < export is still possivle due to "residuals"
# Note that Prod < export is still possible due to "residuals"
DF_SUA_Agg_TradeAdj %>%
filter(element %in% c("Production", "Import", "Export")) %>%
spread(element, value, fill=0.0) %>%
spread(element, value, fill = 0.0) %>%
mutate(value = pmax(Production - Export, -Import)) %>%
filter(value < 0) %>%
select(-Production, -Import, -Export) ->
@@ -539,6 +538,7 @@ module_aglu_L100.FAO_SUA_PrimaryEquivalent <- function(command, ...) {
## Loop through all GCAM_commodity with available data ----

FAO_SUA_Kt_2010to2019_R %>%
# In both left_join here, APE_comm could be NA after the join and they are removed later
left_join(Mapping_SUA_PrimaryEquivalent_ID %>%
select(APE_comm, item_code = sink_item_code, nest_level) %>% distinct(), by = c("item_code")) %>%
left_join(Mapping_SUA_PrimaryEquivalent_ID %>%
@@ -590,7 +590,8 @@ module_aglu_L100.FAO_SUA_PrimaryEquivalent <- function(command, ...) {
Mapping_item_FBS_GCAM %>%
select(item_code, GCAM_commodity)%>%
filter(!is.na(GCAM_commodity)) %>%
left_join(FBSH_CB, by = "item_code") %>%
# Using inner_join since not all items in FBSH_CB are used; left_join would work the same
inner_join(FBSH_CB, by = "item_code") %>%
left_join_error_no_match(AGLU_ctry %>% distinct(area_code = FAO_country_code, iso), by = c("area_code")) %>%
left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>%
left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>%
@@ -713,39 +714,48 @@ module_aglu_L100.FAO_SUA_PrimaryEquivalent <- function(command, ...) {
if (isFALSE(.DF %>%
spread(element, value) %>%
mutate(`Regional supply` = Production + `Import`,
`Regional demand` = `Export` + Feed + Food + `Other uses`,
`Regional demand` = `Export` + Feed + Food + `Stock Variation` + Loss + `Other uses`,
bal = abs(`Regional supply` - `Regional demand`)) %>%
filter(bal > 0.0001) %>% nrow() == 0)) {
warning("Regional supply != Regional demand + Residuals")
}

# 4. Balanced in all dimensions
assertthat::assert_that(.DF %>% nrow() ==
.DF %>% distinct(year) %>% nrow *
.DF %>% distinct(GCAM_commodity) %>% nrow *
.DF %>% distinct(element) %>% nrow *
.DF %>% distinct(region) %>% nrow)
.DF %>% filter(year < 2010) -> .DF1
assertthat::assert_that(.DF1 %>% nrow() ==
.DF1 %>% distinct(year) %>% nrow *
.DF1 %>% distinct(GCAM_commodity) %>% nrow *
.DF1 %>% distinct(element) %>% nrow *
.DF1 %>% distinct(region) %>% nrow)

.DF %>% filter(year >= 2010) -> .DF1
assertthat::assert_that(.DF1 %>% nrow() ==
.DF1 %>% distinct(year) %>% nrow *
.DF1 %>% distinct(GCAM_commodity) %>% nrow *
.DF1 %>% distinct(element) %>% nrow *
.DF1 %>% distinct(region) %>% nrow)

}


# 4.2. Connect and bind data from two periods ----

GCAM_AgLU_SUA_APE_1973_2019 <-
GCAM_APE_before2010 %>%
bind_rows(GCAM_APE_after2010) %>%
mutate(unit = "1000 tonnes") %>%
# clean and aggregate elements not using
filter(!element %in% c("Regional demand", "Regional supply",
"Opening stocks", "Closing stocks")) %>%
filter(!element %in% c("Regional demand", "Regional supply")) %>%
mutate(element = replace(element,
element %in% c("Stock Variation", "Processed",
"Seed", "Residuals", "Loss"),
element %in% c("Processed",
"Seed", "Residuals"),
"Other uses")) %>%
dplyr::group_by_at(dplyr::vars(-value)) %>%
summarise(value = sum(value), .groups = "drop")

## Check balance
GCAM_AgLU_SUA_APE_1973_2019 %>% Check_Balance_SUA

rm(GCAM_APE_before2010, GCAM_APE_after2010)


@@ -942,6 +952,7 @@ module_aglu_L100.FAO_SUA_PrimaryEquivalent <- function(command, ...) {
# SUA_Items_Food includes both GCAM and NonGCAM(NEC)
SUA_item_code_map %>%
filter(item_code %in% unique(GCAMFAOSTAT_MacroNutrientRate$item_code)) %>%
# NA expected here
left_join(SUA_Items_GCAM %>% select(-item), by = "item_code") %>%
# For NA GCAM_commodity: not elsewhere classified (NEC)
# So we would know % of food calories not included in GCAM commodities
211 changes: 175 additions & 36 deletions R/zaglu_L100.FAO_SUA_connection.R

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions R/zaglu_L100.FAO_preprocessing_OtherData.R
Original file line number Diff line number Diff line change
@@ -138,7 +138,7 @@ module_aglu_L100.FAO_preprocessing_OtherData <- function(command, ...) {
gather_years() %>%
# NA area values that should not exist, e.g., USSR after 1991
filter(!is.na(value)) %>%
FAO_REG_YEAR_MAP_FOREST ->
FAO_REG_YEAR_MAP ->
L100.For_bal


@@ -147,7 +147,7 @@ module_aglu_L100.FAO_preprocessing_OtherData <- function(command, ...) {
filter(element == "Production") %>%
add_title("FAO forestry production by country, year") %>%
add_comments("FAO primary roundwood production") %>%
add_units("m3") %>%
add_units("m3/t") %>%
add_precursors("aglu/FAO/GCAMFAOSTAT_ForProdTrade",
"aglu/AGLU_ctry",
"common/iso_GCAM_regID") ->
@@ -159,7 +159,7 @@ module_aglu_L100.FAO_preprocessing_OtherData <- function(command, ...) {
filter(element == "Export") %>%
add_title("FAO forestry export by country, year") %>%
add_comments("FAO primary roundwood gross export") %>%
add_units("m3") %>%
add_units("m3/t") %>%
add_precursors("aglu/FAO/GCAMFAOSTAT_ForProdTrade",
"aglu/AGLU_ctry",
"common/iso_GCAM_regID") ->
@@ -171,7 +171,7 @@ module_aglu_L100.FAO_preprocessing_OtherData <- function(command, ...) {
filter(element == "Import") %>%
add_title("FAO forestry import by country, year") %>%
add_comments("FAO primary roundwood gross import") %>%
add_units("m3") %>%
add_units("m3/t") %>%
add_precursors("aglu/FAO/GCAMFAOSTAT_ForProdTrade",
"aglu/AGLU_ctry", "common/iso_GCAM_regID") ->
L100.FAO_For_Imp_m3
26 changes: 15 additions & 11 deletions R/zaglu_L110.For_FAO_R_Y.R
Original file line number Diff line number Diff line change
@@ -27,7 +27,8 @@ module_aglu_L110.For_FAO_R_Y <- function(command, ...) {
FILE="aglu/A_forest_mapping")

MODULE_OUTPUTS <-
c("L110.For_ALL_bm3_R_Y","L110.IO_Coefs_pulp")
c("L110.For_ALL_bm3_R_Y",
"L110.IO_Coefs_pulp")

if(command == driver.DECLARE_INPUTS) {
return(MODULE_INPUTS)
@@ -71,6 +72,7 @@ module_aglu_L110.For_FAO_R_Y <- function(command, ...) {
# Use spread to have columns for Prod and NetExp instead of flow and value
# Add a column for the variable consumption, Cons=Prod-NetExp
L110.FAO_For_ALL_m3 %>%
filter(item %in% A_forest_mapping$item) %>%
left_join_error_no_match(A_forest_mapping, by = c("item")) %>%
# take the combined tibble
# do a left join on For_ALL tibble, match up the iso labels from the iso tibble,
@@ -140,42 +142,44 @@ module_aglu_L110.For_FAO_R_Y <- function(command, ...) {

#First separate out roundwood consumption
L110.For_ALL_bm3_R_Y %>%
filter(GCAM_commodity==aglu.FOREST_supply_sector) %>%
filter(GCAM_commodity==aglu.FOREST_SUPPLY_SECTOR) %>%
select(GCAM_region_ID,year,roundwood_cons=Cons_bm3)->L110.Roundwood_Cons

#Join the same with commoditties.
#Join the same with commodities.
L110.For_ALL_bm3_R_Y %>%
filter(GCAM_commodity %in% aglu.FOREST_commodities) %>%
filter(GCAM_commodity %in% aglu.FOREST_COMMODITIES) %>%
select(GCAM_region_ID,year,GCAM_commodity,Prod_bm3) %>%
spread(GCAM_commodity,Prod_bm3) %>%
left_join_error_no_match(L110.Roundwood_Cons, by = c("GCAM_region_ID","year")) %>%
#Assume that pulpwood has a coeff of 5.14 sawtimber is the remaining. There are a couple of adjustments that need to be made.
mutate(#First adjust sawnwood production here
#sawnwood= if_else(sawnwood > 2 *roundwood_cons, roundwood_cons *0.05,sawnwood),
after_pulp = roundwood_cons-(woodpulp*aglu.FOREST_pulp_conversion),
after_pulp = roundwood_cons-(woodpulp*aglu.FOREST_PULP_CONVERSION),
#If a country does not have enough roundwood cons to produce saw, increase it.
roundwood_cons=if_else(after_pulp <0, woodpulp*aglu.FOREST_pulp_conversion*1.1,roundwood_cons),
after_pulp = roundwood_cons-(woodpulp*aglu.FOREST_pulp_conversion),
roundwood_cons=if_else(after_pulp <0, woodpulp*aglu.FOREST_PULP_CONVERSION*1.1,roundwood_cons),
after_pulp = roundwood_cons-(woodpulp*aglu.FOREST_PULP_CONVERSION),
#Now calculate pulp IO here
IO=after_pulp/sawnwood,
#We are going to run in a scenario where the coef is less than 1 in some places.
IO= if_else(IO < 1,1,IO),
#Add a max value on the IO here,
# Note that Central Asia may need a bigger than 10 IO to avoid negative production in next steps
IO= if_else(IO > 10,10,IO),
IO= if_else(sawnwood==0, 0,IO),
roundwood_cons=(woodpulp*aglu.FOREST_pulp_conversion)+(IO*sawnwood)) ->L110.IO_Coefs_pulp
roundwood_cons=(woodpulp*aglu.FOREST_PULP_CONVERSION)+(IO*sawnwood)) ->L110.IO_Coefs_pulp

#Since we increased roundwood cons in some places, increase production proportionately
L110.For_ALL_bm3_R_Y %>%
filter(GCAM_commodity==aglu.FOREST_supply_sector) %>%
filter(GCAM_commodity==aglu.FOREST_SUPPLY_SECTOR) %>%
left_join_error_no_match(L110.IO_Coefs_pulp %>% select(GCAM_region_ID,year,roundwood_cons), by = c("GCAM_region_ID","year")) %>%
# We could add other used for forest later instead of adjustments
mutate(diff=roundwood_cons-Cons_bm3,
Prod_bm3= Prod_bm3+diff,
Cons_bm3=roundwood_cons) %>%
select(colnames(L110.For_ALL_bm3_R_Y))->L110.For_ALL_bm3_R_Y_Primary

L110.For_ALL_bm3_R_Y %>%
filter(GCAM_commodity!=aglu.FOREST_supply_sector) %>%
filter(GCAM_commodity!=aglu.FOREST_SUPPLY_SECTOR) %>%
bind_rows(L110.For_ALL_bm3_R_Y_Primary)->L110.For_ALL_bm3_R_Y


@@ -188,7 +192,7 @@ module_aglu_L110.For_FAO_R_Y <- function(command, ...) {
# replace_na here only affect Taiwan, which we did not have trade data.
L110.For_ALL_bm3_R_Y %>%
left_join(
L100.FAO_For_Exp_m3 %>%
L100.FAO_For_Exp_m3 %>% filter(item %in% A_forest_mapping$item) %>%
left_join_error_no_match(A_forest_mapping, by = c("item")) %>%
mutate( # add the forest commodity label
value = CONV_M3_BM3 * value, # convert the value units from m3 to bm3, had to add this constant to constants.R
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -53,14 +53,17 @@ The package is documented in the [online manual](https://jgcri.github.io/gcamfao

#### 3. Modify configurations
* To export csv output files, in `constants.R`,
* set `OUTPUT_Export_CSV` to `TRUE`
* specify the directory path (`DIR_OUTPUT_CSV`) for output files.
* set `Process_Raw_FAO_Data` to `TRUE` if raw data have been downloaded, `PREBUILT_DATA` will be used otherwise.
* specify the modules to be excluded (`DISABLED_MODULES`) if needed.
* modify data years. Most FAOSTAT datasets are available for`1960 - 2022`.

#### 4. Run the driver
* `driver_drake()`
* if `write_csv_model = GCAM` in the function, related CSV will be exported to `output/gcamfaostat_GCAM`
* Users can add and design data flows for other models.

#### 5. Use data and package functions
* Data saved in `DIR_OUTPUT_CSV` can be used in downstream models.
* Data saved in output can be used in downstream models.
* Once `drive_drake` has been run, all the intermediate data are saved and can be explored (see examples in [Use Cases](https://jgcri.github.io/gcamfaostat/articles/vignette_use_cases.html) and [Visualization](https://jgcri.github.io/gcamfaostat/articles/vignette_visualization.html).

***
@@ -69,7 +72,7 @@ The package is documented in the [online manual](https://jgcri.github.io/gcamfao


* **`gcamfaostat`** processes [input data](https://jgcri.github.io/gcamfaostat/articles/vignette_preparing_data.html#metadata) to output data in a format that is needed for downstream processing and modeling, e.g., [data used in gcamdata-aglu-FAO](https://github.com/JGCRI/gcam-core/tree/master/input/gcamdata/inst/extdata/aglu/FAO) (see the schematic below).
* Input data was stored in the [Prebuilt Data](https://github.com/JGCRI/gcamfaostat/blob/main/data/PREBUILT_DATA.rda) of the package. The raw data is archived on Zenodo (see Zhao (2022) and URL in the [`FF_download_RemoteArchive`](https://github.com/JGCRI/gcamfaostat/blob/main/R/xfaostat_helper_funcs.R#L144) function) to ensure the processing is 100% replicable. Users can also download the latest data using [`FF_download_FAOSTAT`](https://github.com/JGCRI/gcamfaostat/blob/main/R/xfaostat_helper_funcs.R#90).
* Input data was stored in the [Prebuilt Data](https://github.com/JGCRI/gcamfaostat/blob/main/data/PREBUILT_DATA.rda) of the package. The raw data is archived on Zenodo (see Zhao (2024) and URL in the [`FF_download_RemoteArchive`](https://github.com/JGCRI/gcamfaostat/blob/main/R/xfaostat_helper_funcs.R#L144) function) to ensure the processing is 100% replicable. Users can also download the latest data using [`FF_download_FAOSTAT`](https://github.com/JGCRI/gcamfaostat/blob/main/R/xfaostat_helper_funcs.R#90).
* All intermediate processing and data flows are transparent and traceable. See [Processing Flow](https://jgcri.github.io/gcamfaostat/articles/vignette_processing_flow.html) for data-tracing examples.

![](man/figures/Fig_data_processing_flow.jpg)
@@ -99,7 +102,7 @@ We welcome and value community contributions to gcamfaostat. Please read our [Co
- Narayan et al., (2021). ambrosia: An R package for calculating and analyzing food demand that is responsive to changing incomes and prices. Journal of Open Source Software, 6(59), 2890. https://doi.org/10.21105/joss.02890
- Zhao, Xin, Katherine V. Calvin, Marshall A. Wise, and Gokul Iyer. "The role of global agricultural market integration in multiregional economic modeling: Using hindcast experiments to validate an Armington model." Economic Analysis and Policy 72 (2021): 1-17. https://doi.org/10.1016/j.eap.2021.07.007
- Zhao, Xin and Marshall Wise. "Core Model Proposal# 360: GCAM agriculture and land use (AgLU) data and method updates: connecting land hectares to food calories." PNNL https://jgcri.github.io/gcam-doc/cmp/CMP_360-AgLU_data_method_updates.pdf
- Zhao, Xin (2022). FAOSTAT AgLU data Archive GCAMv7 (1.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.8260225
- Zhao, Xin (2024). FAOSTAT AgLU data Archive GCAMv7 (1.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.13941470



Binary file added data-raw/Maize2020.xlsx
Binary file not shown.
Binary file added data-raw/Wheat2020.xlsx
Binary file not shown.
378 changes: 378 additions & 0 deletions data-raw/compare_SUA_FBS.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,378 @@
Bal_new_all %>% filter(value != 0) %>%
spread(year, value) %>%
write.csv("B.csv")
GCAM_APE_after2010 %>%
spread(year, value) %>%
write.csv("A.csv")


# This module compares the new PCE data vs FAO FBS

MODULE_INPUTS <-
c("GCAM_APE_after2010",
"Bal_new_all",
"FAO_Food_Macronutrient_All",
FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Mapping_SUA_PrimaryEquivalent"),
FILE = file.path(DIR_RAW_DATA_FAOSTAT, "SUA_item_code_map"),
FILE = file.path(DIR_RAW_DATA_FAOSTAT, "Mapping_FAO_iso_reg") )

MODULE_INPUTS %>% load_from_cache() -> all_data

get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)

GCAM_APE_after2010 %>% distinct(GCAM_commodity)

Bal_new_all %>% filter(value != 0)
GCAM_APE_after2010 %>% filter(value != 0)



SUA_item_code_map %>% select(item, item_code) -> SUA_item_code_map
FAO_Food_Macronutrient_All -> FAO_Food_Macronutrient_rate



Bal_new_all %>% filter(value != 0.0) %>%
transmute(area_code, item_code, element, year, value) ->
GCAMFAOSTAT_SUA

# Goal
# Compare SUA & FBS for wheat/corn at global & regional scales
# But adding details



Mapping_SUA_PrimaryEquivalent %>%
left_join_error_no_match(SUA_item_code_map %>% select(item, item_code) %>%
rename(sink_item_code = item_code), by=c("sink_item" = "item")) %>%
left_join_error_no_match(SUA_item_code_map %>% select(item, item_code) %>%
rename(source_item_code = item_code), by=c("source_item" = "item")) %>%
mutate(APE_comm = as.factor(APE_comm)) ->
Mapping_SUA_PrimaryEquivalent_ID


Mapping_SUA_PrimaryEquivalent_ID %>%
distinct(APE_comm) %>% pull

# china mainland 41
# usa 231
AC = 231
APE_COMM_NAME <- "Wheat"


Mapping_SUA_PrimaryEquivalent_ID %>%
filter(APE_comm == APE_COMM_NAME) %>%
distinct(sink_FBS_item) %>% pull ->
FAO_FBS_COMM_NAME

Mapping_SUA_PrimaryEquivalent_ID %>%
filter(APE_comm == APE_COMM_NAME) %>%
select(sink_item_code, source_item_code) %>% unlist %>% unique() ->
SUACode

GCAMFAOSTAT_SUA %>%
filter(item_code %in% SUACode) ->
GCAMFAOSTAT_SUA_sector

GCAMFAOSTAT_SUA_sector %>%
filter(area_code== AC) %>%
group_by_at(vars(-area_code, -value)) %>%
summarize(value = sum(value), .groups = "drop") %>%
left_join_error_no_match(SUA_item_code_map) %>% select(-item_code) %>%
filter(year == 2020) %>% spread(item, value) ->
GCAMFAOSTAT_SUA_sector1


FAO_Food_Macronutrient_rate %>%
filter(area_code== AC) %>%
filter(item_code %in% SUACode) %>%
group_by_at(vars(year, item_code, element = macronutrient)) %>%
summarize(value = sum(value), .groups = "drop") %>%
left_join_error_no_match(SUA_item_code_map) %>% select(-item_code) %>%
filter(year == 2020) %>%
spread(item, value) %>%
bind_rows(
GCAMFAOSTAT_SUA_sector1
) ->
GCAMFAOSTAT_SUA_sector1_2020





"FBS_wide" %>% load_from_cache() %>% first() -> FBS_wide

FBS_wide %>% gather_years() %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
FAOSTAT_AREA_RM_NONEXIST() -> FBS


FBS %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
# keep only balance items
filter(!element_code %in% c(645, 664, 674, 684)) %>%
# simplify elements and make them consistent with SUA
mutate(element = gsub(" Quantity| supply quantity \\(tonnes\\)| \\(non-food\\)", "", element),
element = replace(element, element == "Losses", "Loss"),
element = replace(element, element == "Processing", "Processed")) %>%
# convert units back to tonnes first since FBS originally used 1000 tons
mutate(value = value) ->
FBS1


FBS1 %>% filter(year == 2020) %>%
filter(area_code== AC) %>%
filter(item %in% FAO_FBS_COMM_NAME ) %>%
group_by_at(vars(-area_code, -area, -value)) %>%
summarize(value = sum(value), .groups = "drop") ->
FAO_FBS_Old


GCAM_APE_after2010 %>%
filter(year == 2020) %>% filter(region_ID == AC) %>%
filter(GCAM_commodity == APE_COMM_NAME) %>%
group_by_at(vars(-region_ID, -value)) %>%
summarize(value = sum(value)) %>%
mutate(GCAM_commodity = "SUA") %>%
spread(GCAM_commodity, value) ->
SUA_FBS_New

FAO_FBS_Old %>%
select(-element_code) %>%
rename(FBS = value) %>%
full_join(SUA_FBS_New) %>%
full_join(
GCAMFAOSTAT_SUA_sector1_2020, by = join_by(element, year)
) -> Compare


C %>% readr::write_csv("Maize2020_usa.csv")




#---- All region ----


# china mainland 41
# usa 231

APE_COMM_NAME <- "Wheat"


Mapping_SUA_PrimaryEquivalent_ID %>%
filter(APE_comm == APE_COMM_NAME) %>%
distinct(sink_FBS_item) %>% pull ->
FAO_FBS_COMM_NAME

Mapping_SUA_PrimaryEquivalent_ID %>%
filter(APE_comm == APE_COMM_NAME) %>%
select(sink_item_code, source_item_code) %>% unlist %>% unique() ->
SUACode

GCAMFAOSTAT_SUA %>%
filter(item_code %in% SUACode) ->
GCAMFAOSTAT_SUA_sector

GCAMFAOSTAT_SUA_sector %>%
group_by_at(vars(-value)) %>%
summarize(value = sum(value), .groups = "drop") %>%
left_join_error_no_match(SUA_item_code_map) %>% select(-item_code) %>%
filter(year == 2020) %>%
spread(item, value, fill = 0) ->
GCAMFAOSTAT_SUA_sector1


FAO_Food_Macronutrient_rate %>%
filter(item_code %in% SUACode) %>%
group_by_at(vars(area_code, year, item_code, element = macronutrient)) %>%
summarize(value = sum(value), .groups = "drop") %>%
left_join_error_no_match(SUA_item_code_map) %>% select(-item_code) %>%
filter(year == 2020) %>%
mutate(value = if_else(element %in% c("MtProtein", "MtFat"), value * 1000, value)) %>%
mutate(element = replace(element, element == "MKcal", "Calorie"),
element = replace(element, element == "MtProtein", "Protein"),
element = replace(element, element == "MtFat", "Fat")) %>%
spread(item, value) %>%
bind_rows(
GCAMFAOSTAT_SUA_sector1
) -> GCAMFAOSTAT_SUA_sector1_2020





"FBS_wide" %>% load_from_cache() %>% first() -> FBS_wide

FBS_wide %>% gather_years() %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
FAOSTAT_AREA_RM_NONEXIST() -> FBS


FBS %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
# keep only balance items
filter(!element_code %in% c(645, 664, 674, 684)) %>%
# simplify elements and make them consistent with SUA
mutate(element = gsub(" Quantity| supply quantity \\(tonnes\\)| \\(non-food\\)", "", element),
element = replace(element, element == "Losses", "Loss"),
element = replace(element, element == "Processing", "Processed")) %>%
# convert units back to tonnes first since FBS originally used 1000 tons
mutate(value = value) ->
FBS1


FBS1 %>% filter(year == 2020) %>%
filter(item %in% FAO_FBS_COMM_NAME ) %>%
group_by_at(vars(-area, -value)) %>%
summarize(value = sum(value), .groups = "drop") %>%
mutate(element = replace(element, element == "Food supply (kcal)", "Calorie"),
element = replace(element, element == "Protein supply quantity (t)", "Protein"),
element = replace(element, element == "Fat supply quantity (t)", "Fat")) %>%
mutate(value = if_else(element %in% c("Protein", "Fat"), value / 1000, value)) %>%
mutate(unit = replace(unit, is.na(unit)|unit == "t", "1000 t")) ->
FAO_FBS_Old


GCAM_APE_after2010 %>%
filter(year == 2020) %>%
filter(GCAM_commodity == APE_COMM_NAME) %>%
group_by_at(vars(-value)) %>%
summarize(value = sum(value)) %>% ungroup() %>%
mutate(GCAM_commodity = "SUA") %>%
spread(GCAM_commodity, value) %>%
rename(area_code = region_ID)->
SUA_FBS_New

GCAMFAOSTAT_SUA_sector1_2020 %>%
gather(item, value, -area_code:-element) %>%
group_by_at(vars(-value, -item)) %>%
summarize(SUA_sum = sum(value), .groups = "drop") ->
SUA_FBS_New_sum

FAO_FBS_Old %>%
select(-element_code) %>% filter(unit == "1000 t") %>%
spread(element, value, fill = 0) %>%
mutate(`Regional supply` = Production + Import,
`Regional demand` = Export + Food + Feed + Processed + Seed + Loss + `Other uses` + `Tourist consumption`,
Residuals = `Regional supply` - `Regional demand` - `Stock Variation`) %>%
gather(element, value, -area_code:-year) %>%
bind_rows(
FAO_FBS_Old %>% select(-element_code) %>% filter(unit != "1000 t")
) %>%
rename(FBS = value) %>%
full_join(SUA_FBS_New) %>%
full_join(SUA_FBS_New_sum) %>%
mutate(SUA = if_else(element %in% c("Calorie", "Protein", "Fat"), SUA_sum, SUA)) %>%
full_join(
GCAMFAOSTAT_SUA_sector1_2020
) -> Compare

Compare %>% filter(area_code==203) %>% write.csv("A.csv")
Compare %>% filter(element == "Residuals") %>%
select(area_code:SUA) %>%
filter(area_code == AC)

Compare %>%
filter(element == "Residuals") %>%
select(area_code:SUA) -> A

Compare %>%
filter(element == "Residuals") %>%
select(area_code:SUA) %>%
group_by(item, year) %>% filter(!is.na(item)) %>% #filter(is.na(SUA))
summarize(FBS = sum(FBS), SUA = sum(SUA, na.rm = T))
filter(area_code == AC)

Compare %>% filter(area_code == AC) -> C


C %>% readr::write_csv("Maize2020_usa.csv")




---
-----

Mapping_SUA_PrimaryEquivalent %>%
left_join_error_no_match(SUA_item_code_map %>% rename(sink_item_code = item_code), by=c("sink_item" = "item")) %>%
left_join_error_no_match(SUA_item_code_map %>% rename(source_item_code = item_code), by=c("source_item" = "item")) %>%
mutate(APE_comm = as.factor(APE_comm)) ->
Mapping_SUA_PrimaryEquivalent_ID

Mapping_SUA_PrimaryEquivalent_ID %>%
filter(APE_comm == APE_COMM_NAME) %>%
select(sink_item_code, source_item_code) %>% unlist %>% unique() -> SUACode

GCAMFAOSTAT_SUA %>%
filter(item_code %in% SUACode) ->
GCAMFAOSTAT_SUA_Wheat

GCAMFAOSTAT_SUA_Wheat %>% #filter(area_code== AC) %>%
group_by_at(vars(-area_code, -value)) %>%
summarize(value = sum(value), .groups = "drop") %>%
left_join_error_no_match(SUA_item_code_map) %>% select(-item_code) %>%
filter(year == 2020) %>% spread(item, value) ->
GCAMFAOSTAT_SUA_Wheat1


FAO_Food_Macronutrient_rate %>% #filter(area_code== AC) %>%
filter(item_code %in% SUACode) %>%
group_by_at(vars(year, item_code, element = macronutrient)) %>%
summarize(value = sum(value), .groups = "drop") %>%
left_join_error_no_match(SUA_item_code_map) %>% select(-item_code) %>%
filter(year == 2020) %>%
spread(item, value) %>%
bind_rows(
GCAMFAOSTAT_SUA_Wheat1
) ->
GCAMFAOSTAT_SUA_Wheat_2020









FBS %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
# keep only balance items
filter(!element_code %in% c(645, 664, 674, 684)) %>%
# simplify elements and make them consistent with SUA
mutate(element = gsub(" Quantity| supply quantity \\(tonnes\\)| \\(non-food\\)", "", element),
element = replace(element, element == "Losses", "Loss"),
element = replace(element, element == "Processing", "Processed")) %>%
# convert units back to tonnes first since FBS originally used 1000 tons
mutate(value = value) ->
FBS1


FBS1 %>% filter(year == 2020) %>%
filter(area_code== AC) %>%
filter(item %in% FAO_FBS_COMM_NAME ) %>%
group_by_at(vars(-area_code, -area, -value)) %>%
summarize(value = sum(value), .groups = "drop") -> A


GCAM_APE_after2010 %>% filter(year == 2020) %>% filter(region_ID == AC) %>%
filter(GCAM_commodity == APE_COMM_NAME) %>%
group_by_at(vars(-region_ID, -value)) %>%
summarize(value = sum(value)) %>%
mutate(GCAM_commodity = "SUA") %>%
spread(GCAM_commodity, value) -> B

A %>% select(-element_code) %>%
rename(FBS = value) %>%
full_join(B) %>%
full_join(
GCAMFAOSTAT_SUA_Wheat_2020, by = join_by(element, year)
) -> C


C %>% readr::write_csv("Maize2020.csv")



10 changes: 4 additions & 6 deletions data-raw/generate_package_data.R
Original file line number Diff line number Diff line change
@@ -48,14 +48,16 @@ prebuilt_data_names <- c(
# outputs of module_xfaostat_L101_RawDataPreProc2_PP_PD_OA
c("PP_wide", # Producer prices
"PD", # GDP deflator
"OA"), # Population
"OA", # Population
"CS", # Capital stock
"MK"), # Macro-Statistics (GDP)

# outputs of module_xfaostat_L101_RawDataPreProc3_SCL_FBS
c("SCL_wide", # Supply utilization accounting
"FBS_wide"), # New food balance sheet

# outputs of module_xfaostat_L101_RawDataPreProc4_FBSH_CB
c("FBSH_CB_wide"), # Combined FBSH and CB
c("FBSH_CBH_wide"), # Combined FBSH and CB

# outputs of module_xfaostat_L101_RawDataPreProc5_TCL
c("TCL_wide"), # Gross trade
@@ -82,10 +84,6 @@ prebuilt_data_names <- c(
# replicated in data.R for that purpose.
if(USE_DRIVER_DRAKE) {
PREBUILT_DATA <- load_from_cache(prebuilt_data_names)
} else {
PREBUILT_DATA <- driver(write_outputs = FALSE,
write_xml = FALSE,
return_data_names = prebuilt_data_names)
}
# Save these objects as external data (i.e. requires explicit call to `data()` to load)
usethis::use_data(PREBUILT_DATA, overwrite = TRUE, internal = FALSE)
Loading

0 comments on commit 24a8d58

Please sign in to comment.