Skip to content

Commit

Permalink
Merge pull request #4 from dzvoti:v0.3.0-alpha
Browse files Browse the repository at this point in the history
V0.3.0-alpha
  • Loading branch information
dzvoti authored Jun 7, 2024
2 parents 01c1772 + 22da1ee commit 277421d
Show file tree
Hide file tree
Showing 13 changed files with 515 additions and 123 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ rsconnect/
data/
excludeR/
.Rproj.user
R/calculateMnFoodVehicleHhReach.R
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: effectivenessCalculations
Title: Micronutrient Action Policy Support Tool Effectiveness Calculations
Version: 0.2.0
Version: 1.0.0
Authors@R:
person("Liberty", "Mlambo", , "liberty.mlambo@nottingham.ac.uk", role = c("aut", "cre","cph"),
comment = c(ORCID = "0000-0003-3740-1345"))
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,19 @@ export(calculateBaselinePrevalenceHighIntakeRiskAfe)
export(calculateConsPerAfe)
export(calculateFoodVehicleHouseholdReach)
export(calculateMNsContent)
export(calculate_pre_and_post_lsff_summaries_afe)
export(checkData)
export(createFortifiableFoodItemsTable)
export(createMasterNct)
export(enforceNumeric)
export(getMnThresholds)
export(loadCsvFiles)
export(load_rda_files)
export(previewData)
importFrom(dplyr,case_when)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(readxl,read_excel)
importFrom(stats,median)
importFrom(utils,head)
256 changes: 256 additions & 0 deletions R/calculate_pre_and_post_lsff_summaries_afe.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
#' Calculate Pre and Post LSFF Nutrient Summaries
#'
#' This function calculates summaries of nutrient inadequacy before and after large-scale food fortification (LSFF) for different administrative groups using the Adult Female Equivalent (AFE) Method.
#'
#' @param householdConsumptionDf A dataframe containing household consumption data. Must contain columns: "householdId", "amountConsumedInG".
#' @param householdDetailsDf A dataframe containing household details. Must contain columns: "householdId", "memberCount".
#' @param nctListDf A dataframe containing nutrient composition tables. Must contain column: "nutrient".
#' @param intakeThresholdsDf A dataframe containing intake thresholds for nutrients. Must contain columns: "nutrient", "ear".
#' @param aggregationGroup A character vector of administrative groups to aggregate the data. Must not be empty. Defaults to c("admin0Name", "admin1Name").
#' @param fortifiableFoodItemsDf A dataframe containing fortifiable food items. Generated using the function `createFortifiableFoodItemsTable()`.
#' @param foodVehicleName A character string specifying the name of the food vehicle for fortification. Defaults to "wheat flour".
#' @param years A numeric vector specifying the years for which LSFF is analyzed. Defaults to 2021:2024.
#' @param MNList A character vector of nutrients to be included in the analysis. Defaults to "A". Must not be empty.
#'
#' @return A dataframe with the summaries of nutrient inadequacy for the specified administrative groups before and after LSFF.
#' @export
#'
#' @examples
#' \dontrun{
#' calculate_pre_and_post_lsff_summaries(
#' householdConsumptionDf = householdConsumption,
#' householdDetailsDf = householdDetails,
#' nctListDf = nctList,
#' intakeThresholdsDf = intakeThresholds,
#' aggregationGroup = c("admin0Name", "admin1Name"),
#' fortifiableFoodItemsDf = createFortifiableFoodItemsTable(),
#' foodVehicleName = "wheat flour",
#' years = c(2021:2024),
#' MNList = c("A", "Ca")
#' )
#' }
calculate_pre_and_post_lsff_summaries_afe <- function(
householdConsumptionDf = householdConsumption,
householdDetailsDf = householdDetails,
nctListDf = nctList,
intakeThresholdsDf = intakeThresholds,
aggregationGroup = c("admin0Name", "admin1Name"),
fortifiableFoodItemsDf = createFortifiableFoodItemsTable(),
foodVehicleName = "wheat flour",
years = c(2021:2024),
MNList = "A") {
# Define required columns
requiredConsumptionCols <- c("householdId", "amountConsumedInG")
requiredDetailsCols <- c("householdId", "memberCount")
requiredNctCols <- c("micronutrientId")
requiredIntakeCols <- c("nutrient", "CND")

# Check if MNList is a character vector
if (!is.character(MNList)) {
stop("MNList must be a character vector e.g. c('A', 'Ca')")
}

# Check if aggregationGroup is a character vector
if (!is.character(aggregationGroup)) {
stop("aggregationGroup must be a character vector e.g. c('admin0Name', 'admin1Name')")
}

# Check if MNList and aggregationGroup are not empty
if (length(aggregationGroup) == 0) {
stop("aggregationGroup cannot be empty")
}

# Check if input dataframes have required columns
if (!all(requiredConsumptionCols %in% names(householdConsumptionDf))) {
stop(paste("householdConsumptionDf must contain the following columns:", paste(requiredConsumptionCols, collapse = ", ")))
}

if (!all(requiredDetailsCols %in% names(householdDetailsDf))) {
stop(paste("householdDetailsDf must contain the following column:", paste(requiredDetailsCols, collapse = ", ")))
}

if (!all(requiredNctCols %in% names(nctListDf))) {
stop(paste("nctListDf must contain the following columns:", paste(requiredNctCols, collapse = ", ")))
}

if (!all(requiredIntakeCols %in% names(intakeThresholdsDf))) {
stop(paste("intakeThresholdsDf must contain the following columns:", paste(requiredIntakeCols, collapse = ", ")))
}

# Use the createMasterNct function to create a master NCT
masterNCT <- effectivenessCalculations::createMasterNct(nctList)

# Filter the fortifiable food items to get the food vehicle
fortifiableFoodVehicle <- fortifiableFoodItemsDf |>
dplyr::filter(food_vehicle_name == foodVehicleName)

## Create a wider format for the intakeThresholds
earThreshholds <- intakeThresholdsDf |>
dplyr::select(nutrient, ear) |>
# Remove rows where ear is NA
dplyr::filter(!is.na(ear)) |>
# Leave thresholds for the nutrients in the MNList
dplyr::filter(nutrient %in% MNList) |>
tidyr::pivot_wider(names_from = nutrient, values_from = ear) |>
# Convert all columns to numeric
dplyr::mutate_all(as.numeric) |>
# Add a suffix of "ear" to the column names
dplyr::rename_with(~ paste0(., "SupplyEarThreshold"), dplyr::everything())

# Process the consumption data
# Load the consumption data
enrichedHouseholdConsumption <- householdConsumptionDf |>
# Not necessary by its a personal preference
tibble::as_tibble() |>
# Join the household details to the consumption data (Joining columns with the same name)
dplyr::left_join(householdDetailsDf) |>
# Join the master NCT to the consumption data
dplyr::left_join(masterNCT) |>
dplyr::left_join(fortifiableFoodVehicle, by = c("foodGenusId" = "food_genus_id")) |>
# Convert all columns needed for calculations to numeric
dplyr::mutate_at(c("amountConsumedInG", "afeFactor", "fortifiable_portion", MNList), as.numeric) |>
dplyr::mutate(dplyr::across(MNList, ~ . / afeFactor), amountConsumedInGAfe = amountConsumedInG / afeFactor) |>
dplyr::bind_cols(earThreshholds)

# Calculate HH count summaries
HHCountSummaries <- enrichedHouseholdConsumption |>
dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
dplyr::distinct(householdId) |>
dplyr::summarise(householdsCount = dplyr::n())

# Fortification vehicle reach summaries
fortificationVehicleReach <- enrichedHouseholdConsumption |>
dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
dplyr::filter(!is.na(food_vehicle_name)) |>
dplyr::distinct(householdId) |>
dplyr::summarize(fortification_vehicle_reach_hh_count = dplyr::n())

# Mean and median fortification vehicle amounts consumed
fortificationVehicleAmountsConsumedAfe <- enrichedHouseholdConsumption |>
dplyr::filter(!is.na(food_vehicle_name)) |>
dplyr::group_by(householdId) |>
dplyr::summarize(median_fortification_vehicle_amountConsumedInGAfe = median(amountConsumedInGAfe, na.rm = TRUE), mean_fortification_vehicle_amountConsumedInGAfe = mean(amountConsumedInGAfe, na.rm = TRUE)) |>
dplyr::left_join(householdDetailsDf) |>
dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
dplyr::summarize(
median_fortification_vehicle_amountConsumedInGAfe = median(median_fortification_vehicle_amountConsumedInGAfe, na.rm = TRUE), mean_fortification_vehicle_amountConsumedInGAfe = mean(mean_fortification_vehicle_amountConsumedInGAfe, na.rm = TRUE)
)

# Average daily consumption per AFE
amountConsumedPerDayAfe <- enrichedHouseholdConsumption |>
dplyr::group_by(householdId) |>
dplyr::summarize(
dailyAmountConsumedPerAfeInG = sum(amountConsumedInG / 100 * afeFactor, na.rm = TRUE)
) |>
dplyr::left_join(householdDetailsDf) |>
dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
dplyr::summarize(
meanDailyAmountConsumedPerAfeInG = mean(dailyAmountConsumedPerAfeInG, na.rm = TRUE),
medianDailyAmountConsumedPerAfeInG = median(dailyAmountConsumedPerAfeInG, na.rm = TRUE)
)

# Amount consumed containing fortificant
amountConsumedContainingFortificant <- enrichedHouseholdConsumption |>
dplyr::group_by(householdId) |>
dplyr::filter(!is.na(food_vehicle_name)) |>
dplyr::summarize(
dailyAmountConsumedPerAfeInG = sum(amountConsumedInG / afeFactor, na.rm = TRUE)
) |>
dplyr::left_join(householdDetailsDf) |>
dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
dplyr::summarize(
meanDailyamountConsumedContainingFortificantInG = mean(dailyAmountConsumedPerAfeInG, na.rm = TRUE),
medianDailyAmountConsumedContainingFortificantInG = median(dailyAmountConsumedPerAfeInG, na.rm = TRUE)
)

# Merge the summaries
initialSummaries <- HHCountSummaries |>
dplyr::left_join(fortificationVehicleReach) |>
dplyr::left_join(amountConsumedPerDayAfe) |>
dplyr::left_join(amountConsumedContainingFortificant) |>
dplyr::left_join(fortificationVehicleAmountsConsumedAfe)

for (nutrient in MNList) {
enrichedHouseholdConsumption[paste0(nutrient, "_BaseSupply")] <- enrichedHouseholdConsumption[nutrient] / 100 * enrichedHouseholdConsumption["amountConsumedInG"]

for (year in years) {
# Calculate the supply of the nutrient with LSFF per food item
enrichedHouseholdConsumption[paste0(nutrient, "_", year, "_LSFFSupply")] <- enrichedHouseholdConsumption[paste0(nutrient, "_BaseSupply")] * yearAverageFortificationLevel(fortification_vehicle = foodVehicleName, Year = year, MN = nutrient) * enrichedHouseholdConsumption["fortifiable_portion"] / 100
}
}

# Aggregate nutrient supplies by household
nutrientSupply <- enrichedHouseholdConsumption |>
dplyr::group_by(householdId) |>
dplyr::summarize(
dplyr::across(dplyr::ends_with("_BaseSupply"), ~ sum(.x, na.rm = TRUE), .names = "{.col}"),
dplyr::across(dplyr::ends_with("_LSFFSupply"), ~ sum(.x, na.rm = TRUE), .names = "{.col}")
)

# Calculate mean and median nutrient supplies
# TODO: These were checked and are consistent with the maps tool.
medianNutrientSupplySummaries <- nutrientSupply |>
dplyr::left_join(householdDetailsDf) |>
dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
dplyr::summarize(
dplyr::across(dplyr::ends_with("_BaseSupply"), ~ round(mean(.x, na.rm = TRUE), 0), .names = "{.col}MeanSupply"),
dplyr::across(dplyr::ends_with("_BaseSupply"), ~ round(median(.x, na.rm = TRUE), 0), .names = "{.col}MedianSupply")
)

# Add _BaseSupply and _LSFFSupply for each nutrient and year combo
for (nutrient in MNList) {
for (year in years) {
nutrientSupply[paste0(nutrient, "_", year, "_BaseAndLSFFTotalSupply")] <- nutrientSupply[paste0(nutrient, "_BaseSupply")] + nutrientSupply[paste0(nutrient, "_", year, "_LSFFSupply")]
}
}

# Remerge the household details
enrichedNutrientSupply <- nutrientSupply |>
dplyr::left_join(householdDetailsDf) |>
dplyr::bind_cols(earThreshholds)

# Create adequacy columns for each Baseline and LSFF nutrient supply
for (nutrient in MNList) {
if (!is.na(effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ear"))) {
enrichedNutrientSupply[paste0(nutrient, "_base_supply_ear_inadequacy")] <- ifelse(enrichedNutrientSupply[paste0(nutrient, "_BaseSupply")] >= effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ear"), 0, 1)
}
for (year in years) {
if (!is.na(effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ear"))) {
enrichedNutrientSupply[paste0(nutrient, "_", year, "_base_and_lsff_ear_inadequacy")] <- ifelse(enrichedNutrientSupply[paste0(nutrient, "_", year, "_BaseAndLSFFTotalSupply")] >= effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ear"), 0, 1)
}
}
}

# Check if the intake is above the Upper Limit
for (nutrient in MNList) {
if (!is.na(effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ul"))) {
enrichedNutrientSupply[paste0(nutrient, "_base_ul_exceedance")] <- ifelse(enrichedNutrientSupply[paste0(nutrient, "_BaseSupply")] > effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ul"), 1, 0)
}
for (year in years) {
if (!is.na(effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ul"))) {
enrichedNutrientSupply[paste0(nutrient, "_", year, "_base_and_lsff_ul_exceedance")] <- ifelse(enrichedNutrientSupply[paste0(nutrient, "_", year, "_BaseAndLSFFTotalSupply")] > effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ul"), 1, 0)
}
}
}

# Create adequacy summaries
inadequacySummarries <- enrichedNutrientSupply |>
dplyr::left_join(householdDetailsDf) |>
dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
dplyr::summarize(
dplyr::across(dplyr::ends_with("_base_supply_ear_inadequacy"), ~ sum(.x, na.rm = TRUE), .names = "{.col}_count"),
dplyr::across(dplyr::ends_with("_base_ul_exceedance"), ~ sum(.x, na.rm = TRUE), .names = "{.col}_count")
) |>
dplyr::left_join(initialSummaries) |>
dplyr::mutate(dplyr::across(dplyr::ends_with("_count"), ~ round((.x * 100 / householdsCount), 2), .names = "{.col}_perc"))

# Get the column order for the data
columnOrder <- sort(names(inadequacySummarries))

# Reorder the columns for better readability
finalSummarries <- inadequacySummarries |>
dplyr::select(dplyr::all_of(columnOrder)) |>
dplyr::select(aggregationGroup, householdsCount, dplyr::everything())

return(finalSummarries)
}
67 changes: 67 additions & 0 deletions R/createFortifiableFoodItemsTable.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#' Create Fortifiable Food Items Table
#'
#' This function loads food group, food genus, and food vehicle data from Excel sheets,
#' processes the data to identify fortifiable food groups and items, and returns a table
#' of fortifiable food items with associated vehicle IDs and fortifiable portions.
#'
#' @return A data frame containing fortifiable food items with their respective vehicle IDs
#' and fortifiable portions.
#' @importFrom readxl read_excel
#' @importFrom dplyr select mutate case_when left_join
#' @export
#' @keywords internal
#'
#' @examples
#' \dontrun{
#' fortifiable_food_items <- createFortifiableFoodItemsTable()
#' }
createFortifiableFoodItemsTable <- function(
food_file = "data/sd123/food group genus vehicle data.xlsx",
food_groups_sheet = "food_group",
food_genus_sheet = "food_genus",
food_vehicle_sheet = "food_vehicle") {
# Load data
food_groups <- readxl::read_excel(food_file, sheet = food_groups_sheet)
food_genus <- readxl::read_excel(food_file, sheet = food_genus_sheet)
food_vehicle <- readxl::read_excel(food_file, sheet = food_vehicle_sheet)

# Create columns for food_vehicle_id and fortifiable_portion
# Create fortificants at food group level and merge with food_vehicle
fortifiable_food_groups <- food_groups |>
dplyr::select(food_group_id, food_group_name) |>
dplyr::mutate(
food_vehicle_id = dplyr::case_when(
food_group_id == "2035" ~ 1,
food_group_id == "2028" ~ 4,
food_group_id == "2037" ~ 2,
food_group_id == "2020" ~ 4,
TRUE ~ NA_integer_
),
fortifiable_portion = dplyr::case_when(
food_group_id == "2035" & food_vehicle_id == 1 ~ 100,
food_group_id == "2028" & food_vehicle_id == 4 ~ 100,
food_group_id == "2020" & food_vehicle_id == 4 ~ 50,
TRUE ~ NA_integer_
)
) |>
dplyr::left_join(food_vehicle, by = "food_vehicle_id")

# Merge fortifiable_food_groups with food_genus
fortifiable_food_items <- food_genus |>
dplyr::left_join(fortifiable_food_groups, by = c("food_group_id", "food_group_name"))

# Override fortifiable_portion for food_genus with multiple fortificants
fortifiable_food_items <- fortifiable_food_items |>
dplyr::mutate(
fortifiable_portion = dplyr::case_when(
food_genus_id == "F0020.06" & food_vehicle_id == 1 ~ 80,
food_genus_id == "23140.03.02" & food_vehicle_id == 1 ~ 0,
"bread" %in% food_genus_name & food_vehicle_id == 1 ~ 75, # Katie email. 08-May-2024
food_genus_name == "buns" & food_vehicle_id == 1 ~ 33,
TRUE ~ fortifiable_portion
)
)

# Return fortifiable_food_items
return(fortifiable_food_items)
}
6 changes: 4 additions & 2 deletions R/getMnThresholds.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
#'
#' @examples
#' \dontrun{
#' intakeThresholds <- data.frame(nutrient = c("vitaminA", "vitaminB", "vitaminC"),
#' ear = c(0.5, 0.6, 0.7), ul = c(1, 1.2, 1.3), unitAdequacy = c(0.8, 0.9, 1))
#' intakeThresholds <- data.frame(
#' nutrient = c("vitaminA", "vitaminB", "vitaminC"),
#' ear = c(0.5, 0.6, 0.7), ul = c(1, 1.2, 1.3), unitAdequacy = c(0.8, 0.9, 1)
#' )
#' thresholds <- getMnThresholds(intakeThresholds, "vitaminA", "ul")
#' }
#'
Expand Down
20 changes: 20 additions & 0 deletions R/loadAverageFortificationLevels.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#' Year Average Fortification Level
#'
#' This function loads the average fortification levels from a CSV file located in the data directory,
#' and then returns the average fortification level for a specific year and micronutrient.
#'
#' @param fortification_vehicle A string specifying the fortification vehicle.
#' @param Year An integer specifying the year.
#' @param MN A string specifying the micronutrient.
#' @param fortificationLevels A dataframe containing the average fortification levels.
#' @return A numeric value representing the average fortification level for the specified year and micronutrient.
#' @examples
#' \dontrun{
#' avgFortificationLevel <- yearAverageFortificationLevel("wheat flour", 2024, "A")
#' }
#' @keywords internal
yearAverageFortificationLevel <- function(fortification_vehicle, Year, MN, fortificationLevels = readr::read_csv(here::here("data/average_fortification_levels.csv"))) {
yearAverageFortificationLevel <- fortificationLevels[fortificationLevels$Year == Year, MN][[1]]

return(yearAverageFortificationLevel)
}
Loading

0 comments on commit 277421d

Please sign in to comment.