Merge pull request #4 from dzvoti:v0.3.0-alpha

V0.3.0-alpha
micronutrientsupport · Jun 7, 2024 · 277421d · 277421d
2 parents 01c1772 + 22da1ee
commit 277421d
Show file tree

Hide file tree

Showing 13 changed files with 515 additions and 123 deletions.
diff --git a/.gitignore b/.gitignore
@@ -52,3 +52,4 @@ rsconnect/
 data/
 excludeR/
 .Rproj.user
+R/calculateMnFoodVehicleHhReach.R
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: effectivenessCalculations
 Title: Micronutrient Action Policy Support Tool Effectiveness Calculations
-Version: 0.2.0
+Version: 1.0.0
 Authors@R: 
     person("Liberty", "Mlambo", , "liberty.mlambo@nottingham.ac.uk", role = c("aut", "cre","cph"),
            comment = c(ORCID = "0000-0003-3740-1345"))

diff --git a/NAMESPACE b/NAMESPACE
@@ -6,12 +6,19 @@ export(calculateBaselinePrevalenceHighIntakeRiskAfe)
 export(calculateConsPerAfe)
 export(calculateFoodVehicleHouseholdReach)
 export(calculateMNsContent)
+export(calculate_pre_and_post_lsff_summaries_afe)
 export(checkData)
+export(createFortifiableFoodItemsTable)
 export(createMasterNct)
 export(enforceNumeric)
 export(getMnThresholds)
 export(loadCsvFiles)
 export(load_rda_files)
 export(previewData)
+importFrom(dplyr,case_when)
+importFrom(dplyr,left_join)
+importFrom(dplyr,mutate)
+importFrom(dplyr,select)
+importFrom(readxl,read_excel)
 importFrom(stats,median)
 importFrom(utils,head)
diff --git a/R/calculate_pre_and_post_lsff_summaries_afe.R b/R/calculate_pre_and_post_lsff_summaries_afe.R
@@ -0,0 +1,256 @@
+#' Calculate Pre and Post LSFF Nutrient Summaries
+#'
+#' This function calculates summaries of nutrient inadequacy before and after large-scale food fortification (LSFF) for different administrative groups using the Adult Female Equivalent (AFE) Method.
+#'
+#' @param householdConsumptionDf A dataframe containing household consumption data. Must contain columns: "householdId", "amountConsumedInG".
+#' @param householdDetailsDf A dataframe containing household details. Must contain columns: "householdId", "memberCount".
+#' @param nctListDf A dataframe containing nutrient composition tables. Must contain column: "nutrient".
+#' @param intakeThresholdsDf A dataframe containing intake thresholds for nutrients. Must contain columns: "nutrient", "ear".
+#' @param aggregationGroup A character vector of administrative groups to aggregate the data. Must not be empty. Defaults to c("admin0Name", "admin1Name").
+#' @param fortifiableFoodItemsDf A dataframe containing fortifiable food items. Generated using the function `createFortifiableFoodItemsTable()`.
+#' @param foodVehicleName A character string specifying the name of the food vehicle for fortification. Defaults to "wheat flour".
+#' @param years A numeric vector specifying the years for which LSFF is analyzed. Defaults to 2021:2024.
+#' @param MNList A character vector of nutrients to be included in the analysis. Defaults to "A". Must not be empty.
+#'
+#' @return A dataframe with the summaries of nutrient inadequacy for the specified administrative groups before and after LSFF.
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' calculate_pre_and_post_lsff_summaries(
+#'     householdConsumptionDf = householdConsumption,
+#'     householdDetailsDf = householdDetails,
+#'     nctListDf = nctList,
+#'     intakeThresholdsDf = intakeThresholds,
+#'     aggregationGroup = c("admin0Name", "admin1Name"),
+#'     fortifiableFoodItemsDf = createFortifiableFoodItemsTable(),
+#'     foodVehicleName = "wheat flour",
+#'     years = c(2021:2024),
+#'     MNList = c("A", "Ca")
+#' )
+#' }
+calculate_pre_and_post_lsff_summaries_afe <- function(
+    householdConsumptionDf = householdConsumption,
+    householdDetailsDf = householdDetails,
+    nctListDf = nctList,
+    intakeThresholdsDf = intakeThresholds,
+    aggregationGroup = c("admin0Name", "admin1Name"),
+    fortifiableFoodItemsDf = createFortifiableFoodItemsTable(),
+    foodVehicleName = "wheat flour",
+    years = c(2021:2024),
+    MNList = "A") {
+    # Define required columns
+    requiredConsumptionCols <- c("householdId", "amountConsumedInG")
+    requiredDetailsCols <- c("householdId", "memberCount")
+    requiredNctCols <- c("micronutrientId")
+    requiredIntakeCols <- c("nutrient", "CND")
+
+    # Check if MNList is a character vector
+    if (!is.character(MNList)) {
+        stop("MNList must be a character vector e.g. c('A', 'Ca')")
+    }
+
+    # Check if aggregationGroup is a character vector
+    if (!is.character(aggregationGroup)) {
+        stop("aggregationGroup must be a character vector e.g. c('admin0Name', 'admin1Name')")
+    }
+
+    # Check if MNList and aggregationGroup are not empty
+    if (length(aggregationGroup) == 0) {
+        stop("aggregationGroup cannot be empty")
+    }
+
+    # Check if input dataframes have required columns
+    if (!all(requiredConsumptionCols %in% names(householdConsumptionDf))) {
+        stop(paste("householdConsumptionDf must contain the following columns:", paste(requiredConsumptionCols, collapse = ", ")))
+    }
+
+    if (!all(requiredDetailsCols %in% names(householdDetailsDf))) {
+        stop(paste("householdDetailsDf must contain the following column:", paste(requiredDetailsCols, collapse = ", ")))
+    }
+
+    if (!all(requiredNctCols %in% names(nctListDf))) {
+        stop(paste("nctListDf must contain the following columns:", paste(requiredNctCols, collapse = ", ")))
+    }
+
+    if (!all(requiredIntakeCols %in% names(intakeThresholdsDf))) {
+        stop(paste("intakeThresholdsDf must contain the following columns:", paste(requiredIntakeCols, collapse = ", ")))
+    }
+
+    # Use the createMasterNct function to create a master NCT
+    masterNCT <- effectivenessCalculations::createMasterNct(nctList)
+
+    # Filter the fortifiable food items to get the food vehicle
+    fortifiableFoodVehicle <- fortifiableFoodItemsDf |>
+        dplyr::filter(food_vehicle_name == foodVehicleName)
+
+    ## Create a wider format for the intakeThresholds
+    earThreshholds <- intakeThresholdsDf |>
+        dplyr::select(nutrient, ear) |>
+        # Remove rows where ear is NA
+        dplyr::filter(!is.na(ear)) |>
+        # Leave thresholds for the nutrients in the MNList
+        dplyr::filter(nutrient %in% MNList) |>
+        tidyr::pivot_wider(names_from = nutrient, values_from = ear) |>
+        # Convert all columns to numeric
+        dplyr::mutate_all(as.numeric) |>
+        # Add a suffix of "ear" to the column names
+        dplyr::rename_with(~ paste0(., "SupplyEarThreshold"), dplyr::everything())
+
+    # Process the consumption data
+    # Load the consumption data
+    enrichedHouseholdConsumption <- householdConsumptionDf |>
+        # Not necessary by its a personal preference
+        tibble::as_tibble() |>
+        # Join the household details to the consumption data (Joining columns with the same name)
+        dplyr::left_join(householdDetailsDf) |>
+        # Join the master NCT to the consumption data
+        dplyr::left_join(masterNCT) |>
+        dplyr::left_join(fortifiableFoodVehicle, by = c("foodGenusId" = "food_genus_id")) |>
+        # Convert all columns needed for calculations to numeric
+        dplyr::mutate_at(c("amountConsumedInG", "afeFactor", "fortifiable_portion", MNList), as.numeric) |>
+        dplyr::mutate(dplyr::across(MNList, ~ . / afeFactor), amountConsumedInGAfe = amountConsumedInG / afeFactor) |>
+        dplyr::bind_cols(earThreshholds)
+
+    # Calculate HH count summaries
+    HHCountSummaries <- enrichedHouseholdConsumption |>
+        dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
+        dplyr::distinct(householdId) |>
+        dplyr::summarise(householdsCount = dplyr::n())
+
+    # Fortification vehicle reach summaries
+    fortificationVehicleReach <- enrichedHouseholdConsumption |>
+        dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
+        dplyr::filter(!is.na(food_vehicle_name)) |>
+        dplyr::distinct(householdId) |>
+        dplyr::summarize(fortification_vehicle_reach_hh_count = dplyr::n())
+
+    # Mean and median fortification vehicle amounts consumed
+    fortificationVehicleAmountsConsumedAfe <- enrichedHouseholdConsumption |>
+        dplyr::filter(!is.na(food_vehicle_name)) |>
+        dplyr::group_by(householdId) |>
+        dplyr::summarize(median_fortification_vehicle_amountConsumedInGAfe = median(amountConsumedInGAfe, na.rm = TRUE), mean_fortification_vehicle_amountConsumedInGAfe = mean(amountConsumedInGAfe, na.rm = TRUE)) |>
+        dplyr::left_join(householdDetailsDf) |>
+        dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
+        dplyr::summarize(
+            median_fortification_vehicle_amountConsumedInGAfe = median(median_fortification_vehicle_amountConsumedInGAfe, na.rm = TRUE), mean_fortification_vehicle_amountConsumedInGAfe = mean(mean_fortification_vehicle_amountConsumedInGAfe, na.rm = TRUE)
+        )
+
+    # Average daily consumption per AFE
+    amountConsumedPerDayAfe <- enrichedHouseholdConsumption |>
+        dplyr::group_by(householdId) |>
+        dplyr::summarize(
+            dailyAmountConsumedPerAfeInG = sum(amountConsumedInG / 100 * afeFactor, na.rm = TRUE)
+        ) |>
+        dplyr::left_join(householdDetailsDf) |>
+        dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
+        dplyr::summarize(
+            meanDailyAmountConsumedPerAfeInG = mean(dailyAmountConsumedPerAfeInG, na.rm = TRUE),
+            medianDailyAmountConsumedPerAfeInG = median(dailyAmountConsumedPerAfeInG, na.rm = TRUE)
+        )
+
+    # Amount consumed containing fortificant
+    amountConsumedContainingFortificant <- enrichedHouseholdConsumption |>
+        dplyr::group_by(householdId) |>
+        dplyr::filter(!is.na(food_vehicle_name)) |>
+        dplyr::summarize(
+            dailyAmountConsumedPerAfeInG = sum(amountConsumedInG / afeFactor, na.rm = TRUE)
+        ) |>
+        dplyr::left_join(householdDetailsDf) |>
+        dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
+        dplyr::summarize(
+            meanDailyamountConsumedContainingFortificantInG = mean(dailyAmountConsumedPerAfeInG, na.rm = TRUE),
+            medianDailyAmountConsumedContainingFortificantInG = median(dailyAmountConsumedPerAfeInG, na.rm = TRUE)
+        )
+
+    # Merge the summaries
+    initialSummaries <- HHCountSummaries |>
+        dplyr::left_join(fortificationVehicleReach) |>
+        dplyr::left_join(amountConsumedPerDayAfe) |>
+        dplyr::left_join(amountConsumedContainingFortificant) |>
+        dplyr::left_join(fortificationVehicleAmountsConsumedAfe)
+
+    for (nutrient in MNList) {
+        enrichedHouseholdConsumption[paste0(nutrient, "_BaseSupply")] <- enrichedHouseholdConsumption[nutrient] / 100 * enrichedHouseholdConsumption["amountConsumedInG"]
+
+        for (year in years) {
+            # Calculate the supply of the nutrient with LSFF per food item
+            enrichedHouseholdConsumption[paste0(nutrient, "_", year, "_LSFFSupply")] <- enrichedHouseholdConsumption[paste0(nutrient, "_BaseSupply")] * yearAverageFortificationLevel(fortification_vehicle = foodVehicleName, Year = year, MN = nutrient) * enrichedHouseholdConsumption["fortifiable_portion"] / 100
+        }
+    }
+
+    # Aggregate nutrient supplies by household
+    nutrientSupply <- enrichedHouseholdConsumption |>
+        dplyr::group_by(householdId) |>
+        dplyr::summarize(
+            dplyr::across(dplyr::ends_with("_BaseSupply"), ~ sum(.x, na.rm = TRUE), .names = "{.col}"),
+            dplyr::across(dplyr::ends_with("_LSFFSupply"), ~ sum(.x, na.rm = TRUE), .names = "{.col}")
+        )
+
+    # Calculate mean and median nutrient supplies
+    # TODO: These were checked and are consistent with the maps tool.
+    medianNutrientSupplySummaries <- nutrientSupply |>
+        dplyr::left_join(householdDetailsDf) |>
+        dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
+        dplyr::summarize(
+            dplyr::across(dplyr::ends_with("_BaseSupply"), ~ round(mean(.x, na.rm = TRUE), 0), .names = "{.col}MeanSupply"),
+            dplyr::across(dplyr::ends_with("_BaseSupply"), ~ round(median(.x, na.rm = TRUE), 0), .names = "{.col}MedianSupply")
+        )
+
+    # Add _BaseSupply and _LSFFSupply for each nutrient and year combo
+    for (nutrient in MNList) {
+        for (year in years) {
+            nutrientSupply[paste0(nutrient, "_", year, "_BaseAndLSFFTotalSupply")] <- nutrientSupply[paste0(nutrient, "_BaseSupply")] + nutrientSupply[paste0(nutrient, "_", year, "_LSFFSupply")]
+        }
+    }
+
+    # Remerge the household details
+    enrichedNutrientSupply <- nutrientSupply |>
+        dplyr::left_join(householdDetailsDf) |>
+        dplyr::bind_cols(earThreshholds)
+
+    # Create adequacy columns for each Baseline and LSFF nutrient supply
+    for (nutrient in MNList) {
+        if (!is.na(effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ear"))) {
+            enrichedNutrientSupply[paste0(nutrient, "_base_supply_ear_inadequacy")] <- ifelse(enrichedNutrientSupply[paste0(nutrient, "_BaseSupply")] >= effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ear"), 0, 1)
+        }
+        for (year in years) {
+            if (!is.na(effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ear"))) {
+                enrichedNutrientSupply[paste0(nutrient, "_", year, "_base_and_lsff_ear_inadequacy")] <- ifelse(enrichedNutrientSupply[paste0(nutrient, "_", year, "_BaseAndLSFFTotalSupply")] >= effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ear"), 0, 1)
+            }
+        }
+    }
+
+    # Check if the intake is above the Upper Limit
+    for (nutrient in MNList) {
+        if (!is.na(effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ul"))) {
+            enrichedNutrientSupply[paste0(nutrient, "_base_ul_exceedance")] <- ifelse(enrichedNutrientSupply[paste0(nutrient, "_BaseSupply")] > effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ul"), 1, 0)
+        }
+        for (year in years) {
+            if (!is.na(effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ul"))) {
+                enrichedNutrientSupply[paste0(nutrient, "_", year, "_base_and_lsff_ul_exceedance")] <- ifelse(enrichedNutrientSupply[paste0(nutrient, "_", year, "_BaseAndLSFFTotalSupply")] > effectivenessCalculations::getMnThresholds(intakeThresholdsDf, nutrient, "ul"), 1, 0)
+            }
+        }
+    }
+
+    # Create adequacy summaries
+    inadequacySummarries <- enrichedNutrientSupply |>
+        dplyr::left_join(householdDetailsDf) |>
+        dplyr::group_by(dplyr::across(dplyr::all_of(aggregationGroup))) |>
+        dplyr::summarize(
+            dplyr::across(dplyr::ends_with("_base_supply_ear_inadequacy"), ~ sum(.x, na.rm = TRUE), .names = "{.col}_count"),
+            dplyr::across(dplyr::ends_with("_base_ul_exceedance"), ~ sum(.x, na.rm = TRUE), .names = "{.col}_count")
+        ) |>
+        dplyr::left_join(initialSummaries) |>
+        dplyr::mutate(dplyr::across(dplyr::ends_with("_count"), ~ round((.x * 100 / householdsCount), 2), .names = "{.col}_perc"))
+
+    # Get the column order for the data
+    columnOrder <- sort(names(inadequacySummarries))
+
+    # Reorder the columns for better readability
+    finalSummarries <- inadequacySummarries |>
+        dplyr::select(dplyr::all_of(columnOrder)) |>
+        dplyr::select(aggregationGroup, householdsCount, dplyr::everything())
+
+    return(finalSummarries)
+}
diff --git a/R/createFortifiableFoodItemsTable.R b/R/createFortifiableFoodItemsTable.R
@@ -0,0 +1,67 @@
+#' Create Fortifiable Food Items Table
+#'
+#' This function loads food group, food genus, and food vehicle data from Excel sheets,
+#' processes the data to identify fortifiable food groups and items, and returns a table
+#' of fortifiable food items with associated vehicle IDs and fortifiable portions.
+#'
+#' @return A data frame containing fortifiable food items with their respective vehicle IDs
+#' and fortifiable portions.
+#' @importFrom readxl read_excel
+#' @importFrom dplyr select mutate case_when left_join
+#' @export
+#' @keywords internal
+#'
+#' @examples
+#' \dontrun{
+#' fortifiable_food_items <- createFortifiableFoodItemsTable()
+#' }
+createFortifiableFoodItemsTable <- function(
+    food_file = "data/sd123/food group genus vehicle data.xlsx",
+    food_groups_sheet = "food_group",
+    food_genus_sheet = "food_genus",
+    food_vehicle_sheet = "food_vehicle") {
+    # Load data
+    food_groups <- readxl::read_excel(food_file, sheet = food_groups_sheet)
+    food_genus <- readxl::read_excel(food_file, sheet = food_genus_sheet)
+    food_vehicle <- readxl::read_excel(food_file, sheet = food_vehicle_sheet)
+
+    # Create columns for food_vehicle_id and fortifiable_portion
+    # Create fortificants at food group level and merge with food_vehicle
+    fortifiable_food_groups <- food_groups |>
+        dplyr::select(food_group_id, food_group_name) |>
+        dplyr::mutate(
+            food_vehicle_id = dplyr::case_when(
+                food_group_id == "2035" ~ 1,
+                food_group_id == "2028" ~ 4,
+                food_group_id == "2037" ~ 2,
+                food_group_id == "2020" ~ 4,
+                TRUE ~ NA_integer_
+            ),
+            fortifiable_portion = dplyr::case_when(
+                food_group_id == "2035" & food_vehicle_id == 1 ~ 100,
+                food_group_id == "2028" & food_vehicle_id == 4 ~ 100,
+                food_group_id == "2020" & food_vehicle_id == 4 ~ 50,
+                TRUE ~ NA_integer_
+            )
+        ) |>
+        dplyr::left_join(food_vehicle, by = "food_vehicle_id")
+
+    # Merge fortifiable_food_groups with food_genus
+    fortifiable_food_items <- food_genus |>
+        dplyr::left_join(fortifiable_food_groups, by = c("food_group_id", "food_group_name"))
+
+    # Override fortifiable_portion for food_genus with multiple fortificants
+    fortifiable_food_items <- fortifiable_food_items |>
+        dplyr::mutate(
+            fortifiable_portion = dplyr::case_when(
+                food_genus_id == "F0020.06" & food_vehicle_id == 1 ~ 80,
+                food_genus_id == "23140.03.02" & food_vehicle_id == 1 ~ 0,
+                "bread" %in% food_genus_name & food_vehicle_id == 1 ~ 75, # Katie email. 08-May-2024
+                food_genus_name == "buns" & food_vehicle_id == 1 ~ 33,
+                TRUE ~ fortifiable_portion
+            )
+        )
+
+    # Return fortifiable_food_items
+    return(fortifiable_food_items)
+}
diff --git a/R/getMnThresholds.R b/R/getMnThresholds.R
@@ -10,8 +10,10 @@
 #'
 #' @examples
 #' \dontrun{
-#' intakeThresholds <- data.frame(nutrient = c("vitaminA", "vitaminB", "vitaminC"),
-#'  ear = c(0.5, 0.6, 0.7), ul = c(1, 1.2, 1.3), unitAdequacy = c(0.8, 0.9, 1))
+#' intakeThresholds <- data.frame(
+#'     nutrient = c("vitaminA", "vitaminB", "vitaminC"),
+#'     ear = c(0.5, 0.6, 0.7), ul = c(1, 1.2, 1.3), unitAdequacy = c(0.8, 0.9, 1)
+#' )
 #' thresholds <- getMnThresholds(intakeThresholds, "vitaminA", "ul")
 #' }
 #'

diff --git a/R/loadAverageFortificationLevels.R b/R/loadAverageFortificationLevels.R
@@ -0,0 +1,20 @@
+#' Year Average Fortification Level
+#'
+#' This function loads the average fortification levels from a CSV file located in the data directory,
+#' and then returns the average fortification level for a specific year and micronutrient.
+#'
+#' @param fortification_vehicle A string specifying the fortification vehicle.
+#' @param Year An integer specifying the year.
+#' @param MN A string specifying the micronutrient.
+#' @param fortificationLevels A dataframe containing the average fortification levels.
+#' @return A numeric value representing the average fortification level for the specified year and micronutrient.
+#' @examples
+#' \dontrun{
+#' avgFortificationLevel <- yearAverageFortificationLevel("wheat flour", 2024, "A")
+#' }
+#' @keywords internal
+yearAverageFortificationLevel <- function(fortification_vehicle, Year, MN, fortificationLevels = readr::read_csv(here::here("data/average_fortification_levels.csv"))) {
+    yearAverageFortificationLevel <- fortificationLevels[fortificationLevels$Year == Year, MN][[1]]
+
+    return(yearAverageFortificationLevel)
+}