diff --git a/.Rbuildignore b/.Rbuildignore index 0f0c78d..ca1efbd 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,4 @@ ^pkgdown$ ^doc$ ^Meta$ +^data-raw$ diff --git a/NAMESPACE b/NAMESPACE index 45d8c00..b3fa0d8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,20 +1,12 @@ # Generated by roxygen2: do not edit by hand export(age_ratio_test) -export(assign_penalty_points_age_sex_ratio) -export(assign_penalty_points_flags_and_sd) -export(assign_penalty_points_skew_kurt) export(check_plausibility_mfaz) export(check_plausibility_muac) export(check_plausibility_wfhz) export(check_sample_size) -export(classify_age_sex_ratio) export(classify_overall_quality) -export(classify_percent_flagged) -export(classify_sd) -export(classify_skew_kurt) export(compute_combined_prevalence) -export(compute_mfaz_prevalence) export(compute_muac_prevalence) export(compute_quality_score) export(compute_wfhz_prevalence) diff --git a/R/age.R b/R/age.R index 24caefe..3771d0d 100644 --- a/R/age.R +++ b/R/age.R @@ -1,27 +1,30 @@ #' -#' Recode age variable from months to days +#' Calculate child's age in days #' -#' @param x A numeric vector containing values of age in months. +#' @param x A double vector of child's age in months. +#' +#' @returns A double vector of the same length as `x` of age in days. #' -#' @returns A numeric vector with values corresponding to age in days #' compute_month_to_days <- function(x) { x * (365.25 / 12) } + + + +#' +#' Calculate child's age in months #' -#' Get age in months from birth-date and the data when data was collected. +#' @description +#' Calculate child's age in months based on date of birth and the data collection date. #' -#' `compute_age_in_months()` works inside [dplyr::mutate()] or [base::transform()] -#' It helps you to compute age in months from a pair of birth date and survey date. +#' @param surv_date A vector of class `Date` for data collection date. #' -#' @param surv_date,birth_date Vectors containing dates. `surv_date` refers to the day, -#' month and year when the data was collected; while `birth_date` refers to the date -#' when the child was born. +#' @param birth_date A vector of class `Date` for child's date of birth. #' -#' @returns A vector of name `age` storing age in months, a mix of double and -#' integer and `NA` for missing value if any of the processed age in months is -#' < 6 or > 59.99 months. +#' @returns A vector of class `double` for child's age in months with two decimal places. +#' Any value less than 6.0 and greater than or equal to 60.0 months will be set to `NA`. #' #' compute_age_in_months <- function (surv_date, birth_date) { @@ -31,34 +34,38 @@ compute_age_in_months <- function (surv_date, birth_date) { age_mo <- ifelse(age_mo < 6.0 | age_mo >= 60.0, NA, age_mo) } + + + #' -#' Transform age in months and age in days with a data frame +#' Wrangle child's age +#' +#' @description +#' Wrangle child's age for downstream analysis. This includes calculating age +#' in months based on the date of data collection and child's date of birth and +#' setting to `NA` the age values that are less than 6.0 and greater than or equal +#' to 60.0 months old. #' -#' `process_age()` helps you get the variable age in the right format and ready -#' to be used for downstream workflow, i.e., get z-scores, as well as exclude -#' age values that are out-of-range. +#' @param df A dataset of class `data.frame` to process age from. #' -#' @param df The input data frame. +#' @param svdate A vector of class `Date` for date of data collection. +#' Default is `NULL`. #' -#' @param svdate,birdate Vectors containing dates. `svdate` refers to the day, month -#' and year when the data was collected; while `birdate` refers to the date when the -#' child was born (birth-date). By default, both arguments are `NULL`. This is -#' makes `process_age()` work even in data sets where either survey date or birth- -#' data is not available, so the `process_age()` works on already given age variable. +#' @param birdate A vector of class `Date` for child's date of birth. +#' Default is `NULL`. #' -#' @param age A numeric vector containing already given age in months, usually an -#' integer in the input data as it is estimated using local event calendars. -#' `age` will typically be available on a particular row when `birth_date` of -#' that same row is missing. +#' @param age A vector of class `integer` of age in months, usually estimated +#' using local event calendars. #' -#' @returns A data frame of the same length as the input data frame, but of a -#' different width. If `svdate` or `birdate` are available, two new vectors are added -#' to the data frame: `age` in months with two decimal places and `age_day` which -#' is age in days with decimal two decimal places. +#' @returns A `data.frame` based on `df`. The variable `age` that is required to be +#' included in `df` will be filled where applicable with the age in months for +#' each row of data in `df`. A new variable for `df` named `age_days` will be +#' created. Values for `age` and `age_days` for children less than 6.0 and greater +#' than or equal to 60.0 months old will be set to `NA`. #' #' @examples #' -#' # Have a sample data ---- +#' ## A sample data ---- #' df <- data.frame( #' survy_date = as.Date(c( #' "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), @@ -67,9 +74,13 @@ compute_age_in_months <- function (surv_date, birth_date) { #' age = c(NA, 36, NA, NA, NA) #' ) #' -#' ## Apply function ---- +#' ## Apply the function ---- #' df |> -#' process_age(svdate = "survy_date", birdate = "birthdate", age = age) +#' process_age( +#' svdate = "survy_date", +#' birdate = "birthdate", +#' age = age +#' ) #' #' @export #' @@ -95,42 +106,40 @@ process_age <- function(df, svdate = NULL, birdate = NULL, age) { tibble::as_tibble(df) } + + #' -#' Age ratio test on children aged 6:23 over 24:59 months +#' Test for statistical difference between the proportion of children aged 24 to +#' 59 months old over those aged 6 to 23 months old #' #' @description -#' As documented in [nipnTK::ageRatioTest()], age ratio test is an age-related -#' test of survey data quality. This includes other assessments as screenings, -#' sentinel sites, etc. Different to [nipnTK::ageRatioTest()], in `age_ratio_test()` -#' the ratio of children is calculate from children 6-23 months to the number of -#' children age 24-59 months. The ratio is then compared to the expected ratio -#' (set at 0.66). Then the difference between the observed ratio is compared to -#' the expected using a Chi-squared test. -#' -#' `age_ratio_test()` should only be used for MUAC checks. This particularly -#' useful as allows you to determine if downstream your analysis you should -#' consider adjusting your MUAC prevalence, should there be more younger children -#' than older children in your survey, screening or sentinel site data. If you -#' wish to get the age ratio for children 6-29/30-59 like in SMART Methodology, -#' then you should use [nipnTK::ageRatioTest()] NOT `age_ratio_test()`. -#' -#' @param age A vector storing values about child's age in months. -#' -#' @param .expectedP The expected proportion of children aged 24-59 months over -#' children aged 6-29 months, considered to be of 0.66 according to the +#' Calculate the observed age ratio of children aged 24 to 59 months old over +#' those aged 6 to 23 months old and test if there is a statistical difference +#' between the observed and the expected. +#' +#' @param age A double vector of age in months. +#' +#' @param .expectedP The expected proportion of children aged 24 to 59 months +#' old over those aged 6 to 23 months old. This is estimated to be 0.66 as in the #' [SMART MUAC tool](https://smartmethodology.org/survey-planning-tools/updated-muac-tool/). #' -#' @returns A list three statistics: `p` for p-value, `observedR` for observed ratio -#' from your data, `observedP` for observed proportion of children 24-59 months -#' over the universe of your sample data. +#' @returns A vector of class `list` of three statistics: `p` for p-value of the +#' statistical difference between the observed and the expected proportion of +#' children aged 24 to 59 months old over those aged 6 to 23 months old; +#' `observedR` and `observedP` for the observed ratio and proportion respectively. #' -#' @examples +#' @details +#' This function should be used specifically for assessing MUAC data. For +#' age ratio tests of children aged 6 to 29 months old over 30 to 59 months old, as +#' performed in the SMART plausibility check, use [nipnTK::ageRatioTest()] instead. #' -#' ## Have a sample data ---- -#' age <- seq(6,59) |> sample(300, replace = TRUE) +#' @examples #' -#' ## Apply the function ---- -#' age_ratio_test(age, .expectedP = 0.66) +#' ## An example of application using `anthro.02` dataset ---- +#' age_ratio_test( +#' age = anthro.02$age, +#' .expectedP = 0.66 +#' ) #' #' @export #' diff --git a/R/case_definitions.R b/R/case_definitions.R index 0b719b1..2696f6f 100644 --- a/R/case_definitions.R +++ b/R/case_definitions.R @@ -1,24 +1,31 @@ #' -#' Case-Definition: is an observation acutely malnourished? +#' Define wasting based on WFHZ, MFAZ, MUAC and Combined criteria #' -#' [define_wasting_cases_muac()], [define_wasting_cases_whz()] and -#' [define_wasting_cases_combined()] help you get through with your wasting -#' case-definition for each observation. It should be used inside dplyr::mutate() -#' or base::transform(). It was designed to be used inside [define_wasting()]. +#' @description +#' Define if a given observation in the dataset is wasted or not, on the basis of +#' WFHZ, MFAZ, MUAC and the combined criteria. +#' +#' @param df A dataset object of class `data.frame` to use. +#' +#' @param muac A vector of class `integer` of MUAC values in millimeters. +#' +#' @param zscore A vector of class `double` of WFHZ values (with 3 decimal places). +#' +#' @param edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. +#' +#' @param cases A choice of the form of wasting to be defined. #' -#' @param muac An integer vector containing MUAC measurements in mm. -#' @param zscore A double vector containing weight-for-height zscores with 3 -#' decimal places. -#' @param edema A character vector of "y" = Yes, "n" = No bilateral edema. -#' Default is NULL. -#' @param cases A choice of wasting case definition you wish to apply. For combined -#' acute malnutrition with [define_wasting_cases_combined()] cases options are: -#' c("cgam", "csam", "cmam"). +#' @param base A choice of the criterion on which the case-definition should be based. #' -#' @returns A numeric vector of the same size as the input vector, with values ranging -#' between 1=Yes and 0=No. +#' @returns A vector of class `numeric` of dummy values: 1 for case and 0 +#' for not case. +#' +#' @details +#' Use `define_wasting()` to add the case-definitions to data frame. +#' +#' @rdname case_definition #' -#' @rdname case_definitions #' define_wasting_cases_muac <- function(muac, edema = NULL, cases = c("gam", "sam", "mam")) { @@ -46,7 +53,7 @@ define_wasting_cases_muac <- function(muac, edema = NULL, #' #' -#' @rdname case_definitions +#' @rdname case_definition #' #' define_wasting_cases_whz <- function(zscore, edema = NULL, @@ -75,7 +82,7 @@ define_wasting_cases_whz <- function(zscore, edema = NULL, #' #' -#' @rdname case_definitions +#' @rdname case_definition #' #' define_wasting_cases_combined <- function(zscore, muac, edema = NULL, @@ -104,45 +111,28 @@ define_wasting_cases_combined <- function(zscore, muac, edema = NULL, } -# Function to add new vectors with case definitions ---------------------------- -#' -#' Add acute malnutrition case-definitions to the data frame -#' -#' Use `define_wasting()` to add the case-definitions in your input data frame. -#' -#' @param df The data frame object containing the vectors with zscores, muac and -#' edema. -#' @param zscore The vector storing zscores values with 3 decimal places. -#' @param muac An integer vector containing MUAC measurements in mm. -#' @param edema A character vector of "y" = Yes, "n" = No bilateral edema. -#' Default is NULL. -#' @param base A choice of options to which your case definition should be based on. -#' -#' @returns A data frame with three vectors added to the input data frame: "gam", -#' "sam" and "mam". If base = "combined" the vector names change to "cgam", -#' "csam" and "cmam" for combined global, severe and moderate acute malnutrition -#' respectively. #' #' @examples -#' # MUAC-based case-definition ---- +#' +#' ## Weight-for-height based case-definition ---- #' x <- anthro.02 |> #' define_wasting( -#' muac = muac, +#' zscore = wfhz, #' edema = edema, -#' base = "muac" +#' base = "wfhz" #' ) #' head(x) #' -#' # Weight-for-height based case-definition ---- +#' ## MUAC-based case-definition ---- #' x <- anthro.02 |> #' define_wasting( -#' zscore = wfhz, +#' muac = muac, #' edema = edema, -#' base = "wfhz" +#' base = "muac" #' ) #' head(x) #' -#' # Combined case-definition ---- +#' ## Combined case-definition ---- #' x <- anthro.02 |> #' define_wasting( #' zscore = wfhz, @@ -152,6 +142,8 @@ define_wasting_cases_combined <- function(zscore, muac, edema = NULL, #' ) #' head(x) #' +#' @rdname case_definition +#' #' @export #' define_wasting <- function(df, zscore = NULL, muac = NULL, edema = NULL, @@ -231,23 +223,16 @@ define_wasting <- function(df, zscore = NULL, muac = NULL, edema = NULL, } #' -#' A helper function to classify nutritional status into SAM, MAM or not wasted -#' -#' @description -#' `classify_wasting_for_cdc_approach()` is used a helper inside -#' [apply_cdc_age_weighting()] to classify nutritional status into "sam", "mam" -#' or "not wasted" and then the vector returned is used downstream to calculate -#' the proportions of children with severe and moderate acute malnutrition. +#' Classify wasting into severe or moderate wasting to be used in the +#' SMART MUAC tool weighting approach #' -#' @param muac An integer vector containing MUAC values. They should be in -#' millimeters. +#' @param muac A vector of class `integer` of MUAC values in millimeters. #' -#' @param .edema Optional. Its a vector containing data on bilateral pitting -#' edema coded as "y" for yes and "n" for no. +#' @param .edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. #' -#' @returns A numeric vector of the same size as the input vector with values ranging -#' between "sam", "mam" and "not wasted" for severe, moderate acute malnutrition and not -#' acutely malnourished, respectively. +#' @returns A vector of class `character` of the same length as `muac` and `.edema` +#' indicating if a child is severe or moderately wasted or not wasted. #' #' classify_wasting_for_cdc_approach <- function(muac, .edema = NULL) { diff --git a/R/data.R b/R/data.R index 56c0647..00b8661 100644 --- a/R/data.R +++ b/R/data.R @@ -1,12 +1,12 @@ #' -#' Raw data of a district level representative survey +#' A sample data of district level SMART surveys with location anonymised #' #' @description -#' #' `anthro.01` is about a two-stage and PPS cluster sampling survey data -#' conducted in two district following the SMART survey methodology in two -#' livelihood zones. The location information was anonymized for confidentiality. +#' `anthro.01` is a two-stage cluster-based survey with probability of selection +#' of clusters proportional to the size of the population. The survey employed +#' the SMART methodology. #' -#' @format A tibble with 1191 rows and 11 columns. +#' @format A tibble of 1,191 rows and 11 columns. #' #' | **Variable** | **Description** | #' | :--- | :--- | @@ -22,31 +22,25 @@ #' | *edema* | Edema, "n" = no, "y" = yes | #' | *muac* | Mid-upper arm circumference (mm) | #' +#' @source Anonymous +#' #' @examples #' anthro.01 #' +#' "anthro.01" #' -#' -#' Province representative survey conducted in Mozambique +#' A sample of an already wrangled survey data #' #' @description -#' `anthro.02` is about a household budget survey conducted in Mozambique in -#' 2019/2020, known as IOF (*Inquérito ao Orçamento Familiar* in portuguese). -#' The data is publicly available [here](https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-data_access). -#' The survey had a module on nutrition with anthropometric measurements taken -#' from children age 0-59 months for weight-for-height and 6-59 months for MUAC. -#' *IOF* is a cluster and PPS-based, survey, with sampling done in two stages, -#' designed to give representative estimates at province level. Its data -#' collection spans for a period of 12 months, with anthropometric measurements -#' taken during that period too. Read the [Bureau of Statistic's website on -#' IOF](https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-sampling) for -#' more details. -#' -#' `anthro.02` has been processed for this package's purpose. -#' -#' @format A tibble with 2267 rows and 14 columns. +#' A household budget survey data conducted in Mozambique in +#' 2019/2020, known as *IOF* (*Inquérito ao Orçamento Familiar* in Portuguese). *IOF* +#' is a two-stage cluster-based survey, representative at province level (admin 2), +#' with probability of the selection of the clusters proportional to the size of +#' the population. Its data collection spans for a period of 12 months. +#' +#' @format A tibble of 2,267 rows and 14 columns. #' #' |**Variable** | **Description** | #' | :--- | :---| @@ -65,29 +59,36 @@ #' | *mfaz* | MUAC-for-age z-scores with 3 decimal places | #' | *flag_mfaz* | Flagged observations. 1=flagged, 0=not flagged | #' +#' @source Mozambique National Institute of Statistics. The data is publicly +#' available at . +#' Data was wrangled using this package's wranglers. Details about survey design +#' can be gotten from: +#' #' @examples #' anthro.02 #' -#' "anthro.02" #' -#' District level SMART surveys conducted in four district in Mozambique +#' A sample data of district level SMART surveys conducted in Mozambique #' #' @description -#' This example data contains survey data of four districts. Two of them have their WFHZ -#' standard deviation classified as problematic, and the are other two within range of -#' acceptable standard deviation. The data is used to test the performance of WFHZ based -#' prevalence when used on a data set with multiple survey areas that may or not have -#' different classification for standard deviation that may warrant different analysis -#' approach, as the function is designed for. +#' `anthro.03` contains survey data of four districts. Each district dataset +#' presents distinct data quality scenarios that requires tailored prevalence +#' analysis approach: two districts show a problematic WFHZ standard deviation +#' whilst the remaining are all within range. +#' +#' This sample data is useful to demonstrate the use of the prevalence functions on +#' a multi-area survey data where there can be variations in the rating of +#' acceptability of the standard deviation, hence require different analyses approaches +#' for each area to ensure accurate estimation. #' #' @format A tibble of 943 x 9. #' #' |**Variable** | **Description** | #' | :--- | :---| -#' | *district* | The administrative unit (admin 1) where data was collected. | +#' | *district* | The location where data was collected | #' | *cluster* | Primary sampling unit | #' | *team* | Survey teams | #' | *sex* | Sex, "m" = boys, "f" = girls | @@ -97,6 +98,8 @@ #' | *edema* | Edema, "n" = no, "y" = yes | #' | *muac* | Mid-upper arm circumference (mm) | #' +#' @source Anonymous +#' #' @examples #' anthro.03 #' @@ -105,24 +108,29 @@ #' -#' MUAC data from a community-based sentinel site from an anonymized location +#' +#' A sample data of a community-based sentinel site from an anonymized location #' #' @description -#' Data in `anthro.04` was generated from a community-based sentinel site of three provinces. -#' Each province data set holds different scenarios that informs the appropriate analysis -#' approach to follow. One province (province 3) has its MFAZ standard deviation and age -#' ratio tests classified as problematic. Another province (province 2) has its age ratio -#' classified as problematic, but with a within range standard deviation. Lastly, province 1 -#' has both tests falling within range of nor problematic. The data is used to test the -#' performance of `[compute_muac_prevalence()]` based when used on a multiple survey areas -#' data that may or not have on the aforementioned test that may then warrant a different -#' analysis approach, as the function is designed for. +#' Data was generated through a community-based sentinel site conducted +#' across three provinces. Each province's dataset presents distinct +#' data quality scenarios, requiring tailored prevalence analysis: +#' + "Province 1" has MFAZ's standard deviation and age ratio test rating of +#' acceptability falling within range; +#' + "Province 2" has age ratio rated as problematic but with an acceptable +#' standard deviation of MFAZ; +#' + "Province 3" has both tests rated as problematic. +#' +#' This sample data is useful to demonstrate the use of prevalence functions on +#' a multi-area survey data where variations in the rating of acceptability of the +#' standard deviation exist, hence require different analyses approaches for each +#' area to ensure accurate estimation. #' #' @format A tibble of 3,002 x 8. #' #' |**Variable** | **Description** | #' | :--- | :---| -#' | *province* | +#' | *province* | location where data was collected | #' | *cluster* | Primary sampling unit | #' | *sex* | Sex, "m" = boys, "f" = girls | #' | *age* | calculated age in months with two decimal places | @@ -131,20 +139,17 @@ #' | *mfaz* | MUAC-for-age z-scores with 3 decimal places | #' | *flag_mfaz* | Flagged observations. 1=flagged, 0=not flagged | #' +#' @source Anonymous +#' #' @examples #' anthro.04 #' +#' "anthro.04" #' -#' A SMART survey data with standard deviation on weight-for-height zscores -#' classified as problematic -#' -#' @description -#' A SMART survey data with weight-for-height data where standard deviation is -#' problematic. The data is used to test that `compute_wfhz_prevalence()` works as -#' designed for when standard deviation is problematic. +#' A sample SMART survey data with WFHZ standard deviation rated as problematic #' #' @format A tibble with 303 rows and 6 columns. #' @@ -157,6 +162,8 @@ #' | *wfhz* | MUAC-for-age z-scores with 3 decimal places | #' | *flag_wfhz* | Flagged observations. 1=flagged, 0=not flagged | #' +#' @source Anonymous +#' #' @examples #' wfhz.01 #' @@ -165,7 +172,7 @@ #' -#' A MUAC screening data from an anonymized setting +#' A sample MUAC screening data from an anonymized setting #' #' @format A tibble with 661 rows and 4 columns. #' @@ -176,18 +183,15 @@ #' | *edema* | Edema, "n" = no, "y" = yes | #' | *muac* | Mid-upper arm circumference (mm) | #' +#' @source Anonymous +#' #' @examples #' mfaz.01 #' "mfaz.01" -#' A SMART survey data with MUAC #' -#' @description -#' A SMART survey data collected in an anonymized location. This data has -#' mfaz standard deviation and age ratio within range for a normal prevalence -#' analysis. It is, thus, used to check if `compute_muac_prevalence()` performs -#' as designed. +#' A sample SMART survey data with MUAC #' #' @format A tibble with 303 rows and 7 columns. #' @@ -200,6 +204,8 @@ #' | *mfaz* | MUAC-for-age z-scores with 3 decimal places | #' | *flag_mfaz* | Flagged observations. 1=flagged, 0=not flagged | #' +#' @source Anonymous +#' #' @examples #' mfaz.02 #' diff --git a/R/data_processors.R b/R/data_processors.R deleted file mode 100644 index a3a3b97..0000000 --- a/R/data_processors.R +++ /dev/null @@ -1,337 +0,0 @@ -#' -#' -#' Identify and flag outliers in WHZ, MFAZ, and crude MUAC datasets -#' -#' Outliers are extreme values that far away from the mean, that are unlikely to -#' be correct measurements. `flag_outliers()` helps you to identify any extreme -#' values in your dataset in two different ways. Outliers in WHZ are identified -#' based on the [SMART Methodology.](https://smartmethodology.org/). -#' MFAZ follows the same approach, while crude MUAC's approach is based on a -#' fixed range (<100mm and >200mm), based a multicountry research findings by -#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478) -#' -#' @param x A numeric value from the variable storing either WHZ or MFAZ or crude -#' MUAC's observations in the dataset, as applicable. -#' -#' @param type The method you wish `flag_outliers()` to identify flags on. -#' A choice between "zscore" and "crude". If you wish to get flags for WHZ or -#' MFAZ, set `method = "zscore"`. Alternatively, if your wish to get flags for -#' crude MUAC, set `method = "crude"`. The default is "zscore". If by mistake -#' a different option is supplied, an error will be thrown with a message -#' guiding you what to do. -#' -#' @return A vector of two values: 1 and 0, where 1 signifies flagged value and -#' 0 not flagged. -#' -#' @examples -#' -#' # Sample data of crude MUAC ---- -#' x <- c(90, 110, 140, 200, 119, 235) -#' -#' # Apply `flag_outliers()` with type set to "crude" ---- -#' flag_outliers(x, type = "crude") -#' -#' # Sample data of MFAZ ---- -#' x <- c(-2.265, -5.275, -0.72, -2.261, -2.264, -4.451, -2.261, -1.828) -#' -#' # Apply `flag_outliers()` with type set to "zscore" ---- -#' flag_outliers(x, type = "zscore") -#' -#' @export -#' -flag_outliers <- function(x, type = c("zscore", "crude")) { - type <- match.arg(type) - - if (type == "zscore") { - mean_zscore <- mean(x, na.rm = TRUE) - flags <- ifelse((x < (mean_zscore - 3) | x > (mean_zscore + 3)), 1, 0) - flags <- ifelse(is.na(x), NA, flags) - flags - - } else { - flags <- ifelse(x < 100 | x > 200, 1, 0) - flags <- ifelse(is.na(x), NA, flags) - flags - } -} - - -#' -#' -#' Remove detected outliers -#' -#' @description -#' `remove_flags()` removes flags detected by [flag_outliers()]. It helps you -#' compute your statistics when flags needs to be removed, such as in standard -#' deviation. -#' -#' @param x A numeric vector containing zscore or crude MUAC values. -#' -#' @param unit A choice of the units to which you wish remove flags on. variable into. -#' -#' @returns A vector of same size, with flagged data replaced by `NA`s. -#' -remove_flags <- function(x, unit = c("zscore", "crude")) { - - ## Match arguments ---- - unit <- match.arg(unit) - - ## Control flow based on unit ---- - switch( - unit, - ### Remove flags when unit = "zscore" ---- - "zscore" = { - mean_x <- mean(x, na.rm = TRUE) - zs <- ifelse((x < (mean_x - 3) | x > (mean_x + 3)) | is.na(x), NA_real_, x) - }, - ### Remove flags when unit = "crude" ---- - "crude" = { - cr <- ifelse(x < 100 | x > 200 | is.na(x), NA_integer_, x) - } - ) -} - - -#' -#' -#' -#' Recode crude MUAC variable into either centimeters or millimeters -#' -#' @description -#' Sometimes, a vector containing MUAC values may be in centimeters or in -#' millimeters. You may want to get in the right format to use with -#' [zscorer::addWGSR] or [nipnTK::digitPreference()]. `recode_muac()` helps you -#' getting the vector in the right format for the job! It works inside works -#' inside [dplyr::mutate()] or [base::transform()]. -#' -#' @param muac A numeric vector storing values for MUAC that can be in centimeters -#' or in millimeters. -#' -#' @param unit A choice of the units to which you wish to convert your MUAC -#' variable into. -#' -#' @returns A transformed vector into the unit you wish to have. -#' -#' @examples -#' # Have an input data with muac in mm ---- -#' muac <- seq(90, 250, by = 4) -#' -#' # Apply recode ---- -#' recode_muac(muac, unit = "cm") -#' -#' # Have an input data with muac in mm ---- -#' muac <- seq(9.0, 25.0, by = 0.2) -#' -#' # Apply recode ---- -#' recode_muac(muac, unit = "mm") -#' -#' @export -#' -recode_muac <- function(muac, unit = c("cm", "mm")) { - - ## Check if unit's arguments match ---- - stopifnot(unit %in% c("cm", "mm")) - - ## Recode muac conditionally ---- - switch( - unit, - ### Recode to millimeters ---- - "mm" = {muac <- muac * 10}, - ### Recode to centimeters ---- - "cm" = {muac <- muac / 10}, - stop("Invalid 'units' argument. Please choose either 'cm' or 'mm'.") - ) -} - - -#' -#' -#' -#' Process MUAC data a get it ready for analyses -#' -#' @description -#' `process_muac_data()` gets your input data ready for downstream MUAC related -#' analysis. -#' -#' @param df The input data frame with variables sex, age and MUAC. -#' -#' @param sex A vector storing values about whether the child is a boy or a girl. -#' The variable name must be named sex, otherwise it will not work. -#' -#' @param muac A vector storing crude MUAC values. -#' -#' @param age A vector storing values about child's age in months. The variable -#' name must be named age, otherwise it will not work. For instance, if given as -#' following: age = months it will not work. -#' -#' @param .recode_sex Logical. It asks whether you should recode your sex variable -#' to the required shape to use in `process_muac_data()`. The default values for -#' sex are 1 for boys and 2 for girls. Setting `.recode_sex = TRUE` works on "m" -#' and "f" values. If your vector is in any different shape, you should put it in -#' "m" and "f" or right away to 1 or 2. If you are using data exported from ENA for -#' SMART software, then you should leave `.recode_sex` at its default: `TRUE`. -#' -#' @param .recode_muac Logical. Choose between `TRUE` or `FALSE` if you wish or -#' not to recode the MUAC variable into the required format to work on. -#' -#' @param unit A choice of the units to which you wish to convert your MUAC -#' variable into. -#' -#' @returns A data frame of the same length as the input data, but with a -#' different width as explained:When `age` is available in the input data and -#' supplied, `process_muac_data` will return as output a data frame with two -#' new variables `mfaz` and `flags`. `mfaz` stores MUAC-for-age z-score (MFAZ) -#' values and `flags` tells you whether a given z-score is an outlier or not. -#' This job is done by [flag_outliers()]. If age is not available in the input -#' data, therefore not possible to supply in this function, `process_muac_data` -#' will only return `flags`. This will refer to flags based on crude MUAC. -#' -#' @examples -#' -#' ## Have a sample data ---- -#' -#' df <- data.frame( -#' survey_date = as.Date(c( -#' "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), -#' birthdate = as.Date(c( -#' "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), -#' age = c(NA, 36, NA, NA, NA), -#' sex = c("m", "f", "m", "m", "f"), -#' muac = c(110, 130, 300, 123, 125) -#' ) -#' -#' ## Apply function ---- -#' df |> -#' process_age( -#' svdate = "survey_date", -#' birdate = "birthdate", -#' age = age -#' ) |> -#' process_muac_data( -#' sex = sex, -#' age = "age", -#' muac = muac, -#' .recode_sex = TRUE, -#' .recode_muac = TRUE, -#' unit = "cm" -#' ) -#' -#' @export -#' -process_muac_data <- function(df, - sex, muac, age = NULL, - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = c("cm", "mm", "none")) { - unit <- match.arg(unit) - - recode_sex <- quote( - if (.recode_sex) { - sex <- ifelse({{ sex }} == "m", 1, 2) - } else { - {{ sex }} - } - ) - - rec_muac <- quote( - if (.recode_muac && unit == "cm") { - muac <- recode_muac({{ muac }}, unit = "cm") - } else if (.recode_muac && unit == "mm") { - muac <- recode_muac({{ muac }}, unit = "mm") - } else { - {{ muac }} - } - ) - - if (!is.null({{ age }})) { - df <- df |> - mutate( - muac = !!rec_muac, - sex = !!recode_sex, - ) |> - addWGSR( - sex = "sex", - firstPart = "muac", - secondPart = "age_days", - index = "mfa", - digits = 3 - )|> - mutate( - flag_mfaz = do.call(flag_outliers, list(.data$mfaz, type = "zscore")) - ) - } else { - df <- df |> - mutate( - sex = !!recode_sex, - flag_muac = do.call(flag_outliers, list({{ muac }}, type = "crude")) - ) - } - tibble::as_tibble(df) -} - - -# Function to process Weight-for-height data ----------------------------------- - -#' -#' Process Weight-for-Height data get it ready for analyses -#' -#' `process_wfhz_data()` gets your input data ready for downstream WHZ related -#' analysis. -#' -#' @param df The input data frame with variables sex, age and MUAC. -#' -#' @param sex A vector storing values about whether the child is a boy or a girl. -#' -#' @param weight,height Vectors storing weight values in kilograms and height -#' values in centimeters, respectively. -#' -#' @param .recode_sex Logical. It asks whether you should recode your sex variable -#' to the required shape to use in `process_wfhz_data()`. The default values for -#' sex are 1 = boys and 2 = girls. Setting `.recode_sex = TRUE` works on "m" -#' and "f" values. If your vector is in any different shape, you should put it in -#' "m" and "f" or right away to 1 or 2. If you are using data exported from ENA for -#' SMART software, then you should leave `.recode_sex` at its default: `TRUE`. -#' -#' @returns A data frame of the same length as the input data, but with a different -#' width: two new variables `wfhz` and `flags`. `wfhz` stores weight-for-height -#' z-score values with three decimal places. `flags` tells you whether a given -#' z-score is an outlier or not. This job is done by [flag_outliers()]. -#' -#' @examples -#' ## Have a sample data ---- -#' anthro.01 |> -#' process_wfhz_data( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE -#' ) -#' -#' @export -#' -process_wfhz_data <- function(df, sex, weight, height, .recode_sex = TRUE) { - - recode_sex <- quote( - if (.recode_sex) { - sex <- ifelse({{ sex }} == "m", 1, 2) - } else { - {{ sex }} - } - ) - - df <- df |> - mutate( - sex = !!recode_sex - ) |> - addWGSR( - sex = {{ "sex" }}, - firstPart = {{ "weight" }}, - secondPart = {{ "height" }}, - index = "wfh", - digits = 3 - ) |> - mutate( - flag_wfhz = do.call(flag_outliers, list(.data$wfhz, type = "zscore")) - ) - tibble::as_tibble(df) -} diff --git a/R/pretty_tables.R b/R/pretty_tables.R index 623c5ed..2b0c7f7 100644 --- a/R/pretty_tables.R +++ b/R/pretty_tables.R @@ -1,45 +1,38 @@ -#' Get a prettified formatted and presentable output table +#' Get a formatted and presentable output table for the plausibility checkers #' -#' You may want to share the plausibility report in a table. You usually care for -#' a well formatted and pretty table, with values rounded, scientific notations -#' converted into conventional notations, etc. `generate_pretty_table_mfaz()`, -#' `generate_pretty_table_wfhz()` and `generate_pretty_table_muac()` does that -#' for you so you already. +#' @description +#' Useful to getting the output returned from the plausibility checkers +#' into a presentable format. It converts scientific notations to standard +#' notations, round values and rename columns to meaningful names. #' -#' @param df An output data frame returned by [check_plausibility_mfaz()], -#' [check_plausibility_wfhz()] or [check_plausibility_muac()]. +#' @param df A summary table object of class `data.frame` returned by the +#' plausibility checkers. #' -#' @returns An output data frame of the same size as the input, but with values -#' formatted, columns renamed, and ready to share. +#' @returns A `data.frame` as `df`. Columns are renamed, values formatted and +#' ready to be shared. #' #' @examples #' -#' ## Plausibility check on MFAZ ---- +#' ## Check the plausibility of WFHZ data ---- #' #' anthro.01 |> -#' process_age( -#' svdate = "dos", -#' birdate = "dob", -#' age = age -#' ) |> -#' process_muac_data( +#' process_wfhz_data( #' sex = sex, -#' age = "age", -#' muac = muac, -#' .recode_sex = TRUE, -#' .recode_muac = TRUE, -#' unit = "cm" +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE #' ) |> -#' check_plausibility_mfaz( -#' flags = flag_mfaz, +#' check_plausibility_wfhz( #' sex = sex, -#' muac = muac, #' age = age, +#' weight = weight, +#' height = height, +#' flags = flag_wfhz, #' area = area #' ) |> -#' generate_pretty_table_mfaz() +#' generate_pretty_table_wfhz() #' -#' ## Plausibility check on absolute MUAC ---- +#' ## Check the plausibility of MUAC data ---- #' #' anthro.01 |> #' process_muac_data( @@ -57,25 +50,30 @@ #' ) |> #' generate_pretty_table_muac() #' -#' ## Plausibility check on WFHZ ---- +#' ## Check the plausibility of MFAZ data ---- #' #' anthro.01 |> -#' process_wfhz_data( +#' process_age( +#' svdate = "dos", +#' birdate = "dob", +#' age = age +#' ) |> +#' process_muac_data( #' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE +#' age = "age", +#' muac = muac, +#' .recode_sex = TRUE, +#' .recode_muac = TRUE, +#' unit = "cm" #' ) |> -#' check_plausibility_wfhz( +#' check_plausibility_mfaz( +#' flags = flag_mfaz, #' sex = sex, +#' muac = muac, #' age = age, -#' weight = weight, -#' height = height, -#' flags = flag_wfhz, #' area = area #' ) |> -#' generate_pretty_table_wfhz() -#' +#' generate_pretty_table_mfaz() #' #' @rdname pretty_table #' diff --git a/R/prevalence_combined.R b/R/prevalence_combined.R index 149a086..c73f7b2 100644 --- a/R/prevalence_combined.R +++ b/R/prevalence_combined.R @@ -1,5 +1,5 @@ #' -#' Compute combined prevalence of acute malnutrition +#' Compute combined prevalence of wasting #' #' @rdname combined_prevalence #' @@ -63,50 +63,45 @@ compute_pps_based_combined_prevalence <- function(df, #' #' -#' Compute combined prevalence of acute malnutrition +#' Compute the prevalence of combined wasting #' #' @description -#' `compute_combined_prevalence()` is handy function to compute the combined prevalence of -#' acute malnutrition using the WFHZ and the absolute values of MUAC and edema for case -#' definition. Under the hood, before prevalence computations begin, it first evaluates the -#' status of WFHZ, MFAZ's standard deviation and age ratio test, as documented in -#' [compute_wfhz_prevalence()] and [compute_muac_prevalence()]. Then, it decides on the -#' appropriate analysis approach to employ depending on the outcome of the aforementioned -#' checks: (i) if either WFHZ, MFAZ standard deviation as well as age ratio test are not -#' simultaneously problematic, a complex sample-based prevalence analysis (for a two-stage -#' PPS cluster sampling) is computed; (ii) all other possibilities will involve either one -#' of the z-scores or the age ratio test being problematic, thus NA (for Not Applicable) -#' get thrown to output table. -#' -#' A concept of "combined flags" is introduced here. This consists on creating a new vector -#' (cflags) of the same length as the input vectors (wfhz_flags and mfaz_flags) and assesses -#' if any element of either input vector is a flag (1), then that element is labelled as -#' flag (1) in the "cflags" vector, otherwise is not flag (0). This ensures that all -#' flagged observations in the WFHZ data and in MFAZ data are excluded for the combined -#' prevalence analysis. -#' -#' @param df A data frame object returned by [process_muac_data()] and [process_wfhz_data()]. -#' The process_***_data function will have to used both to prepare the input data to be used -#' in the `compute_combined_prevalence()`. The order of which comes first does not matter, -#' however, since the muac data processor transforms MUAC values into centimeters, those -#' need to be put back into millimeter. This can be achieved my using [recode_muac()] inside -#' [dplyr::mutate()] or [base::transform()] (see example number 3 below). -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) -#' the function will assume self weights, like in ENA for SMART, if otherwise given, the -#' weighted analysis will be computed. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. If you are working on a single survey data, set -#' .summary_by = NULL (default). If this argument is not used, the function will error. -#' -#' @returns A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -#' 1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -#' areas in the data set) x 17. -#' +#' The prevalence is calculated in accordance with the complex sample design +#' properties inherent to surveys. This includes weighting of survey data where +#' applicable. When either the acceptability of the standard deviation of WFHZ or +#' of the age ratio test is problematic, prevalence is not calculated. +#' +#' @param df An already wrangled dataset of class `data.frame` to use. Both +#' wranglers (of WFHZ and of MUAC) need to be used sequentially, regardless of the +#' order. Note that MUAC values should be converted to millimeters after using +#' the MUAC wrangler. +#' +#' @param .wt A vector of class `double` of the final survey weights. Default is +#' `NULL` assuming a self-weighted survey, as in the ENA for SMART software; +#' otherwise a weighted analysis is computed. +#' +#' @param .edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. +#' +#' @param .summary_by A vector of class `character` of the geographical areas +#' where the data was collected and for which the analysis should be performed. +#' +#' @returns A summarised table of class `data.frame` for the descriptive +#' statistics about combined wasting. +#' +#' @details +#' A concept of "combined flags" is introduced in this function. It consists of +#' defining as flag any observation that is flagged in either `flag_wfhz` or +#' `flag_mfaz` vectors. A new column `cflags` for combined flags is created and +#' added to `df`. This ensures that all flagged observations from both WFHZ +#' and MFAZ data are excluded from the combined prevalence analysis. +#' +#' *The table below shows an overview of how `cflags` are defined* +#' | **flag_wfhz** | **flag_mfaz** | **cflags** | +#' | :---: | :---: | :---: | +#' | 1 | 0 | 1 | +#' | 0 | 1 | 1 | +#' | 0 | 0 | 0 | #' #' @examples #' diff --git a/R/prevalence_mfaz.R b/R/prevalence_mfaz.R index 454f068..185bd32 100644 --- a/R/prevalence_mfaz.R +++ b/R/prevalence_mfaz.R @@ -1,29 +1,4 @@ #' -#' Compute a MUAC-for-age z-score based prevalence estimates of data collected from a two-stage -#' cluster survey sample design, with the first stage sampling done with Probability -#' Proportional to the size of population -#' -#' @description -#' Create a survey design object using the [srvyr::as_survey_design()] and then calculate -#' the survey means as well the sum of positive cases. -#' -#' @param df A data frame object returned by [process_muac_data()]. -#' this will contain the wrangled vectors that are read inside the function. -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. This is to group the survey design object into different -#' geographical areas in the data and allow for summaries to be computed for each of them. -#' -#' @returns A tibble of size depending on the number of groups of the vector given to -#' `.summary_by` or if set to NULL, and of length 17. #' #' compute_pps_based_mfaz_prevalence <- function(df, @@ -82,67 +57,9 @@ compute_pps_based_mfaz_prevalence <- function(df, } #' -#' Compute acute malnutrition prevalence based on MUAC-for-age z-scores (MFAZ) -#' -#' @description -#' `compute_mfaz_prevalence()` is a handy function designed to dynamically compute acute -#' malnutrition's prevalence using WFHZ. Under the hood, it first checks the status of -#' WFHZ's standard deviation (SD) after removing flags, and then it decides on the -#' appropriate prevalence analysis approach to follow: if SD is anything between excellent -#' and acceptable, a complex sample-based prevalence analysis (for a two-stage PPS -#' cluster sampling) is computed, otherwise, a re-calculated prevalence using PROBIT method -#' with a sample mean and a SD = 1 is computed. On the former analysis approach, the function -#' was also designed to work around survey weights. -#' The function also super handy to work on large data sets with multiple survey areas. For -#' this, the aforementioned conditionals are checked for each survey area in a summarized -#' data frame and prevalence get computed according to each row's scenario. -#' -#' @param df A data frame object returned by [process_muac_data()]. -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. If you are working on a single survey data, set -#' .summary_by = NULL (default). -#' -#' @returns A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -#' 1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -#' areas in the data set) x 17. -#' -#' @examples -#' -#' ## When .summary_by = NULL ---- -#' compute_mfaz_prevalence( -#' df = anthro.04, -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = NULL -#' ) -#' -#' ## When .summary_by is not set to NULL ---- -#' compute_mfaz_prevalence( -#' df = anthro.04, -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = province -#' ) #' -#' ## When a weighted analysis is needed ---- -#' ### This example uses a different data set with survey weights ---- -#' compute_mfaz_prevalence( -#' df = anthro.02, -#' .wt = "wtfactor", -#' .edema = edema, -#' .summary_by = province -#' ) +#' @rdname prevalence #' -#' @export #' compute_mfaz_prevalence <- function(df, .wt = NULL, diff --git a/R/prevalence_muac.R b/R/prevalence_muac.R index 51a11f4..38de555 100644 --- a/R/prevalence_muac.R +++ b/R/prevalence_muac.R @@ -1,15 +1,26 @@ #' -#' A helper function to tell how to go about MUAC prevalence analysis based on -#' on the output of age ratio and standard deviation test results +#' A helper function to determine the MUAC prevalence analysis approach to follow #' -#' @param age_ratio_class,sd_class Character vectors storing age ratio's p-values -#' and standard deviation's classification, respectively. +#' @description +#' It determines the analysis approach to follow for a given analysis area on +#' the basis of the rate of acceptability of the age ratio test and the standard +#' deviation analysis result. +#' +#' @param age_ratio_class A vector of class `character` of the acceptability +#' classification of the age ratio test result. +#' +#' @param sd_class A vector of class `character` of the acceptability +#' classification of the standard deviation analysis result. +#' +#' @returns A vector of class `character` of the same length as the input vectors, +#' containing values indicating the analysis approach for each analysis area: "weighted", +#' "unweighted" and "missing". +#' +#' @details +#' When "weighted", the CDC weighting approach is applied to correct for +#' age bias; when "unweighted" a normal complex sample analysis is applied; when +#' "missing" `NA` gets thrown. #' -#' @returns A character vector of the same length containing the indication of -#' what to do for the MUAC prevalence analysis: "weighted", "unweighted" and -#' "missing". If "weighted", the CDC weighting approach is applied to correct for -#' age bias. If "unweighted" a normal complex sample analysis is applied, and for -#' the latter, NA are thrown. #' #' tell_muac_analysis_strategy <- function(age_ratio_class, sd_class) { @@ -24,34 +35,27 @@ tell_muac_analysis_strategy <- function(age_ratio_class, sd_class) { #' #' -#' Correct the observed MUAC prevalence when there is an unbalanced sample -#' between children under 2 and over two years old +#' Apply the CDC/SMART prevalence weighting approach on MUAC data #' #' @description -#' As documented in the SMART MUAC tool and in the literature, MUAC shows a known -#' bias towards younger children. In a balanced sample, it is expected to have -#' nearly two thirds of the sample to be of children over two years old. If too -#' few older children are included in the sample, the weighted tool should be used. +#' Calculate a weighted prevalence estimate of MUAC by adding the proportion of +#' children under 2 years to twice the proportion of children over 2 and then +#' dividing by 3. #' -#' `apply_cdc_age_weighting()` does that. It takes the proportion of children -#' under 2 and adds to the product of 2 times the proportion of children over two, -#' then divided by 3. The use of this function is informed by the output of -#' [age_ratio_test()]. There is difference between this function and that in the -#' SMART plausibility check. Consider reading the documentation before use. +#' @param muac A vector of class `integer` of MUAC values (in mm). #' -#' @param muac An integer vector containing MUAC measurements in mm. +#' @param age A vector of class `double` of child's age in months. #' -#' @param age A double vector containing age in months with at least 2 decimal -#' places. +#' @param .edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. #' -#' @param .edema Optional. If given, it should be a character vector of "y" = Yes, -#' "n" = No bilateral edema. +#' @param status A choice of the form of wasting to be defined. #' -#' @param status If you wish to get the prevalence/proportions of severe or -#' moderate acute malnutrition. Set `status = "sam" or status = "mam"` for the -#' former or latter, respectively. +#' @returns A vector of class `numeric` of length and size 1. +#' +#' @details +#' This function is informed by the output of [age_ratio_test()]. #' -#' @returns A numeric vector of length and size 1. #' apply_cdc_age_weighting <- function(muac, age, .edema = NULL, status = c("sam", "mam")) { @@ -85,32 +89,18 @@ apply_cdc_age_weighting <- function(muac, age, #' +#' Apply the CDC/SMART prevalence weighting approach on MUAC data #' -#' A wrapper function to compute of `apply_cdc_age_weighting()` that allows to work on -#' a data frame -#' -#' @description -#' `compute_weighted_prevalence()` is the main function use to compute age adjusted MUAC -#' prevalence where there are excess of children 6:23 over 24:59 months. It allows the -#' computations to be done on a data frame. The function is used inside the main and -#' exported function to compute MUAC based prevalence. Before computing the prevalence, -#' the function first removed the flagged data so the computations are performed on -#' non-flagged observations. +#' @param df An already wrangled dataset object of class `data.frame` to use. #' -#' @param df A data frame object returned by [process_muac_data()] this will contain the -#' wrangled vectors that are read inside the function. +#' @param .edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. #' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. This is to group the survey design object into different -#' geographical areas in the data and allow for summaries to be computed for each of them. +#' @param .summary_by A vector of class `character` of the geographical areas +#' where the data was collected and for which the analysis should be performed. #' -#' @returns A tibble with length and size varying according to use of `.summary_by`. -#' If set to NULL, a tibble of 1 x 3 is returned, otherwise the size of the tibble with be -#' corresponding to the number of groups/areas in the vector given to `.summary_by`, but -#' with the same length. +#' @returns A table of class `data.frame` of dimensions that vary based on +#' `.summary_by`, containing the results. #' #' compute_weighted_prevalence <- function(df, .edema=NULL, .summary_by = NULL) { @@ -149,31 +139,6 @@ compute_weighted_prevalence <- function(df, .edema=NULL, .summary_by = NULL) { #' #' -#' Compute MUAC based prevalence estimates of data collected from a two-stage cluster -#' survey sample design, with the first stage sampling done with Probability Proportional -#' to the size of population -#' -#' @description -#' Create a survey design object using the [srvyr::as_survey_design()] and then calculate -#' the survey means as well the sum of positive cases. -#' -#' @param df A data frame object returned by [process_muac_data()]. -#' this will contain the wrangled vectors that are read inside the function. -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. This is to group the survey design object into different -#' geographical areas in the data and allow for summaries to be computed for each of them. -#' -#' @returns A tibble of size depending on the number of groups of the vector given to -#' `.summary_by` or if set to NULL, and of length 17. -#' #' #' compute_pps_based_muac_prevalence <- function(df, @@ -226,44 +191,13 @@ compute_pps_based_muac_prevalence <- function(df, #' #' +#' @rdname prevalence #' -#' Compute acute malnutrition prevalence based on MUAC (the absolute values) +#' @examples #' -#' @description -#' `compute_muac_prevalence()` is a handy function designed to dynamically compute acute -#' malnutrition's prevalence using the absolute values of MUAC, however using the MFAZ for -#' quality checks before advancing to prevalence computations. Under the hood, the function -#' first checks the status of MFAZ's standard deviation (SD) after removing flags, and -#' the status of age ratio among children aged 6:23 vs 24:59 months. Then it decides on the -#' appropriate prevalence analysis approach to follow: (i) if SD & age ratio are both not -#' problematic, a complex sample-based prevalence analysis (for a two-stage PPS -#' cluster sampling) is computed; (ii) if MFAZ's SD is not problematic, but age ratio test -#' is, the CDC/SMART MUAC tool weighting approach is used to compute the prevalence; (iii) -#' lastly, if MFAZ's SD is problematic even if age ratio test is not, no prevalence -#' analysis is computed and NA (of Not Applicable) are thrown. -#' The function also super handy to work on large data sets with multiple survey areas. For -#' this, the aforementioned conditionals are checked for each survey areas in a summarized -#' data frame and prevalence get computed according to each row's scenario. -#' -#' @param df A data frame object returned by [process_muac_data()]. -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. If you are working on a single survey data, set -#' .summary_by = NULL (default). If this argument is not used, the function will error. -#' -#' @returns A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -#' 1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -#' areas in the data set) x 17. +#' ## An example of application of `compute_muac_prevalence()` ---- #' -#' @examples -#' ## When .summary.by = NULL ---- +#' ### When .summary.by = NULL ---- #' #' x <- compute_muac_prevalence( #' df = anthro.04, @@ -274,7 +208,7 @@ compute_pps_based_muac_prevalence <- function(df, #' #' print(x) #' -#' ## When .summary_by is not set to NULL ---- +#' ### When .summary_by is not set to NULL ---- #' #' p <- compute_muac_prevalence( #' df = anthro.04, @@ -285,7 +219,6 @@ compute_pps_based_muac_prevalence <- function(df, #' #' print(p) #' -#' #' @export #' compute_muac_prevalence <- function(df, diff --git a/R/prevalence_wfhz.R b/R/prevalence_wfhz.R index 0997a22..b976c87 100644 --- a/R/prevalence_wfhz.R +++ b/R/prevalence_wfhz.R @@ -1,29 +1,132 @@ #' -#' Compute a weight-for-height based prevalence estimates of data collected from a two-stage -#' cluster survey sample design, with the first stage sampling done with Probability -#' Proportional to the size of population +#' Compute the prevalence estimates of wasting on the basis of WFHZ, MFAZ or MUAC #' #' @description -#' Create a survey design object using the [srvyr::as_survey_design()] and then calculate -#' the survey means as well the sum of positive cases. +#' The prevalence is calculated in accordance with the complex sample design +#' properties inherent to surveys. This includes weighting the survey data where +#' applicable and applying PROBIT method estimation (for WFHZ) when the standard +#' deviation is problematic. This is as in the SMART Methodology. #' -#' @param df A data frame object returned by [process_wfhz_data()]. -#' this will contain the wrangled vectors that are read inside the function. +#' @param df An already wrangled dataset object of class `data.frame` to use. #' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. +#' @param .wt A vector of class `double` of the final survey weights. Default is +#' `NULL` assuming a self weighted survey, as in the ENA for SMART software; +#' otherwise, when a vector of weights if supplied, weighted analysis is computed. #' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. +#' @param .edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. +#' +#' @param .summary_by A vector of class `character` of the geographical areas +#' where the data was collected and for which the analysis should be performed. +#' +#' @returns A summarised table of class `data.frame` of the descriptive +#' statistics about wasting. +#' +#' @examples +#' ## An example of application of `compute_wfhz_prevalence()` ---- +#' +#' ### When .summary_by = NULL ---- +#' anthro.03 |> +#' process_wfhz_data( +#' sex = sex, +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE +#' ) |> +#' compute_wfhz_prevalence( +#' .wt = NULL, +#' .edema = edema, +#' .summary_by = NULL +#' ) +#' +#' ### When .summary_by is not set to NULL ---- +#' +#' anthro.03 |> +#' process_wfhz_data( +#' sex = sex, +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE +#' ) |> +#' compute_wfhz_prevalence( +#' .wt = NULL, +#' .edema = edema, +#' .summary_by = district +#' ) +#' +#' ### When a weighted analysis is needed ---- +#' +#' anthro.02 |> +#' compute_wfhz_prevalence( +#' .wt = "wtfactor", +#' .edema = edema, +#' .summary_by = province +#' ) +#' +#' @rdname prevalence +#' +#' @export +#' +compute_wfhz_prevalence <- function(df, + .wt = NULL, + .edema = NULL, + .summary_by = NULL) { + + ## Difuse argument .summary_by ---- + .summary_by <- rlang::enquo(.summary_by) + + ## An empty vector type list ---- + results <- list() + + if (!rlang::quo_is_null(.summary_by)) { + ## Grouped summary of standard deviation classification ---- + x <- summarise( + df, + std = classify_sd(sd(remove_flags(.data$wfhz, "zscore"), na.rm = TRUE)), + .by = !!.summary_by + ) + } else { + ## Non-grouped summary ---- + x <- summarise( + df, + std = classify_sd(sd(remove_flags(.data$wfhz, "zscore"), na.rm = TRUE)) + ) + } + + ## Iterate over data frame to compute prevalence according to the SD ---- + for (i in seq_len(nrow(x))) { + if (!rlang::quo_is_null(.summary_by)) { + area <- dplyr::pull(x, !!.summary_by)[i] + data <- filter(df, !!sym(rlang::quo_name(.summary_by)) == !!area) + } else { + data <- df + } + + std <- x$std[i] + if (std != "Problematic") { + ### Compute standard complex sample based prevalence analysis ---- + result <- compute_pps_based_wfhz_prevalence(data, {{ .wt }}, {{ .edema }}, !!.summary_by) + } else { + ### Compute grouped PROBIT based prevalence ---- + if (!rlang::quo_is_null(.summary_by)) { + result <- compute_probit_prevalence(data, !!.summary_by, .for = "wfhz") + } else { + ### Compute non-grouped PROBIT based prevalence ---- + result <- compute_probit_prevalence(data, .for = "wfhz") + } + } + results[[i]] <- result + } + dplyr::bind_rows(results) |> + dplyr::relocate(.data$gam_p, .after = .data$gam_n) |> + dplyr::relocate(.data$sam_p, .after = .data$sam_n) |> + dplyr::relocate(.data$mam_p, .after = .data$mam_n) +} + + + #' -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. This is to group the survey design object into different -#' geographical areas in the data and allow for summaries to be computed for each of them. #' -#' @returns A tibble of size depending on the number of groups of the vector given to -#' `.summary_by` or if set to NULL, and of length 17. #' #' compute_pps_based_wfhz_prevalence <- function(df, @@ -84,19 +187,31 @@ compute_pps_based_wfhz_prevalence <- function(df, #' #' -#' Compute global, severe and moderate acute malnutrition prevalence using PROBIT approach. +#' Compute the prevalence estimates of wasting on the basis of the PROBIT method. #' #' @description -#' This approach is only applied for when WFHZ standard deviation's is problematic. The -#' PROBIT approach estimates the prevalence of acute malnutrition indirectly by computing -#' the area under the tail of the curve from negative infinitive to the given threshold -#' through the cumulative normal distribution function using the mean and standard deviation. +#' This approach is applied when the standard deviation of WFHZ is problematic. +#' The PROBIT method estimates the prevalence of wasting indirectly by calculating +#' the area under the tail of the curve, from negative infinitive to +#' the given threshold, using the cumulative normal distribution function with +#' the mean and standard deviation as inputs. +#' +#' @param df An already wrangled dataset object of class `data.frame` to use. +#' +#' @param x A vector of class `double` of WFHZ or MFAZ values. +#' +#' @param .status A choice of the form of wasting for which the prevalence should +#' be estimated. +#' +#' @param .summary_by A vector of class `character` of the geographical areas +#' where the data was collected and for which the analysis should be performed. +#' +#' @param .for A choice between "wfhz" and "mfaz" for the anthropometric index. #' -#' @param x A double vector containing the z-score values -#' @param .status A choice on the nutritional status you wish to apply the PROBIT approach -#' on. Default is "gam" for global acute malnutrition. +#' @returns A summarised table of class `data.frame` of the prevalence estimates. +#' No confidence intervals are yielded. #' -#' @returns A numeric value (double) corresponding to the point prevalence estimate. +#' @rdname probit-method #' #' apply_probit_approach <- function(x, .status = c("gam", "sam")) { @@ -111,30 +226,10 @@ apply_probit_approach <- function(x, .status = c("gam", "sam")) { } + #' #' -#' Compute global, severe and moderate acute malnutrition prevalence using PROBIT approach -#' -#' @description -#' This function is a helper function used inside [compute_wfhz_prevalence()] and -#' [compute_mfaz_prevalence()]. It is used to compute PROBIT based prevalence depending -#' on the status of standard deviation. For more details, check the documentation of the -#' aforementioned functions. -#' -#' @param df A data frame object returned by [process_wfhz_data()] or by [process_muac_data()] -#' They will contain the wrangled vectors that are read inside the function. -#' -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. This is to group the survey design object into different -#' geographical areas in the data and allow for summaries to be computed for each of them. -#' Default is NULL. -#' -#' @param .for A choice between "wfhz" and "mfaz" for the anthropometric index you wish -#' to compute PROBIT prevalence on. -#' -#' @returns A tibble with the PROBIT based point prevalence for global, severe and moderate -#' acute malnutrition. -#' +#' @rdname probit-method #' compute_probit_prevalence <- function(df, .summary_by = NULL, @@ -206,134 +301,3 @@ compute_probit_prevalence <- function(df, } ) } - -#' -#' Compute acute malnutrition prevalence based on weight-for-height z-scores (WFHZ), -#' MUAC-for-age z-scores (MFAZ), MUAC and combined -#' -#' @description -#' `compute_wfhz_prevalence()` is a handy function designed to dynamically compute acute -#' malnutrition's prevalence using WFHZ. Under the hood, it first checks the status of -#' WFHZ's standard deviation (SD) after removing flags, and then it decides on the -#' appropriate prevalence analysis approach to follow: if SD is anything between excellent -#' and acceptable, a complex sample-based prevalence analysis (for a two-stage PPS -#' cluster sampling) is computed, otherwise, a re-calculated prevalence using PROBIT method -#' with a sample mean and a SD = 1 is computed. On the former analysis approach, the function -#' was also designed to work around survey weights. -#' The function also super handy to work on large data sets with multiple survey areas. For -#' this, the aforementioned conditionals are checked for each survey areas in a summarized -#' data frame and prevalence get computed according to each row's scenario. -#' -#' @param df A data frame object returned by [process_wfhz_data()]. -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. If you are working on a single survey data, set -#' .summary_by = NULL (default). -#' -#' @returns A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -#' 1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -#' areas in the data set) x 17. -#' -#' @examples -#' -#' ## When .summary_by = NULL ---- -#' anthro.03 |> -#' process_wfhz_data( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE -#' ) |> -#' compute_wfhz_prevalence( -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = NULL -#' ) -#' -#' ## When .summary_by is not set to NULL ---- -#' anthro.03 |> -#' process_wfhz_data( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE -#' ) |> -#' compute_wfhz_prevalence( -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = district -#' ) -#' -#' ## When a weighted analysis is needed ---- -#' anthro.02 |> -#' compute_wfhz_prevalence( -#' .wt = "wtfactor", -#' .edema = edema, -#' .summary_by = province -#' ) -#' -#' @export -#' -compute_wfhz_prevalence <- function(df, - .wt = NULL, - .edema = NULL, - .summary_by = NULL) { - - ## Difuse argument .summary_by ---- - .summary_by <- rlang::enquo(.summary_by) - - ## An empty vector type list ---- - results <- list() - - if (!rlang::quo_is_null(.summary_by)) { - ## Grouped summary of standard deviation classification ---- - x <- summarise( - df, - std = classify_sd(sd(remove_flags(.data$wfhz, "zscore"), na.rm = TRUE)), - .by = !!.summary_by - ) - } else { - ## Non-grouped summary ---- - x <- summarise( - df, - std = classify_sd(sd(remove_flags(.data$wfhz, "zscore"), na.rm = TRUE)) - ) - } - - ## Iterate over data frame to compute prevalence according to the SD ---- - for (i in seq_len(nrow(x))) { - if (!rlang::quo_is_null(.summary_by)) { - area <- dplyr::pull(x, !!.summary_by)[i] - data <- filter(df, !!sym(rlang::quo_name(.summary_by)) == !!area) - } else { - data <- df - } - - std <- x$std[i] - if (std != "Problematic") { - ### Compute standard complex sample based prevalence analysis ---- - result <- compute_pps_based_wfhz_prevalence(data, {{ .wt }}, {{ .edema }}, !!.summary_by) - } else { - ### Compute grouped PROBIT based prevalence ---- - if (!rlang::quo_is_null(.summary_by)) { - result <- compute_probit_prevalence(data, !!.summary_by, .for = "wfhz") - } else { - ### Compute non-grouped PROBIT based prevalence ---- - result <- compute_probit_prevalence(data, .for = "wfhz") - } - } - results[[i]] <- result - } - dplyr::bind_rows(results) |> - dplyr::relocate(.data$gam_p, .after = .data$gam_n) |> - dplyr::relocate(.data$sam_p, .after = .data$sam_n) |> - dplyr::relocate(.data$mam_p, .after = .data$mam_n) -} - diff --git a/R/quality_checkers.R b/R/quality_auditors.R similarity index 71% rename from R/quality_checkers.R rename to R/quality_auditors.R index eb1a463..2174280 100644 --- a/R/quality_checkers.R +++ b/R/quality_auditors.R @@ -1,43 +1,34 @@ #' -#' Plausibility checkers: MUAC-for-age z-scores, Weight-for-Height z-scores and -#' MUAC +#' Check the plausibility of the data #' #' @description -#' `check_plausibility_mfaz()`, `check_plausibility_wfhz()` and -#' `check_plausibility_muac()` lets you know the quality of your data, based on -#' the statistics around MUAC-for-age zscores, weight-for-height z-scores and on -#' crude MUAC, respectively. Note that `check_plausibility_wfhz()` is all about -#' WHZ only. If you wish to know about MUAC checks consider using either -#' `check_plausibility_mfaz()` or `check_plausibility_muac()` +#' Verify the overall acceptability of the data through a set of +#' structured tests around sampling and measurement-related biases in the data. #' -#' @param df A data frame object returned by [process_muac_data()] for -#' `check_plausibility_mfaz()` and `check_plausibility_muac()` and returned by -#' [process_wfhz_data()] for `check_plausibility_wfhz()`. +#' @param df A dataset object of class `data.frame` to check. It should have been +#' wrangled using this package's wranglers. #' -#' @param sex A vector telling whether a given child is a boy or girl. +#' @param sex A vector of class `numeric` of child's sex: 1 for boy and 2 for girl. #' -#' @param age A vector containing children's age in months. +#' @param age A vector of class `double` of child's age in months. #' -#' @param muac A vector containing MUAC measurements. +#' @param muac A vector of class `double` of child's MUAC in centimeters. #' -#' @param weight A vector containing weight measurements in kilograms. +#' @param weight A vector of class `double` of child's weight in kilograms. #' -#' @param height A vector containing height measurements in centimeters. +#' @param height A vector of class `double` of child's height in centimeters. #' -#' @param flags A character vector telling whether or not an observation is an -#' outlier. +#' @param flags A vector of class `numeric` of flagged observations. #' -#' @param area A vector with values on where was the data collected. If you are -#' analyzing a data set with just one area, provide it anyway to -#' `check_plausibility_mfaz()` or `check_plausibility_wfhz()` -#' -#' @returns A summarized data frame containing quality checks statistics and -#' respective classification. +#' @param area A vector of class `character` of the geographical location where +#' data was collected and for which the analysis should be aggregated. #' +#' @returns A summarised `data.frame` of plausibility test results and their +#' respective acceptability ratings. #' #' @examples #' -#' ## Check Plausibility: MFAZ ---- +#' ## Check the plausibility of WFHZ data ---- #' #' anthro.01 |> #' process_age( @@ -45,23 +36,22 @@ #' birdate = "dob", #' age = age #' ) |> -#' process_muac_data( +#' process_wfhz_data( #' sex = sex, -#' age = "age", -#' muac = muac, -#' .recode_sex = TRUE, -#' .recode_muac = TRUE, -#' unit = "cm" +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE #' ) |> -#' check_plausibility_mfaz( -#' flags = flag_mfaz, +#' check_plausibility_wfhz( #' sex = sex, -#' muac = muac, #' age = age, +#' weight = weight, +#' height = height, +#' flags = flag_wfhz, #' area = area #' ) #' -#' ## Check Plausibility: WFHZ ---- +#' ## Check the plausibility of MFAZ data ---- #' #' anthro.01 |> #' process_age( @@ -69,22 +59,23 @@ #' birdate = "dob", #' age = age #' ) |> -#' process_wfhz_data( +#' process_muac_data( #' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE +#' age = "age", +#' muac = muac, +#' .recode_sex = TRUE, +#' .recode_muac = TRUE, +#' unit = "cm" #' ) |> -#' check_plausibility_wfhz( +#' check_plausibility_mfaz( +#' flags = flag_mfaz, #' sex = sex, +#' muac = muac, #' age = age, -#' weight = weight, -#' height = height, -#' flags = flag_wfhz, #' area = area #' ) #' -#' ## Check Plausibility: MUAC ---- +#' ## Check the plausibility of the absolute MUAC values ---- #' #' anthro.01 |> #' process_muac_data( @@ -101,7 +92,7 @@ #' muac = muac #' ) #' -#' @rdname plausibility_checkers +#' @rdname plausibility-check #' #' @export #' @@ -114,7 +105,7 @@ check_plausibility_mfaz <- function(df, sex, muac, age, flags, area) { n = n(), flagged = sum({{ flags }}, na.rm = TRUE) / n(), flagged_class = classify_percent_flagged(.data$flagged, type = "mfaz"), - sex_ratio = sexRatioTest({{ sex }}, code = c(1, 2))$p, + sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))$p, sex_ratio_class = classify_age_sex_ratio(.data$sex_ratio), age_ratio = age_ratio_test({{ age }}, .expectedP = 0.66)$p, age_ratio_class = classify_age_sex_ratio(.data$age_ratio), @@ -148,7 +139,7 @@ check_plausibility_mfaz <- function(df, sex, muac, age, flags, area) { #' #' -#' @rdname plausibility_checkers +#' @rdname plausibility-check #' #' @export #' @@ -162,7 +153,7 @@ check_plausibility_wfhz <- function(df, sex, age, weight, height, flags, area) { n = n(), flagged = sum({{ flags }}, na.rm = TRUE) / n(), flagged_class = classify_percent_flagged(.data$flagged, type = "whz"), - sex_ratio = sexRatioTest({{ sex }}, code = c(1, 2))$p, + sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))$p, sex_ratio_class = classify_age_sex_ratio(.data$sex_ratio), age_ratio = ageRatioTest({{ age }}, ratio = 0.85)$p, age_ratio_class = classify_age_sex_ratio(.data$age_ratio), @@ -198,7 +189,7 @@ check_plausibility_wfhz <- function(df, sex, age, weight, height, flags, area) { #' -#' @rdname plausibility_checkers +#' @rdname plausibility-check #' #' @export #' @@ -210,7 +201,7 @@ check_plausibility_muac <- function(df, flags, sex, muac) { n = n(), flagged = sum({{ flags }}, na.rm = TRUE) / n(), flagged_class = classify_percent_flagged(.data$flagged, type = "crude"), - sex_ratio = sexRatioTest({{ sex }}, code = c(1, 2))[["p"]], + sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))[["p"]], sex_ratio_class = classify_age_sex_ratio(.data$sex_ratio), dps = digitPreference({{ muac }}, digits = 0, values = 0:9)[["dps"]], dps_class = digitPreference({{ muac }}, digits = 0, values = 0:9)[["dpsClass"]], diff --git a/R/quality_classifiers.R b/R/quality_classifiers.R deleted file mode 100644 index 2698dc6..0000000 --- a/R/quality_classifiers.R +++ /dev/null @@ -1,246 +0,0 @@ -#' -#' Classify how much high is the proportion of flagged data -#' -#' @description -#' `classify_percent_flagged()` tells you how much high is the proportion of -#' of flagged data in your data set, an indication of quality of data. Its a -#' reusable function for MFAZ, WHZ and crude MUAC. The cut-offs for MFAZ and -#' crude MUAC are the same with the upper limit of 2%. This is based on the -#' research findings by [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478), -#' from a multi-country analysis, found that the correlation between the mean -#' MFAZ and crude MUAC was almost perfect (r=99). As for WHZ, the cut-offs are -#' exactly those in the [SMART Methodology](https://smartmethodology.org/). -#' -#' @param p A numeric vector containing the proportions of flagged data -#' -#' @param type The method to which you wish to classify how much high are the -#' proportions of flagged data. A choice between "mfaz" for MFAZ, "whz" for WHZ -#' and "crude" for crude MUAC. -#' -#' @returns A character vector with the correspondent classification of the -#' amount of flagged data. The categories of classification ranges are: -#' "Excellent", "Good", "Acceptable", "Problematic". -#' -#' @examples -#' -#' ## Take a vector with the proportions of flagged data ---- -#' prop <- c(0.0, 0.0, 0.01, 0.015, 0.2, 0.015, 0.016, 0.017, 0.05, 0.06, -#' 0.03, 0.03, 0.04, 0.000001, 0) -#' -#' ## Apply the function setting type to "whz" for instance ---- -#' classify_percent_flagged(prop, type = "whz") -#' -#' @export -#' -classify_percent_flagged <- function(p, type = c("mfaz", "whz", "crude")) { - - type <- match.arg(type) - - if (type == "mfaz" || type == "crude") { - - ## classify percent of outliers in MFAZ ---- - x <- cut( - x = p, - breaks = c(0, 0.01, 0.015, 0.02, Inf), - labels = c("Excellent", "Good", "Acceptable", "Problematic"), - include.lowest = TRUE, - right = TRUE - ) - } - - if (type == "whz") { - - ## classify percent of outliers in WHZ ---- - x <- cut( - x = p, - breaks = c(0, 0.025, 0.05, 0.075, Inf), - labels = c("Excellent", "Good", "Acceptable", "Problematic"), - include.lowest = TRUE, - right = TRUE - ) - } - x -} - - -#' -#' Classify how much high is the difference in age ration and in sex ratio -#' -#' -#' @description -#' `classify_age_sex_ratio()` works on the results yielded by [nipnTK::ageRatioTest()]. -#' It helps you know how much high is the statistical difference between children -#' age 6-29 months of those age 30-59 months. Likewise, with regard to sex, -#' function works on the results yielded by [nipnTK::sexRatioTest()] to know -#' how much high is the difference between boy and girls in your sample data. -#' -#' @param p A numeric vector containing the test p-values. -#' -#' @returns A character vector with the correspondent classification. -#' -#' @examples -#' -#' ## Have a numeric vector storing p-values ---- -#' pvalues <- c(0, 0, 0.01, 0.011, 0.2, 0.015, 0.016, 0.017, -#' 0.05, 0.06,0.03, 0.03, 0.04, 0.000001, 0.07 -#' ) -#' -#' ## Apply the function ---- -#' classify_age_sex_ratio(pvalues) -#' -#' @export -#' -classify_age_sex_ratio <- function(p) { - case_when( - p > 0.1 ~ "Excellent", - p > 0.05 ~ "Good", - p > 0.001 ~ "Acceptable", - TRUE ~ "Problematic" - ) -} - - -#' -#' Classify how much high is the value of standard deviation -#' -#' -#' @description -#' `classify_sd()` helps you to know the magnitude of the data's standard -#' deviation. You can use this function for either WHZ, MFAZ or crude MUAC. -#' Cut-offs for WHZ are based on the [SMART Methodology](https://smartmethodology.org/). -#' Cut-offs for MFAZ are also based on SMART, but informed by -#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478). -#' For crude MUAC, the cut-offs are based on the -#' [IPC AMN guidelines](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/) -#' -#' @param sd A numeric vector containing values for standard deviation of the -#' method you wish the work on. -#' -#' @param type The method to which you wish to classify how much high is the -#' value of standard deviation. A choice between "zscore" MFAZ or WHZ and -#' "crude" for crude MUAC. -#' -#' @returns A character vector with the correspondent classification. -#' -#' @examples -#' -#' ## Have a vector with standard deviation ---- -#' sdvalues <- seq(0.7, 1.3, by = 0.001) |> -#' sample(size = 9, replace = TRUE) -#' -#' ## Apply the function with `type = "zscore` ---- -#' classify_sd(sdvalues, type = "zscore") -#' -#' ## Using `type = "crude"` ---- -#' ### Create sample data ---- -#' sdvalues <- seq(9, 30, by = 2) |> -#' sample(size = 20, replace = TRUE) -#' -#' ### Apply the function with `type = "crude"` ---- -#' classify_sd(sdvalues, type = "crude") -#' -#' @export -#' -classify_sd <- function(sd, type = c("zscore", "crude")) { - - type <- match.arg(type) - - if (type == "zscore") { - - ## Classify WHZ and MFAZ-based standard deviation ---- - x <- case_when( - sd > 0.9 & sd < 1.1 ~ "Excellent", - sd > 0.85 & sd < 1.15 ~ "Good", - sd > 0.8 & sd < 1.20 ~ "Acceptable", - TRUE ~ "Problematic" - ) - } - - if (type == "crude") { - - ## Classify crude MUAC-based standard deviation ---- - x <- cut( - x = sd, - breaks = c(-Inf, 13, 14, 15, Inf), - labels = c("Excellent", "Acceptable", "Poor", "Problematic"), - include.lowest = FALSE, - right = FALSE - ) - } - x -} - - -#' -#' Classify how much high is the value of Skewness and Kurtosis -#' -#' -#' @description -#' `classify_skew_kurt()` helps you to know the magnitude of the Skewness and -#' Kurtosis from your data. This is only useful for WHZ and MFAZ. The function -#' works on the results yielded by [nipnTK::skewKurt()]. -#' Cut-offs for WHZ are based on the [SMART Methodology](https://smartmethodology.org/). -#' -#' @param sk A numeric vector containing values of either Skewness or Kurtosis. -#' -#' @returns A character vector with the correspondent classification. -#' -#' @examples -#' -#' #Have a numeric vector storing values for skewness or kurtosis ---- -#' sk <- seq(-5, 1, by = 0.05) |> sample(size = 20, replace = TRUE) -#' -#' # Apply function -#' classify_skew_kurt(sk) -#' -#' @export -#' -classify_skew_kurt <- function(sk) { - cut( - x = sk, - breaks = c(-Inf, 0.2, 0.4, 0.6, Inf), - labels = c("Excellent", "Good", "Acceptable", "Problematic"), - include.lowest = FALSE, - right = FALSE - ) -} - -#' -#' Get the overall data quality classification -#' -#' -#' @description -#' `classify_overall_quality()` helps you in knowing the overall status of your -#' data quality. It classifies the overall score generated by -#' [compute_quality_score()] into four categories, as it is done in the -#' [SMART Methodology](https://smartmethodology.org/), -#' namely: "Excellent", "Good", "Acceptable" and "Problematic". Beware that -#' the overall classification should be used as an indication to further -#' scrutinize of data before taking the decision to validate or invalidate the -#' results. -#' -#' @param df A data frame containing a vector with the quality scores generated by -#' [compute_quality_score()]. -#' -#' @returns A character vector of the same length, but a different width as the -#' input `df` is returned with a new column called `quality_class`. -#' -#' -#' @export -#' -classify_overall_quality <- function(df) { - - qclass <- with( - df, - data.frame( - quality_class <- cut( - x = quality_score, - breaks = c(0, 9, 14, 24, Inf), - labels = c("Excellent", "Good", "Acceptable", "Problematic"), - include.lowest = TRUE, - right = TRUE - ) - ) - ) - qclass$quality_class -} diff --git a/R/quality_raters.R b/R/quality_raters.R new file mode 100644 index 0000000..74fe00e --- /dev/null +++ b/R/quality_raters.R @@ -0,0 +1,174 @@ +#' +#' Rate the acceptability of the standard deviation and the percentage of flagged +#' data +#' +#' @description +#' Rate how much high is the standard deviation and the percentage of flagged +#' data in the dataset, hence it's acceptability. +#' +#' @param p A vector of class `double` of the proportions of flagged values in +#' the dataset. +#' +#' @param sd A vector of class `double` of the values of the standard deviation. +#' +#' @param type A choice between "wfhz", "mfaz" and "crude" for the basis on which +#' the rating should be done. +#' +#' @returns A vector of class `character` for the acceptability rate. +#' +#' @details +#' The ranges of acceptability are: "Excellent", "Good", "Acceptable", "Problematic". +#' The cut-offs for WFHZ are as in the [SMART Methodology](https://smartmethodology.org/). +#' For the MFAZ and the absolute MUAC values, the maximum acceptable limit for +#' outliers is 2%, as recommended by +#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478). +#' Cut-offs for the standard deviation of the absolute MUAC values are based on the +#' [IPC AMN guidelines](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/). +#' +#' +#' @rdname raters +#' +classify_percent_flagged <- function(p, type = c("mfaz", "whz", "crude")) { + + type <- match.arg(type) + + if (type == "mfaz" || type == "crude") { + + ## classify percent of outliers in MFAZ ---- + x <- cut( + x = p, + breaks = c(0, 0.01, 0.015, 0.02, Inf), + labels = c("Excellent", "Good", "Acceptable", "Problematic"), + include.lowest = TRUE, + right = TRUE + ) + } + + if (type == "whz") { + + ## classify percent of outliers in WHZ ---- + x <- cut( + x = p, + breaks = c(0, 0.025, 0.05, 0.075, Inf), + labels = c("Excellent", "Good", "Acceptable", "Problematic"), + include.lowest = TRUE, + right = TRUE + ) + } + x +} + +#' +#' +#' @rdname raters +#' +classify_sd <- function(sd, type = c("zscore", "crude")) { + + type <- match.arg(type) + + if (type == "zscore") { + + ## Classify WHZ and MFAZ-based standard deviation ---- + x <- case_when( + sd > 0.9 & sd < 1.1 ~ "Excellent", + sd > 0.85 & sd < 1.15 ~ "Good", + sd > 0.8 & sd < 1.20 ~ "Acceptable", + TRUE ~ "Problematic" + ) + } + + if (type == "crude") { + + ## Classify crude MUAC-based standard deviation ---- + x <- cut( + x = sd, + breaks = c(-Inf, 13, 14, 15, Inf), + labels = c("Excellent", "Acceptable", "Poor", "Problematic"), + include.lowest = FALSE, + right = FALSE + ) + } + x +} + + +#' +#' Rate the acceptability of the age and sex ratio test p-values +#' +#' @param p A vector of class `double` of the age or sex ratio test p-values. +#' +#' @returns A vector of class `character` of the same length as `p` for the +#' acceptability rate. +#' +#' +classify_age_sex_ratio <- function(p) { + case_when( + p > 0.1 ~ "Excellent", + p > 0.05 ~ "Good", + p > 0.001 ~ "Acceptable", + TRUE ~ "Problematic" + ) +} + + +#' +#' Rate the acceptability of the skewness and kurtosis test results +#' +#' @param sk A vector of class `double` for skewness or kurtosis test results. +#' +#' @returns A vector of class `character` of the same length as `sk` for the +#' acceptability rate. +#' +#' +classify_skew_kurt <- function(sk) { + cut( + x = sk, + breaks = c(-Inf, 0.2, 0.4, 0.6, Inf), + labels = c("Excellent", "Good", "Acceptable", "Problematic"), + include.lowest = FALSE, + right = FALSE + ) +} + +#' +#' +#' Rate the overall acceptability score +#' +#' @description +#' Rate the overall acceptability score into "Excellent", "Good", "Acceptable" and +#' "Problematic". +#' +#' @param df A dataset of class `data.frame` containing a vector of the overall +#' acceptability score as yielded from [compute_quality_score()]. +#' +#' @returns A `data.frame` based on `df`. A new column `quality_class` for the +#' overall acceptability rate is created and added to `df`. +#' +#' @examples +#' ## A sample data ---- +#' +#' df <- data.frame( +#' quality_score = 29 +#' ) +#' +#' ## Apply the function ---- +#' classify_overall_quality(df) +#' +#' @export +#' +classify_overall_quality <- function(df) { + + qclass <- with( + df, + data.frame( + quality_class <- cut( + x = quality_score, + breaks = c(0, 9, 14, 24, Inf), + labels = c("Excellent", "Good", "Acceptable", "Problematic"), + include.lowest = TRUE, + right = TRUE + ) + ) + ) + qclass$quality_class +} diff --git a/R/quality_scorers.R b/R/quality_scorers.R index 35f2f70..58e3490 100644 --- a/R/quality_scorers.R +++ b/R/quality_scorers.R @@ -1,28 +1,20 @@ #' -#' Assign a penalty point for the amount of proportion flagged data and standard deviation +#' Score the acceptability classification of the standard deviation and percentage +#' of flagged data test results #' #' @description -#' The function assigns a penalty score for a given category of test classification. -#' The score range varies between 0 (when "Excellent") to 20 (when "Problematic") for -#' both flagged data and standard deviation. This was borrowed from the -#' [ENA for SMART software](https://smartmethodology.org/) -#' In the SMART Methodology, flagged data and standard deviation are tho test -#' criteria that gets the highest penalty scores, so it is here. +#' Attribute a penalty point based on the acceptability classification in which +#' the plausibility test result falls. #' -#' @param x A character vector containing the test classifications of proportion -#' of flagged data and the value of standard deviation. +#' @param x A vector of class `character` of acceptability classification of the +#' plausibility test results. #' -#' @returns A numeric vector with the corresponding penalty points (scores) according -#' to the classification. +#' @returns A vector of class `integer` of the same length as `x` for the score. #' -#' @examples +#' @details +#' The scoring criteria is as in [SMART Plausibility checks](https://smartmethodology.org/). #' -#' ## Sample data ---- -#' x <- c("Excellent", "Problematic", "Acceptable", "Good") -#' ## Apply the function ---- -#' assign_penalty_points_flags_and_sd(x) -#' -#' @export +#' @rdname scorer #' assign_penalty_points_flags_and_sd <- function(x) { case_when( @@ -33,29 +25,10 @@ assign_penalty_points_flags_and_sd <- function(x) { ) } + #' -#' Assign a penalty point for the amount of selection biases in age and sex ratios -#' -#' @description -#' The function assigns a penalty score for a age and sex ratio's test classification. -#' The score range varies between 0 (when "Excellent") to 10 (when "Problematic") for -#' both, according to the [ENA for SMART software](https://smartmethodology.org/). -#' -#' @param x A numeric vector containing p-values from either age or sex ratio -#' test results. -#' -#' @returns A numeric vector with the corresponding penalty points (scores) according -#' to the classification. -#' -#' @examples -#' -#' ## A vector storing age ratio or sex ratio p-values' classification ---- -#' x <- c("Excellent", "Problematic", "Acceptable", "Good") -#' -#' ## Apply the function ---- -#' assign_penalty_points_age_sex_ratio(x) #' -#' @export +#' @rdname scorer #' assign_penalty_points_age_sex_ratio <- function(x) { case_when( @@ -68,28 +41,7 @@ assign_penalty_points_age_sex_ratio <- function(x) { #' #' -#' Assign a penalty point for the amount of issues in Skweness and Kurtosis -#' -#' @description -#' The function assigns a penalty score for a Skewness and Kurtosis test classification. -#' The score range varies between 0 (when "Excellent") to 5 (when "Problematic") for -#' both, according to the [ENA for SMART software](https://smartmethodology.org/). -#' -#' @param x A numeric vector containing Skewness or Kurtosis test results classification. -#' -#' @returns A numeric vector with the corresponding penalty points (scores) according -#' to the classification. -#' -#' @examples -#' -#' ## A vector storing Skewness or Kurtosis test classification ---- -#' -#' x <- c("Excellent", "Problematic", "Acceptable", "Good") -#' -#' ## Apply the function ---- -#' assign_penalty_points_skew_kurt(x) -#' -#' @export +#' @rdname scorer #' assign_penalty_points_skew_kurt <- function(x) { case_when( @@ -101,33 +53,24 @@ assign_penalty_points_skew_kurt <- function(x) { } #' -#' Get the overall WHZ or MFAZ's quality score #' +#' Get the overall acceptability score from the acceptability classification scores #' #' @description -#' `compute_quality_score()` provides the overall quality score of either WHZ or MFAZ, -#' by adding up the scores across each test criteria. This is an input to -#' [classify_overall_quality()]. -#' -#' @param df A data frame containing the scores. If you wish the get the overall -#' quality score for MFAZ, the input data frame must have seven (7) required -#' columns containing test classification of flagged data, sex ratio, age ratio, -#' standard deviation, skewness, kurtosis, crude MUAC's digit preference. -#' Alternatively, if you wish to get the quality score of WHZ, then the input -#' data frame must have the exact same columns in the plausibility report of the -#' ENA for SMART software. -#' -#' @param type The method you wish to get the overall quality score for. -#' A choice between "mfaz" and "whz". If you wish to know the overall survey -#' score of your WHZ data, set `type = whz`, otherwise set `type = mfaz` for -#' MFAZ. If by mistake a different input choice is given, an error will be -#' thrown with a message guiding how to go about. -#' -#' @returns A vector (named `"quality_score"`) with the overall quality scores. +#' Calculate the total amount of penalty points based on each plausibility test +#' result acceptability classification for WFHZ and MFAZ. +#' +#' @param df A dataset object of class `data.frame` to calculate from. +#' +#' @param type A choice between "wfhz" and "mfaz" for the basis on which the +#' calculations should be made. +#' +#' @returns A `data.frame` based on `df` with a new column named `"quality_score"` +#' for the overall of acceptability (of quality) score. #' #' @examples -#' # example code -#' ## Create a `df` object ---- +#' +#' ## A sample data ---- #' #' df <- data.frame( #' flagged_class = "Excellent", @@ -139,13 +82,9 @@ assign_penalty_points_skew_kurt <- function(x) { #' kurt_class = "Acceptable" #' ) #' -#' ## Apply function ---- +#' ## Apply the function ---- #' compute_quality_score(df, type = "mfaz") #' -#' # You can also choose to chain the functions with a pipe operator ---- -#' df |> -#' compute_quality_score(type = "mfaz") -#' #' @export #' compute_quality_score <- function(df, type = c("mfaz", "whz")) { diff --git a/R/sample_size.R b/R/sample_size.R index 85267fd..a1645dd 100644 --- a/R/sample_size.R +++ b/R/sample_size.R @@ -1,51 +1,34 @@ #' -#' Check IPC AMN Sample Size Requirements +#' Check whether the IPC Acute Malnutrition sample size requirements were met #' #' @description -#' Evidence used in [IPC](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/) -#' comes from different sources, collected in different ways, -#' namely: representative surveys, screenings or even data from community-based -#' surveillance system - the sentinel sites. IPC AMN protocols have set minimum -#' sampling a sample size requirements for each. For cluster-based -#' representative surveys, there must be at least 25 primary sampling unit (PSUs). -#' On screening, there ware two ways: i. exhaustive screening (door-to-door) or -#' ii. sampled screening. For this, there should be at least three sites (i.e., -#' villages or communities, etc). `check_sample_size()` checks the -#' on sampled screening. +#' Verify whether the minimum sample size requirements for the area of analysis +#' were met, in accordance with the IPC Acute Malnutrition (IPC AMN) protocols. #' -#' `check_sample_size()` helps you know if your data meets the at least -#' IPC AMN minimum requirements. This function should be used before proceeding -#' to checking the quality of measurements. Doing this saves you from avoid -#' working on data that do not meet the minimum requirements, as it will not be -#' used in any IPC analysis. +#' @param df A dataset of class `data.frame` to check. #' -#' @param df A data frame containing the required variables. +#' @param .group A vector of class `integer` of the cluster ID's for survey, +#' screening or site ID's for screenings and sentinel sites. #' -#' @param .group A vector containing the ID's of the primary sampling unit. -#' Usually and ideally a numeric vector, but sometimes this variables may come as -#' a character vector. Either way, `check_sample_size()` will execute -#' the task accordingly. +#' @param .data_type A choice between "survey" for survey data, "screening" for +#' screening data or "ssite" for community-based sentinel site data. #' -#' @param .data_type The data collection method: survey, screening or sentinel sites. -#' If you wish to check IPC AMN requirements on surveys were met, set -#' method = "survey"; for screening set method = "screening" and for sentinel -#' sites set method = "ssite". If by mistake a different parameter is given, -#' an error will be thrown and the function will stop, but with a guidance on -#' how to go about. +#' @returns A summarised table of three columns: `groups` for the total number +#' of unique cluster or screening or site IDs; `n_obs` for the respective total +#' number of children; and `meet_ipc` for whether the IPC AMN requirements were met. +#' +#' @details +#' [The IPC Manual](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/). #' -#' @returns `check_sample_size()` returns an output of the same type -#' as the input (data frame), but of a different size. By default, the function -#' returns a summary of length 1 (one row), but with three new columns added to -#' the input data frame: `groups` (for survey), or sites (for screening or sentinel -#' sites) `n_obs` and `meet_ipc`. The first will store the total number of PSUs -#' in the sample. `n_obs` will store the total number of rows/observations and -#' `meet_ipc` is a logical vector to say whether or not the IPC AMN minimum -#' criteria for sample size was met. This is flexible according to the method you -#' select with `.data_type = " "`. #' #' @examples -#' # Have an input data frame -------------------------------------------------- -#' check_sample_size(anthro.01, .group = cluster, .data_type = "survey") +#' +#' anthro.01 |> +#' dplyr::group_by(area) |> +#' check_sample_size( +#' .group = cluster, +#' .data_type = "survey" +#' ) #' #' @export #' diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 0000000..d774818 Binary files /dev/null and b/R/sysdata.rda differ diff --git a/R/wranglers.R b/R/wranglers.R new file mode 100644 index 0000000..8026225 --- /dev/null +++ b/R/wranglers.R @@ -0,0 +1,318 @@ +#' +#' +#' Identify and flag outliers +#' +#' @description +#' Outliers are extreme values that deviate remarkably from the survey mean, making +#' them unlikely to be accurate measurements. This function detects and signals +#' them based on a criterion set for the WFHZ, the MFAZ and for the absolute MUAC +#' values. +#' +#' @param x A vector of class `double` of WFHZ or MFAZ or absolute MUAC values. +#' The latter should be in millimeters. +#' +#' @param type A choice between `zscore` and `crude` for where outliers should be +#' detected and flagged from. +#' +#' @param unit A choice between `zscore` and `crude` for where outliers should be +#' detected and flagged from. +#' +#' @return A vector of the same length as `x` of flagged observations that are +#' outliers: 1 for is a flag and 0 is not a flag. +#' +#' @details +#' The flagging criterion used for the WFHZ and the MFAZ is as in +#' [SMART plausibility check](https://smartmethodology.org/). A fixed flagging +#' criterion is used for the absolute MUAC values. This is as recommended by +#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478) +#' +#' +#' @examples +#' +#' ## Sample data for absolute MUAC values ---- +#' x <- anthro.01$muac +#' +#' ## Apply the function with type set to "crude" ---- +#' flag_outliers(x, type = "crude") +#' +#' ## Sample data for MFAZ or for WFHZ values ---- +#' x <- anthro.02$mfaz +#' +#' # Apply the function with type set to "zscore" ---- +#' flag_outliers(x, type = "zscore") +#' +#' @rdname outliers +#' @export +#' +flag_outliers <- function(x, type = c("zscore", "crude")) { + type <- match.arg(type) + + if (type == "zscore") { + mean_zscore <- mean(x, na.rm = TRUE) + flags <- ifelse((x < (mean_zscore - 3) | x > (mean_zscore + 3)), 1, 0) + flags <- ifelse(is.na(x), NA, flags) + flags + + } else { + flags <- ifelse(x < 100 | x > 200, 1, 0) + flags <- ifelse(is.na(x), NA, flags) + flags + } +} + + +#' +#' +#' Remove outliers +#' +#' @rdname outliers +#' +remove_flags <- function(x, unit = c("zscore", "crude")) { + + ## Match arguments ---- + unit <- match.arg(unit) + + ## Control flow based on unit ---- + switch( + unit, + ### Remove flags when unit = "zscore" ---- + "zscore" = { + mean_x <- mean(x, na.rm = TRUE) + zs <- ifelse((x < (mean_x - 3) | x > (mean_x + 3)) | is.na(x), NA_real_, x) + }, + ### Remove flags when unit = "crude" ---- + "crude" = { + cr <- ifelse(x < 100 | x > 200 | is.na(x), NA_integer_, x) + } + ) +} + + +#' +#' +#' +#' Convert MUAC values to either centimeters or millimeters +#' +#' @description +#' Recode the MUAC values to either centimeters or millimeters as required. +#' +#' @param muac A vector of class `double` or `integer` of the absolute MUAC values. +#' +#' @param unit A choice of the unit to which the MUAC values should be converted. +#' +#' @returns A numeric vector of the same length `muac`, with values converted +#' to the chosen unit. +#' +#' @examples +#' +#' ## Recode from millimeters to centimeters ---- +#' muac <- anthro.01$muac +#' muac_cm <- recode_muac(muac, unit = "cm") +#' +#' ## Using the `muac_cm` object to recode it back to "mm" ---- +#' muac_mm <- recode_muac(muac_cm, unit = "mm") +#' +#' @export +#' +recode_muac <- function(muac, unit = c("cm", "mm")) { + + ## Check if unit's arguments match ---- + stopifnot(unit %in% c("cm", "mm")) + + ## Recode muac conditionally ---- + switch( + unit, + ### Recode to millimeters ---- + "mm" = {muac <- muac * 10}, + ### Recode to centimeters ---- + "cm" = {muac <- muac / 10}, + stop("Invalid 'units' argument. Please choose either 'cm' or 'mm'.") + ) +} + + +#' +#' +#' Wrangle weight-for-height and MUAC data +#' +#' @description +#' This function performs data wrangling by calculating weight-for-height +#' and MUAC-for-age z-scores, followed by the detection and flagging of outliers. +#' For MUAC data, if age is not supplies, z-scores do not get computed. In such +#' cases, outlier detection and flagging are based on the absolute MUAC values. +#' +#' @param df A dataset of class `data.frame` to wrangle data from. +#' +#' @param sex A numeric or character vector of child's sex. Code values should +#' be 1 or "m" for boy and 2 or "f" for girl. The variable name must be sex, +#' otherwise it will not work. +#' +#' @param .recode_sex Logical. Default is `FALSE`. Setting to `TRUE` assumes that +#' the sex variable is a character vector of values "m" for boys and "f" for girls +#' and will recode them to 1 and 2 respectively. +#' +#' @param muac A vector of class `double` or `integer` of the absolute MUAC values. +#' +#' @param .recode_muac Logical. Default is `FALSE`. Set to `TRUE` if MUAC values +#' should be converted to either centimeters or millimeters. +#' +#' @param unit A choice of the unit to which the MUAC values should be converted. +#' "cm" for centimeters, "mm" for millimeters and "none" to leave as it is. +#' +#' @param age A double vector of child's age in months. It must be named age, +#' otherwise it will not work. +#' +#' @param weight A vector of class `double` of child's weight in kilograms. +#' +#' @param height A vector of class `double` of child's height in centimeters. +#' +#' @returns A data frame based on `df`. New variables named `wfhz` and +#' `flag_wfhz`, of child's weight-for-height z-scores and flags, or `mfaz` and +#' `flag_mfaz`, of child's MUAC-for-age z-scores and flags, will be created. For +#' MUAC, when age is not supplied only `flag_muac` variable is created. +#' This refers to flags based on the absolute MUAC values as recommended by +#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478). +#' +#' @details +#' The flagging criterion used for the WFHZ and MFAZ is as in +#' [SMART plausibility check](https://smartmethodology.org/). A fixed flagging +#' criterion is used for the absolute MUAC values. This is as recommended by +#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478) +#' +#' @examples +#' +#' ## An example application of `process_wfhz_data()` ---- +#' +#' anthro.01 |> +#' process_wfhz_data( +#' sex = sex, +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE +#' ) +#' +#' ## An example application of `process_muac_data()` ---- +#' +#' ### Sample data ---- +#' df <- data.frame( +#' survey_date = as.Date(c( +#' "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), +#' birthdate = as.Date(c( +#' "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), +#' age = c(NA, 36, NA, NA, NA), +#' sex = c("m", "f", "m", "m", "f"), +#' muac = c(110, 130, 300, 123, 125) +#' ) +#' +#' ### The application of the function ---- +#' +#' df |> +#' process_age( +#' svdate = "survey_date", +#' birdate = "birthdate", +#' age = age +#' ) |> +#' process_muac_data( +#' sex = sex, +#' age = "age", +#' muac = muac, +#' .recode_sex = TRUE, +#' .recode_muac = TRUE, +#' unit = "cm" +#' ) +#' +#' @rdname wrangler +#' +#' @export +#' + +process_wfhz_data <- function(df, + sex, + weight, + height, + .recode_sex = TRUE) { + + recode_sex <- quote( + if (.recode_sex) { + sex <- ifelse({{ sex }} == "m", 1, 2) + } else { + {{ sex }} + } + ) + + df <- df |> + mutate( + sex = !!recode_sex + ) |> + addWGSR( + sex = {{ "sex" }}, + firstPart = {{ "weight" }}, + secondPart = {{ "height" }}, + index = "wfh", + digits = 3 + ) |> + mutate( + flag_wfhz = do.call(flag_outliers, list(.data$wfhz, type = "zscore")) + ) + tibble::as_tibble(df) +} + + + +#' +#' @rdname wrangler +#' +#' @export +#' +process_muac_data <- function(df, + sex, + muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = TRUE, + unit = c("cm", "mm", "none")) { + unit <- match.arg(unit) + + recode_sex <- quote( + if (.recode_sex) { + sex <- ifelse({{ sex }} == "m", 1, 2) + } else { + {{ sex }} + } + ) + + rec_muac <- quote( + if (.recode_muac && unit == "cm") { + muac <- recode_muac({{ muac }}, unit = "cm") + } else if (.recode_muac && unit == "mm") { + muac <- recode_muac({{ muac }}, unit = "mm") + } else { + {{ muac }} + } + ) + + if (!is.null({{ age }})) { + df <- df |> + mutate( + muac = !!rec_muac, + sex = !!recode_sex, + ) |> + addWGSR( + sex = "sex", + firstPart = "muac", + secondPart = "age_days", + index = "mfa", + digits = 3 + )|> + mutate( + flag_mfaz = do.call(flag_outliers, list(.data$mfaz, type = "zscore")) + ) + } else { + df <- df |> + mutate( + sex = !!recode_sex, + flag_muac = do.call(flag_outliers, list({{ muac }}, type = "crude")) + ) + } + tibble::as_tibble(df) +} diff --git a/data-raw/DATASET.R b/data-raw/DATASET.R new file mode 100644 index 0000000..482b9a2 --- /dev/null +++ b/data-raw/DATASET.R @@ -0,0 +1,2 @@ +# Internal data ---- +usethis::use_data(wfhz.01, mfaz.01, mfaz.02, internal = TRUE, overwrite = TRUE) diff --git a/inst/WORDLIST b/inst/WORDLIST index aab8cc6..77fc839 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -13,22 +13,18 @@ IPC Inquérito Kianian Lifecycle -MAM MFAZ MFAZ's MUAC -MUAC's Maravia Metuge ORCID Oftentimes Orçamento -PSUs +PSU +PSU's R's -Skweness WFHZ -WFHZ's -WHZ WIP analyzing anthro @@ -36,21 +32,16 @@ ao callout centimeters cflags -cgam -cmam -csam dob dplyr edema etc -gam ipc mam mfaz millimeter millimeters muac -multicountry mwana mwana’s nipnTK @@ -58,11 +49,9 @@ nutriverse obs offs plausibile -portuguese requeriments sam ssite -tho tibble undernutrition wfhz @@ -70,5 +59,4 @@ whz wtfactor zscore zscorer -zscores ’s diff --git a/man/age_ratio_test.Rd b/man/age_ratio_test.Rd index bc6c521..9b8fe1c 100644 --- a/man/age_ratio_test.Rd +++ b/man/age_ratio_test.Rd @@ -2,44 +2,40 @@ % Please edit documentation in R/age.R \name{age_ratio_test} \alias{age_ratio_test} -\title{Age ratio test on children aged 6:23 over 24:59 months} +\title{Test for statistical difference between the proportion of children aged 24 to +59 months old over those aged 6 to 23 months old} \usage{ age_ratio_test(age, .expectedP = 0.66) } \arguments{ -\item{age}{A vector storing values about child's age in months.} +\item{age}{A double vector of age in months.} -\item{.expectedP}{The expected proportion of children aged 24-59 months over -children aged 6-29 months, considered to be of 0.66 according to the +\item{.expectedP}{The expected proportion of children aged 24 to 59 months +old over those aged 6 to 23 months old. This is estimated to be 0.66 as in the \href{https://smartmethodology.org/survey-planning-tools/updated-muac-tool/}{SMART MUAC tool}.} } \value{ -A list three statistics: \code{p} for p-value, \code{observedR} for observed ratio -from your data, \code{observedP} for observed proportion of children 24-59 months -over the universe of your sample data. +A vector of class \code{list} of three statistics: \code{p} for p-value of the +statistical difference between the observed and the expected proportion of +children aged 24 to 59 months old over those aged 6 to 23 months old; +\code{observedR} and \code{observedP} for the observed ratio and proportion respectively. + +@details +This function should be used specifically for assessing MUAC data. For +age ratio tests of children aged 6 to 29 months old over 30 to 59 months old, as +performed in the SMART plausibility check, use \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}} instead. } \description{ -As documented in \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}}, age ratio test is an age-related -test of survey data quality. This includes other assessments as screenings, -sentinel sites, etc. Different to \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}}, in \code{age_ratio_test()} -the ratio of children is calculate from children 6-23 months to the number of -children age 24-59 months. The ratio is then compared to the expected ratio -(set at 0.66). Then the difference between the observed ratio is compared to -the expected using a Chi-squared test. - -\code{age_ratio_test()} should only be used for MUAC checks. This particularly -useful as allows you to determine if downstream your analysis you should -consider adjusting your MUAC prevalence, should there be more younger children -than older children in your survey, screening or sentinel site data. If you -wish to get the age ratio for children 6-29/30-59 like in SMART Methodology, -then you should use \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}} NOT \code{age_ratio_test()}. +Calculate the observed age ratio of children aged 24 to 59 months old over +those aged 6 to 23 months old and test if there is a statistical difference +between the observed and the expected. } \examples{ -## Have a sample data ---- -age <- seq(6,59) |> sample(300, replace = TRUE) - -## Apply the function ---- -age_ratio_test(age, .expectedP = 0.66) +## An example of application using `anthro.02` dataset ---- +age_ratio_test( +age = anthro.02$age, +.expectedP = 0.66 +) } diff --git a/man/anthro.01.Rd b/man/anthro.01.Rd index d744cac..158e764 100644 --- a/man/anthro.01.Rd +++ b/man/anthro.01.Rd @@ -3,9 +3,9 @@ \docType{data} \name{anthro.01} \alias{anthro.01} -\title{Raw data of a district level representative survey} +\title{A sample data of district level SMART surveys with location anonymised} \format{ -A tibble with 1191 rows and 11 columns.\tabular{ll}{ +A tibble of 1,191 rows and 11 columns.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr \emph{area} \tab Location where the survey took place \cr \emph{dos} \tab Survey date \cr @@ -20,16 +20,20 @@ A tibble with 1191 rows and 11 columns.\tabular{ll}{ \emph{muac} \tab Mid-upper arm circumference (mm) \cr } } +\source{ +Anonymous +} \usage{ anthro.01 } \description{ -#' \code{anthro.01} is about a two-stage and PPS cluster sampling survey data -conducted in two district following the SMART survey methodology in two -livelihood zones. The location information was anonymized for confidentiality. +\code{anthro.01} is a two-stage cluster-based survey with probability of selection +of clusters proportional to the size of the population. The survey employed +the SMART methodology. } \examples{ anthro.01 + } \keyword{datasets} diff --git a/man/anthro.02.Rd b/man/anthro.02.Rd index 8f64088..bc62b6e 100644 --- a/man/anthro.02.Rd +++ b/man/anthro.02.Rd @@ -3,9 +3,9 @@ \docType{data} \name{anthro.02} \alias{anthro.02} -\title{Province representative survey conducted in Mozambique} +\title{A sample of an already wrangled survey data} \format{ -A tibble with 2267 rows and 14 columns.\tabular{ll}{ +A tibble of 2,267 rows and 14 columns.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr \emph{province} \tab The administrative unit (admin 1) where data was collected. \cr \emph{strata} \tab Rural and Urban \cr @@ -23,26 +23,24 @@ A tibble with 2267 rows and 14 columns.\tabular{ll}{ \emph{flag_mfaz} \tab Flagged observations. 1=flagged, 0=not flagged \cr } } +\source{ +Mozambique National Institute of Statistics. The data is publicly +available at \url{https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-data_access}. +Data was wrangled using this package's wranglers. Details about survey design +can be gotten from: \url{https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-sampling} +} \usage{ anthro.02 } \description{ -\code{anthro.02} is about a household budget survey conducted in Mozambique in -2019/2020, known as IOF (\emph{Inquérito ao Orçamento Familiar} in portuguese). -The data is publicly available \href{https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-data_access}{here}. -The survey had a module on nutrition with anthropometric measurements taken -from children age 0-59 months for weight-for-height and 6-59 months for MUAC. -\emph{IOF} is a cluster and PPS-based, survey, with sampling done in two stages, -designed to give representative estimates at province level. Its data -collection spans for a period of 12 months, with anthropometric measurements -taken during that period too. Read the \href{https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-sampling}{Bureau of Statistic's website on IOF} for -more details. - -\code{anthro.02} has been processed for this package's purpose. +A household budget survey data conducted in Mozambique in +2019/2020, known as \emph{IOF} (\emph{Inquérito ao Orçamento Familiar} in Portuguese). \emph{IOF} +is a two-stage cluster-based survey, representative at province level (admin 2), +with probability of the selection of the clusters proportional to the size of +the population. Its data collection spans for a period of 12 months. } \examples{ anthro.02 - } \keyword{datasets} diff --git a/man/anthro.03.Rd b/man/anthro.03.Rd index dc5ae52..1d5414e 100644 --- a/man/anthro.03.Rd +++ b/man/anthro.03.Rd @@ -3,11 +3,11 @@ \docType{data} \name{anthro.03} \alias{anthro.03} -\title{District level SMART surveys conducted in four district in Mozambique} +\title{A sample data of district level SMART surveys conducted in Mozambique} \format{ A tibble of 943 x 9.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr - \emph{district} \tab The administrative unit (admin 1) where data was collected. \cr + \emph{district} \tab The location where data was collected \cr \emph{cluster} \tab Primary sampling unit \cr \emph{team} \tab Survey teams \cr \emph{sex} \tab Sex, "m" = boys, "f" = girls \cr @@ -18,16 +18,22 @@ A tibble of 943 x 9.\tabular{ll}{ \emph{muac} \tab Mid-upper arm circumference (mm) \cr } } +\source{ +Anonymous +} \usage{ anthro.03 } \description{ -This example data contains survey data of four districts. Two of them have their WFHZ -standard deviation classified as problematic, and the are other two within range of -acceptable standard deviation. The data is used to test the performance of WFHZ based -prevalence when used on a data set with multiple survey areas that may or not have -different classification for standard deviation that may warrant different analysis -approach, as the function is designed for. +\code{anthro.03} contains survey data of four districts. Each district dataset +presents distinct data quality scenarios that requires tailored prevalence +analysis approach: two districts show a problematic WFHZ standard deviation +whilst the remaining are all within range. + +This sample data is useful to demonstrate the use of the prevalence functions on +a multi-area survey data where there can be variations in the rating of +acceptability of the standard deviation, hence require different analyses approaches +for each area to ensure accurate estimation. } \examples{ anthro.03 diff --git a/man/anthro.04.Rd b/man/anthro.04.Rd index db29727..5cfae6e 100644 --- a/man/anthro.04.Rd +++ b/man/anthro.04.Rd @@ -3,11 +3,11 @@ \docType{data} \name{anthro.04} \alias{anthro.04} -\title{MUAC data from a community-based sentinel site from an anonymized location} +\title{A sample data of a community-based sentinel site from an anonymized location} \format{ A tibble of 3,002 x 8.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr - \emph{province} \tab \cr + \emph{province} \tab location where data was collected \cr \emph{cluster} \tab Primary sampling unit \cr \emph{sex} \tab Sex, "m" = boys, "f" = girls \cr \emph{age} \tab calculated age in months with two decimal places \cr @@ -17,22 +17,32 @@ A tibble of 3,002 x 8.\tabular{ll}{ \emph{flag_mfaz} \tab Flagged observations. 1=flagged, 0=not flagged \cr } } +\source{ +Anonymous +} \usage{ anthro.04 } \description{ -Data in \code{anthro.04} was generated from a community-based sentinel site of three provinces. -Each province data set holds different scenarios that informs the appropriate analysis -approach to follow. One province (province 3) has its MFAZ standard deviation and age -ratio tests classified as problematic. Another province (province 2) has its age ratio -classified as problematic, but with a within range standard deviation. Lastly, province 1 -has both tests falling within range of nor problematic. The data is used to test the -performance of \verb{[compute_muac_prevalence()]} based when used on a multiple survey areas -data that may or not have on the aforementioned test that may then warrant a different -analysis approach, as the function is designed for. +Data was generated through a community-based sentinel site conducted +across three provinces. Each province's dataset presents distinct +data quality scenarios, requiring tailored prevalence analysis: +\itemize{ +\item "Province 1" has MFAZ's standard deviation and age ratio test rating of +acceptability falling within range; +\item "Province 2" has age ratio rated as problematic but with an acceptable +standard deviation of MFAZ; +\item "Province 3" has both tests rated as problematic. +} + +This sample data is useful to demonstrate the use of prevalence functions on +a multi-area survey data where variations in the rating of acceptability of the +standard deviation exist, hence require different analyses approaches for each +area to ensure accurate estimation. } \examples{ anthro.04 + } \keyword{datasets} diff --git a/man/apply_cdc_age_weighting.Rd b/man/apply_cdc_age_weighting.Rd index 9ceddd0..ef4bea0 100644 --- a/man/apply_cdc_age_weighting.Rd +++ b/man/apply_cdc_age_weighting.Rd @@ -2,36 +2,28 @@ % Please edit documentation in R/prevalence_muac.R \name{apply_cdc_age_weighting} \alias{apply_cdc_age_weighting} -\title{Correct the observed MUAC prevalence when there is an unbalanced sample -between children under 2 and over two years old} +\title{Apply the CDC/SMART prevalence weighting approach on MUAC data} \usage{ apply_cdc_age_weighting(muac, age, .edema = NULL, status = c("sam", "mam")) } \arguments{ -\item{muac}{An integer vector containing MUAC measurements in mm.} +\item{muac}{A vector of class \code{integer} of MUAC values (in mm).} -\item{age}{A double vector containing age in months with at least 2 decimal -places.} +\item{age}{A vector of class \code{double} of child's age in months.} -\item{.edema}{Optional. If given, it should be a character vector of "y" = Yes, -"n" = No bilateral edema.} +\item{.edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} -\item{status}{If you wish to get the prevalence/proportions of severe or -moderate acute malnutrition. Set \verb{status = "sam" or status = "mam"} for the -former or latter, respectively.} +\item{status}{A choice of the form of wasting to be defined.} } \value{ -A numeric vector of length and size 1. +A vector of class \code{numeric} of length and size 1. } \description{ -As documented in the SMART MUAC tool and in the literature, MUAC shows a known -bias towards younger children. In a balanced sample, it is expected to have -nearly two thirds of the sample to be of children over two years old. If too -few older children are included in the sample, the weighted tool should be used. - -\code{apply_cdc_age_weighting()} does that. It takes the proportion of children -under 2 and adds to the product of 2 times the proportion of children over two, -then divided by 3. The use of this function is informed by the output of -\code{\link[=age_ratio_test]{age_ratio_test()}}. There is difference between this function and that in the -SMART plausibility check. Consider reading the documentation before use. +Calculate a weighted prevalence estimate of MUAC by adding the proportion of +children under 2 years to twice the proportion of children over 2 and then +dividing by 3. +} +\details{ +This function is informed by the output of \code{\link[=age_ratio_test]{age_ratio_test()}}. } diff --git a/man/apply_probit_approach.Rd b/man/apply_probit_approach.Rd deleted file mode 100644 index 9e45a5a..0000000 --- a/man/apply_probit_approach.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_wfhz.R -\name{apply_probit_approach} -\alias{apply_probit_approach} -\title{Compute global, severe and moderate acute malnutrition prevalence using PROBIT approach.} -\usage{ -apply_probit_approach(x, .status = c("gam", "sam")) -} -\arguments{ -\item{x}{A double vector containing the z-score values} - -\item{.status}{A choice on the nutritional status you wish to apply the PROBIT approach -on. Default is "gam" for global acute malnutrition.} -} -\value{ -A numeric value (double) corresponding to the point prevalence estimate. -} -\description{ -This approach is only applied for when WFHZ standard deviation's is problematic. The -PROBIT approach estimates the prevalence of acute malnutrition indirectly by computing -the area under the tail of the curve from negative infinitive to the given threshold -through the cumulative normal distribution function using the mean and standard deviation. -} diff --git a/man/assign_penalty_points_age_sex_ratio.Rd b/man/assign_penalty_points_age_sex_ratio.Rd deleted file mode 100644 index b4f5704..0000000 --- a/man/assign_penalty_points_age_sex_ratio.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_scorers.R -\name{assign_penalty_points_age_sex_ratio} -\alias{assign_penalty_points_age_sex_ratio} -\title{Assign a penalty point for the amount of selection biases in age and sex ratios} -\usage{ -assign_penalty_points_age_sex_ratio(x) -} -\arguments{ -\item{x}{A numeric vector containing p-values from either age or sex ratio -test results.} -} -\value{ -A numeric vector with the corresponding penalty points (scores) according -to the classification. -} -\description{ -The function assigns a penalty score for a age and sex ratio's test classification. -The score range varies between 0 (when "Excellent") to 10 (when "Problematic") for -both, according to the \href{https://smartmethodology.org/}{ENA for SMART software}. -} -\examples{ - -## A vector storing age ratio or sex ratio p-values' classification ---- -x <- c("Excellent", "Problematic", "Acceptable", "Good") - -## Apply the function ---- -assign_penalty_points_age_sex_ratio(x) - -} diff --git a/man/assign_penalty_points_flags_and_sd.Rd b/man/assign_penalty_points_flags_and_sd.Rd deleted file mode 100644 index 429dea5..0000000 --- a/man/assign_penalty_points_flags_and_sd.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_scorers.R -\name{assign_penalty_points_flags_and_sd} -\alias{assign_penalty_points_flags_and_sd} -\title{Assign a penalty point for the amount of proportion flagged data and standard deviation} -\usage{ -assign_penalty_points_flags_and_sd(x) -} -\arguments{ -\item{x}{A character vector containing the test classifications of proportion -of flagged data and the value of standard deviation.} -} -\value{ -A numeric vector with the corresponding penalty points (scores) according -to the classification. -} -\description{ -The function assigns a penalty score for a given category of test classification. -The score range varies between 0 (when "Excellent") to 20 (when "Problematic") for -both flagged data and standard deviation. This was borrowed from the -\href{https://smartmethodology.org/}{ENA for SMART software} -In the SMART Methodology, flagged data and standard deviation are tho test -criteria that gets the highest penalty scores, so it is here. -} -\examples{ - -## Sample data ---- -x <- c("Excellent", "Problematic", "Acceptable", "Good") -## Apply the function ---- -assign_penalty_points_flags_and_sd(x) - -} diff --git a/man/assign_penalty_points_skew_kurt.Rd b/man/assign_penalty_points_skew_kurt.Rd deleted file mode 100644 index d8b456e..0000000 --- a/man/assign_penalty_points_skew_kurt.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_scorers.R -\name{assign_penalty_points_skew_kurt} -\alias{assign_penalty_points_skew_kurt} -\title{Assign a penalty point for the amount of issues in Skweness and Kurtosis} -\usage{ -assign_penalty_points_skew_kurt(x) -} -\arguments{ -\item{x}{A numeric vector containing Skewness or Kurtosis test results classification.} -} -\value{ -A numeric vector with the corresponding penalty points (scores) according -to the classification. -} -\description{ -The function assigns a penalty score for a Skewness and Kurtosis test classification. -The score range varies between 0 (when "Excellent") to 5 (when "Problematic") for -both, according to the \href{https://smartmethodology.org/}{ENA for SMART software}. -} -\examples{ - -## A vector storing Skewness or Kurtosis test classification ---- - -x <- c("Excellent", "Problematic", "Acceptable", "Good") - -## Apply the function ---- -assign_penalty_points_skew_kurt(x) - -} diff --git a/man/case_definition.Rd b/man/case_definition.Rd new file mode 100644 index 0000000..4de9245 --- /dev/null +++ b/man/case_definition.Rd @@ -0,0 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/case_definitions.R +\name{define_wasting_cases_muac} +\alias{define_wasting_cases_muac} +\alias{define_wasting_cases_whz} +\alias{define_wasting_cases_combined} +\alias{define_wasting} +\title{Define wasting based on WFHZ, MFAZ, MUAC and Combined criteria} +\usage{ +define_wasting_cases_muac(muac, edema = NULL, cases = c("gam", "sam", "mam")) + +define_wasting_cases_whz(zscore, edema = NULL, cases = c("gam", "sam", "mam")) + +define_wasting_cases_combined( + zscore, + muac, + edema = NULL, + cases = c("cgam", "csam", "cmam") +) + +define_wasting( + df, + zscore = NULL, + muac = NULL, + edema = NULL, + base = c("wfhz", "muac", "combined") +) +} +\arguments{ +\item{muac}{A vector of class \code{integer} of MUAC values in millimeters.} + +\item{edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} + +\item{cases}{A choice of the form of wasting to be defined.} + +\item{zscore}{A vector of class \code{double} of WFHZ values (with 3 decimal places).} + +\item{df}{A dataset object of class \code{data.frame} to use.} + +\item{base}{A choice of the criterion on which the case-definition should be based.} +} +\value{ +A vector of class \code{numeric} of dummy values: 1 for case and 0 +for not case. +} +\description{ +Define if a given observation in the dataset is wasted or not, on the basis of +WFHZ, MFAZ, MUAC and the combined criteria. +} +\details{ +Use \code{define_wasting()} to add the case-definitions to data frame. +} +\examples{ + +## Weight-for-height based case-definition ---- +x <- anthro.02 |> +define_wasting( +zscore = wfhz, +edema = edema, +base = "wfhz" +) +head(x) + +## MUAC-based case-definition ---- +x <- anthro.02 |> +define_wasting( +muac = muac, +edema = edema, +base = "muac" +) +head(x) + +## Combined case-definition ---- +x <- anthro.02 |> +define_wasting( +zscore = wfhz, +muac = muac, +edema = edema, +base = "combined" +) +head(x) + +} diff --git a/man/case_definitions.Rd b/man/case_definitions.Rd deleted file mode 100644 index 5c7c6c0..0000000 --- a/man/case_definitions.Rd +++ /dev/null @@ -1,42 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/case_definitions.R -\name{define_wasting_cases_muac} -\alias{define_wasting_cases_muac} -\alias{define_wasting_cases_whz} -\alias{define_wasting_cases_combined} -\title{Case-Definition: is an observation acutely malnourished?} -\usage{ -define_wasting_cases_muac(muac, edema = NULL, cases = c("gam", "sam", "mam")) - -define_wasting_cases_whz(zscore, edema = NULL, cases = c("gam", "sam", "mam")) - -define_wasting_cases_combined( - zscore, - muac, - edema = NULL, - cases = c("cgam", "csam", "cmam") -) -} -\arguments{ -\item{muac}{An integer vector containing MUAC measurements in mm.} - -\item{edema}{A character vector of "y" = Yes, "n" = No bilateral edema. -Default is NULL.} - -\item{cases}{A choice of wasting case definition you wish to apply. For combined -acute malnutrition with \code{\link[=define_wasting_cases_combined]{define_wasting_cases_combined()}} cases options are: -c("cgam", "csam", "cmam").} - -\item{zscore}{A double vector containing weight-for-height zscores with 3 -decimal places.} -} -\value{ -A numeric vector of the same size as the input vector, with values ranging -between 1=Yes and 0=No. -} -\description{ -\code{\link[=define_wasting_cases_muac]{define_wasting_cases_muac()}}, \code{\link[=define_wasting_cases_whz]{define_wasting_cases_whz()}} and -\code{\link[=define_wasting_cases_combined]{define_wasting_cases_combined()}} help you get through with your wasting -case-definition for each observation. It should be used inside dplyr::mutate() -or base::transform(). It was designed to be used inside \code{\link[=define_wasting]{define_wasting()}}. -} diff --git a/man/check_sample_size.Rd b/man/check_sample_size.Rd index abffacb..f2cb5d9 100644 --- a/man/check_sample_size.Rd +++ b/man/check_sample_size.Rd @@ -2,56 +2,38 @@ % Please edit documentation in R/sample_size.R \name{check_sample_size} \alias{check_sample_size} -\title{Check IPC AMN Sample Size Requirements} +\title{Check whether the IPC Acute Malnutrition sample size requirements were met} \usage{ check_sample_size(df, .group, .data_type = c("survey", "screening", "ssite")) } \arguments{ -\item{df}{A data frame containing the required variables.} +\item{df}{A dataset of class \code{data.frame} to check.} -\item{.group}{A vector containing the ID's of the primary sampling unit. -Usually and ideally a numeric vector, but sometimes this variables may come as -a character vector. Either way, \code{check_sample_size()} will execute -the task accordingly.} +\item{.group}{A vector of class \code{integer} of the cluster ID's for survey, +screening or site ID's for screenings and sentinel sites.} -\item{.data_type}{The data collection method: survey, screening or sentinel sites. -If you wish to check IPC AMN requirements on surveys were met, set -method = "survey"; for screening set method = "screening" and for sentinel -sites set method = "ssite". If by mistake a different parameter is given, -an error will be thrown and the function will stop, but with a guidance on -how to go about.} +\item{.data_type}{A choice between "survey" for survey data, "screening" for +screening data or "ssite" for community-based sentinel site data.} } \value{ -\code{check_sample_size()} returns an output of the same type -as the input (data frame), but of a different size. By default, the function -returns a summary of length 1 (one row), but with three new columns added to -the input data frame: \code{groups} (for survey), or sites (for screening or sentinel -sites) \code{n_obs} and \code{meet_ipc}. The first will store the total number of PSUs -in the sample. \code{n_obs} will store the total number of rows/observations and -\code{meet_ipc} is a logical vector to say whether or not the IPC AMN minimum -criteria for sample size was met. This is flexible according to the method you -select with \code{.data_type = " "}. +A summarised table of three columns: \code{groups} for the total number +of unique cluster or screening or site IDs; \code{n_obs} for the respective total +number of children; and \code{meet_ipc} for whether the IPC AMN requirements were met. } \description{ -Evidence used in \href{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}{IPC} -comes from different sources, collected in different ways, -namely: representative surveys, screenings or even data from community-based -surveillance system - the sentinel sites. IPC AMN protocols have set minimum -sampling a sample size requirements for each. For cluster-based -representative surveys, there must be at least 25 primary sampling unit (PSUs). -On screening, there ware two ways: i. exhaustive screening (door-to-door) or -ii. sampled screening. For this, there should be at least three sites (i.e., -villages or communities, etc). \code{check_sample_size()} checks the -on sampled screening. - -\code{check_sample_size()} helps you know if your data meets the at least -IPC AMN minimum requirements. This function should be used before proceeding -to checking the quality of measurements. Doing this saves you from avoid -working on data that do not meet the minimum requirements, as it will not be -used in any IPC analysis. +Verify whether the minimum sample size requirements for the area of analysis +were met, in accordance with the IPC Acute Malnutrition (IPC AMN) protocols. +} +\details{ +\href{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}{The IPC Manual}. } \examples{ -# Have an input data frame -------------------------------------------------- -check_sample_size(anthro.01, .group = cluster, .data_type = "survey") + +anthro.01 |> +dplyr::group_by(area) |> +check_sample_size( +.group = cluster, +.data_type = "survey" +) } diff --git a/man/classify_age_sex_ratio.Rd b/man/classify_age_sex_ratio.Rd index 9b6cb0f..ffb2aaa 100644 --- a/man/classify_age_sex_ratio.Rd +++ b/man/classify_age_sex_ratio.Rd @@ -1,32 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_classifiers.R +% Please edit documentation in R/quality_raters.R \name{classify_age_sex_ratio} \alias{classify_age_sex_ratio} -\title{Classify how much high is the difference in age ration and in sex ratio} +\title{Rate the acceptability of the age and sex ratio test p-values} \usage{ classify_age_sex_ratio(p) } \arguments{ -\item{p}{A numeric vector containing the test p-values.} +\item{p}{A vector of class \code{double} of the age or sex ratio test p-values.} } \value{ -A character vector with the correspondent classification. +A vector of class \code{character} of the same length as \code{p} for the +acceptability rate. } \description{ -\code{classify_age_sex_ratio()} works on the results yielded by \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}}. -It helps you know how much high is the statistical difference between children -age 6-29 months of those age 30-59 months. Likewise, with regard to sex, -function works on the results yielded by \code{\link[nipnTK:sexRatioTest]{nipnTK::sexRatioTest()}} to know -how much high is the difference between boy and girls in your sample data. -} -\examples{ - -## Have a numeric vector storing p-values ---- -pvalues <- c(0, 0, 0.01, 0.011, 0.2, 0.015, 0.016, 0.017, -0.05, 0.06,0.03, 0.03, 0.04, 0.000001, 0.07 -) - -## Apply the function ---- -classify_age_sex_ratio(pvalues) - +Rate the acceptability of the age and sex ratio test p-values } diff --git a/man/classify_overall_quality.Rd b/man/classify_overall_quality.Rd index 96e827a..7a61406 100644 --- a/man/classify_overall_quality.Rd +++ b/man/classify_overall_quality.Rd @@ -1,26 +1,31 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_classifiers.R +% Please edit documentation in R/quality_raters.R \name{classify_overall_quality} \alias{classify_overall_quality} -\title{Get the overall data quality classification} +\title{Rate the overall acceptability score} \usage{ classify_overall_quality(df) } \arguments{ -\item{df}{A data frame containing a vector with the quality scores generated by -\code{\link[=compute_quality_score]{compute_quality_score()}}.} +\item{df}{A dataset of class \code{data.frame} containing a vector of the overall +acceptability score as yielded from \code{\link[=compute_quality_score]{compute_quality_score()}}.} } \value{ -A character vector of the same length, but a different width as the -input \code{df} is returned with a new column called \code{quality_class}. +A \code{data.frame} based on \code{df}. A new column \code{quality_class} for the +overall acceptability rate is created and added to \code{df}. } \description{ -\code{classify_overall_quality()} helps you in knowing the overall status of your -data quality. It classifies the overall score generated by -\code{\link[=compute_quality_score]{compute_quality_score()}} into four categories, as it is done in the -\href{https://smartmethodology.org/}{SMART Methodology}, -namely: "Excellent", "Good", "Acceptable" and "Problematic". Beware that -the overall classification should be used as an indication to further -scrutinize of data before taking the decision to validate or invalidate the -results. +Rate the overall acceptability score into "Excellent", "Good", "Acceptable" and +"Problematic". +} +\examples{ +## A sample data ---- + +df <- data.frame( +quality_score = 29 +) + +## Apply the function ---- +classify_overall_quality(df) + } diff --git a/man/classify_percent_flagged.Rd b/man/classify_percent_flagged.Rd deleted file mode 100644 index 6cda3bb..0000000 --- a/man/classify_percent_flagged.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_classifiers.R -\name{classify_percent_flagged} -\alias{classify_percent_flagged} -\title{Classify how much high is the proportion of flagged data} -\usage{ -classify_percent_flagged(p, type = c("mfaz", "whz", "crude")) -} -\arguments{ -\item{p}{A numeric vector containing the proportions of flagged data} - -\item{type}{The method to which you wish to classify how much high are the -proportions of flagged data. A choice between "mfaz" for MFAZ, "whz" for WHZ -and "crude" for crude MUAC.} -} -\value{ -A character vector with the correspondent classification of the -amount of flagged data. The categories of classification ranges are: -"Excellent", "Good", "Acceptable", "Problematic". -} -\description{ -\code{classify_percent_flagged()} tells you how much high is the proportion of -of flagged data in your data set, an indication of quality of data. Its a -reusable function for MFAZ, WHZ and crude MUAC. The cut-offs for MFAZ and -crude MUAC are the same with the upper limit of 2\%. This is based on the -research findings by \href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}, -from a multi-country analysis, found that the correlation between the mean -MFAZ and crude MUAC was almost perfect (r=99). As for WHZ, the cut-offs are -exactly those in the \href{https://smartmethodology.org/}{SMART Methodology}. -} -\examples{ - -## Take a vector with the proportions of flagged data ---- -prop <- c(0.0, 0.0, 0.01, 0.015, 0.2, 0.015, 0.016, 0.017, 0.05, 0.06, -0.03, 0.03, 0.04, 0.000001, 0) - -## Apply the function setting type to "whz" for instance ---- -classify_percent_flagged(prop, type = "whz") - -} diff --git a/man/classify_sd.Rd b/man/classify_sd.Rd deleted file mode 100644 index 7ff76c2..0000000 --- a/man/classify_sd.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_classifiers.R -\name{classify_sd} -\alias{classify_sd} -\title{Classify how much high is the value of standard deviation} -\usage{ -classify_sd(sd, type = c("zscore", "crude")) -} -\arguments{ -\item{sd}{A numeric vector containing values for standard deviation of the -method you wish the work on.} - -\item{type}{The method to which you wish to classify how much high is the -value of standard deviation. A choice between "zscore" MFAZ or WHZ and -"crude" for crude MUAC.} -} -\value{ -A character vector with the correspondent classification. -} -\description{ -\code{classify_sd()} helps you to know the magnitude of the data's standard -deviation. You can use this function for either WHZ, MFAZ or crude MUAC. -Cut-offs for WHZ are based on the \href{https://smartmethodology.org/}{SMART Methodology}. -Cut-offs for MFAZ are also based on SMART, but informed by -\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}. -For crude MUAC, the cut-offs are based on the -\href{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}{IPC AMN guidelines} -} -\examples{ - -## Have a vector with standard deviation ---- -sdvalues <- seq(0.7, 1.3, by = 0.001) |> -sample(size = 9, replace = TRUE) - -## Apply the function with `type = "zscore` ---- -classify_sd(sdvalues, type = "zscore") - -## Using `type = "crude"` ---- -### Create sample data ---- -sdvalues <- seq(9, 30, by = 2) |> -sample(size = 20, replace = TRUE) - -### Apply the function with `type = "crude"` ---- -classify_sd(sdvalues, type = "crude") - -} diff --git a/man/classify_skew_kurt.Rd b/man/classify_skew_kurt.Rd index 58c0006..f8a08ae 100644 --- a/man/classify_skew_kurt.Rd +++ b/man/classify_skew_kurt.Rd @@ -1,29 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_classifiers.R +% Please edit documentation in R/quality_raters.R \name{classify_skew_kurt} \alias{classify_skew_kurt} -\title{Classify how much high is the value of Skewness and Kurtosis} +\title{Rate the acceptability of the skewness and kurtosis test results} \usage{ classify_skew_kurt(sk) } \arguments{ -\item{sk}{A numeric vector containing values of either Skewness or Kurtosis.} +\item{sk}{A vector of class \code{double} for skewness or kurtosis test results.} } \value{ -A character vector with the correspondent classification. +A vector of class \code{character} of the same length as \code{sk} for the +acceptability rate. } \description{ -\code{classify_skew_kurt()} helps you to know the magnitude of the Skewness and -Kurtosis from your data. This is only useful for WHZ and MFAZ. The function -works on the results yielded by \code{\link[nipnTK:skewKurt]{nipnTK::skewKurt()}}. -Cut-offs for WHZ are based on the \href{https://smartmethodology.org/}{SMART Methodology}. -} -\examples{ - -#Have a numeric vector storing values for skewness or kurtosis ---- -sk <- seq(-5, 1, by = 0.05) |> sample(size = 20, replace = TRUE) - -# Apply function -classify_skew_kurt(sk) - +Rate the acceptability of the skewness and kurtosis test results } diff --git a/man/classify_wasting_for_cdc_approach.Rd b/man/classify_wasting_for_cdc_approach.Rd index 8135317..26f11d4 100644 --- a/man/classify_wasting_for_cdc_approach.Rd +++ b/man/classify_wasting_for_cdc_approach.Rd @@ -2,25 +2,22 @@ % Please edit documentation in R/case_definitions.R \name{classify_wasting_for_cdc_approach} \alias{classify_wasting_for_cdc_approach} -\title{A helper function to classify nutritional status into SAM, MAM or not wasted} +\title{Classify wasting into severe or moderate wasting to be used in the +SMART MUAC tool weighting approach} \usage{ classify_wasting_for_cdc_approach(muac, .edema = NULL) } \arguments{ -\item{muac}{An integer vector containing MUAC values. They should be in -millimeters.} +\item{muac}{A vector of class \code{integer} of MUAC values in millimeters.} -\item{.edema}{Optional. Its a vector containing data on bilateral pitting -edema coded as "y" for yes and "n" for no.} +\item{.edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} } \value{ -A numeric vector of the same size as the input vector with values ranging -between "sam", "mam" and "not wasted" for severe, moderate acute malnutrition and not -acutely malnourished, respectively. +A vector of class \code{character} of the same length as \code{muac} and \code{.edema} +indicating if a child is severe or moderately wasted or not wasted. } \description{ -\code{classify_wasting_for_cdc_approach()} is used a helper inside -\code{\link[=apply_cdc_age_weighting]{apply_cdc_age_weighting()}} to classify nutritional status into "sam", "mam" -or "not wasted" and then the vector returned is used downstream to calculate -the proportions of children with severe and moderate acute malnutrition. +Classify wasting into severe or moderate wasting to be used in the +SMART MUAC tool weighting approach } diff --git a/man/combined_prevalence.Rd b/man/combined_prevalence.Rd index 0103eeb..131e552 100644 --- a/man/combined_prevalence.Rd +++ b/man/combined_prevalence.Rd @@ -3,7 +3,7 @@ \name{compute_pps_based_combined_prevalence} \alias{compute_pps_based_combined_prevalence} \alias{compute_combined_prevalence} -\title{Compute combined prevalence of acute malnutrition} +\title{Compute combined prevalence of wasting} \usage{ compute_pps_based_combined_prevalence( df, @@ -15,49 +15,44 @@ compute_pps_based_combined_prevalence( compute_combined_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) } \arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}} and \code{\link[=process_wfhz_data]{process_wfhz_data()}}. -The process_***_data function will have to used both to prepare the input data to be used -in the \code{compute_combined_prevalence()}. The order of which comes first does not matter, -however, since the muac data processor transforms MUAC values into centimeters, those -need to be put back into millimeter. This can be achieved my using \code{\link[=recode_muac]{recode_muac()}} inside -\code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}} (see example number 3 below).} +\item{df}{An already wrangled dataset of class \code{data.frame} to use. Both +wranglers (of WFHZ and of MUAC) need to be used sequentially, regardless of the +order. Note that MUAC values should be converted to millimeters after using +the MUAC wrangler.} -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) -the function will assume self weights, like in ENA for SMART, if otherwise given, the -weighted analysis will be computed.} +\item{.wt}{A vector of class \code{double} of the final survey weights. Default is +\code{NULL} assuming a self-weighted survey, as in the ENA for SMART software; +otherwise a weighted analysis is computed.} -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} +\item{.edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. If you are working on a single survey data, set -.summary_by = NULL (default). If this argument is not used, the function will error.} +\item{.summary_by}{A vector of class \code{character} of the geographical areas +where the data was collected and for which the analysis should be performed.} } \value{ -A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -areas in the data set) x 17. +A summarised table of class \code{data.frame} for the descriptive +statistics about combined wasting. } \description{ -\code{compute_combined_prevalence()} is handy function to compute the combined prevalence of -acute malnutrition using the WFHZ and the absolute values of MUAC and edema for case -definition. Under the hood, before prevalence computations begin, it first evaluates the -status of WFHZ, MFAZ's standard deviation and age ratio test, as documented in -\code{\link[=compute_wfhz_prevalence]{compute_wfhz_prevalence()}} and \code{\link[=compute_muac_prevalence]{compute_muac_prevalence()}}. Then, it decides on the -appropriate analysis approach to employ depending on the outcome of the aforementioned -checks: (i) if either WFHZ, MFAZ standard deviation as well as age ratio test are not -simultaneously problematic, a complex sample-based prevalence analysis (for a two-stage -PPS cluster sampling) is computed; (ii) all other possibilities will involve either one -of the z-scores or the age ratio test being problematic, thus NA (for Not Applicable) -get thrown to output table. +The prevalence is calculated in accordance with the complex sample design +properties inherent to surveys. This includes weighting of survey data where +applicable. When either the acceptability of the standard deviation of WFHZ or +of the age ratio test is problematic, prevalence is not calculated. +} +\details{ +A concept of "combined flags" is introduced in this function. It consists of +defining as flag any observation that is flagged in either \code{flag_wfhz} or +\code{flag_mfaz} vectors. A new column \code{cflags} for combined flags is created and +added to \code{df}. This ensures that all flagged observations from both WFHZ +and MFAZ data are excluded from the combined prevalence analysis. -A concept of "combined flags" is introduced here. This consists on creating a new vector -(cflags) of the same length as the input vectors (wfhz_flags and mfaz_flags) and assesses -if any element of either input vector is a flag (1), then that element is labelled as -flag (1) in the "cflags" vector, otherwise is not flag (0). This ensures that all -flagged observations in the WFHZ data and in MFAZ data are excluded for the combined -prevalence analysis. +\emph{The table below shows an overview of how \code{cflags} are defined}\tabular{ccc}{ + \strong{flag_wfhz} \tab \strong{flag_mfaz} \tab \strong{cflags} \cr + 1 \tab 0 \tab 1 \cr + 0 \tab 1 \tab 1 \cr + 0 \tab 0 \tab 0 \cr +} } \examples{ diff --git a/man/compute_age_in_months.Rd b/man/compute_age_in_months.Rd index a592857..4e96d75 100644 --- a/man/compute_age_in_months.Rd +++ b/man/compute_age_in_months.Rd @@ -2,21 +2,19 @@ % Please edit documentation in R/age.R \name{compute_age_in_months} \alias{compute_age_in_months} -\title{Get age in months from birth-date and the data when data was collected.} +\title{Calculate child's age in months} \usage{ compute_age_in_months(surv_date, birth_date) } \arguments{ -\item{surv_date, birth_date}{Vectors containing dates. \code{surv_date} refers to the day, -month and year when the data was collected; while \code{birth_date} refers to the date -when the child was born.} +\item{surv_date}{A vector of class \code{Date} for data collection date.} + +\item{birth_date}{A vector of class \code{Date} for child's date of birth.} } \value{ -A vector of name \code{age} storing age in months, a mix of double and -integer and \code{NA} for missing value if any of the processed age in months is -< 6 or > 59.99 months. +A vector of class \code{double} for child's age in months with two decimal places. +Any value less than 6.0 and greater than or equal to 60.0 months will be set to \code{NA}. } \description{ -\code{compute_age_in_months()} works inside \code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}} -It helps you to compute age in months from a pair of birth date and survey date. +Calculate child's age in months based on date of birth and the data collection date. } diff --git a/man/compute_mfaz_prevalence.Rd b/man/compute_mfaz_prevalence.Rd deleted file mode 100644 index 177c303..0000000 --- a/man/compute_mfaz_prevalence.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_mfaz.R -\name{compute_mfaz_prevalence} -\alias{compute_mfaz_prevalence} -\title{Compute acute malnutrition prevalence based on MUAC-for-age z-scores (MFAZ)} -\usage{ -compute_mfaz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}}.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. If you are working on a single survey data, set -.summary_by = NULL (default).} -} -\value{ -A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -areas in the data set) x 17. -} -\description{ -\code{compute_mfaz_prevalence()} is a handy function designed to dynamically compute acute -malnutrition's prevalence using WFHZ. Under the hood, it first checks the status of -WFHZ's standard deviation (SD) after removing flags, and then it decides on the -appropriate prevalence analysis approach to follow: if SD is anything between excellent -and acceptable, a complex sample-based prevalence analysis (for a two-stage PPS -cluster sampling) is computed, otherwise, a re-calculated prevalence using PROBIT method -with a sample mean and a SD = 1 is computed. On the former analysis approach, the function -was also designed to work around survey weights. -The function also super handy to work on large data sets with multiple survey areas. For -this, the aforementioned conditionals are checked for each survey area in a summarized -data frame and prevalence get computed according to each row's scenario. -} -\examples{ - -## When .summary_by = NULL ---- -compute_mfaz_prevalence( -df = anthro.04, -.wt = NULL, -.edema = edema, -.summary_by = NULL -) - -## When .summary_by is not set to NULL ---- -compute_mfaz_prevalence( -df = anthro.04, -.wt = NULL, -.edema = edema, -.summary_by = province -) - -## When a weighted analysis is needed ---- -### This example uses a different data set with survey weights ---- -compute_mfaz_prevalence( -df = anthro.02, -.wt = "wtfactor", -.edema = edema, -.summary_by = province -) - -} diff --git a/man/compute_month_to_days.Rd b/man/compute_month_to_days.Rd index b3aaa20..790fd25 100644 --- a/man/compute_month_to_days.Rd +++ b/man/compute_month_to_days.Rd @@ -2,16 +2,16 @@ % Please edit documentation in R/age.R \name{compute_month_to_days} \alias{compute_month_to_days} -\title{Recode age variable from months to days} +\title{Calculate child's age in days} \usage{ compute_month_to_days(x) } \arguments{ -\item{x}{A numeric vector containing values of age in months.} +\item{x}{A double vector of child's age in months.} } \value{ -A numeric vector with values corresponding to age in days +A double vector of the same length as \code{x} of age in days. } \description{ -Recode age variable from months to days +Calculate child's age in days } diff --git a/man/compute_muac_prevalence.Rd b/man/compute_muac_prevalence.Rd deleted file mode 100644 index a9e104b..0000000 --- a/man/compute_muac_prevalence.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_muac.R -\name{compute_muac_prevalence} -\alias{compute_muac_prevalence} -\title{Compute acute malnutrition prevalence based on MUAC (the absolute values)} -\usage{ -compute_muac_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}}.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. If you are working on a single survey data, set -.summary_by = NULL (default). If this argument is not used, the function will error.} -} -\value{ -A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -areas in the data set) x 17. -} -\description{ -\code{compute_muac_prevalence()} is a handy function designed to dynamically compute acute -malnutrition's prevalence using the absolute values of MUAC, however using the MFAZ for -quality checks before advancing to prevalence computations. Under the hood, the function -first checks the status of MFAZ's standard deviation (SD) after removing flags, and -the status of age ratio among children aged 6:23 vs 24:59 months. Then it decides on the -appropriate prevalence analysis approach to follow: (i) if SD & age ratio are both not -problematic, a complex sample-based prevalence analysis (for a two-stage PPS -cluster sampling) is computed; (ii) if MFAZ's SD is not problematic, but age ratio test -is, the CDC/SMART MUAC tool weighting approach is used to compute the prevalence; (iii) -lastly, if MFAZ's SD is problematic even if age ratio test is not, no prevalence -analysis is computed and NA (of Not Applicable) are thrown. -The function also super handy to work on large data sets with multiple survey areas. For -this, the aforementioned conditionals are checked for each survey areas in a summarized -data frame and prevalence get computed according to each row's scenario. -} -\examples{ -## When .summary.by = NULL ---- - -x <- compute_muac_prevalence( -df = anthro.04, -.wt = NULL, -.edema = edema, -.summary_by = NULL -) - -print(x) - -## When .summary_by is not set to NULL ---- - -p <- compute_muac_prevalence( -df = anthro.04, -.wt = NULL, -.edema = edema, -.summary_by = province -) - -print(p) - - -} diff --git a/man/compute_pps_based_mfaz_prevalence.Rd b/man/compute_pps_based_mfaz_prevalence.Rd deleted file mode 100644 index 26642b5..0000000 --- a/man/compute_pps_based_mfaz_prevalence.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_mfaz.R -\name{compute_pps_based_mfaz_prevalence} -\alias{compute_pps_based_mfaz_prevalence} -\title{Compute a MUAC-for-age z-score based prevalence estimates of data collected from a two-stage -cluster survey sample design, with the first stage sampling done with Probability -Proportional to the size of population} -\usage{ -compute_pps_based_mfaz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}}. -this will contain the wrangled vectors that are read inside the function.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. This is to group the survey design object into different -geographical areas in the data and allow for summaries to be computed for each of them. - -@returns A tibble of size depending on the number of groups of the vector given to -\code{.summary_by} or if set to NULL, and of length 17.} -} -\description{ -Create a survey design object using the \code{\link[srvyr:as_survey_design]{srvyr::as_survey_design()}} and then calculate -the survey means as well the sum of positive cases. -} diff --git a/man/compute_pps_based_muac_prevalence.Rd b/man/compute_pps_based_muac_prevalence.Rd deleted file mode 100644 index 4df63df..0000000 --- a/man/compute_pps_based_muac_prevalence.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_muac.R -\name{compute_pps_based_muac_prevalence} -\alias{compute_pps_based_muac_prevalence} -\title{Compute MUAC based prevalence estimates of data collected from a two-stage cluster -survey sample design, with the first stage sampling done with Probability Proportional -to the size of population} -\usage{ -compute_pps_based_muac_prevalence( - df, - .wt = NULL, - .edema = NULL, - .summary_by = NULL -) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}}. -this will contain the wrangled vectors that are read inside the function.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. This is to group the survey design object into different -geographical areas in the data and allow for summaries to be computed for each of them. - -@returns A tibble of size depending on the number of groups of the vector given to -\code{.summary_by} or if set to NULL, and of length 17.} -} -\description{ -Create a survey design object using the \code{\link[srvyr:as_survey_design]{srvyr::as_survey_design()}} and then calculate -the survey means as well the sum of positive cases. -} diff --git a/man/compute_pps_based_wfhz_prevalence.Rd b/man/compute_pps_based_wfhz_prevalence.Rd deleted file mode 100644 index 626dcd8..0000000 --- a/man/compute_pps_based_wfhz_prevalence.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_wfhz.R -\name{compute_pps_based_wfhz_prevalence} -\alias{compute_pps_based_wfhz_prevalence} -\title{Compute a weight-for-height based prevalence estimates of data collected from a two-stage -cluster survey sample design, with the first stage sampling done with Probability -Proportional to the size of population} -\usage{ -compute_pps_based_wfhz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_wfhz_data]{process_wfhz_data()}}. -this will contain the wrangled vectors that are read inside the function.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. This is to group the survey design object into different -geographical areas in the data and allow for summaries to be computed for each of them. - -@returns A tibble of size depending on the number of groups of the vector given to -\code{.summary_by} or if set to NULL, and of length 17.} -} -\description{ -Create a survey design object using the \code{\link[srvyr:as_survey_design]{srvyr::as_survey_design()}} and then calculate -the survey means as well the sum of positive cases. -} diff --git a/man/compute_probit_prevalence.Rd b/man/compute_probit_prevalence.Rd deleted file mode 100644 index ae539b4..0000000 --- a/man/compute_probit_prevalence.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_wfhz.R -\name{compute_probit_prevalence} -\alias{compute_probit_prevalence} -\title{Compute global, severe and moderate acute malnutrition prevalence using PROBIT approach} -\usage{ -compute_probit_prevalence(df, .summary_by = NULL, .for = c("wfhz", "mfaz")) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_wfhz_data]{process_wfhz_data()}} or by \code{\link[=process_muac_data]{process_muac_data()}} -They will contain the wrangled vectors that are read inside the function.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. This is to group the survey design object into different -geographical areas in the data and allow for summaries to be computed for each of them. -Default is NULL.} - -\item{.for}{A choice between "wfhz" and "mfaz" for the anthropometric index you wish -to compute PROBIT prevalence on.} -} -\value{ -A tibble with the PROBIT based point prevalence for global, severe and moderate -acute malnutrition. -} -\description{ -This function is a helper function used inside \code{\link[=compute_wfhz_prevalence]{compute_wfhz_prevalence()}} and -\code{\link[=compute_mfaz_prevalence]{compute_mfaz_prevalence()}}. It is used to compute PROBIT based prevalence depending -on the status of standard deviation. For more details, check the documentation of the -aforementioned functions. -} diff --git a/man/compute_quality_score.Rd b/man/compute_quality_score.Rd index 1ed4ee3..ebe4266 100644 --- a/man/compute_quality_score.Rd +++ b/man/compute_quality_score.Rd @@ -2,36 +2,27 @@ % Please edit documentation in R/quality_scorers.R \name{compute_quality_score} \alias{compute_quality_score} -\title{Get the overall WHZ or MFAZ's quality score} +\title{Get the overall acceptability score from the acceptability classification scores} \usage{ compute_quality_score(df, type = c("mfaz", "whz")) } \arguments{ -\item{df}{A data frame containing the scores. If you wish the get the overall -quality score for MFAZ, the input data frame must have seven (7) required -columns containing test classification of flagged data, sex ratio, age ratio, -standard deviation, skewness, kurtosis, crude MUAC's digit preference. -Alternatively, if you wish to get the quality score of WHZ, then the input -data frame must have the exact same columns in the plausibility report of the -ENA for SMART software.} +\item{df}{A dataset object of class \code{data.frame} to calculate from.} -\item{type}{The method you wish to get the overall quality score for. -A choice between "mfaz" and "whz". If you wish to know the overall survey -score of your WHZ data, set \code{type = whz}, otherwise set \code{type = mfaz} for -MFAZ. If by mistake a different input choice is given, an error will be -thrown with a message guiding how to go about.} +\item{type}{A choice between "wfhz" and "mfaz" for the basis on which the +calculations should be made.} } \value{ -A vector (named \code{"quality_score"}) with the overall quality scores. +A \code{data.frame} based on \code{df} with a new column named \code{"quality_score"} +for the overall of acceptability (of quality) score. } \description{ -\code{compute_quality_score()} provides the overall quality score of either WHZ or MFAZ, -by adding up the scores across each test criteria. This is an input to -\code{\link[=classify_overall_quality]{classify_overall_quality()}}. +Calculate the total amount of penalty points based on each plausibility test +result acceptability classification for WFHZ and MFAZ. } \examples{ -# example code -## Create a `df` object ---- + +## A sample data ---- df <- data.frame( flagged_class = "Excellent", @@ -43,11 +34,7 @@ skew_class = "Good", kurt_class = "Acceptable" ) -## Apply function ---- +## Apply the function ---- compute_quality_score(df, type = "mfaz") -# You can also choose to chain the functions with a pipe operator ---- -df |> -compute_quality_score(type = "mfaz") - } diff --git a/man/compute_weighted_prevalence.Rd b/man/compute_weighted_prevalence.Rd index 609ef7f..e55d415 100644 --- a/man/compute_weighted_prevalence.Rd +++ b/man/compute_weighted_prevalence.Rd @@ -2,34 +2,23 @@ % Please edit documentation in R/prevalence_muac.R \name{compute_weighted_prevalence} \alias{compute_weighted_prevalence} -\title{A wrapper function to compute of \code{apply_cdc_age_weighting()} that allows to work on -a data frame} +\title{Apply the CDC/SMART prevalence weighting approach on MUAC data} \usage{ compute_weighted_prevalence(df, .edema = NULL, .summary_by = NULL) } \arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}} this will contain the -wrangled vectors that are read inside the function.} +\item{df}{An already wrangled dataset object of class \code{data.frame} to use.} -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} +\item{.edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. This is to group the survey design object into different -geographical areas in the data and allow for summaries to be computed for each of them.} +\item{.summary_by}{A vector of class \code{character} of the geographical areas +where the data was collected and for which the analysis should be performed.} } \value{ -A tibble with length and size varying according to use of \code{.summary_by}. -If set to NULL, a tibble of 1 x 3 is returned, otherwise the size of the tibble with be -corresponding to the number of groups/areas in the vector given to \code{.summary_by}, but -with the same length. +A table of class \code{data.frame} of dimensions that vary based on +\code{.summary_by}, containing the results. } \description{ -\code{compute_weighted_prevalence()} is the main function use to compute age adjusted MUAC -prevalence where there are excess of children 6:23 over 24:59 months. It allows the -computations to be done on a data frame. The function is used inside the main and -exported function to compute MUAC based prevalence. Before computing the prevalence, -the function first removed the flagged data so the computations are performed on -non-flagged observations. +Apply the CDC/SMART prevalence weighting approach on MUAC data } diff --git a/man/compute_wfhz_prevalence.Rd b/man/compute_wfhz_prevalence.Rd deleted file mode 100644 index 7a8adb7..0000000 --- a/man/compute_wfhz_prevalence.Rd +++ /dev/null @@ -1,81 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_wfhz.R -\name{compute_wfhz_prevalence} -\alias{compute_wfhz_prevalence} -\title{Compute acute malnutrition prevalence based on weight-for-height z-scores (WFHZ), -MUAC-for-age z-scores (MFAZ), MUAC and combined} -\usage{ -compute_wfhz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_wfhz_data]{process_wfhz_data()}}.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. If you are working on a single survey data, set -.summary_by = NULL (default).} -} -\value{ -A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -areas in the data set) x 17. -} -\description{ -\code{compute_wfhz_prevalence()} is a handy function designed to dynamically compute acute -malnutrition's prevalence using WFHZ. Under the hood, it first checks the status of -WFHZ's standard deviation (SD) after removing flags, and then it decides on the -appropriate prevalence analysis approach to follow: if SD is anything between excellent -and acceptable, a complex sample-based prevalence analysis (for a two-stage PPS -cluster sampling) is computed, otherwise, a re-calculated prevalence using PROBIT method -with a sample mean and a SD = 1 is computed. On the former analysis approach, the function -was also designed to work around survey weights. -The function also super handy to work on large data sets with multiple survey areas. For -this, the aforementioned conditionals are checked for each survey areas in a summarized -data frame and prevalence get computed according to each row's scenario. -} -\examples{ - -## When .summary_by = NULL ---- -anthro.03 |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) |> -compute_wfhz_prevalence( -.wt = NULL, -.edema = edema, -.summary_by = NULL -) - -## When .summary_by is not set to NULL ---- -anthro.03 |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) |> -compute_wfhz_prevalence( -.wt = NULL, -.edema = edema, -.summary_by = district -) - -## When a weighted analysis is needed ---- -anthro.02 |> -compute_wfhz_prevalence( -.wt = "wtfactor", -.edema = edema, -.summary_by = province -) - -} diff --git a/man/define_wasting.Rd b/man/define_wasting.Rd deleted file mode 100644 index d62fab8..0000000 --- a/man/define_wasting.Rd +++ /dev/null @@ -1,66 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/case_definitions.R -\name{define_wasting} -\alias{define_wasting} -\title{Add acute malnutrition case-definitions to the data frame} -\usage{ -define_wasting( - df, - zscore = NULL, - muac = NULL, - edema = NULL, - base = c("wfhz", "muac", "combined") -) -} -\arguments{ -\item{df}{The data frame object containing the vectors with zscores, muac and -edema.} - -\item{zscore}{The vector storing zscores values with 3 decimal places.} - -\item{muac}{An integer vector containing MUAC measurements in mm.} - -\item{edema}{A character vector of "y" = Yes, "n" = No bilateral edema. -Default is NULL.} - -\item{base}{A choice of options to which your case definition should be based on.} -} -\value{ -A data frame with three vectors added to the input data frame: "gam", -"sam" and "mam". If base = "combined" the vector names change to "cgam", -"csam" and "cmam" for combined global, severe and moderate acute malnutrition -respectively. -} -\description{ -Use \code{define_wasting()} to add the case-definitions in your input data frame. -} -\examples{ -# MUAC-based case-definition ---- -x <- anthro.02 |> -define_wasting( -muac = muac, -edema = edema, -base = "muac" -) -head(x) - -# Weight-for-height based case-definition ---- -x <- anthro.02 |> -define_wasting( -zscore = wfhz, -edema = edema, -base = "wfhz" -) -head(x) - -# Combined case-definition ---- -x <- anthro.02 |> -define_wasting( -zscore = wfhz, -muac = muac, -edema = edema, -base = "combined" -) -head(x) - -} diff --git a/man/flag_outliers.Rd b/man/flag_outliers.Rd deleted file mode 100644 index 7bbe8b3..0000000 --- a/man/flag_outliers.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_processors.R -\name{flag_outliers} -\alias{flag_outliers} -\title{Identify and flag outliers in WHZ, MFAZ, and crude MUAC datasets} -\usage{ -flag_outliers(x, type = c("zscore", "crude")) -} -\arguments{ -\item{x}{A numeric value from the variable storing either WHZ or MFAZ or crude -MUAC's observations in the dataset, as applicable.} - -\item{type}{The method you wish \code{flag_outliers()} to identify flags on. -A choice between "zscore" and "crude". If you wish to get flags for WHZ or -MFAZ, set \code{method = "zscore"}. Alternatively, if your wish to get flags for -crude MUAC, set \code{method = "crude"}. The default is "zscore". If by mistake -a different option is supplied, an error will be thrown with a message -guiding you what to do.} -} -\value{ -A vector of two values: 1 and 0, where 1 signifies flagged value and -0 not flagged. -} -\description{ -Outliers are extreme values that far away from the mean, that are unlikely to -be correct measurements. \code{flag_outliers()} helps you to identify any extreme -values in your dataset in two different ways. Outliers in WHZ are identified -based on the \href{https://smartmethodology.org/}{SMART Methodology.}. -MFAZ follows the same approach, while crude MUAC's approach is based on a -fixed range (<100mm and >200mm), based a multicountry research findings by -\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).} -} -\examples{ - -# Sample data of crude MUAC ---- -x <- c(90, 110, 140, 200, 119, 235) - -# Apply `flag_outliers()` with type set to "crude" ---- -flag_outliers(x, type = "crude") - -# Sample data of MFAZ ---- -x <- c(-2.265, -5.275, -0.72, -2.261, -2.264, -4.451, -2.261, -1.828) - -# Apply `flag_outliers()` with type set to "zscore" ---- -flag_outliers(x, type = "zscore") - -} diff --git a/man/mfaz.01.Rd b/man/mfaz.01.Rd index a0cd322..cce5e02 100644 --- a/man/mfaz.01.Rd +++ b/man/mfaz.01.Rd @@ -3,7 +3,7 @@ \docType{data} \name{mfaz.01} \alias{mfaz.01} -\title{A MUAC screening data from an anonymized setting} +\title{A sample MUAC screening data from an anonymized setting} \format{ A tibble with 661 rows and 4 columns.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr @@ -13,11 +13,14 @@ A tibble with 661 rows and 4 columns.\tabular{ll}{ \emph{muac} \tab Mid-upper arm circumference (mm) \cr } } +\source{ +Anonymous +} \usage{ mfaz.01 } \description{ -A MUAC screening data from an anonymized setting +A sample MUAC screening data from an anonymized setting } \examples{ mfaz.01 diff --git a/man/mfaz.02.Rd b/man/mfaz.02.Rd index d83da35..655c3bb 100644 --- a/man/mfaz.02.Rd +++ b/man/mfaz.02.Rd @@ -3,7 +3,7 @@ \docType{data} \name{mfaz.02} \alias{mfaz.02} -\title{A SMART survey data with MUAC} +\title{A sample SMART survey data with MUAC} \format{ A tibble with 303 rows and 7 columns.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr @@ -15,14 +15,14 @@ A tibble with 303 rows and 7 columns.\tabular{ll}{ \emph{flag_mfaz} \tab Flagged observations. 1=flagged, 0=not flagged \cr } } +\source{ +Anonymous +} \usage{ mfaz.02 } \description{ -A SMART survey data collected in an anonymized location. This data has -mfaz standard deviation and age ratio within range for a normal prevalence -analysis. It is, thus, used to check if \code{compute_muac_prevalence()} performs -as designed. +A sample SMART survey data with MUAC } \examples{ mfaz.02 diff --git a/man/outliers.Rd b/man/outliers.Rd new file mode 100644 index 0000000..43c063c --- /dev/null +++ b/man/outliers.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wranglers.R +\name{flag_outliers} +\alias{flag_outliers} +\alias{remove_flags} +\title{Identify and flag outliers} +\usage{ +flag_outliers(x, type = c("zscore", "crude")) + +remove_flags(x, unit = c("zscore", "crude")) +} +\arguments{ +\item{x}{A vector of class \code{double} of WFHZ or MFAZ or absolute MUAC values. +The latter should be in millimeters.} + +\item{type}{A choice between \code{zscore} and \code{crude} for where outliers should be +detected and flagged from.} + +\item{unit}{A choice between \code{zscore} and \code{crude} for where outliers should be +detected and flagged from.} +} +\value{ +A vector of the same length as \code{x} of flagged observations that are +outliers: 1 for is a flag and 0 is not a flag. +} +\description{ +Outliers are extreme values that deviate remarkably from the survey mean, making +them unlikely to be accurate measurements. This function detects and signals +them based on a criterion set for the WFHZ, the MFAZ and for the absolute MUAC +values. +} +\details{ +The flagging criterion used for the WFHZ and the MFAZ is as in +\href{https://smartmethodology.org/}{SMART plausibility check}. A fixed flagging +criterion is used for the absolute MUAC values. This is as recommended by +\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).} +} +\examples{ + +## Sample data for absolute MUAC values ---- +x <- anthro.01$muac + +## Apply the function with type set to "crude" ---- +flag_outliers(x, type = "crude") + +## Sample data for MFAZ or for WFHZ values ---- +x <- anthro.02$mfaz + +# Apply the function with type set to "zscore" ---- +flag_outliers(x, type = "zscore") + +} diff --git a/man/plausibility-check.Rd b/man/plausibility-check.Rd new file mode 100644 index 0000000..8c78981 --- /dev/null +++ b/man/plausibility-check.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quality_auditors.R +\name{check_plausibility_mfaz} +\alias{check_plausibility_mfaz} +\alias{check_plausibility_wfhz} +\alias{check_plausibility_muac} +\title{Check the plausibility of the data} +\usage{ +check_plausibility_mfaz(df, sex, muac, age, flags, area) + +check_plausibility_wfhz(df, sex, age, weight, height, flags, area) + +check_plausibility_muac(df, flags, sex, muac) +} +\arguments{ +\item{df}{A dataset object of class \code{data.frame} to check. It should have been +wrangled using this package's wranglers.} + +\item{sex}{A vector of class \code{numeric} of child's sex: 1 for boy and 2 for girl.} + +\item{muac}{A vector of class \code{double} of child's MUAC in centimeters.} + +\item{age}{A vector of class \code{double} of child's age in months.} + +\item{flags}{A vector of class \code{numeric} of flagged observations.} + +\item{area}{A vector of class \code{character} of the geographical location where +data was collected and for which the analysis should be aggregated.} + +\item{weight}{A vector of class \code{double} of child's weight in kilograms.} + +\item{height}{A vector of class \code{double} of child's height in centimeters.} +} +\value{ +A summarised \code{data.frame} of plausibility test results and their +respective acceptability ratings. +} +\description{ +Verify the overall acceptability of the data through a set of +structured tests around sampling and measurement-related biases in the data. +} +\examples{ + +## Check the plausibility of WFHZ data ---- + +anthro.01 |> +process_age( +svdate = "dos", +birdate = "dob", +age = age +) |> +process_wfhz_data( +sex = sex, +weight = weight, +height = height, +.recode_sex = TRUE +) |> +check_plausibility_wfhz( +sex = sex, +age = age, +weight = weight, +height = height, +flags = flag_wfhz, +area = area +) + +## Check the plausibility of MFAZ data ---- + +anthro.01 |> +process_age( +svdate = "dos", +birdate = "dob", +age = age +) |> +process_muac_data( +sex = sex, +age = "age", +muac = muac, +.recode_sex = TRUE, +.recode_muac = TRUE, +unit = "cm" +) |> +check_plausibility_mfaz( +flags = flag_mfaz, +sex = sex, +muac = muac, +age = age, +area = area +) + +## Check the plausibility of the absolute MUAC values ---- + +anthro.01 |> +process_muac_data( +sex = sex, +muac = muac, +age = NULL, +.recode_sex = TRUE, +.recode_muac = FALSE, +unit = "none" +) |> +check_plausibility_muac( +flags = flag_muac, +sex = sex, +muac = muac +) + +} diff --git a/man/plausibility_checkers.Rd b/man/plausibility_checkers.Rd deleted file mode 100644 index 5a32f12..0000000 --- a/man/plausibility_checkers.Rd +++ /dev/null @@ -1,116 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_checkers.R -\name{check_plausibility_mfaz} -\alias{check_plausibility_mfaz} -\alias{check_plausibility_wfhz} -\alias{check_plausibility_muac} -\title{Plausibility checkers: MUAC-for-age z-scores, Weight-for-Height z-scores and -MUAC} -\usage{ -check_plausibility_mfaz(df, sex, muac, age, flags, area) - -check_plausibility_wfhz(df, sex, age, weight, height, flags, area) - -check_plausibility_muac(df, flags, sex, muac) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}} for -\code{check_plausibility_mfaz()} and \code{check_plausibility_muac()} and returned by -\code{\link[=process_wfhz_data]{process_wfhz_data()}} for \code{check_plausibility_wfhz()}.} - -\item{sex}{A vector telling whether a given child is a boy or girl.} - -\item{muac}{A vector containing MUAC measurements.} - -\item{age}{A vector containing children's age in months.} - -\item{flags}{A character vector telling whether or not an observation is an -outlier.} - -\item{area}{A vector with values on where was the data collected. If you are -analyzing a data set with just one area, provide it anyway to -\code{check_plausibility_mfaz()} or \code{check_plausibility_wfhz()}} - -\item{weight}{A vector containing weight measurements in kilograms.} - -\item{height}{A vector containing height measurements in centimeters.} -} -\value{ -A summarized data frame containing quality checks statistics and -respective classification. -} -\description{ -\code{check_plausibility_mfaz()}, \code{check_plausibility_wfhz()} and -\code{check_plausibility_muac()} lets you know the quality of your data, based on -the statistics around MUAC-for-age zscores, weight-for-height z-scores and on -crude MUAC, respectively. Note that \code{check_plausibility_wfhz()} is all about -WHZ only. If you wish to know about MUAC checks consider using either -\code{check_plausibility_mfaz()} or \code{check_plausibility_muac()} -} -\examples{ - -## Check Plausibility: MFAZ ---- - -anthro.01 |> -process_age( -svdate = "dos", -birdate = "dob", -age = age -) |> -process_muac_data( -sex = sex, -age = "age", -muac = muac, -.recode_sex = TRUE, -.recode_muac = TRUE, -unit = "cm" -) |> -check_plausibility_mfaz( -flags = flag_mfaz, -sex = sex, -muac = muac, -age = age, -area = area -) - -## Check Plausibility: WFHZ ---- - -anthro.01 |> -process_age( -svdate = "dos", -birdate = "dob", -age = age -) |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) |> -check_plausibility_wfhz( -sex = sex, -age = age, -weight = weight, -height = height, -flags = flag_wfhz, -area = area -) - -## Check Plausibility: MUAC ---- - -anthro.01 |> -process_muac_data( -sex = sex, -muac = muac, -age = NULL, -.recode_sex = TRUE, -.recode_muac = FALSE, -unit = "none" -) |> -check_plausibility_muac( -flags = flag_muac, -sex = sex, -muac = muac -) - -} diff --git a/man/pretty_table.Rd b/man/pretty_table.Rd index c3ef2a8..4b334e9 100644 --- a/man/pretty_table.Rd +++ b/man/pretty_table.Rd @@ -4,7 +4,7 @@ \alias{generate_pretty_table_mfaz} \alias{generate_pretty_table_wfhz} \alias{generate_pretty_table_muac} -\title{Get a prettified formatted and presentable output table} +\title{Get a formatted and presentable output table for the plausibility checkers} \usage{ generate_pretty_table_mfaz(df) @@ -13,48 +13,40 @@ generate_pretty_table_wfhz(df) generate_pretty_table_muac(df) } \arguments{ -\item{df}{An output data frame returned by \code{\link[=check_plausibility_mfaz]{check_plausibility_mfaz()}}, -\code{\link[=check_plausibility_wfhz]{check_plausibility_wfhz()}} or \code{\link[=check_plausibility_muac]{check_plausibility_muac()}}.} +\item{df}{A summary table object of class \code{data.frame} returned by the +plausibility checkers.} } \value{ -An output data frame of the same size as the input, but with values -formatted, columns renamed, and ready to share. +A \code{data.frame} as \code{df}. Columns are renamed, values formatted and +ready to be shared. } \description{ -You may want to share the plausibility report in a table. You usually care for -a well formatted and pretty table, with values rounded, scientific notations -converted into conventional notations, etc. \code{generate_pretty_table_mfaz()}, -\code{generate_pretty_table_wfhz()} and \code{generate_pretty_table_muac()} does that -for you so you already. +Useful to getting the output returned from the plausibility checkers +into a presentable format. It converts scientific notations to standard +notations, round values and rename columns to meaningful names. } \examples{ -## Plausibility check on MFAZ ---- +## Check the plausibility of WFHZ data ---- anthro.01 |> -process_age( -svdate = "dos", -birdate = "dob", -age = age -) |> -process_muac_data( +process_wfhz_data( sex = sex, -age = "age", -muac = muac, -.recode_sex = TRUE, -.recode_muac = TRUE, -unit = "cm" +weight = weight, +height = height, +.recode_sex = TRUE ) |> -check_plausibility_mfaz( -flags = flag_mfaz, +check_plausibility_wfhz( sex = sex, -muac = muac, age = age, +weight = weight, +height = height, +flags = flag_wfhz, area = area ) |> -generate_pretty_table_mfaz() +generate_pretty_table_wfhz() -## Plausibility check on absolute MUAC ---- +## Check the plausibility of MUAC data ---- anthro.01 |> process_muac_data( @@ -72,24 +64,29 @@ muac = muac ) |> generate_pretty_table_muac() -## Plausibility check on WFHZ ---- +## Check the plausibility of MFAZ data ---- anthro.01 |> -process_wfhz_data( +process_age( +svdate = "dos", +birdate = "dob", +age = age +) |> +process_muac_data( sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE +age = "age", +muac = muac, +.recode_sex = TRUE, +.recode_muac = TRUE, +unit = "cm" ) |> -check_plausibility_wfhz( +check_plausibility_mfaz( +flags = flag_mfaz, sex = sex, +muac = muac, age = age, -weight = weight, -height = height, -flags = flag_wfhz, area = area ) |> -generate_pretty_table_wfhz() - +generate_pretty_table_mfaz() } diff --git a/man/prevalence.Rd b/man/prevalence.Rd new file mode 100644 index 0000000..d28b34c --- /dev/null +++ b/man/prevalence.Rd @@ -0,0 +1,105 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prevalence_mfaz.R, R/prevalence_muac.R, +% R/prevalence_wfhz.R +\name{compute_mfaz_prevalence} +\alias{compute_mfaz_prevalence} +\alias{compute_muac_prevalence} +\alias{compute_wfhz_prevalence} +\title{Compute the prevalence estimates of wasting on the basis of WFHZ, MFAZ or MUAC} +\usage{ +compute_mfaz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) + +compute_muac_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) + +compute_wfhz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) +} +\arguments{ +\item{df}{An already wrangled dataset object of class \code{data.frame} to use.} + +\item{.wt}{A vector of class \code{double} of the final survey weights. Default is +\code{NULL} assuming a self weighted survey, as in the ENA for SMART software; +otherwise, when a vector of weights if supplied, weighted analysis is computed.} + +\item{.edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} + +\item{.summary_by}{A vector of class \code{character} of the geographical areas +where the data was collected and for which the analysis should be performed.} +} +\value{ +A summarised table of class \code{data.frame} of the descriptive +statistics about wasting. +} +\description{ +The prevalence is calculated in accordance with the complex sample design +properties inherent to surveys. This includes weighting the survey data where +applicable and applying PROBIT method estimation (for WFHZ) when the standard +deviation is problematic. This is as in the SMART Methodology. +} +\examples{ + +## An example of application of `compute_muac_prevalence()` ---- + +### When .summary.by = NULL ---- + +x <- compute_muac_prevalence( +df = anthro.04, +.wt = NULL, +.edema = edema, +.summary_by = NULL +) + +print(x) + +### When .summary_by is not set to NULL ---- + +p <- compute_muac_prevalence( +df = anthro.04, +.wt = NULL, +.edema = edema, +.summary_by = province +) + +print(p) + +## An example of application of `compute_wfhz_prevalence()` ---- + +### When .summary_by = NULL ---- +anthro.03 |> +process_wfhz_data( +sex = sex, +weight = weight, +height = height, +.recode_sex = TRUE +) |> +compute_wfhz_prevalence( +.wt = NULL, +.edema = edema, +.summary_by = NULL +) + +### When .summary_by is not set to NULL ---- + +anthro.03 |> +process_wfhz_data( +sex = sex, +weight = weight, +height = height, +.recode_sex = TRUE +) |> +compute_wfhz_prevalence( +.wt = NULL, +.edema = edema, +.summary_by = district +) + +### When a weighted analysis is needed ---- + +anthro.02 |> +compute_wfhz_prevalence( +.wt = "wtfactor", +.edema = edema, +.summary_by = province +) + +} diff --git a/man/probit-method.Rd b/man/probit-method.Rd new file mode 100644 index 0000000..0b1b698 --- /dev/null +++ b/man/probit-method.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prevalence_wfhz.R +\name{apply_probit_approach} +\alias{apply_probit_approach} +\alias{compute_probit_prevalence} +\title{Compute the prevalence estimates of wasting on the basis of the PROBIT method.} +\usage{ +apply_probit_approach(x, .status = c("gam", "sam")) + +compute_probit_prevalence(df, .summary_by = NULL, .for = c("wfhz", "mfaz")) +} +\arguments{ +\item{x}{A vector of class \code{double} of WFHZ or MFAZ values.} + +\item{.status}{A choice of the form of wasting for which the prevalence should +be estimated.} + +\item{df}{An already wrangled dataset object of class \code{data.frame} to use.} + +\item{.summary_by}{A vector of class \code{character} of the geographical areas +where the data was collected and for which the analysis should be performed.} + +\item{.for}{A choice between "wfhz" and "mfaz" for the anthropometric index.} +} +\value{ +A summarised table of class \code{data.frame} of the prevalence estimates. +No confidence intervals are yielded. +} +\description{ +This approach is applied when the standard deviation of WFHZ is problematic. +The PROBIT method estimates the prevalence of wasting indirectly by calculating +the area under the tail of the curve, from negative infinitive to +the given threshold, using the cumulative normal distribution function with +the mean and standard deviation as inputs. +} diff --git a/man/process_age.Rd b/man/process_age.Rd index 52e578b..7108b88 100644 --- a/man/process_age.Rd +++ b/man/process_age.Rd @@ -2,38 +2,38 @@ % Please edit documentation in R/age.R \name{process_age} \alias{process_age} -\title{Transform age in months and age in days with a data frame} +\title{Wrangle child's age} \usage{ process_age(df, svdate = NULL, birdate = NULL, age) } \arguments{ -\item{df}{The input data frame.} +\item{df}{A dataset of class \code{data.frame} to process age from.} -\item{svdate, birdate}{Vectors containing dates. \code{svdate} refers to the day, month -and year when the data was collected; while \code{birdate} refers to the date when the -child was born (birth-date). By default, both arguments are \code{NULL}. This is -makes \code{process_age()} work even in data sets where either survey date or birth- -data is not available, so the \code{process_age()} works on already given age variable.} +\item{svdate}{A vector of class \code{Date} for date of data collection. +Default is \code{NULL}.} -\item{age}{A numeric vector containing already given age in months, usually an -integer in the input data as it is estimated using local event calendars. -\code{age} will typically be available on a particular row when \code{birth_date} of -that same row is missing.} +\item{birdate}{A vector of class \code{Date} for child's date of birth. +Default is \code{NULL}.} + +\item{age}{A vector of class \code{integer} of age in months, usually estimated +using local event calendars.} } \value{ -A data frame of the same length as the input data frame, but of a -different width. If \code{svdate} or \code{birdate} are available, two new vectors are added -to the data frame: \code{age} in months with two decimal places and \code{age_day} which -is age in days with decimal two decimal places. +A \code{data.frame} based on \code{df}. The variable \code{age} that is required to be +included in \code{df} will be filled where applicable with the age in months for +each row of data in \code{df}. A new variable for \code{df} named \code{age_days} will be +created. Values for \code{age} and \code{age_days} for children less than 6.0 and greater +than or equal to 60.0 months old will be set to \code{NA}. } \description{ -\code{process_age()} helps you get the variable age in the right format and ready -to be used for downstream workflow, i.e., get z-scores, as well as exclude -age values that are out-of-range. +Wrangle child's age for downstream analysis. This includes calculating age +in months based on the date of data collection and child's date of birth and +setting to \code{NA} the age values that are less than 6.0 and greater than or equal +to 60.0 months old. } \examples{ -# Have a sample data ---- +## A sample data ---- df <- data.frame( survy_date = as.Date(c( "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), @@ -42,8 +42,12 @@ birthdate = as.Date(c( age = c(NA, 36, NA, NA, NA) ) -## Apply function ---- +## Apply the function ---- df |> -process_age(svdate = "survy_date", birdate = "birthdate", age = age) +process_age( +svdate = "survy_date", +birdate = "birthdate", +age = age +) } diff --git a/man/process_muac_data.Rd b/man/process_muac_data.Rd deleted file mode 100644 index b579ddd..0000000 --- a/man/process_muac_data.Rd +++ /dev/null @@ -1,86 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_processors.R -\name{process_muac_data} -\alias{process_muac_data} -\title{Process MUAC data a get it ready for analyses} -\usage{ -process_muac_data( - df, - sex, - muac, - age = NULL, - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = c("cm", "mm", "none") -) -} -\arguments{ -\item{df}{The input data frame with variables sex, age and MUAC.} - -\item{sex}{A vector storing values about whether the child is a boy or a girl. -The variable name must be named sex, otherwise it will not work.} - -\item{muac}{A vector storing crude MUAC values.} - -\item{age}{A vector storing values about child's age in months. The variable -name must be named age, otherwise it will not work. For instance, if given as -following: age = months it will not work.} - -\item{.recode_sex}{Logical. It asks whether you should recode your sex variable -to the required shape to use in \code{process_muac_data()}. The default values for -sex are 1 for boys and 2 for girls. Setting \code{.recode_sex = TRUE} works on "m" -and "f" values. If your vector is in any different shape, you should put it in -"m" and "f" or right away to 1 or 2. If you are using data exported from ENA for -SMART software, then you should leave \code{.recode_sex} at its default: \code{TRUE}.} - -\item{.recode_muac}{Logical. Choose between \code{TRUE} or \code{FALSE} if you wish or -not to recode the MUAC variable into the required format to work on.} - -\item{unit}{A choice of the units to which you wish to convert your MUAC -variable into.} -} -\value{ -A data frame of the same length as the input data, but with a -different width as explained:When \code{age} is available in the input data and -supplied, \code{process_muac_data} will return as output a data frame with two -new variables \code{mfaz} and \code{flags}. \code{mfaz} stores MUAC-for-age z-score (MFAZ) -values and \code{flags} tells you whether a given z-score is an outlier or not. -This job is done by \code{\link[=flag_outliers]{flag_outliers()}}. If age is not available in the input -data, therefore not possible to supply in this function, \code{process_muac_data} -will only return \code{flags}. This will refer to flags based on crude MUAC. -} -\description{ -\code{process_muac_data()} gets your input data ready for downstream MUAC related -analysis. -} -\examples{ - -## Have a sample data ---- - -df <- data.frame( - survey_date = as.Date(c( - "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), - birthdate = as.Date(c( - "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), - age = c(NA, 36, NA, NA, NA), - sex = c("m", "f", "m", "m", "f"), - muac = c(110, 130, 300, 123, 125) - ) - - ## Apply function ---- - df |> - process_age( - svdate = "survey_date", - birdate = "birthdate", - age = age - ) |> - process_muac_data( - sex = sex, - age = "age", - muac = muac, - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = "cm" - ) - -} diff --git a/man/process_wfhz_data.Rd b/man/process_wfhz_data.Rd deleted file mode 100644 index b099af3..0000000 --- a/man/process_wfhz_data.Rd +++ /dev/null @@ -1,44 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_processors.R -\name{process_wfhz_data} -\alias{process_wfhz_data} -\title{Process Weight-for-Height data get it ready for analyses} -\usage{ -process_wfhz_data(df, sex, weight, height, .recode_sex = TRUE) -} -\arguments{ -\item{df}{The input data frame with variables sex, age and MUAC.} - -\item{sex}{A vector storing values about whether the child is a boy or a girl.} - -\item{weight, height}{Vectors storing weight values in kilograms and height -values in centimeters, respectively.} - -\item{.recode_sex}{Logical. It asks whether you should recode your sex variable -to the required shape to use in \code{process_wfhz_data()}. The default values for -sex are 1 = boys and 2 = girls. Setting \code{.recode_sex = TRUE} works on "m" -and "f" values. If your vector is in any different shape, you should put it in -"m" and "f" or right away to 1 or 2. If you are using data exported from ENA for -SMART software, then you should leave \code{.recode_sex} at its default: \code{TRUE}.} -} -\value{ -A data frame of the same length as the input data, but with a different -width: two new variables \code{wfhz} and \code{flags}. \code{wfhz} stores weight-for-height -z-score values with three decimal places. \code{flags} tells you whether a given -z-score is an outlier or not. This job is done by \code{\link[=flag_outliers]{flag_outliers()}}. -} -\description{ -\code{process_wfhz_data()} gets your input data ready for downstream WHZ related -analysis. -} -\examples{ -## Have a sample data ---- -anthro.01 |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) - -} diff --git a/man/raters.Rd b/man/raters.Rd new file mode 100644 index 0000000..43284fb --- /dev/null +++ b/man/raters.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quality_raters.R +\name{classify_percent_flagged} +\alias{classify_percent_flagged} +\alias{classify_sd} +\title{Rate the acceptability of the standard deviation and the percentage of flagged +data} +\usage{ +classify_percent_flagged(p, type = c("mfaz", "whz", "crude")) + +classify_sd(sd, type = c("zscore", "crude")) +} +\arguments{ +\item{p}{A vector of class \code{double} of the proportions of flagged values in +the dataset.} + +\item{type}{A choice between "wfhz", "mfaz" and "crude" for the basis on which +the rating should be done.} + +\item{sd}{A vector of class \code{double} of the values of the standard deviation.} +} +\value{ +A vector of class \code{character} for the acceptability rate. +} +\description{ +Rate how much high is the standard deviation and the percentage of flagged +data in the dataset, hence it's acceptability. +} +\details{ +The ranges of acceptability are: "Excellent", "Good", "Acceptable", "Problematic". +The cut-offs for WFHZ are as in the \href{https://smartmethodology.org/}{SMART Methodology}. +For the MFAZ and the absolute MUAC values, the maximum acceptable limit for +outliers is 2\%, as recommended by +\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}. +Cut-offs for the standard deviation of the absolute MUAC values are based on the +\href{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}{IPC AMN guidelines}. +} diff --git a/man/recode_muac.Rd b/man/recode_muac.Rd index a53de0d..086b0da 100644 --- a/man/recode_muac.Rd +++ b/man/recode_muac.Rd @@ -1,39 +1,30 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_processors.R +% Please edit documentation in R/wranglers.R \name{recode_muac} \alias{recode_muac} -\title{Recode crude MUAC variable into either centimeters or millimeters} +\title{Convert MUAC values to either centimeters or millimeters} \usage{ recode_muac(muac, unit = c("cm", "mm")) } \arguments{ -\item{muac}{A numeric vector storing values for MUAC that can be in centimeters -or in millimeters.} +\item{muac}{A vector of class \code{double} or \code{integer} of the absolute MUAC values.} -\item{unit}{A choice of the units to which you wish to convert your MUAC -variable into.} +\item{unit}{A choice of the unit to which the MUAC values should be converted.} } \value{ -A transformed vector into the unit you wish to have. +A numeric vector of the same length \code{muac}, with values converted +to the chosen unit. } \description{ -Sometimes, a vector containing MUAC values may be in centimeters or in -millimeters. You may want to get in the right format to use with -\link[zscorer:addWGSR]{zscorer::addWGSR} or \code{\link[nipnTK:digitPreference]{nipnTK::digitPreference()}}. \code{recode_muac()} helps you -getting the vector in the right format for the job! It works inside works -inside \code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}}. +Recode the MUAC values to either centimeters or millimeters as required. } \examples{ -# Have an input data with muac in mm ---- -muac <- seq(90, 250, by = 4) -# Apply recode ---- -recode_muac(muac, unit = "cm") +## Recode from millimeters to centimeters ---- +muac <- anthro.01$muac +muac_cm <- recode_muac(muac, unit = "cm") -# Have an input data with muac in mm ---- -muac <- seq(9.0, 25.0, by = 0.2) - -# Apply recode ---- -recode_muac(muac, unit = "mm") +## Using the `muac_cm` object to recode it back to "mm" ---- +muac_mm <- recode_muac(muac_cm, unit = "mm") } diff --git a/man/remove_flags.Rd b/man/remove_flags.Rd deleted file mode 100644 index b061846..0000000 --- a/man/remove_flags.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_processors.R -\name{remove_flags} -\alias{remove_flags} -\title{Remove detected outliers} -\usage{ -remove_flags(x, unit = c("zscore", "crude")) -} -\arguments{ -\item{x}{A numeric vector containing zscore or crude MUAC values.} - -\item{unit}{A choice of the units to which you wish remove flags on. variable into.} -} -\value{ -A vector of same size, with flagged data replaced by \code{NA}s. -} -\description{ -\code{remove_flags()} removes flags detected by \code{\link[=flag_outliers]{flag_outliers()}}. It helps you -compute your statistics when flags needs to be removed, such as in standard -deviation. -} diff --git a/man/scorer.Rd b/man/scorer.Rd new file mode 100644 index 0000000..93590de --- /dev/null +++ b/man/scorer.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quality_scorers.R +\name{assign_penalty_points_flags_and_sd} +\alias{assign_penalty_points_flags_and_sd} +\alias{assign_penalty_points_age_sex_ratio} +\alias{assign_penalty_points_skew_kurt} +\title{Score the acceptability classification of the standard deviation and percentage +of flagged data test results} +\usage{ +assign_penalty_points_flags_and_sd(x) + +assign_penalty_points_age_sex_ratio(x) + +assign_penalty_points_skew_kurt(x) +} +\arguments{ +\item{x}{A vector of class \code{character} of acceptability classification of the +plausibility test results.} +} +\value{ +A vector of class \code{integer} of the same length as \code{x} for the score. +} +\description{ +Attribute a penalty point based on the acceptability classification in which +the plausibility test result falls. +} +\details{ +The scoring criteria is as in \href{https://smartmethodology.org/}{SMART Plausibility checks}. +} diff --git a/man/tell_muac_analysis_strategy.Rd b/man/tell_muac_analysis_strategy.Rd index 38b1dba..0e230fd 100644 --- a/man/tell_muac_analysis_strategy.Rd +++ b/man/tell_muac_analysis_strategy.Rd @@ -2,23 +2,29 @@ % Please edit documentation in R/prevalence_muac.R \name{tell_muac_analysis_strategy} \alias{tell_muac_analysis_strategy} -\title{A helper function to tell how to go about MUAC prevalence analysis based on -on the output of age ratio and standard deviation test results} +\title{A helper function to determine the MUAC prevalence analysis approach to follow} \usage{ tell_muac_analysis_strategy(age_ratio_class, sd_class) } \arguments{ -\item{age_ratio_class, sd_class}{Character vectors storing age ratio's p-values -and standard deviation's classification, respectively.} +\item{age_ratio_class}{A vector of class \code{character} of the acceptability +classification of the age ratio test result.} + +\item{sd_class}{A vector of class \code{character} of the acceptability +classification of the standard deviation analysis result.} } \value{ -A character vector of the same length containing the indication of -what to do for the MUAC prevalence analysis: "weighted", "unweighted" and -"missing". If "weighted", the CDC weighting approach is applied to correct for -age bias. If "unweighted" a normal complex sample analysis is applied, and for -the latter, NA are thrown. +A vector of class \code{character} of the same length as the input vectors, +containing values indicating the analysis approach for each analysis area: "weighted", +"unweighted" and "missing". } \description{ -A helper function to tell how to go about MUAC prevalence analysis based on -on the output of age ratio and standard deviation test results +It determines the analysis approach to follow for a given analysis area on +the basis of the rate of acceptability of the age ratio test and the standard +deviation analysis result. +} +\details{ +When "weighted", the CDC weighting approach is applied to correct for +age bias; when "unweighted" a normal complex sample analysis is applied; when +"missing" \code{NA} gets thrown. } diff --git a/man/wfhz.01.Rd b/man/wfhz.01.Rd index 061bda9..e23b80f 100644 --- a/man/wfhz.01.Rd +++ b/man/wfhz.01.Rd @@ -3,8 +3,7 @@ \docType{data} \name{wfhz.01} \alias{wfhz.01} -\title{A SMART survey data with standard deviation on weight-for-height zscores -classified as problematic} +\title{A sample SMART survey data with WFHZ standard deviation rated as problematic} \format{ A tibble with 303 rows and 6 columns.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr @@ -16,13 +15,14 @@ A tibble with 303 rows and 6 columns.\tabular{ll}{ \emph{flag_wfhz} \tab Flagged observations. 1=flagged, 0=not flagged \cr } } +\source{ +Anonymous +} \usage{ wfhz.01 } \description{ -A SMART survey data with weight-for-height data where standard deviation is -problematic. The data is used to test that \code{compute_wfhz_prevalence()} works as -designed for when standard deviation is problematic. +A sample SMART survey data with WFHZ standard deviation rated as problematic } \examples{ wfhz.01 diff --git a/man/wrangler.Rd b/man/wrangler.Rd new file mode 100644 index 0000000..97df312 --- /dev/null +++ b/man/wrangler.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wranglers.R +\name{process_wfhz_data} +\alias{process_wfhz_data} +\alias{process_muac_data} +\title{Wrangle weight-for-height and MUAC data} +\usage{ +process_wfhz_data(df, sex, weight, height, .recode_sex = TRUE) + +process_muac_data( + df, + sex, + muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = TRUE, + unit = c("cm", "mm", "none") +) +} +\arguments{ +\item{df}{A dataset of class \code{data.frame} to wrangle data from.} + +\item{sex}{A numeric or character vector of child's sex. Code values should +be 1 or "m" for boy and 2 or "f" for girl. The variable name must be sex, +otherwise it will not work.} + +\item{weight}{A vector of class \code{double} of child's weight in kilograms.} + +\item{height}{A vector of class \code{double} of child's height in centimeters.} + +\item{.recode_sex}{Logical. Default is \code{FALSE}. Setting to \code{TRUE} assumes that +the sex variable is a character vector of values "m" for boys and "f" for girls +and will recode them to 1 and 2 respectively.} + +\item{muac}{A vector of class \code{double} or \code{integer} of the absolute MUAC values.} + +\item{age}{A double vector of child's age in months. It must be named age, +otherwise it will not work.} + +\item{.recode_muac}{Logical. Default is \code{FALSE}. Set to \code{TRUE} if MUAC values +should be converted to either centimeters or millimeters.} + +\item{unit}{A choice of the unit to which the MUAC values should be converted. +"cm" for centimeters, "mm" for millimeters and "none" to leave as it is.} +} +\value{ +A data frame based on \code{df}. New variables named \code{wfhz} and +\code{flag_wfhz}, of child's weight-for-height z-scores and flags, or \code{mfaz} and +\code{flag_mfaz}, of child's MUAC-for-age z-scores and flags, will be created. For +MUAC, when age is not supplied only \code{flag_muac} variable is created. +This refers to flags based on the absolute MUAC values as recommended by +\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}. +} +\description{ +This function performs data wrangling by calculating weight-for-height +and MUAC-for-age z-scores, followed by the detection and flagging of outliers. +For MUAC data, if age is not supplies, z-scores do not get computed. In such +cases, outlier detection and flagging are based on the absolute MUAC values. +} +\details{ +The flagging criterion used for the WFHZ and MFAZ is as in +\href{https://smartmethodology.org/}{SMART plausibility check}. A fixed flagging +criterion is used for the absolute MUAC values. This is as recommended by +\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).} +} +\examples{ + +## An example application of `process_wfhz_data()` ---- + +anthro.01 |> +process_wfhz_data( +sex = sex, +weight = weight, +height = height, +.recode_sex = TRUE +) + +## An example application of `process_muac_data()` ---- + +### Sample data ---- +df <- data.frame( + survey_date = as.Date(c( + "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), + birthdate = as.Date(c( + "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), + age = c(NA, 36, NA, NA, NA), + sex = c("m", "f", "m", "m", "f"), + muac = c(110, 130, 300, 123, 125) + ) + + ### The application of the function ---- + + df |> + process_age( + svdate = "survey_date", + birdate = "birthdate", + age = age + ) |> + process_muac_data( + sex = sex, + age = "age", + muac = muac, + .recode_sex = TRUE, + .recode_muac = TRUE, + unit = "cm" + ) + +} diff --git a/tests/testthat/test-quality_checkers.R b/tests/testthat/test-quality_auditors.R similarity index 100% rename from tests/testthat/test-quality_checkers.R rename to tests/testthat/test-quality_auditors.R diff --git a/tests/testthat/test-classifiers.R b/tests/testthat/test-quality_raters.R similarity index 100% rename from tests/testthat/test-classifiers.R rename to tests/testthat/test-quality_raters.R diff --git a/tests/testthat/test-data_processors.R b/tests/testthat/test-wranglers.R similarity index 100% rename from tests/testthat/test-data_processors.R rename to tests/testthat/test-wranglers.R