From 045eb859ca343f1f0a13dc051240f299e05b0de0 Mon Sep 17 00:00:00 2001 From: tomaszaba Date: Fri, 11 Oct 2024 21:20:07 +0200 Subject: [PATCH 1/9] revise functions documentation --- NAMESPACE | 7 - R/age.R | 130 +++---- R/case_definitions.R | 86 ++--- R/data_processors.R | 337 ------------------ R/pretty_tables.R | 24 +- R/{quality_checkers.R => quality_auditors.R} | 60 ++-- R/quality_classifiers.R | 246 ------------- R/quality_raters.R | 173 +++++++++ R/quality_scorers.R | 111 ++---- R/sample_size.R | 53 +-- R/wranglers.R | 317 ++++++++++++++++ man/age_ratio_test.Rd | 48 ++- man/assign_penalty_points_age_sex_ratio.Rd | 30 -- man/assign_penalty_points_flags_and_sd.Rd | 32 -- man/assign_penalty_points_skew_kurt.Rd | 30 -- man/auditor.Rd | 111 ++++++ man/case_definition.Rd | 88 +++++ man/case_definitions.Rd | 42 --- man/check_sample_size.Rd | 53 +-- man/classify_age_sex_ratio.Rd | 23 +- man/classify_overall_quality.Rd | 34 +- man/classify_percent_flagged.Rd | 40 --- man/classify_sd.Rd | 46 --- man/classify_skew_kurt.Rd | 22 +- man/classify_wasting_for_cdc_approach.Rd | 22 +- man/compute_age_in_months.Rd | 21 +- man/compute_month_to_days.Rd | 9 +- man/compute_quality_score.Rd | 32 +- man/define_wasting.Rd | 66 ---- man/flag_outliers.Rd | 47 --- man/outliers.Rd | 53 +++ man/plausibility_checkers.Rd | 116 ------ man/pretty_table.Rd | 23 +- man/process_age.Rd | 45 +-- man/process_muac_data.Rd | 86 ----- man/process_wfhz_data.Rd | 44 --- man/raters.Rd | 36 ++ man/recode_muac.Rd | 30 +- man/remove_flags.Rd | 21 -- man/scorer.Rd | 31 ++ man/wrangler.Rd | 107 ++++++ ...ity_checkers.R => test-quality_auditors.R} | 0 ...st-classifiers.R => test-quality_raters.R} | 0 ...est-data_processors.R => test-wranglers.R} | 0 44 files changed, 1255 insertions(+), 1677 deletions(-) delete mode 100644 R/data_processors.R rename R/{quality_checkers.R => quality_auditors.R} (72%) delete mode 100644 R/quality_classifiers.R create mode 100644 R/quality_raters.R create mode 100644 R/wranglers.R delete mode 100644 man/assign_penalty_points_age_sex_ratio.Rd delete mode 100644 man/assign_penalty_points_flags_and_sd.Rd delete mode 100644 man/assign_penalty_points_skew_kurt.Rd create mode 100644 man/auditor.Rd create mode 100644 man/case_definition.Rd delete mode 100644 man/case_definitions.Rd delete mode 100644 man/classify_percent_flagged.Rd delete mode 100644 man/classify_sd.Rd delete mode 100644 man/define_wasting.Rd delete mode 100644 man/flag_outliers.Rd create mode 100644 man/outliers.Rd delete mode 100644 man/plausibility_checkers.Rd delete mode 100644 man/process_muac_data.Rd delete mode 100644 man/process_wfhz_data.Rd create mode 100644 man/raters.Rd delete mode 100644 man/remove_flags.Rd create mode 100644 man/scorer.Rd create mode 100644 man/wrangler.Rd rename tests/testthat/{test-quality_checkers.R => test-quality_auditors.R} (100%) rename tests/testthat/{test-classifiers.R => test-quality_raters.R} (100%) rename tests/testthat/{test-data_processors.R => test-wranglers.R} (100%) diff --git a/NAMESPACE b/NAMESPACE index 45d8c00..02dcf27 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,18 +1,11 @@ # Generated by roxygen2: do not edit by hand export(age_ratio_test) -export(assign_penalty_points_age_sex_ratio) -export(assign_penalty_points_flags_and_sd) -export(assign_penalty_points_skew_kurt) export(check_plausibility_mfaz) export(check_plausibility_muac) export(check_plausibility_wfhz) export(check_sample_size) -export(classify_age_sex_ratio) export(classify_overall_quality) -export(classify_percent_flagged) -export(classify_sd) -export(classify_skew_kurt) export(compute_combined_prevalence) export(compute_mfaz_prevalence) export(compute_muac_prevalence) diff --git a/R/age.R b/R/age.R index 24caefe..b1c72ef 100644 --- a/R/age.R +++ b/R/age.R @@ -1,27 +1,33 @@ #' -#' Recode age variable from months to days +#' Transform age in months to days #' -#' @param x A numeric vector containing values of age in months. +#' @param x A numeric vector containing age values in months. +#' +#' @returns A numeric vector, of the same length as the input variable, containing +#' age values in days. #' -#' @returns A numeric vector with values corresponding to age in days #' compute_month_to_days <- function(x) { x * (365.25 / 12) } #' -#' Get age in months from birth-date and the data when data was collected. +#' Calculate age in months +#' +#' @description +#' `compute_age_in_months()` calculates age in months from on the basis of +#' difference between the data collection date and the child's date of birth. +#' It works inside [dplyr::mutate()] or [base::transform()]. #' -#' `compute_age_in_months()` works inside [dplyr::mutate()] or [base::transform()] -#' It helps you to compute age in months from a pair of birth date and survey date. +#' @param surv_date A vector of class "Date" holding values corresponding to +#' the date of data collection. #' -#' @param surv_date,birth_date Vectors containing dates. `surv_date` refers to the day, -#' month and year when the data was collected; while `birth_date` refers to the date -#' when the child was born. +#' @param birth_date A vector of class "Date" holding values corresponding to +#' the child's date of birth. #' -#' @returns A vector of name `age` storing age in months, a mix of double and -#' integer and `NA` for missing value if any of the processed age in months is -#' < 6 or > 59.99 months. +#' @returns A numeric vector named `age` holding age values in months with two +#' decimal places. Any value outside the range of 6.0 to 59.99 is replaced with +#' `NA`. #' #' compute_age_in_months <- function (surv_date, birth_date) { @@ -32,33 +38,33 @@ compute_age_in_months <- function (surv_date, birth_date) { } #' -#' Transform age in months and age in days with a data frame +#' Process age #' -#' `process_age()` helps you get the variable age in the right format and ready -#' to be used for downstream workflow, i.e., get z-scores, as well as exclude -#' age values that are out-of-range. +#' @description +#' `process_age()` helps you to get the variable age in the format needed for +#' the analyses in the downstream workflow. Fundamentally, it calculates age in +#' months from on the basis of the difference between the data collection date +#' and the child's date of birth and then censors age values that are out of range. +#' +#' @param df Input data frame holding the required variables. #' -#' @param df The input data frame. +#' @param svdate A vector of class "Date" holding values corresponding to +#' the data collection date. Default is `NULL`. #' -#' @param svdate,birdate Vectors containing dates. `svdate` refers to the day, month -#' and year when the data was collected; while `birdate` refers to the date when the -#' child was born (birth-date). By default, both arguments are `NULL`. This is -#' makes `process_age()` work even in data sets where either survey date or birth- -#' data is not available, so the `process_age()` works on already given age variable. +#' @param birdate A vector of class "Date" holding values corresponding to +#' the child's date of birth. Default is `NULL`. #' -#' @param age A numeric vector containing already given age in months, usually an -#' integer in the input data as it is estimated using local event calendars. -#' `age` will typically be available on a particular row when `birth_date` of -#' that same row is missing. +#' @param age A numeric vector holding age values in months, usually estimated +#' using local event calendars. #' -#' @returns A data frame of the same length as the input data frame, but of a -#' different width. If `svdate` or `birdate` are available, two new vectors are added -#' to the data frame: `age` in months with two decimal places and `age_day` which -#' is age in days with decimal two decimal places. +#' @returns A data frame of the same length as the input with an additional +#' column. A new variable, `age_day`, is added to the output data frame whilst +#' the `age` variable gets filled where applicable, and then any values outside +#' the range of 6.0 to 59.99 months get replaced with `NA`. #' #' @examples #' -#' # Have a sample data ---- +#' ## A sample data ---- #' df <- data.frame( #' survy_date = as.Date(c( #' "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), @@ -67,9 +73,13 @@ compute_age_in_months <- function (surv_date, birth_date) { #' age = c(NA, 36, NA, NA, NA) #' ) #' -#' ## Apply function ---- +#' ## Apply the function ---- #' df |> -#' process_age(svdate = "survy_date", birdate = "birthdate", age = age) +#' process_age( +#' svdate = "survy_date", +#' birdate = "birthdate", +#' age = age +#' ) #' #' @export #' @@ -96,41 +106,39 @@ process_age <- function(df, svdate = NULL, birdate = NULL, age) { } #' -#' Age ratio test on children aged 6:23 over 24:59 months +#' Test the proportion of children aged 24 to 59 months over 6 to 23 months old #' #' @description -#' As documented in [nipnTK::ageRatioTest()], age ratio test is an age-related -#' test of survey data quality. This includes other assessments as screenings, -#' sentinel sites, etc. Different to [nipnTK::ageRatioTest()], in `age_ratio_test()` -#' the ratio of children is calculate from children 6-23 months to the number of -#' children age 24-59 months. The ratio is then compared to the expected ratio -#' (set at 0.66). Then the difference between the observed ratio is compared to -#' the expected using a Chi-squared test. -#' -#' `age_ratio_test()` should only be used for MUAC checks. This particularly -#' useful as allows you to determine if downstream your analysis you should -#' consider adjusting your MUAC prevalence, should there be more younger children -#' than older children in your survey, screening or sentinel site data. If you -#' wish to get the age ratio for children 6-29/30-59 like in SMART Methodology, -#' then you should use [nipnTK::ageRatioTest()] NOT `age_ratio_test()`. -#' -#' @param age A vector storing values about child's age in months. -#' -#' @param .expectedP The expected proportion of children aged 24-59 months over -#' children aged 6-29 months, considered to be of 0.66 according to the -#' [SMART MUAC tool](https://smartmethodology.org/survey-planning-tools/updated-muac-tool/). -#' -#' @returns A list three statistics: `p` for p-value, `observedR` for observed ratio -#' from your data, `observedP` for observed proportion of children 24-59 months -#' over the universe of your sample data. +#' Age ratio test of the proportion of children aged 24 to 59 months over those +#' aged 6 to 23 months old. +#' +#' @param age A numeric vector holding child's age in months. +#' +#' @param .expectedP The expected proportion of children aged 24 to 59 months +#' old over those aged 6 to 23 months old. As in the +#' [SMART MUAC tool](https://smartmethodology.org/survey-planning-tools/updated-muac-tool/), +#' this is estimated at 0.66. +#' +#' @returns A vector of class "list" holding three statistics: `p` for p-value, +#' `observedR` for the observed ratio and `observedP` for the observed proportion +#' of children aged 24 to 59 months over those aged 6 to 24 months old. +#' +#' @details +#' `age_ratio_test()` should be used specifically for assessing MUAC data. For +#' age ratio tests of children ages 6 to 29 months and 30 to 59 months old, as +#' performed in the SMART plausibility checks, use [nipnTK::ageRatioTest()] instead. #' #' @examples #' -#' ## Have a sample data ---- -#' age <- seq(6,59) |> sample(300, replace = TRUE) +#' ## A sample data ---- +#' age <- seq(6,59) |> +#' sample(300, replace = TRUE) #' #' ## Apply the function ---- -#' age_ratio_test(age, .expectedP = 0.66) +#' age_ratio_test( +#' age = age, +#' .expectedP = 0.66 +#' ) #' #' @export #' diff --git a/R/case_definitions.R b/R/case_definitions.R index 0b719b1..4ef49e2 100644 --- a/R/case_definitions.R +++ b/R/case_definitions.R @@ -1,24 +1,32 @@ #' -#' Case-Definition: is an observation acutely malnourished? +#' Define if an observation is wasted on the basis of the criteria +#' of WFHZ, absolute MUAC values and combined case-definition #' -#' [define_wasting_cases_muac()], [define_wasting_cases_whz()] and -#' [define_wasting_cases_combined()] help you get through with your wasting -#' case-definition for each observation. It should be used inside dplyr::mutate() -#' or base::transform(). It was designed to be used inside [define_wasting()]. +#' @param df A data frame containing the required variables. #' -#' @param muac An integer vector containing MUAC measurements in mm. -#' @param zscore A double vector containing weight-for-height zscores with 3 -#' decimal places. -#' @param edema A character vector of "y" = Yes, "n" = No bilateral edema. -#' Default is NULL. -#' @param cases A choice of wasting case definition you wish to apply. For combined -#' acute malnutrition with [define_wasting_cases_combined()] cases options are: -#' c("cgam", "csam", "cmam"). +#' @param muac A numeric vector holding absolute MUAC values (in mm). #' -#' @returns A numeric vector of the same size as the input vector, with values ranging -#' between 1=Yes and 0=No. +#' @param zscore A numeric vector holding WFHZ values (with 3 decimal places). +#' +#' @param edema A character vector indicating if an observation has bilateral +#' edema or not. The codes are "y" for presence and "n" for absence of bilateral +#' edema. Default is `NULL`. +#' +#' @param cases A choice of the form of wasting to be defined. +#' +#' @param base A choice of the criterion which the case-definition should be based +#' on. +#' +#' @returns A numeric vector of the same length as the input vector, with dummy +#' values: 1 for yes wasted and 0 for not wasted. The meaning of the codes +#' changes depending on the form of wasting chosen. That is, if set `cases` to +#' `"sam"` the codes 1 would mean yes for severe wasting. +#' +#' @details +#' Use `define_wasting()` to add the case-definitions in your input data frame. +#' +#' @rdname case_definition #' -#' @rdname case_definitions #' define_wasting_cases_muac <- function(muac, edema = NULL, cases = c("gam", "sam", "mam")) { @@ -46,7 +54,7 @@ define_wasting_cases_muac <- function(muac, edema = NULL, #' #' -#' @rdname case_definitions +#' @rdname case_definition #' #' define_wasting_cases_whz <- function(zscore, edema = NULL, @@ -75,7 +83,7 @@ define_wasting_cases_whz <- function(zscore, edema = NULL, #' #' -#' @rdname case_definitions +#' @rdname case_definition #' #' define_wasting_cases_combined <- function(zscore, muac, edema = NULL, @@ -104,24 +112,6 @@ define_wasting_cases_combined <- function(zscore, muac, edema = NULL, } -# Function to add new vectors with case definitions ---------------------------- -#' -#' Add acute malnutrition case-definitions to the data frame -#' -#' Use `define_wasting()` to add the case-definitions in your input data frame. -#' -#' @param df The data frame object containing the vectors with zscores, muac and -#' edema. -#' @param zscore The vector storing zscores values with 3 decimal places. -#' @param muac An integer vector containing MUAC measurements in mm. -#' @param edema A character vector of "y" = Yes, "n" = No bilateral edema. -#' Default is NULL. -#' @param base A choice of options to which your case definition should be based on. -#' -#' @returns A data frame with three vectors added to the input data frame: "gam", -#' "sam" and "mam". If base = "combined" the vector names change to "cgam", -#' "csam" and "cmam" for combined global, severe and moderate acute malnutrition -#' respectively. #' #' @examples #' # MUAC-based case-definition ---- @@ -152,6 +142,8 @@ define_wasting_cases_combined <- function(zscore, muac, edema = NULL, #' ) #' head(x) #' +#' @rdname case_definition +#' #' @export #' define_wasting <- function(df, zscore = NULL, muac = NULL, edema = NULL, @@ -231,23 +223,17 @@ define_wasting <- function(df, zscore = NULL, muac = NULL, edema = NULL, } #' -#' A helper function to classify nutritional status into SAM, MAM or not wasted -#' -#' @description -#' `classify_wasting_for_cdc_approach()` is used a helper inside -#' [apply_cdc_age_weighting()] to classify nutritional status into "sam", "mam" -#' or "not wasted" and then the vector returned is used downstream to calculate -#' the proportions of children with severe and moderate acute malnutrition. +#' Classify wasting into severe or moderate wasting for use in SMART MUAC tool +#' weighting approach #' -#' @param muac An integer vector containing MUAC values. They should be in -#' millimeters. +#' @param muac A numeric vector holding absolute MUAC values (in mm). #' -#' @param .edema Optional. Its a vector containing data on bilateral pitting -#' edema coded as "y" for yes and "n" for no. +#' @param .edema Optional. A character vector indicating if an observation has +#' bilateral edema or not. The codes are "y" for presence and "n" for absence of +#' bilateral edema. #' -#' @returns A numeric vector of the same size as the input vector with values ranging -#' between "sam", "mam" and "not wasted" for severe, moderate acute malnutrition and not -#' acutely malnourished, respectively. +#' @returns A character vector of the same length as the input indicating if a +#' child is severe or moderate wasted or not wasted. #' #' classify_wasting_for_cdc_approach <- function(muac, .edema = NULL) { diff --git a/R/data_processors.R b/R/data_processors.R deleted file mode 100644 index a3a3b97..0000000 --- a/R/data_processors.R +++ /dev/null @@ -1,337 +0,0 @@ -#' -#' -#' Identify and flag outliers in WHZ, MFAZ, and crude MUAC datasets -#' -#' Outliers are extreme values that far away from the mean, that are unlikely to -#' be correct measurements. `flag_outliers()` helps you to identify any extreme -#' values in your dataset in two different ways. Outliers in WHZ are identified -#' based on the [SMART Methodology.](https://smartmethodology.org/). -#' MFAZ follows the same approach, while crude MUAC's approach is based on a -#' fixed range (<100mm and >200mm), based a multicountry research findings by -#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478) -#' -#' @param x A numeric value from the variable storing either WHZ or MFAZ or crude -#' MUAC's observations in the dataset, as applicable. -#' -#' @param type The method you wish `flag_outliers()` to identify flags on. -#' A choice between "zscore" and "crude". If you wish to get flags for WHZ or -#' MFAZ, set `method = "zscore"`. Alternatively, if your wish to get flags for -#' crude MUAC, set `method = "crude"`. The default is "zscore". If by mistake -#' a different option is supplied, an error will be thrown with a message -#' guiding you what to do. -#' -#' @return A vector of two values: 1 and 0, where 1 signifies flagged value and -#' 0 not flagged. -#' -#' @examples -#' -#' # Sample data of crude MUAC ---- -#' x <- c(90, 110, 140, 200, 119, 235) -#' -#' # Apply `flag_outliers()` with type set to "crude" ---- -#' flag_outliers(x, type = "crude") -#' -#' # Sample data of MFAZ ---- -#' x <- c(-2.265, -5.275, -0.72, -2.261, -2.264, -4.451, -2.261, -1.828) -#' -#' # Apply `flag_outliers()` with type set to "zscore" ---- -#' flag_outliers(x, type = "zscore") -#' -#' @export -#' -flag_outliers <- function(x, type = c("zscore", "crude")) { - type <- match.arg(type) - - if (type == "zscore") { - mean_zscore <- mean(x, na.rm = TRUE) - flags <- ifelse((x < (mean_zscore - 3) | x > (mean_zscore + 3)), 1, 0) - flags <- ifelse(is.na(x), NA, flags) - flags - - } else { - flags <- ifelse(x < 100 | x > 200, 1, 0) - flags <- ifelse(is.na(x), NA, flags) - flags - } -} - - -#' -#' -#' Remove detected outliers -#' -#' @description -#' `remove_flags()` removes flags detected by [flag_outliers()]. It helps you -#' compute your statistics when flags needs to be removed, such as in standard -#' deviation. -#' -#' @param x A numeric vector containing zscore or crude MUAC values. -#' -#' @param unit A choice of the units to which you wish remove flags on. variable into. -#' -#' @returns A vector of same size, with flagged data replaced by `NA`s. -#' -remove_flags <- function(x, unit = c("zscore", "crude")) { - - ## Match arguments ---- - unit <- match.arg(unit) - - ## Control flow based on unit ---- - switch( - unit, - ### Remove flags when unit = "zscore" ---- - "zscore" = { - mean_x <- mean(x, na.rm = TRUE) - zs <- ifelse((x < (mean_x - 3) | x > (mean_x + 3)) | is.na(x), NA_real_, x) - }, - ### Remove flags when unit = "crude" ---- - "crude" = { - cr <- ifelse(x < 100 | x > 200 | is.na(x), NA_integer_, x) - } - ) -} - - -#' -#' -#' -#' Recode crude MUAC variable into either centimeters or millimeters -#' -#' @description -#' Sometimes, a vector containing MUAC values may be in centimeters or in -#' millimeters. You may want to get in the right format to use with -#' [zscorer::addWGSR] or [nipnTK::digitPreference()]. `recode_muac()` helps you -#' getting the vector in the right format for the job! It works inside works -#' inside [dplyr::mutate()] or [base::transform()]. -#' -#' @param muac A numeric vector storing values for MUAC that can be in centimeters -#' or in millimeters. -#' -#' @param unit A choice of the units to which you wish to convert your MUAC -#' variable into. -#' -#' @returns A transformed vector into the unit you wish to have. -#' -#' @examples -#' # Have an input data with muac in mm ---- -#' muac <- seq(90, 250, by = 4) -#' -#' # Apply recode ---- -#' recode_muac(muac, unit = "cm") -#' -#' # Have an input data with muac in mm ---- -#' muac <- seq(9.0, 25.0, by = 0.2) -#' -#' # Apply recode ---- -#' recode_muac(muac, unit = "mm") -#' -#' @export -#' -recode_muac <- function(muac, unit = c("cm", "mm")) { - - ## Check if unit's arguments match ---- - stopifnot(unit %in% c("cm", "mm")) - - ## Recode muac conditionally ---- - switch( - unit, - ### Recode to millimeters ---- - "mm" = {muac <- muac * 10}, - ### Recode to centimeters ---- - "cm" = {muac <- muac / 10}, - stop("Invalid 'units' argument. Please choose either 'cm' or 'mm'.") - ) -} - - -#' -#' -#' -#' Process MUAC data a get it ready for analyses -#' -#' @description -#' `process_muac_data()` gets your input data ready for downstream MUAC related -#' analysis. -#' -#' @param df The input data frame with variables sex, age and MUAC. -#' -#' @param sex A vector storing values about whether the child is a boy or a girl. -#' The variable name must be named sex, otherwise it will not work. -#' -#' @param muac A vector storing crude MUAC values. -#' -#' @param age A vector storing values about child's age in months. The variable -#' name must be named age, otherwise it will not work. For instance, if given as -#' following: age = months it will not work. -#' -#' @param .recode_sex Logical. It asks whether you should recode your sex variable -#' to the required shape to use in `process_muac_data()`. The default values for -#' sex are 1 for boys and 2 for girls. Setting `.recode_sex = TRUE` works on "m" -#' and "f" values. If your vector is in any different shape, you should put it in -#' "m" and "f" or right away to 1 or 2. If you are using data exported from ENA for -#' SMART software, then you should leave `.recode_sex` at its default: `TRUE`. -#' -#' @param .recode_muac Logical. Choose between `TRUE` or `FALSE` if you wish or -#' not to recode the MUAC variable into the required format to work on. -#' -#' @param unit A choice of the units to which you wish to convert your MUAC -#' variable into. -#' -#' @returns A data frame of the same length as the input data, but with a -#' different width as explained:When `age` is available in the input data and -#' supplied, `process_muac_data` will return as output a data frame with two -#' new variables `mfaz` and `flags`. `mfaz` stores MUAC-for-age z-score (MFAZ) -#' values and `flags` tells you whether a given z-score is an outlier or not. -#' This job is done by [flag_outliers()]. If age is not available in the input -#' data, therefore not possible to supply in this function, `process_muac_data` -#' will only return `flags`. This will refer to flags based on crude MUAC. -#' -#' @examples -#' -#' ## Have a sample data ---- -#' -#' df <- data.frame( -#' survey_date = as.Date(c( -#' "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), -#' birthdate = as.Date(c( -#' "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), -#' age = c(NA, 36, NA, NA, NA), -#' sex = c("m", "f", "m", "m", "f"), -#' muac = c(110, 130, 300, 123, 125) -#' ) -#' -#' ## Apply function ---- -#' df |> -#' process_age( -#' svdate = "survey_date", -#' birdate = "birthdate", -#' age = age -#' ) |> -#' process_muac_data( -#' sex = sex, -#' age = "age", -#' muac = muac, -#' .recode_sex = TRUE, -#' .recode_muac = TRUE, -#' unit = "cm" -#' ) -#' -#' @export -#' -process_muac_data <- function(df, - sex, muac, age = NULL, - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = c("cm", "mm", "none")) { - unit <- match.arg(unit) - - recode_sex <- quote( - if (.recode_sex) { - sex <- ifelse({{ sex }} == "m", 1, 2) - } else { - {{ sex }} - } - ) - - rec_muac <- quote( - if (.recode_muac && unit == "cm") { - muac <- recode_muac({{ muac }}, unit = "cm") - } else if (.recode_muac && unit == "mm") { - muac <- recode_muac({{ muac }}, unit = "mm") - } else { - {{ muac }} - } - ) - - if (!is.null({{ age }})) { - df <- df |> - mutate( - muac = !!rec_muac, - sex = !!recode_sex, - ) |> - addWGSR( - sex = "sex", - firstPart = "muac", - secondPart = "age_days", - index = "mfa", - digits = 3 - )|> - mutate( - flag_mfaz = do.call(flag_outliers, list(.data$mfaz, type = "zscore")) - ) - } else { - df <- df |> - mutate( - sex = !!recode_sex, - flag_muac = do.call(flag_outliers, list({{ muac }}, type = "crude")) - ) - } - tibble::as_tibble(df) -} - - -# Function to process Weight-for-height data ----------------------------------- - -#' -#' Process Weight-for-Height data get it ready for analyses -#' -#' `process_wfhz_data()` gets your input data ready for downstream WHZ related -#' analysis. -#' -#' @param df The input data frame with variables sex, age and MUAC. -#' -#' @param sex A vector storing values about whether the child is a boy or a girl. -#' -#' @param weight,height Vectors storing weight values in kilograms and height -#' values in centimeters, respectively. -#' -#' @param .recode_sex Logical. It asks whether you should recode your sex variable -#' to the required shape to use in `process_wfhz_data()`. The default values for -#' sex are 1 = boys and 2 = girls. Setting `.recode_sex = TRUE` works on "m" -#' and "f" values. If your vector is in any different shape, you should put it in -#' "m" and "f" or right away to 1 or 2. If you are using data exported from ENA for -#' SMART software, then you should leave `.recode_sex` at its default: `TRUE`. -#' -#' @returns A data frame of the same length as the input data, but with a different -#' width: two new variables `wfhz` and `flags`. `wfhz` stores weight-for-height -#' z-score values with three decimal places. `flags` tells you whether a given -#' z-score is an outlier or not. This job is done by [flag_outliers()]. -#' -#' @examples -#' ## Have a sample data ---- -#' anthro.01 |> -#' process_wfhz_data( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE -#' ) -#' -#' @export -#' -process_wfhz_data <- function(df, sex, weight, height, .recode_sex = TRUE) { - - recode_sex <- quote( - if (.recode_sex) { - sex <- ifelse({{ sex }} == "m", 1, 2) - } else { - {{ sex }} - } - ) - - df <- df |> - mutate( - sex = !!recode_sex - ) |> - addWGSR( - sex = {{ "sex" }}, - firstPart = {{ "weight" }}, - secondPart = {{ "height" }}, - index = "wfh", - digits = 3 - ) |> - mutate( - flag_wfhz = do.call(flag_outliers, list(.data$wfhz, type = "zscore")) - ) - tibble::as_tibble(df) -} diff --git a/R/pretty_tables.R b/R/pretty_tables.R index 623c5ed..cfdbc6e 100644 --- a/R/pretty_tables.R +++ b/R/pretty_tables.R @@ -1,20 +1,20 @@ -#' Get a prettified formatted and presentable output table +#' Get a formatted and presentable output table for the plausibility auditors #' -#' You may want to share the plausibility report in a table. You usually care for -#' a well formatted and pretty table, with values rounded, scientific notations -#' converted into conventional notations, etc. `generate_pretty_table_mfaz()`, -#' `generate_pretty_table_wfhz()` and `generate_pretty_table_muac()` does that -#' for you so you already. +#' @description +#' `generate_pretty_table_mfaz()`, `generate_pretty_table_wfhz()` and +#' `generate_pretty_table_muac()` are useful to getting the output returned from +#' the plausibility auditors into a presentable format. They convert scientific +#' notation, round values and rename columns to meaningful names. #' -#' @param df An output data frame returned by [check_plausibility_mfaz()], +#' @param df The table returned by [check_plausibility_mfaz()], #' [check_plausibility_wfhz()] or [check_plausibility_muac()]. #' -#' @returns An output data frame of the same size as the input, but with values -#' formatted, columns renamed, and ready to share. +#' @returns An output table of the same size as the input, with values +#' formatted, columns renamed, and ready to be shared. #' #' @examples #' -#' ## Plausibility check on MFAZ ---- +#' ## Audit the plausibility of MFAZ data ---- #' #' anthro.01 |> #' process_age( @@ -39,7 +39,7 @@ #' ) |> #' generate_pretty_table_mfaz() #' -#' ## Plausibility check on absolute MUAC ---- +#' ## Audit the plausibility of absolute MUAC values ---- #' #' anthro.01 |> #' process_muac_data( @@ -57,7 +57,7 @@ #' ) |> #' generate_pretty_table_muac() #' -#' ## Plausibility check on WFHZ ---- +#' ## Audit the plausibility of WFHZ data ---- #' #' anthro.01 |> #' process_wfhz_data( diff --git a/R/quality_checkers.R b/R/quality_auditors.R similarity index 72% rename from R/quality_checkers.R rename to R/quality_auditors.R index eb1a463..bc1f696 100644 --- a/R/quality_checkers.R +++ b/R/quality_auditors.R @@ -1,43 +1,37 @@ #' -#' Plausibility checkers: MUAC-for-age z-scores, Weight-for-Height z-scores and -#' MUAC +#' Audit the plausibility of WFHZ, MFAZ data and absolute MUAC values #' #' @description -#' `check_plausibility_mfaz()`, `check_plausibility_wfhz()` and -#' `check_plausibility_muac()` lets you know the quality of your data, based on -#' the statistics around MUAC-for-age zscores, weight-for-height z-scores and on -#' crude MUAC, respectively. Note that `check_plausibility_wfhz()` is all about -#' WHZ only. If you wish to know about MUAC checks consider using either -#' `check_plausibility_mfaz()` or `check_plausibility_muac()` -#' -#' @param df A data frame object returned by [process_muac_data()] for -#' `check_plausibility_mfaz()` and `check_plausibility_muac()` and returned by -#' [process_wfhz_data()] for `check_plausibility_wfhz()`. +#' `check_plausibility_wfhz()`, `check_plausibility_mfaz()`, and +#' `check_plausibility_muac()` examines the plausibility of data through a +#' structured set of tests around sampling and measurement-related errors. #' -#' @param sex A vector telling whether a given child is a boy or girl. +#' @param df A data frame yielded from [process_muac_data()] for +#' `check_plausibility_mfaz()` and `check_plausibility_muac()`, and yielded from +#' [process_wfhz_data()] for `check_plausibility_wfhz()`. #' -#' @param age A vector containing children's age in months. +#' @param sex A vector holding codes on child's sex: 1 for boy and 2 for girl. #' -#' @param muac A vector containing MUAC measurements. +#' @param age A numeric vector holding age in months. #' -#' @param weight A vector containing weight measurements in kilograms. +#' @param muac A numeric vector holding MUAC measurements (in centimeters). #' -#' @param height A vector containing height measurements in centimeters. +#' @param weight A numeric vector holding weight measurements (in kilograms). #' -#' @param flags A character vector telling whether or not an observation is an -#' outlier. +#' @param height A numeric vector holding height measurements (in centimeters). #' -#' @param area A vector with values on where was the data collected. If you are -#' analyzing a data set with just one area, provide it anyway to -#' `check_plausibility_mfaz()` or `check_plausibility_wfhz()` +#' @param flags A character vector holding on values on flagged observations. #' -#' @returns A summarized data frame containing quality checks statistics and -#' respective classification. +#' @param area A character vector holding values on where was the data collected +#' and for which you want the analysis to be performed. If analysing data of just +#' one area, you will still have to supply the corresponding column to `area` in +#' `check_plausibility_mfaz()` or `check_plausibility_wfhz()`. #' +#' @returns A summarized table with the raw statistics and respective classification. #' #' @examples #' -#' ## Check Plausibility: MFAZ ---- +#' ## Audit the plausibility of MFAZ data ---- #' #' anthro.01 |> #' process_age( @@ -61,7 +55,7 @@ #' area = area #' ) #' -#' ## Check Plausibility: WFHZ ---- +#' ## Audit the plausibility of WFHZ ---- #' #' anthro.01 |> #' process_age( @@ -84,7 +78,7 @@ #' area = area #' ) #' -#' ## Check Plausibility: MUAC ---- +#' ## Audit the plausibility of the absolute MUAC values ---- #' #' anthro.01 |> #' process_muac_data( @@ -101,7 +95,7 @@ #' muac = muac #' ) #' -#' @rdname plausibility_checkers +#' @rdname auditor #' #' @export #' @@ -114,7 +108,7 @@ check_plausibility_mfaz <- function(df, sex, muac, age, flags, area) { n = n(), flagged = sum({{ flags }}, na.rm = TRUE) / n(), flagged_class = classify_percent_flagged(.data$flagged, type = "mfaz"), - sex_ratio = sexRatioTest({{ sex }}, code = c(1, 2))$p, + sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))$p, sex_ratio_class = classify_age_sex_ratio(.data$sex_ratio), age_ratio = age_ratio_test({{ age }}, .expectedP = 0.66)$p, age_ratio_class = classify_age_sex_ratio(.data$age_ratio), @@ -148,7 +142,7 @@ check_plausibility_mfaz <- function(df, sex, muac, age, flags, area) { #' #' -#' @rdname plausibility_checkers +#' @rdname auditor #' #' @export #' @@ -162,7 +156,7 @@ check_plausibility_wfhz <- function(df, sex, age, weight, height, flags, area) { n = n(), flagged = sum({{ flags }}, na.rm = TRUE) / n(), flagged_class = classify_percent_flagged(.data$flagged, type = "whz"), - sex_ratio = sexRatioTest({{ sex }}, code = c(1, 2))$p, + sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))$p, sex_ratio_class = classify_age_sex_ratio(.data$sex_ratio), age_ratio = ageRatioTest({{ age }}, ratio = 0.85)$p, age_ratio_class = classify_age_sex_ratio(.data$age_ratio), @@ -198,7 +192,7 @@ check_plausibility_wfhz <- function(df, sex, age, weight, height, flags, area) { #' -#' @rdname plausibility_checkers +#' @rdname auditor #' #' @export #' @@ -210,7 +204,7 @@ check_plausibility_muac <- function(df, flags, sex, muac) { n = n(), flagged = sum({{ flags }}, na.rm = TRUE) / n(), flagged_class = classify_percent_flagged(.data$flagged, type = "crude"), - sex_ratio = sexRatioTest({{ sex }}, code = c(1, 2))[["p"]], + sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))[["p"]], sex_ratio_class = classify_age_sex_ratio(.data$sex_ratio), dps = digitPreference({{ muac }}, digits = 0, values = 0:9)[["dps"]], dps_class = digitPreference({{ muac }}, digits = 0, values = 0:9)[["dpsClass"]], diff --git a/R/quality_classifiers.R b/R/quality_classifiers.R deleted file mode 100644 index 2698dc6..0000000 --- a/R/quality_classifiers.R +++ /dev/null @@ -1,246 +0,0 @@ -#' -#' Classify how much high is the proportion of flagged data -#' -#' @description -#' `classify_percent_flagged()` tells you how much high is the proportion of -#' of flagged data in your data set, an indication of quality of data. Its a -#' reusable function for MFAZ, WHZ and crude MUAC. The cut-offs for MFAZ and -#' crude MUAC are the same with the upper limit of 2%. This is based on the -#' research findings by [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478), -#' from a multi-country analysis, found that the correlation between the mean -#' MFAZ and crude MUAC was almost perfect (r=99). As for WHZ, the cut-offs are -#' exactly those in the [SMART Methodology](https://smartmethodology.org/). -#' -#' @param p A numeric vector containing the proportions of flagged data -#' -#' @param type The method to which you wish to classify how much high are the -#' proportions of flagged data. A choice between "mfaz" for MFAZ, "whz" for WHZ -#' and "crude" for crude MUAC. -#' -#' @returns A character vector with the correspondent classification of the -#' amount of flagged data. The categories of classification ranges are: -#' "Excellent", "Good", "Acceptable", "Problematic". -#' -#' @examples -#' -#' ## Take a vector with the proportions of flagged data ---- -#' prop <- c(0.0, 0.0, 0.01, 0.015, 0.2, 0.015, 0.016, 0.017, 0.05, 0.06, -#' 0.03, 0.03, 0.04, 0.000001, 0) -#' -#' ## Apply the function setting type to "whz" for instance ---- -#' classify_percent_flagged(prop, type = "whz") -#' -#' @export -#' -classify_percent_flagged <- function(p, type = c("mfaz", "whz", "crude")) { - - type <- match.arg(type) - - if (type == "mfaz" || type == "crude") { - - ## classify percent of outliers in MFAZ ---- - x <- cut( - x = p, - breaks = c(0, 0.01, 0.015, 0.02, Inf), - labels = c("Excellent", "Good", "Acceptable", "Problematic"), - include.lowest = TRUE, - right = TRUE - ) - } - - if (type == "whz") { - - ## classify percent of outliers in WHZ ---- - x <- cut( - x = p, - breaks = c(0, 0.025, 0.05, 0.075, Inf), - labels = c("Excellent", "Good", "Acceptable", "Problematic"), - include.lowest = TRUE, - right = TRUE - ) - } - x -} - - -#' -#' Classify how much high is the difference in age ration and in sex ratio -#' -#' -#' @description -#' `classify_age_sex_ratio()` works on the results yielded by [nipnTK::ageRatioTest()]. -#' It helps you know how much high is the statistical difference between children -#' age 6-29 months of those age 30-59 months. Likewise, with regard to sex, -#' function works on the results yielded by [nipnTK::sexRatioTest()] to know -#' how much high is the difference between boy and girls in your sample data. -#' -#' @param p A numeric vector containing the test p-values. -#' -#' @returns A character vector with the correspondent classification. -#' -#' @examples -#' -#' ## Have a numeric vector storing p-values ---- -#' pvalues <- c(0, 0, 0.01, 0.011, 0.2, 0.015, 0.016, 0.017, -#' 0.05, 0.06,0.03, 0.03, 0.04, 0.000001, 0.07 -#' ) -#' -#' ## Apply the function ---- -#' classify_age_sex_ratio(pvalues) -#' -#' @export -#' -classify_age_sex_ratio <- function(p) { - case_when( - p > 0.1 ~ "Excellent", - p > 0.05 ~ "Good", - p > 0.001 ~ "Acceptable", - TRUE ~ "Problematic" - ) -} - - -#' -#' Classify how much high is the value of standard deviation -#' -#' -#' @description -#' `classify_sd()` helps you to know the magnitude of the data's standard -#' deviation. You can use this function for either WHZ, MFAZ or crude MUAC. -#' Cut-offs for WHZ are based on the [SMART Methodology](https://smartmethodology.org/). -#' Cut-offs for MFAZ are also based on SMART, but informed by -#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478). -#' For crude MUAC, the cut-offs are based on the -#' [IPC AMN guidelines](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/) -#' -#' @param sd A numeric vector containing values for standard deviation of the -#' method you wish the work on. -#' -#' @param type The method to which you wish to classify how much high is the -#' value of standard deviation. A choice between "zscore" MFAZ or WHZ and -#' "crude" for crude MUAC. -#' -#' @returns A character vector with the correspondent classification. -#' -#' @examples -#' -#' ## Have a vector with standard deviation ---- -#' sdvalues <- seq(0.7, 1.3, by = 0.001) |> -#' sample(size = 9, replace = TRUE) -#' -#' ## Apply the function with `type = "zscore` ---- -#' classify_sd(sdvalues, type = "zscore") -#' -#' ## Using `type = "crude"` ---- -#' ### Create sample data ---- -#' sdvalues <- seq(9, 30, by = 2) |> -#' sample(size = 20, replace = TRUE) -#' -#' ### Apply the function with `type = "crude"` ---- -#' classify_sd(sdvalues, type = "crude") -#' -#' @export -#' -classify_sd <- function(sd, type = c("zscore", "crude")) { - - type <- match.arg(type) - - if (type == "zscore") { - - ## Classify WHZ and MFAZ-based standard deviation ---- - x <- case_when( - sd > 0.9 & sd < 1.1 ~ "Excellent", - sd > 0.85 & sd < 1.15 ~ "Good", - sd > 0.8 & sd < 1.20 ~ "Acceptable", - TRUE ~ "Problematic" - ) - } - - if (type == "crude") { - - ## Classify crude MUAC-based standard deviation ---- - x <- cut( - x = sd, - breaks = c(-Inf, 13, 14, 15, Inf), - labels = c("Excellent", "Acceptable", "Poor", "Problematic"), - include.lowest = FALSE, - right = FALSE - ) - } - x -} - - -#' -#' Classify how much high is the value of Skewness and Kurtosis -#' -#' -#' @description -#' `classify_skew_kurt()` helps you to know the magnitude of the Skewness and -#' Kurtosis from your data. This is only useful for WHZ and MFAZ. The function -#' works on the results yielded by [nipnTK::skewKurt()]. -#' Cut-offs for WHZ are based on the [SMART Methodology](https://smartmethodology.org/). -#' -#' @param sk A numeric vector containing values of either Skewness or Kurtosis. -#' -#' @returns A character vector with the correspondent classification. -#' -#' @examples -#' -#' #Have a numeric vector storing values for skewness or kurtosis ---- -#' sk <- seq(-5, 1, by = 0.05) |> sample(size = 20, replace = TRUE) -#' -#' # Apply function -#' classify_skew_kurt(sk) -#' -#' @export -#' -classify_skew_kurt <- function(sk) { - cut( - x = sk, - breaks = c(-Inf, 0.2, 0.4, 0.6, Inf), - labels = c("Excellent", "Good", "Acceptable", "Problematic"), - include.lowest = FALSE, - right = FALSE - ) -} - -#' -#' Get the overall data quality classification -#' -#' -#' @description -#' `classify_overall_quality()` helps you in knowing the overall status of your -#' data quality. It classifies the overall score generated by -#' [compute_quality_score()] into four categories, as it is done in the -#' [SMART Methodology](https://smartmethodology.org/), -#' namely: "Excellent", "Good", "Acceptable" and "Problematic". Beware that -#' the overall classification should be used as an indication to further -#' scrutinize of data before taking the decision to validate or invalidate the -#' results. -#' -#' @param df A data frame containing a vector with the quality scores generated by -#' [compute_quality_score()]. -#' -#' @returns A character vector of the same length, but a different width as the -#' input `df` is returned with a new column called `quality_class`. -#' -#' -#' @export -#' -classify_overall_quality <- function(df) { - - qclass <- with( - df, - data.frame( - quality_class <- cut( - x = quality_score, - breaks = c(0, 9, 14, 24, Inf), - labels = c("Excellent", "Good", "Acceptable", "Problematic"), - include.lowest = TRUE, - right = TRUE - ) - ) - ) - qclass$quality_class -} diff --git a/R/quality_raters.R b/R/quality_raters.R new file mode 100644 index 0000000..688e1ac --- /dev/null +++ b/R/quality_raters.R @@ -0,0 +1,173 @@ +#' +#' Rate the proportion of flagged values in the data and the magnitude of the +#' standard deviation +#' +#' @description +#' `classify_percent_flagged()` rates how much high is the proportion of +#' of flagged data in your data set, as well as the magnitude of the standard +#' deviation. It applies for the WFHZ, the MFAZ and absolute MUAC values. +#' +#' @param p A numeric vector containing the proportions of flagged values +#' +#' @param sd A numeric vector containing values for standard deviation. +#' +#' @param type The indicator to be used for the rating. A choice between "mfaz" +#' for MFAZ, "whz" for WFHZ and "crude" for crude MUAC. +#' +#' @returns A character vector with the rating results. +#' +#' @details +#' The rating categories are: "Excellent", "Good", "Acceptable", "Problematic". +#' The cut-offs of the WFHZ are as in the [ +#' SMART Methodology](https://smartmethodology.org/). As for the MFAZ and the +#' absolute MUAC values, the maximum acceptable limit is at 2%, as recommended +#' by [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478). +#' Cut-offs for crude MUAC are based on the +#' [IPC AMN guidelines](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/). +#' +#' +#' @rdname raters +#' +classify_percent_flagged <- function(p, type = c("mfaz", "whz", "crude")) { + + type <- match.arg(type) + + if (type == "mfaz" || type == "crude") { + + ## classify percent of outliers in MFAZ ---- + x <- cut( + x = p, + breaks = c(0, 0.01, 0.015, 0.02, Inf), + labels = c("Excellent", "Good", "Acceptable", "Problematic"), + include.lowest = TRUE, + right = TRUE + ) + } + + if (type == "whz") { + + ## classify percent of outliers in WHZ ---- + x <- cut( + x = p, + breaks = c(0, 0.025, 0.05, 0.075, Inf), + labels = c("Excellent", "Good", "Acceptable", "Problematic"), + include.lowest = TRUE, + right = TRUE + ) + } + x +} + +#' +#' +#' @rdname raters +#' +classify_sd <- function(sd, type = c("zscore", "crude")) { + + type <- match.arg(type) + + if (type == "zscore") { + + ## Classify WHZ and MFAZ-based standard deviation ---- + x <- case_when( + sd > 0.9 & sd < 1.1 ~ "Excellent", + sd > 0.85 & sd < 1.15 ~ "Good", + sd > 0.8 & sd < 1.20 ~ "Acceptable", + TRUE ~ "Problematic" + ) + } + + if (type == "crude") { + + ## Classify crude MUAC-based standard deviation ---- + x <- cut( + x = sd, + breaks = c(-Inf, 13, 14, 15, Inf), + labels = c("Excellent", "Acceptable", "Poor", "Problematic"), + include.lowest = FALSE, + right = FALSE + ) + } + x +} + + +#' +#' Rate the p-values of the age and sex ratio test +#' +#' @param p A numeric vector containing the test p-values. +#' +#' @returns A character vector with the rating results. +#' +#' +classify_age_sex_ratio <- function(p) { + case_when( + p > 0.1 ~ "Excellent", + p > 0.05 ~ "Good", + p > 0.001 ~ "Acceptable", + TRUE ~ "Problematic" + ) +} + + +#' +#' Rate the magnitude of skewness and kurtosis test results +#' +#' @param sk A numeric vector containing values of either skewness or kurtosis. +#' +#' @returns A character vector with the rating results. +#' +#' +classify_skew_kurt <- function(sk) { + cut( + x = sk, + breaks = c(-Inf, 0.2, 0.4, 0.6, Inf), + labels = c("Excellent", "Good", "Acceptable", "Problematic"), + include.lowest = FALSE, + right = FALSE + ) +} + +#' +#' +#' Rate the overall data quality +#' +#' @description +#' `classify_overall_quality()` informs you about the overall quality of the data +#' by rating the overall quality score in "Excellent", "Good", "Acceptable" and +#' "Problematic". +#' +#' @param df A data frame containing a vector with the quality scores yielded +#' from [compute_quality_score()]. +#' +#' @returns A character vector of the same length with a new column called +#' `quality_class`. +#' +#' @examples +#' ## A sample data ---- +#' +#' df <- data.frame( +#' quality_score = 29 +#' ) +#' +#' ## Apply the function ---- +#' classify_overall_quality(df) +#' +#' @export +#' +classify_overall_quality <- function(df) { + + qclass <- with( + df, + data.frame( + quality_class <- cut( + x = quality_score, + breaks = c(0, 9, 14, 24, Inf), + labels = c("Excellent", "Good", "Acceptable", "Problematic"), + include.lowest = TRUE, + right = TRUE + ) + ) + ) + qclass$quality_class +} diff --git a/R/quality_scorers.R b/R/quality_scorers.R index 35f2f70..1314629 100644 --- a/R/quality_scorers.R +++ b/R/quality_scorers.R @@ -1,28 +1,22 @@ #' -#' Assign a penalty point for the amount of proportion flagged data and standard deviation +#' Score the rating of proportion of flagged data, the magnitude of the standard +#' deviation, skewness, kurtosis and the p-values sex and age ratio test #' #' @description -#' The function assigns a penalty score for a given category of test classification. -#' The score range varies between 0 (when "Excellent") to 20 (when "Problematic") for -#' both flagged data and standard deviation. This was borrowed from the -#' [ENA for SMART software](https://smartmethodology.org/) -#' In the SMART Methodology, flagged data and standard deviation are tho test -#' criteria that gets the highest penalty scores, so it is here. +#' `assign_penalty_points_flags_and_sd()` ranks the proportion of the flagged +#' values in the data and the magnitude of standard deviation based on the SMART +#' scoring criteria. #' -#' @param x A character vector containing the test classifications of proportion -#' of flagged data and the value of standard deviation. +#' @param x A character vector holding the test classifications for the proportion +#' of flagged data, the magnitude of the standard deviation, the p-values of the +#' age and sex ratio tests, as well as the results of skewness and kurtosis tests. #' -#' @returns A numeric vector with the corresponding penalty points (scores) according -#' to the classification. +#' @returns A numeric vector with the corresponding score. #' -#' @examples -#' -#' ## Sample data ---- -#' x <- c("Excellent", "Problematic", "Acceptable", "Good") -#' ## Apply the function ---- -#' assign_penalty_points_flags_and_sd(x) +#' @details +#' The ranking is as in [SMART Plausibility checks](https://smartmethodology.org/). #' -#' @export +#' @rdname scorer #' assign_penalty_points_flags_and_sd <- function(x) { case_when( @@ -33,29 +27,10 @@ assign_penalty_points_flags_and_sd <- function(x) { ) } + #' -#' Assign a penalty point for the amount of selection biases in age and sex ratios -#' -#' @description -#' The function assigns a penalty score for a age and sex ratio's test classification. -#' The score range varies between 0 (when "Excellent") to 10 (when "Problematic") for -#' both, according to the [ENA for SMART software](https://smartmethodology.org/). -#' -#' @param x A numeric vector containing p-values from either age or sex ratio -#' test results. -#' -#' @returns A numeric vector with the corresponding penalty points (scores) according -#' to the classification. -#' -#' @examples -#' -#' ## A vector storing age ratio or sex ratio p-values' classification ---- -#' x <- c("Excellent", "Problematic", "Acceptable", "Good") -#' -#' ## Apply the function ---- -#' assign_penalty_points_age_sex_ratio(x) #' -#' @export +#' @rdname scorer #' assign_penalty_points_age_sex_ratio <- function(x) { case_when( @@ -68,28 +43,7 @@ assign_penalty_points_age_sex_ratio <- function(x) { #' #' -#' Assign a penalty point for the amount of issues in Skweness and Kurtosis -#' -#' @description -#' The function assigns a penalty score for a Skewness and Kurtosis test classification. -#' The score range varies between 0 (when "Excellent") to 5 (when "Problematic") for -#' both, according to the [ENA for SMART software](https://smartmethodology.org/). -#' -#' @param x A numeric vector containing Skewness or Kurtosis test results classification. -#' -#' @returns A numeric vector with the corresponding penalty points (scores) according -#' to the classification. -#' -#' @examples -#' -#' ## A vector storing Skewness or Kurtosis test classification ---- -#' -#' x <- c("Excellent", "Problematic", "Acceptable", "Good") -#' -#' ## Apply the function ---- -#' assign_penalty_points_skew_kurt(x) -#' -#' @export +#' @rdname scorer #' assign_penalty_points_skew_kurt <- function(x) { case_when( @@ -101,33 +55,24 @@ assign_penalty_points_skew_kurt <- function(x) { } #' -#' Get the overall WHZ or MFAZ's quality score #' +#' Get the overall quality score for WFHZ and MFAZ #' #' @description -#' `compute_quality_score()` provides the overall quality score of either WHZ or MFAZ, -#' by adding up the scores across each test criteria. This is an input to -#' [classify_overall_quality()]. -#' -#' @param df A data frame containing the scores. If you wish the get the overall -#' quality score for MFAZ, the input data frame must have seven (7) required -#' columns containing test classification of flagged data, sex ratio, age ratio, -#' standard deviation, skewness, kurtosis, crude MUAC's digit preference. -#' Alternatively, if you wish to get the quality score of WHZ, then the input -#' data frame must have the exact same columns in the plausibility report of the -#' ENA for SMART software. +#' `compute_quality_score()` calculates the overall score of the quality of the +#' data for both WFHZ and MFAZ. +#' +#' @param df A data frame containing individual test quality scores. #' #' @param type The method you wish to get the overall quality score for. -#' A choice between "mfaz" and "whz". If you wish to know the overall survey -#' score of your WHZ data, set `type = whz`, otherwise set `type = mfaz` for -#' MFAZ. If by mistake a different input choice is given, an error will be -#' thrown with a message guiding how to go about. +#' A choice between "mfaz" and "wfhz". +#' +#' @returns A vector named `"quality_score"` with the overall quality score. #' -#' @returns A vector (named `"quality_score"`) with the overall quality scores. #' #' @examples -#' # example code -#' ## Create a `df` object ---- +#' +#' ## A sample data ---- #' #' df <- data.frame( #' flagged_class = "Excellent", @@ -139,13 +84,9 @@ assign_penalty_points_skew_kurt <- function(x) { #' kurt_class = "Acceptable" #' ) #' -#' ## Apply function ---- +#' ## Apply the function ---- #' compute_quality_score(df, type = "mfaz") #' -#' # You can also choose to chain the functions with a pipe operator ---- -#' df |> -#' compute_quality_score(type = "mfaz") -#' #' @export #' compute_quality_score <- function(df, type = c("mfaz", "whz")) { diff --git a/R/sample_size.R b/R/sample_size.R index 85267fd..18b206c 100644 --- a/R/sample_size.R +++ b/R/sample_size.R @@ -1,50 +1,29 @@ #' -#' Check IPC AMN Sample Size Requirements +#' Check if the IPC AMN sample size requirement were met #' #' @description -#' Evidence used in [IPC](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/) -#' comes from different sources, collected in different ways, -#' namely: representative surveys, screenings or even data from community-based -#' surveillance system - the sentinel sites. IPC AMN protocols have set minimum -#' sampling a sample size requirements for each. For cluster-based -#' representative surveys, there must be at least 25 primary sampling unit (PSUs). -#' On screening, there ware two ways: i. exhaustive screening (door-to-door) or -#' ii. sampled screening. For this, there should be at least three sites (i.e., -#' villages or communities, etc). `check_sample_size()` checks the -#' on sampled screening. -#' -#' `check_sample_size()` helps you know if your data meets the at least -#' IPC AMN minimum requirements. This function should be used before proceeding -#' to checking the quality of measurements. Doing this saves you from avoid -#' working on data that do not meet the minimum requirements, as it will not be -#' used in any IPC analysis. +#' `check_sample_size()` verifies if the minimum sample size requirements of the +#' IPC Acute Malnutrition protocols are met in a given area of analysis. #' #' @param df A data frame containing the required variables. #' -#' @param .group A vector containing the ID's of the primary sampling unit. -#' Usually and ideally a numeric vector, but sometimes this variables may come as -#' a character vector. Either way, `check_sample_size()` will execute -#' the task accordingly. +#' @param .group A vector containing the primary sampling unit (PSU) ID's. Usually and +#' ideally a numeric vector, but sometimes this may present itself as a character. +#' Either way, `check_sample_size()` will work accordingly. +#' +#' @param .data_type A choice between "survey" for survey data, "screening" for +#' screening data or "ssite" for community-based sentinel site data. #' -#' @param .data_type The data collection method: survey, screening or sentinel sites. -#' If you wish to check IPC AMN requirements on surveys were met, set -#' method = "survey"; for screening set method = "screening" and for sentinel -#' sites set method = "ssite". If by mistake a different parameter is given, -#' an error will be thrown and the function will stop, but with a guidance on -#' how to go about. +#' @returns By default, a summary table of one row and three additional columns +#' are returned. Column `groups` and `n_obs` hold the total number of unique +#' PSU's and children respectively, and `meet_ipc` tells whether the IPC AMN +#' sample size requirements were met. #' -#' @returns `check_sample_size()` returns an output of the same type -#' as the input (data frame), but of a different size. By default, the function -#' returns a summary of length 1 (one row), but with three new columns added to -#' the input data frame: `groups` (for survey), or sites (for screening or sentinel -#' sites) `n_obs` and `meet_ipc`. The first will store the total number of PSUs -#' in the sample. `n_obs` will store the total number of rows/observations and -#' `meet_ipc` is a logical vector to say whether or not the IPC AMN minimum -#' criteria for sample size was met. This is flexible according to the method you -#' select with `.data_type = " "`. +#' @details +#' Use dplyr::group_by() before `check_sample_size()` to get a summary for each +#' unique survey or screening location from your data. #' #' @examples -#' # Have an input data frame -------------------------------------------------- #' check_sample_size(anthro.01, .group = cluster, .data_type = "survey") #' #' @export diff --git a/R/wranglers.R b/R/wranglers.R new file mode 100644 index 0000000..9b4d049 --- /dev/null +++ b/R/wranglers.R @@ -0,0 +1,317 @@ +#' +#' +#' Identify and flag outliers +#' +#' @description +#' Outliers are extreme values that deviate remarkably from the mean, making +#' them unlikely to be accurate measurements. `flag_outliers()` helps you to +#' identify them whether in the WFHZ, the MFAZ or the absolute MUAC values. +#' +#' @param x A numeric vector holding either the WFHZ, the MFAZ values, or the +#' absolute MUAC values (in millimeters). +#' +#' @param type The method you wish `flag_outliers()` to identify flag outliers +#' in the data. A choice between "zscore" (for WFHZ and MFAZ), and "crude" (for +#' absolute MUAC values). +#' +#' @param unit A choice between "zscore" (for WFHZ and MFAZ), and "crude" (for +#' absolute MUAC values). +#' +#' @return A vector of the same length as input holding dummy values: 1 for is +#' a flag and 0 is not a flag. +#' +#' @details +#' The flagging criteria for the WFHZ is as in +#' [SMART plausibility check](https://smartmethodology.org/). As for the MFAZ, it +#' uses the same criteria as WFHZ, whilst a fixed flagging criteria is used for +#' absolute MUAC values. This is as recommended by +#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478) +#' +#' +#' @examples +#' +#' ## Sample data for absolute MUAC values ---- +#' x <- c(90, 110, 140, 200, 119, 235) +#' +#' ## Apply `flag_outliers()` with type set to "crude" ---- +#' flag_outliers(x, type = "crude") +#' +#' ## Sample data for MFAZ or for WFHZ values ---- +#' x <- c(-2.265, -5.275, -0.72, -2.261, -2.264, -4.451, -2.261, -1.828) +#' +#' # Apply `flag_outliers()` with type set to "zscore" ---- +#' flag_outliers(x, type = "zscore") +#' +#' @rdname outliers +#' @export +#' +flag_outliers <- function(x, type = c("zscore", "crude")) { + type <- match.arg(type) + + if (type == "zscore") { + mean_zscore <- mean(x, na.rm = TRUE) + flags <- ifelse((x < (mean_zscore - 3) | x > (mean_zscore + 3)), 1, 0) + flags <- ifelse(is.na(x), NA, flags) + flags + + } else { + flags <- ifelse(x < 100 | x > 200, 1, 0) + flags <- ifelse(is.na(x), NA, flags) + flags + } +} + + +#' +#' +#' Remove outliers +#' +#' @rdname outliers +#' +remove_flags <- function(x, unit = c("zscore", "crude")) { + + ## Match arguments ---- + unit <- match.arg(unit) + + ## Control flow based on unit ---- + switch( + unit, + ### Remove flags when unit = "zscore" ---- + "zscore" = { + mean_x <- mean(x, na.rm = TRUE) + zs <- ifelse((x < (mean_x - 3) | x > (mean_x + 3)) | is.na(x), NA_real_, x) + }, + ### Remove flags when unit = "crude" ---- + "crude" = { + cr <- ifelse(x < 100 | x > 200 | is.na(x), NA_integer_, x) + } + ) +} + + +#' +#' +#' +#' Convert MUAC values to either centimeters or millimeters +#' +#' @description +#' Recode the MUAC values into either centimeters or millimeters as required. +#' `recode_muac()` works inside [dplyr::mutate()] or [base::transform()]. +#' +#' @param muac A numeric vector holding the absolute MUAC values. +#' +#' @param unit A choice of the unit to which you wish to convert the MUAC +#' values into. +#' +#' @returns A numeric vector of the same length as input, with values converted +#' into your chosen unit. +#' +#' @examples +#' +#' ## A sample of MUAC data in millimeters ---- +#' muac <- seq(90, 250, by = 4) +#' +#' ## Apply the function ---- +#' recode_muac(muac, unit = "cm") +#' +#' ## A sample of MUAC data in centimeters ---- +#' muac <- seq(9.0, 25.0, by = 0.2) +#' +#' # Apply the function ---- +#' recode_muac(muac, unit = "mm") +#' +#' @export +#' +recode_muac <- function(muac, unit = c("cm", "mm")) { + + ## Check if unit's arguments match ---- + stopifnot(unit %in% c("cm", "mm")) + + ## Recode muac conditionally ---- + switch( + unit, + ### Recode to millimeters ---- + "mm" = {muac <- muac * 10}, + ### Recode to centimeters ---- + "cm" = {muac <- muac / 10}, + stop("Invalid 'units' argument. Please choose either 'cm' or 'mm'.") + ) +} + + +#' +#' +#' Process and censor weight-for-height and MUAC data +#' +#' @description +#' This is the job of `process_wfhz_data` and `process_muac_data()`. They are +#' responsible for computing the weight-for-height and the muac-for-age z-scores +#' respectively, and censor the data by flagging outliers based on the SMART flags. +#' For the latter, if age is not supplied, the function censors the absolute MUAC +#' values. +#' +#' @param df The input data frame with the required variables. +#' +#' @param sex A numeric or character vector of child's sex. Code values should +#' either be 1 or "m" for boy and 2 or "f" for girl. The variable name must be +#' sex, otherwise it will not work. +#' +#' @param .recode_sex Logical. It asks whether sex should be recoded. In the end, +#' the variable sex have values coded as 1 for boy and 2 for girl. Setting +#' `.recode_sex = TRUE` works over "m" and "f" values. If your vector is coded +#' differently, make sure to put it in "m" and "f" or in 1 or 2 right away. +#' +#' @param muac A numeric vector holding the absolute MUAC values. +#' +#' @param .recode_muac Logical. Choose between `TRUE` if you wish to recode +#' the MUAC values into either centimeters or millimeters. +#' +#' @param unit A choice of the unit to which you wish to convert the MUAC +#' variable into. Choose "cm" for centimeters, "mm" for millimeters and "none" +#' to leave as it is. +#' +#' @param age A numeric vector of child's age in months. It must be named age, +#' otherwise it will not work. For instance, if given as following: age = months +#' it will not work. +#' +#' @param weight A numeric vector holding the weight values of the child in +#' kilograms. +#' +#' @param height A numeric vector holding the height values of the child in +#' centimeters. +#' +#' @returns A data frame of the same length as the input with additional +#' columns: one named `wfhz` or `mfaz` that holds the zscore values, and the other +#' holding dummy values: 1 (is a flag) and 0 (is not a flag). For the +#' `process_muac_data` function, when age is not supplied, only `flag_muac` is +#' added. This refers to flags based based on absolute MUAC values as recommended by +#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478). +#' +#' @examples +#' +#' ## An example application of `process_wfhz_data()` ---- +#' +#' anthro.01 |> +#' process_wfhz_data( +#' sex = sex, +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE +#' ) +#' +#' ## An example application of `process_muac_data()` ---- +#' +#' ### Sample data ---- +#' df <- data.frame( +#' survey_date = as.Date(c( +#' "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), +#' birthdate = as.Date(c( +#' "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), +#' age = c(NA, 36, NA, NA, NA), +#' sex = c("m", "f", "m", "m", "f"), +#' muac = c(110, 130, 300, 123, 125) +#' ) +#' +#' ### The application of the function ---- +#' df |> +#' process_age( +#' svdate = "survey_date", +#' birdate = "birthdate", +#' age = age +#' ) |> +#' process_muac_data( +#' sex = sex, +#' age = "age", +#' muac = muac, +#' .recode_sex = TRUE, +#' .recode_muac = TRUE, +#' unit = "cm" +#' ) +#' +#' @rdname wrangler +#' +#' @export +#' +process_muac_data <- function(df, + sex, muac, age = NULL, + .recode_sex = TRUE, + .recode_muac = TRUE, + unit = c("cm", "mm", "none")) { + unit <- match.arg(unit) + + recode_sex <- quote( + if (.recode_sex) { + sex <- ifelse({{ sex }} == "m", 1, 2) + } else { + {{ sex }} + } + ) + + rec_muac <- quote( + if (.recode_muac && unit == "cm") { + muac <- recode_muac({{ muac }}, unit = "cm") + } else if (.recode_muac && unit == "mm") { + muac <- recode_muac({{ muac }}, unit = "mm") + } else { + {{ muac }} + } + ) + + if (!is.null({{ age }})) { + df <- df |> + mutate( + muac = !!rec_muac, + sex = !!recode_sex, + ) |> + addWGSR( + sex = "sex", + firstPart = "muac", + secondPart = "age_days", + index = "mfa", + digits = 3 + )|> + mutate( + flag_mfaz = do.call(flag_outliers, list(.data$mfaz, type = "zscore")) + ) + } else { + df <- df |> + mutate( + sex = !!recode_sex, + flag_muac = do.call(flag_outliers, list({{ muac }}, type = "crude")) + ) + } + tibble::as_tibble(df) +} + + +#' +#' @rdname wrangler +#' +#' @export +#' +process_wfhz_data <- function(df, sex, weight, height, .recode_sex = TRUE) { + + recode_sex <- quote( + if (.recode_sex) { + sex <- ifelse({{ sex }} == "m", 1, 2) + } else { + {{ sex }} + } + ) + + df <- df |> + mutate( + sex = !!recode_sex + ) |> + addWGSR( + sex = {{ "sex" }}, + firstPart = {{ "weight" }}, + secondPart = {{ "height" }}, + index = "wfh", + digits = 3 + ) |> + mutate( + flag_wfhz = do.call(flag_outliers, list(.data$wfhz, type = "zscore")) + ) + tibble::as_tibble(df) +} diff --git a/man/age_ratio_test.Rd b/man/age_ratio_test.Rd index bc6c521..a0f3ccd 100644 --- a/man/age_ratio_test.Rd +++ b/man/age_ratio_test.Rd @@ -2,44 +2,42 @@ % Please edit documentation in R/age.R \name{age_ratio_test} \alias{age_ratio_test} -\title{Age ratio test on children aged 6:23 over 24:59 months} +\title{Test the proportion of children aged 24 to 59 months over 6 to 23 months old} \usage{ age_ratio_test(age, .expectedP = 0.66) } \arguments{ -\item{age}{A vector storing values about child's age in months.} +\item{age}{A numeric vector holding child's age in months.} -\item{.expectedP}{The expected proportion of children aged 24-59 months over -children aged 6-29 months, considered to be of 0.66 according to the -\href{https://smartmethodology.org/survey-planning-tools/updated-muac-tool/}{SMART MUAC tool}.} +\item{.expectedP}{The expected proportion of children aged 24 to 59 months +old over those aged 6 to 23 months old. As in the +\href{https://smartmethodology.org/survey-planning-tools/updated-muac-tool/}{SMART MUAC tool}, +this is estimated at 0.66.} } \value{ -A list three statistics: \code{p} for p-value, \code{observedR} for observed ratio -from your data, \code{observedP} for observed proportion of children 24-59 months -over the universe of your sample data. +A vector of class "list" holding three statistics: \code{p} for p-value, +\code{observedR} for the observed ratio and \code{observedP} for the observed proportion +of children aged 24 to 59 months over those aged 6 to 24 months old. + +@details +\code{age_ratio_test()} should be used specifically for assessing MUAC data. For +age ratio tests of children ages 6 to 29 months and 30 to 59 months old, as +performed in the SMART plausibility checks, use \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}} instead. } \description{ -As documented in \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}}, age ratio test is an age-related -test of survey data quality. This includes other assessments as screenings, -sentinel sites, etc. Different to \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}}, in \code{age_ratio_test()} -the ratio of children is calculate from children 6-23 months to the number of -children age 24-59 months. The ratio is then compared to the expected ratio -(set at 0.66). Then the difference between the observed ratio is compared to -the expected using a Chi-squared test. - -\code{age_ratio_test()} should only be used for MUAC checks. This particularly -useful as allows you to determine if downstream your analysis you should -consider adjusting your MUAC prevalence, should there be more younger children -than older children in your survey, screening or sentinel site data. If you -wish to get the age ratio for children 6-29/30-59 like in SMART Methodology, -then you should use \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}} NOT \code{age_ratio_test()}. +Age ratio test of the proportion of children aged 24 to 59 months over those +aged 6 to 23 months old. } \examples{ -## Have a sample data ---- -age <- seq(6,59) |> sample(300, replace = TRUE) +## A sample data ---- +age <- seq(6,59) |> +sample(300, replace = TRUE) ## Apply the function ---- -age_ratio_test(age, .expectedP = 0.66) +age_ratio_test( +age = age, +.expectedP = 0.66 +) } diff --git a/man/assign_penalty_points_age_sex_ratio.Rd b/man/assign_penalty_points_age_sex_ratio.Rd deleted file mode 100644 index b4f5704..0000000 --- a/man/assign_penalty_points_age_sex_ratio.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_scorers.R -\name{assign_penalty_points_age_sex_ratio} -\alias{assign_penalty_points_age_sex_ratio} -\title{Assign a penalty point for the amount of selection biases in age and sex ratios} -\usage{ -assign_penalty_points_age_sex_ratio(x) -} -\arguments{ -\item{x}{A numeric vector containing p-values from either age or sex ratio -test results.} -} -\value{ -A numeric vector with the corresponding penalty points (scores) according -to the classification. -} -\description{ -The function assigns a penalty score for a age and sex ratio's test classification. -The score range varies between 0 (when "Excellent") to 10 (when "Problematic") for -both, according to the \href{https://smartmethodology.org/}{ENA for SMART software}. -} -\examples{ - -## A vector storing age ratio or sex ratio p-values' classification ---- -x <- c("Excellent", "Problematic", "Acceptable", "Good") - -## Apply the function ---- -assign_penalty_points_age_sex_ratio(x) - -} diff --git a/man/assign_penalty_points_flags_and_sd.Rd b/man/assign_penalty_points_flags_and_sd.Rd deleted file mode 100644 index 429dea5..0000000 --- a/man/assign_penalty_points_flags_and_sd.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_scorers.R -\name{assign_penalty_points_flags_and_sd} -\alias{assign_penalty_points_flags_and_sd} -\title{Assign a penalty point for the amount of proportion flagged data and standard deviation} -\usage{ -assign_penalty_points_flags_and_sd(x) -} -\arguments{ -\item{x}{A character vector containing the test classifications of proportion -of flagged data and the value of standard deviation.} -} -\value{ -A numeric vector with the corresponding penalty points (scores) according -to the classification. -} -\description{ -The function assigns a penalty score for a given category of test classification. -The score range varies between 0 (when "Excellent") to 20 (when "Problematic") for -both flagged data and standard deviation. This was borrowed from the -\href{https://smartmethodology.org/}{ENA for SMART software} -In the SMART Methodology, flagged data and standard deviation are tho test -criteria that gets the highest penalty scores, so it is here. -} -\examples{ - -## Sample data ---- -x <- c("Excellent", "Problematic", "Acceptable", "Good") -## Apply the function ---- -assign_penalty_points_flags_and_sd(x) - -} diff --git a/man/assign_penalty_points_skew_kurt.Rd b/man/assign_penalty_points_skew_kurt.Rd deleted file mode 100644 index d8b456e..0000000 --- a/man/assign_penalty_points_skew_kurt.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_scorers.R -\name{assign_penalty_points_skew_kurt} -\alias{assign_penalty_points_skew_kurt} -\title{Assign a penalty point for the amount of issues in Skweness and Kurtosis} -\usage{ -assign_penalty_points_skew_kurt(x) -} -\arguments{ -\item{x}{A numeric vector containing Skewness or Kurtosis test results classification.} -} -\value{ -A numeric vector with the corresponding penalty points (scores) according -to the classification. -} -\description{ -The function assigns a penalty score for a Skewness and Kurtosis test classification. -The score range varies between 0 (when "Excellent") to 5 (when "Problematic") for -both, according to the \href{https://smartmethodology.org/}{ENA for SMART software}. -} -\examples{ - -## A vector storing Skewness or Kurtosis test classification ---- - -x <- c("Excellent", "Problematic", "Acceptable", "Good") - -## Apply the function ---- -assign_penalty_points_skew_kurt(x) - -} diff --git a/man/auditor.Rd b/man/auditor.Rd new file mode 100644 index 0000000..b755618 --- /dev/null +++ b/man/auditor.Rd @@ -0,0 +1,111 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quality_auditors.R +\name{check_plausibility_mfaz} +\alias{check_plausibility_mfaz} +\alias{check_plausibility_wfhz} +\alias{check_plausibility_muac} +\title{Audit the plausibility of WFHZ, MFAZ data and absolute MUAC values} +\usage{ +check_plausibility_mfaz(df, sex, muac, age, flags, area) + +check_plausibility_wfhz(df, sex, age, weight, height, flags, area) + +check_plausibility_muac(df, flags, sex, muac) +} +\arguments{ +\item{df}{A data frame yielded from \code{\link[=process_muac_data]{process_muac_data()}} for +\code{check_plausibility_mfaz()} and \code{check_plausibility_muac()}, and yielded from +\code{\link[=process_wfhz_data]{process_wfhz_data()}} for \code{check_plausibility_wfhz()}.} + +\item{sex}{A vector holding codes on child's sex: 1 for boy and 2 for girl.} + +\item{muac}{A numeric vector holding MUAC measurements (in centimeters).} + +\item{age}{A numeric vector holding age in months.} + +\item{flags}{A character vector holding on values on flagged observations.} + +\item{area}{A character vector holding values on where was the data collected +and for which you want the analysis to be performed. If analysing data of just +one area, you will still have to supply the corresponding column to \code{area} in +\code{check_plausibility_mfaz()} or \code{check_plausibility_wfhz()}.} + +\item{weight}{A numeric vector holding weight measurements (in kilograms).} + +\item{height}{A numeric vector holding height measurements (in centimeters).} +} +\value{ +A summarized table with the raw statistics and respective classification. +} +\description{ +\code{check_plausibility_wfhz()}, \code{check_plausibility_mfaz()}, and +\code{check_plausibility_muac()} examines the plausibility of data through a +structured set of tests around sampling and measurement-related errors. +} +\examples{ + +## Audit the plausibility of MFAZ data ---- + +anthro.01 |> +process_age( +svdate = "dos", +birdate = "dob", +age = age +) |> +process_muac_data( +sex = sex, +age = "age", +muac = muac, +.recode_sex = TRUE, +.recode_muac = TRUE, +unit = "cm" +) |> +check_plausibility_mfaz( +flags = flag_mfaz, +sex = sex, +muac = muac, +age = age, +area = area +) + +## Audit the plausibility of WFHZ ---- + +anthro.01 |> +process_age( +svdate = "dos", +birdate = "dob", +age = age +) |> +process_wfhz_data( +sex = sex, +weight = weight, +height = height, +.recode_sex = TRUE +) |> +check_plausibility_wfhz( +sex = sex, +age = age, +weight = weight, +height = height, +flags = flag_wfhz, +area = area +) + +## Audit the plausibility of the absolute MUAC values ---- + +anthro.01 |> +process_muac_data( +sex = sex, +muac = muac, +age = NULL, +.recode_sex = TRUE, +.recode_muac = FALSE, +unit = "none" +) |> +check_plausibility_muac( +flags = flag_muac, +sex = sex, +muac = muac +) + +} diff --git a/man/case_definition.Rd b/man/case_definition.Rd new file mode 100644 index 0000000..2c750cf --- /dev/null +++ b/man/case_definition.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/case_definitions.R +\name{define_wasting_cases_muac} +\alias{define_wasting_cases_muac} +\alias{define_wasting_cases_whz} +\alias{define_wasting_cases_combined} +\alias{define_wasting} +\title{Define if an observation is wasted on the basis of the criteria +of WFHZ, absolute MUAC values and combined case-definition} +\usage{ +define_wasting_cases_muac(muac, edema = NULL, cases = c("gam", "sam", "mam")) + +define_wasting_cases_whz(zscore, edema = NULL, cases = c("gam", "sam", "mam")) + +define_wasting_cases_combined( + zscore, + muac, + edema = NULL, + cases = c("cgam", "csam", "cmam") +) + +define_wasting( + df, + zscore = NULL, + muac = NULL, + edema = NULL, + base = c("wfhz", "muac", "combined") +) +} +\arguments{ +\item{muac}{A numeric vector holding absolute MUAC values (in mm).} + +\item{edema}{A character vector indicating if an observation has bilateral +edema or not. The codes are "y" for presence and "n" for absence of bilateral +edema. Default is \code{NULL}.} + +\item{cases}{A choice of the form of wasting to be defined.} + +\item{zscore}{A numeric vector holding WFHZ values (with 3 decimal places).} + +\item{df}{A data frame containing the required variables.} + +\item{base}{A choice of the criterion which the case-definition should be based +on.} +} +\value{ +A numeric vector of the same length as the input vector, with dummy +values: 1 for yes wasted and 0 for not wasted. The meaning of the codes +changes depending on the form of wasting chosen. That is, if set \code{cases} to +\code{"sam"} the codes 1 would mean yes for severe wasting. +} +\description{ +Define if an observation is wasted on the basis of the criteria +of WFHZ, absolute MUAC values and combined case-definition +} +\details{ +Use \code{define_wasting()} to add the case-definitions in your input data frame. +} +\examples{ +# MUAC-based case-definition ---- +x <- anthro.02 |> +define_wasting( +muac = muac, +edema = edema, +base = "muac" +) +head(x) + +# Weight-for-height based case-definition ---- +x <- anthro.02 |> +define_wasting( +zscore = wfhz, +edema = edema, +base = "wfhz" +) +head(x) + +# Combined case-definition ---- +x <- anthro.02 |> +define_wasting( +zscore = wfhz, +muac = muac, +edema = edema, +base = "combined" +) +head(x) + +} diff --git a/man/case_definitions.Rd b/man/case_definitions.Rd deleted file mode 100644 index 5c7c6c0..0000000 --- a/man/case_definitions.Rd +++ /dev/null @@ -1,42 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/case_definitions.R -\name{define_wasting_cases_muac} -\alias{define_wasting_cases_muac} -\alias{define_wasting_cases_whz} -\alias{define_wasting_cases_combined} -\title{Case-Definition: is an observation acutely malnourished?} -\usage{ -define_wasting_cases_muac(muac, edema = NULL, cases = c("gam", "sam", "mam")) - -define_wasting_cases_whz(zscore, edema = NULL, cases = c("gam", "sam", "mam")) - -define_wasting_cases_combined( - zscore, - muac, - edema = NULL, - cases = c("cgam", "csam", "cmam") -) -} -\arguments{ -\item{muac}{An integer vector containing MUAC measurements in mm.} - -\item{edema}{A character vector of "y" = Yes, "n" = No bilateral edema. -Default is NULL.} - -\item{cases}{A choice of wasting case definition you wish to apply. For combined -acute malnutrition with \code{\link[=define_wasting_cases_combined]{define_wasting_cases_combined()}} cases options are: -c("cgam", "csam", "cmam").} - -\item{zscore}{A double vector containing weight-for-height zscores with 3 -decimal places.} -} -\value{ -A numeric vector of the same size as the input vector, with values ranging -between 1=Yes and 0=No. -} -\description{ -\code{\link[=define_wasting_cases_muac]{define_wasting_cases_muac()}}, \code{\link[=define_wasting_cases_whz]{define_wasting_cases_whz()}} and -\code{\link[=define_wasting_cases_combined]{define_wasting_cases_combined()}} help you get through with your wasting -case-definition for each observation. It should be used inside dplyr::mutate() -or base::transform(). It was designed to be used inside \code{\link[=define_wasting]{define_wasting()}}. -} diff --git a/man/check_sample_size.Rd b/man/check_sample_size.Rd index abffacb..fa7a5c6 100644 --- a/man/check_sample_size.Rd +++ b/man/check_sample_size.Rd @@ -2,56 +2,35 @@ % Please edit documentation in R/sample_size.R \name{check_sample_size} \alias{check_sample_size} -\title{Check IPC AMN Sample Size Requirements} +\title{Check if the IPC AMN sample size requirement were met} \usage{ check_sample_size(df, .group, .data_type = c("survey", "screening", "ssite")) } \arguments{ \item{df}{A data frame containing the required variables.} -\item{.group}{A vector containing the ID's of the primary sampling unit. -Usually and ideally a numeric vector, but sometimes this variables may come as -a character vector. Either way, \code{check_sample_size()} will execute -the task accordingly.} +\item{.group}{A vector containing the primary sampling unit (PSU) ID's. Usually and +ideally a numeric vector, but sometimes this may present itself as a character. +Either way, \code{check_sample_size()} will work accordingly.} -\item{.data_type}{The data collection method: survey, screening or sentinel sites. -If you wish to check IPC AMN requirements on surveys were met, set -method = "survey"; for screening set method = "screening" and for sentinel -sites set method = "ssite". If by mistake a different parameter is given, -an error will be thrown and the function will stop, but with a guidance on -how to go about.} +\item{.data_type}{A choice between "survey" for survey data, "screening" for +screening data or "ssite" for community-based sentinel site data.} } \value{ -\code{check_sample_size()} returns an output of the same type -as the input (data frame), but of a different size. By default, the function -returns a summary of length 1 (one row), but with three new columns added to -the input data frame: \code{groups} (for survey), or sites (for screening or sentinel -sites) \code{n_obs} and \code{meet_ipc}. The first will store the total number of PSUs -in the sample. \code{n_obs} will store the total number of rows/observations and -\code{meet_ipc} is a logical vector to say whether or not the IPC AMN minimum -criteria for sample size was met. This is flexible according to the method you -select with \code{.data_type = " "}. +By default, a summary table of one row and three additional columns +are returned. Column \code{groups} and \code{n_obs} hold the total number of unique +PSU's and children respectively, and \code{meet_ipc} tells whether the IPC AMN +sample size requirements were met. } \description{ -Evidence used in \href{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}{IPC} -comes from different sources, collected in different ways, -namely: representative surveys, screenings or even data from community-based -surveillance system - the sentinel sites. IPC AMN protocols have set minimum -sampling a sample size requirements for each. For cluster-based -representative surveys, there must be at least 25 primary sampling unit (PSUs). -On screening, there ware two ways: i. exhaustive screening (door-to-door) or -ii. sampled screening. For this, there should be at least three sites (i.e., -villages or communities, etc). \code{check_sample_size()} checks the -on sampled screening. - -\code{check_sample_size()} helps you know if your data meets the at least -IPC AMN minimum requirements. This function should be used before proceeding -to checking the quality of measurements. Doing this saves you from avoid -working on data that do not meet the minimum requirements, as it will not be -used in any IPC analysis. +\code{check_sample_size()} verifies if the minimum sample size requirements of the +IPC Acute Malnutrition protocols are met in a given area of analysis. +} +\details{ +Use dplyr::group_by() before \code{check_sample_size()} to get a summary for each +unique survey or screening location from your data. } \examples{ -# Have an input data frame -------------------------------------------------- check_sample_size(anthro.01, .group = cluster, .data_type = "survey") } diff --git a/man/classify_age_sex_ratio.Rd b/man/classify_age_sex_ratio.Rd index 9b6cb0f..333ef72 100644 --- a/man/classify_age_sex_ratio.Rd +++ b/man/classify_age_sex_ratio.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_classifiers.R +% Please edit documentation in R/quality_raters.R \name{classify_age_sex_ratio} \alias{classify_age_sex_ratio} -\title{Classify how much high is the difference in age ration and in sex ratio} +\title{Rate the p-values of the age and sex ratio test} \usage{ classify_age_sex_ratio(p) } @@ -10,23 +10,8 @@ classify_age_sex_ratio(p) \item{p}{A numeric vector containing the test p-values.} } \value{ -A character vector with the correspondent classification. +A character vector with the rating results. } \description{ -\code{classify_age_sex_ratio()} works on the results yielded by \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}}. -It helps you know how much high is the statistical difference between children -age 6-29 months of those age 30-59 months. Likewise, with regard to sex, -function works on the results yielded by \code{\link[nipnTK:sexRatioTest]{nipnTK::sexRatioTest()}} to know -how much high is the difference between boy and girls in your sample data. -} -\examples{ - -## Have a numeric vector storing p-values ---- -pvalues <- c(0, 0, 0.01, 0.011, 0.2, 0.015, 0.016, 0.017, -0.05, 0.06,0.03, 0.03, 0.04, 0.000001, 0.07 -) - -## Apply the function ---- -classify_age_sex_ratio(pvalues) - +Rate the p-values of the age and sex ratio test } diff --git a/man/classify_overall_quality.Rd b/man/classify_overall_quality.Rd index 96e827a..8657a65 100644 --- a/man/classify_overall_quality.Rd +++ b/man/classify_overall_quality.Rd @@ -1,26 +1,32 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_classifiers.R +% Please edit documentation in R/quality_raters.R \name{classify_overall_quality} \alias{classify_overall_quality} -\title{Get the overall data quality classification} +\title{Rate the overall data quality} \usage{ classify_overall_quality(df) } \arguments{ -\item{df}{A data frame containing a vector with the quality scores generated by -\code{\link[=compute_quality_score]{compute_quality_score()}}.} +\item{df}{A data frame containing a vector with the quality scores yielded +from \code{\link[=compute_quality_score]{compute_quality_score()}}.} } \value{ -A character vector of the same length, but a different width as the -input \code{df} is returned with a new column called \code{quality_class}. +A character vector of the same length with a new column called +\code{quality_class}. } \description{ -\code{classify_overall_quality()} helps you in knowing the overall status of your -data quality. It classifies the overall score generated by -\code{\link[=compute_quality_score]{compute_quality_score()}} into four categories, as it is done in the -\href{https://smartmethodology.org/}{SMART Methodology}, -namely: "Excellent", "Good", "Acceptable" and "Problematic". Beware that -the overall classification should be used as an indication to further -scrutinize of data before taking the decision to validate or invalidate the -results. +\code{classify_overall_quality()} informs you about the overall quality of the data +by rating the overall quality score in "Excellent", "Good", "Acceptable" and +"Problematic". +} +\examples{ +## A sample data ---- + +df <- data.frame( +quality_score = 29 +) + +## Apply the function ---- +classify_overall_quality(df) + } diff --git a/man/classify_percent_flagged.Rd b/man/classify_percent_flagged.Rd deleted file mode 100644 index 6cda3bb..0000000 --- a/man/classify_percent_flagged.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_classifiers.R -\name{classify_percent_flagged} -\alias{classify_percent_flagged} -\title{Classify how much high is the proportion of flagged data} -\usage{ -classify_percent_flagged(p, type = c("mfaz", "whz", "crude")) -} -\arguments{ -\item{p}{A numeric vector containing the proportions of flagged data} - -\item{type}{The method to which you wish to classify how much high are the -proportions of flagged data. A choice between "mfaz" for MFAZ, "whz" for WHZ -and "crude" for crude MUAC.} -} -\value{ -A character vector with the correspondent classification of the -amount of flagged data. The categories of classification ranges are: -"Excellent", "Good", "Acceptable", "Problematic". -} -\description{ -\code{classify_percent_flagged()} tells you how much high is the proportion of -of flagged data in your data set, an indication of quality of data. Its a -reusable function for MFAZ, WHZ and crude MUAC. The cut-offs for MFAZ and -crude MUAC are the same with the upper limit of 2\%. This is based on the -research findings by \href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}, -from a multi-country analysis, found that the correlation between the mean -MFAZ and crude MUAC was almost perfect (r=99). As for WHZ, the cut-offs are -exactly those in the \href{https://smartmethodology.org/}{SMART Methodology}. -} -\examples{ - -## Take a vector with the proportions of flagged data ---- -prop <- c(0.0, 0.0, 0.01, 0.015, 0.2, 0.015, 0.016, 0.017, 0.05, 0.06, -0.03, 0.03, 0.04, 0.000001, 0) - -## Apply the function setting type to "whz" for instance ---- -classify_percent_flagged(prop, type = "whz") - -} diff --git a/man/classify_sd.Rd b/man/classify_sd.Rd deleted file mode 100644 index 7ff76c2..0000000 --- a/man/classify_sd.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_classifiers.R -\name{classify_sd} -\alias{classify_sd} -\title{Classify how much high is the value of standard deviation} -\usage{ -classify_sd(sd, type = c("zscore", "crude")) -} -\arguments{ -\item{sd}{A numeric vector containing values for standard deviation of the -method you wish the work on.} - -\item{type}{The method to which you wish to classify how much high is the -value of standard deviation. A choice between "zscore" MFAZ or WHZ and -"crude" for crude MUAC.} -} -\value{ -A character vector with the correspondent classification. -} -\description{ -\code{classify_sd()} helps you to know the magnitude of the data's standard -deviation. You can use this function for either WHZ, MFAZ or crude MUAC. -Cut-offs for WHZ are based on the \href{https://smartmethodology.org/}{SMART Methodology}. -Cut-offs for MFAZ are also based on SMART, but informed by -\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}. -For crude MUAC, the cut-offs are based on the -\href{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}{IPC AMN guidelines} -} -\examples{ - -## Have a vector with standard deviation ---- -sdvalues <- seq(0.7, 1.3, by = 0.001) |> -sample(size = 9, replace = TRUE) - -## Apply the function with `type = "zscore` ---- -classify_sd(sdvalues, type = "zscore") - -## Using `type = "crude"` ---- -### Create sample data ---- -sdvalues <- seq(9, 30, by = 2) |> -sample(size = 20, replace = TRUE) - -### Apply the function with `type = "crude"` ---- -classify_sd(sdvalues, type = "crude") - -} diff --git a/man/classify_skew_kurt.Rd b/man/classify_skew_kurt.Rd index 58c0006..9fcdb9a 100644 --- a/man/classify_skew_kurt.Rd +++ b/man/classify_skew_kurt.Rd @@ -1,29 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_classifiers.R +% Please edit documentation in R/quality_raters.R \name{classify_skew_kurt} \alias{classify_skew_kurt} -\title{Classify how much high is the value of Skewness and Kurtosis} +\title{Rate the magnitude of skewness and kurtosis test results} \usage{ classify_skew_kurt(sk) } \arguments{ -\item{sk}{A numeric vector containing values of either Skewness or Kurtosis.} +\item{sk}{A numeric vector containing values of either skewness or kurtosis.} } \value{ -A character vector with the correspondent classification. +A character vector with the rating results. } \description{ -\code{classify_skew_kurt()} helps you to know the magnitude of the Skewness and -Kurtosis from your data. This is only useful for WHZ and MFAZ. The function -works on the results yielded by \code{\link[nipnTK:skewKurt]{nipnTK::skewKurt()}}. -Cut-offs for WHZ are based on the \href{https://smartmethodology.org/}{SMART Methodology}. -} -\examples{ - -#Have a numeric vector storing values for skewness or kurtosis ---- -sk <- seq(-5, 1, by = 0.05) |> sample(size = 20, replace = TRUE) - -# Apply function -classify_skew_kurt(sk) - +Rate the magnitude of skewness and kurtosis test results } diff --git a/man/classify_wasting_for_cdc_approach.Rd b/man/classify_wasting_for_cdc_approach.Rd index 8135317..780fcd1 100644 --- a/man/classify_wasting_for_cdc_approach.Rd +++ b/man/classify_wasting_for_cdc_approach.Rd @@ -2,25 +2,23 @@ % Please edit documentation in R/case_definitions.R \name{classify_wasting_for_cdc_approach} \alias{classify_wasting_for_cdc_approach} -\title{A helper function to classify nutritional status into SAM, MAM or not wasted} +\title{Classify wasting into severe or moderate wasting for use in SMART MUAC tool +weighting approach} \usage{ classify_wasting_for_cdc_approach(muac, .edema = NULL) } \arguments{ -\item{muac}{An integer vector containing MUAC values. They should be in -millimeters.} +\item{muac}{A numeric vector holding absolute MUAC values (in mm).} -\item{.edema}{Optional. Its a vector containing data on bilateral pitting -edema coded as "y" for yes and "n" for no.} +\item{.edema}{Optional. A character vector indicating if an observation has +bilateral edema or not. The codes are "y" for presence and "n" for absence of +bilateral edema.} } \value{ -A numeric vector of the same size as the input vector with values ranging -between "sam", "mam" and "not wasted" for severe, moderate acute malnutrition and not -acutely malnourished, respectively. +A character vector of the same length as the input indicating if a +child is severe or moderate wasted or not wasted. } \description{ -\code{classify_wasting_for_cdc_approach()} is used a helper inside -\code{\link[=apply_cdc_age_weighting]{apply_cdc_age_weighting()}} to classify nutritional status into "sam", "mam" -or "not wasted" and then the vector returned is used downstream to calculate -the proportions of children with severe and moderate acute malnutrition. +Classify wasting into severe or moderate wasting for use in SMART MUAC tool +weighting approach } diff --git a/man/compute_age_in_months.Rd b/man/compute_age_in_months.Rd index a592857..e378361 100644 --- a/man/compute_age_in_months.Rd +++ b/man/compute_age_in_months.Rd @@ -2,21 +2,24 @@ % Please edit documentation in R/age.R \name{compute_age_in_months} \alias{compute_age_in_months} -\title{Get age in months from birth-date and the data when data was collected.} +\title{Calculate age in months} \usage{ compute_age_in_months(surv_date, birth_date) } \arguments{ -\item{surv_date, birth_date}{Vectors containing dates. \code{surv_date} refers to the day, -month and year when the data was collected; while \code{birth_date} refers to the date -when the child was born.} +\item{surv_date}{A vector of class "Date" holding values corresponding to +the date of data collection.} + +\item{birth_date}{A vector of class "Date" holding values corresponding to +the child's date of birth.} } \value{ -A vector of name \code{age} storing age in months, a mix of double and -integer and \code{NA} for missing value if any of the processed age in months is -< 6 or > 59.99 months. +A numeric vector named \code{age} holding age values in months with two +decimal places. Any value outside the range of 6.0 to 59.99 is replaced with +\code{NA}. } \description{ -\code{compute_age_in_months()} works inside \code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}} -It helps you to compute age in months from a pair of birth date and survey date. +\code{compute_age_in_months()} calculates age in months from on the basis of +difference between the data collection date and the child's date of birth. +It works inside \code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}}. } diff --git a/man/compute_month_to_days.Rd b/man/compute_month_to_days.Rd index b3aaa20..3d3cc83 100644 --- a/man/compute_month_to_days.Rd +++ b/man/compute_month_to_days.Rd @@ -2,16 +2,17 @@ % Please edit documentation in R/age.R \name{compute_month_to_days} \alias{compute_month_to_days} -\title{Recode age variable from months to days} +\title{Transform age in months to days} \usage{ compute_month_to_days(x) } \arguments{ -\item{x}{A numeric vector containing values of age in months.} +\item{x}{A numeric vector containing age values in months.} } \value{ -A numeric vector with values corresponding to age in days +A numeric vector, of the same length as the input variable, containing +age values in days. } \description{ -Recode age variable from months to days +Transform age in months to days } diff --git a/man/compute_quality_score.Rd b/man/compute_quality_score.Rd index 1ed4ee3..a367a19 100644 --- a/man/compute_quality_score.Rd +++ b/man/compute_quality_score.Rd @@ -2,36 +2,26 @@ % Please edit documentation in R/quality_scorers.R \name{compute_quality_score} \alias{compute_quality_score} -\title{Get the overall WHZ or MFAZ's quality score} +\title{Get the overall quality score for WFHZ and MFAZ} \usage{ compute_quality_score(df, type = c("mfaz", "whz")) } \arguments{ -\item{df}{A data frame containing the scores. If you wish the get the overall -quality score for MFAZ, the input data frame must have seven (7) required -columns containing test classification of flagged data, sex ratio, age ratio, -standard deviation, skewness, kurtosis, crude MUAC's digit preference. -Alternatively, if you wish to get the quality score of WHZ, then the input -data frame must have the exact same columns in the plausibility report of the -ENA for SMART software.} +\item{df}{A data frame containing individual test quality scores.} \item{type}{The method you wish to get the overall quality score for. -A choice between "mfaz" and "whz". If you wish to know the overall survey -score of your WHZ data, set \code{type = whz}, otherwise set \code{type = mfaz} for -MFAZ. If by mistake a different input choice is given, an error will be -thrown with a message guiding how to go about.} +A choice between "mfaz" and "wfhz".} } \value{ -A vector (named \code{"quality_score"}) with the overall quality scores. +A vector named \code{"quality_score"} with the overall quality score. } \description{ -\code{compute_quality_score()} provides the overall quality score of either WHZ or MFAZ, -by adding up the scores across each test criteria. This is an input to -\code{\link[=classify_overall_quality]{classify_overall_quality()}}. +\code{compute_quality_score()} calculates the overall score of the quality of the +data for both WFHZ and MFAZ. } \examples{ -# example code -## Create a `df` object ---- + +## A sample data ---- df <- data.frame( flagged_class = "Excellent", @@ -43,11 +33,7 @@ skew_class = "Good", kurt_class = "Acceptable" ) -## Apply function ---- +## Apply the function ---- compute_quality_score(df, type = "mfaz") -# You can also choose to chain the functions with a pipe operator ---- -df |> -compute_quality_score(type = "mfaz") - } diff --git a/man/define_wasting.Rd b/man/define_wasting.Rd deleted file mode 100644 index d62fab8..0000000 --- a/man/define_wasting.Rd +++ /dev/null @@ -1,66 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/case_definitions.R -\name{define_wasting} -\alias{define_wasting} -\title{Add acute malnutrition case-definitions to the data frame} -\usage{ -define_wasting( - df, - zscore = NULL, - muac = NULL, - edema = NULL, - base = c("wfhz", "muac", "combined") -) -} -\arguments{ -\item{df}{The data frame object containing the vectors with zscores, muac and -edema.} - -\item{zscore}{The vector storing zscores values with 3 decimal places.} - -\item{muac}{An integer vector containing MUAC measurements in mm.} - -\item{edema}{A character vector of "y" = Yes, "n" = No bilateral edema. -Default is NULL.} - -\item{base}{A choice of options to which your case definition should be based on.} -} -\value{ -A data frame with three vectors added to the input data frame: "gam", -"sam" and "mam". If base = "combined" the vector names change to "cgam", -"csam" and "cmam" for combined global, severe and moderate acute malnutrition -respectively. -} -\description{ -Use \code{define_wasting()} to add the case-definitions in your input data frame. -} -\examples{ -# MUAC-based case-definition ---- -x <- anthro.02 |> -define_wasting( -muac = muac, -edema = edema, -base = "muac" -) -head(x) - -# Weight-for-height based case-definition ---- -x <- anthro.02 |> -define_wasting( -zscore = wfhz, -edema = edema, -base = "wfhz" -) -head(x) - -# Combined case-definition ---- -x <- anthro.02 |> -define_wasting( -zscore = wfhz, -muac = muac, -edema = edema, -base = "combined" -) -head(x) - -} diff --git a/man/flag_outliers.Rd b/man/flag_outliers.Rd deleted file mode 100644 index 7bbe8b3..0000000 --- a/man/flag_outliers.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_processors.R -\name{flag_outliers} -\alias{flag_outliers} -\title{Identify and flag outliers in WHZ, MFAZ, and crude MUAC datasets} -\usage{ -flag_outliers(x, type = c("zscore", "crude")) -} -\arguments{ -\item{x}{A numeric value from the variable storing either WHZ or MFAZ or crude -MUAC's observations in the dataset, as applicable.} - -\item{type}{The method you wish \code{flag_outliers()} to identify flags on. -A choice between "zscore" and "crude". If you wish to get flags for WHZ or -MFAZ, set \code{method = "zscore"}. Alternatively, if your wish to get flags for -crude MUAC, set \code{method = "crude"}. The default is "zscore". If by mistake -a different option is supplied, an error will be thrown with a message -guiding you what to do.} -} -\value{ -A vector of two values: 1 and 0, where 1 signifies flagged value and -0 not flagged. -} -\description{ -Outliers are extreme values that far away from the mean, that are unlikely to -be correct measurements. \code{flag_outliers()} helps you to identify any extreme -values in your dataset in two different ways. Outliers in WHZ are identified -based on the \href{https://smartmethodology.org/}{SMART Methodology.}. -MFAZ follows the same approach, while crude MUAC's approach is based on a -fixed range (<100mm and >200mm), based a multicountry research findings by -\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).} -} -\examples{ - -# Sample data of crude MUAC ---- -x <- c(90, 110, 140, 200, 119, 235) - -# Apply `flag_outliers()` with type set to "crude" ---- -flag_outliers(x, type = "crude") - -# Sample data of MFAZ ---- -x <- c(-2.265, -5.275, -0.72, -2.261, -2.264, -4.451, -2.261, -1.828) - -# Apply `flag_outliers()` with type set to "zscore" ---- -flag_outliers(x, type = "zscore") - -} diff --git a/man/outliers.Rd b/man/outliers.Rd new file mode 100644 index 0000000..8c9a56d --- /dev/null +++ b/man/outliers.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wranglers.R +\name{flag_outliers} +\alias{flag_outliers} +\alias{remove_flags} +\title{Identify and flag outliers} +\usage{ +flag_outliers(x, type = c("zscore", "crude")) + +remove_flags(x, unit = c("zscore", "crude")) +} +\arguments{ +\item{x}{A numeric vector holding either the WFHZ, the MFAZ values, or the +absolute MUAC values (in millimeters).} + +\item{type}{The method you wish \code{flag_outliers()} to identify flag outliers +in the data. A choice between "zscore" (for WFHZ and MFAZ), and "crude" (for +absolute MUAC values).} + +\item{unit}{A choice between "zscore" (for WFHZ and MFAZ), and "crude" (for +absolute MUAC values).} +} +\value{ +A vector of the same length as input holding dummy values: 1 for is +a flag and 0 is not a flag. +} +\description{ +Outliers are extreme values that deviate remarkably from the mean, making +them unlikely to be accurate measurements. \code{flag_outliers()} helps you to +identify them whether in the WFHZ, the MFAZ or the absolute MUAC values. +} +\details{ +The flagging criteria for the WFHZ is as in +\href{https://smartmethodology.org/}{SMART plausibility check}. As for the MFAZ, it +uses the same criteria as WFHZ, whilst a fixed flagging criteria is used for +absolute MUAC values. This is as recommended by +\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).} +} +\examples{ + +## Sample data for absolute MUAC values ---- +x <- c(90, 110, 140, 200, 119, 235) + +## Apply `flag_outliers()` with type set to "crude" ---- +flag_outliers(x, type = "crude") + +## Sample data for MFAZ or for WFHZ values ---- +x <- c(-2.265, -5.275, -0.72, -2.261, -2.264, -4.451, -2.261, -1.828) + +# Apply `flag_outliers()` with type set to "zscore" ---- +flag_outliers(x, type = "zscore") + +} diff --git a/man/plausibility_checkers.Rd b/man/plausibility_checkers.Rd deleted file mode 100644 index 5a32f12..0000000 --- a/man/plausibility_checkers.Rd +++ /dev/null @@ -1,116 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_checkers.R -\name{check_plausibility_mfaz} -\alias{check_plausibility_mfaz} -\alias{check_plausibility_wfhz} -\alias{check_plausibility_muac} -\title{Plausibility checkers: MUAC-for-age z-scores, Weight-for-Height z-scores and -MUAC} -\usage{ -check_plausibility_mfaz(df, sex, muac, age, flags, area) - -check_plausibility_wfhz(df, sex, age, weight, height, flags, area) - -check_plausibility_muac(df, flags, sex, muac) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}} for -\code{check_plausibility_mfaz()} and \code{check_plausibility_muac()} and returned by -\code{\link[=process_wfhz_data]{process_wfhz_data()}} for \code{check_plausibility_wfhz()}.} - -\item{sex}{A vector telling whether a given child is a boy or girl.} - -\item{muac}{A vector containing MUAC measurements.} - -\item{age}{A vector containing children's age in months.} - -\item{flags}{A character vector telling whether or not an observation is an -outlier.} - -\item{area}{A vector with values on where was the data collected. If you are -analyzing a data set with just one area, provide it anyway to -\code{check_plausibility_mfaz()} or \code{check_plausibility_wfhz()}} - -\item{weight}{A vector containing weight measurements in kilograms.} - -\item{height}{A vector containing height measurements in centimeters.} -} -\value{ -A summarized data frame containing quality checks statistics and -respective classification. -} -\description{ -\code{check_plausibility_mfaz()}, \code{check_plausibility_wfhz()} and -\code{check_plausibility_muac()} lets you know the quality of your data, based on -the statistics around MUAC-for-age zscores, weight-for-height z-scores and on -crude MUAC, respectively. Note that \code{check_plausibility_wfhz()} is all about -WHZ only. If you wish to know about MUAC checks consider using either -\code{check_plausibility_mfaz()} or \code{check_plausibility_muac()} -} -\examples{ - -## Check Plausibility: MFAZ ---- - -anthro.01 |> -process_age( -svdate = "dos", -birdate = "dob", -age = age -) |> -process_muac_data( -sex = sex, -age = "age", -muac = muac, -.recode_sex = TRUE, -.recode_muac = TRUE, -unit = "cm" -) |> -check_plausibility_mfaz( -flags = flag_mfaz, -sex = sex, -muac = muac, -age = age, -area = area -) - -## Check Plausibility: WFHZ ---- - -anthro.01 |> -process_age( -svdate = "dos", -birdate = "dob", -age = age -) |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) |> -check_plausibility_wfhz( -sex = sex, -age = age, -weight = weight, -height = height, -flags = flag_wfhz, -area = area -) - -## Check Plausibility: MUAC ---- - -anthro.01 |> -process_muac_data( -sex = sex, -muac = muac, -age = NULL, -.recode_sex = TRUE, -.recode_muac = FALSE, -unit = "none" -) |> -check_plausibility_muac( -flags = flag_muac, -sex = sex, -muac = muac -) - -} diff --git a/man/pretty_table.Rd b/man/pretty_table.Rd index c3ef2a8..92f47f0 100644 --- a/man/pretty_table.Rd +++ b/man/pretty_table.Rd @@ -4,7 +4,7 @@ \alias{generate_pretty_table_mfaz} \alias{generate_pretty_table_wfhz} \alias{generate_pretty_table_muac} -\title{Get a prettified formatted and presentable output table} +\title{Get a formatted and presentable output table for the plausibility auditors} \usage{ generate_pretty_table_mfaz(df) @@ -13,23 +13,22 @@ generate_pretty_table_wfhz(df) generate_pretty_table_muac(df) } \arguments{ -\item{df}{An output data frame returned by \code{\link[=check_plausibility_mfaz]{check_plausibility_mfaz()}}, +\item{df}{The table returned by \code{\link[=check_plausibility_mfaz]{check_plausibility_mfaz()}}, \code{\link[=check_plausibility_wfhz]{check_plausibility_wfhz()}} or \code{\link[=check_plausibility_muac]{check_plausibility_muac()}}.} } \value{ -An output data frame of the same size as the input, but with values -formatted, columns renamed, and ready to share. +An output table of the same size as the input, with values +formatted, columns renamed, and ready to be shared. } \description{ -You may want to share the plausibility report in a table. You usually care for -a well formatted and pretty table, with values rounded, scientific notations -converted into conventional notations, etc. \code{generate_pretty_table_mfaz()}, -\code{generate_pretty_table_wfhz()} and \code{generate_pretty_table_muac()} does that -for you so you already. +\code{generate_pretty_table_mfaz()}, \code{generate_pretty_table_wfhz()} and +\code{generate_pretty_table_muac()} are useful to getting the output returned from +the plausibility auditors into a presentable format. They convert scientific +notation, round values and rename columns to meaningful names. } \examples{ -## Plausibility check on MFAZ ---- +## Audit the plausibility of MFAZ data ---- anthro.01 |> process_age( @@ -54,7 +53,7 @@ area = area ) |> generate_pretty_table_mfaz() -## Plausibility check on absolute MUAC ---- +## Audit the plausibility of absolute MUAC values ---- anthro.01 |> process_muac_data( @@ -72,7 +71,7 @@ muac = muac ) |> generate_pretty_table_muac() -## Plausibility check on WFHZ ---- +## Audit the plausibility of WFHZ data ---- anthro.01 |> process_wfhz_data( diff --git a/man/process_age.Rd b/man/process_age.Rd index 52e578b..4cb2e76 100644 --- a/man/process_age.Rd +++ b/man/process_age.Rd @@ -2,38 +2,37 @@ % Please edit documentation in R/age.R \name{process_age} \alias{process_age} -\title{Transform age in months and age in days with a data frame} +\title{Process age} \usage{ process_age(df, svdate = NULL, birdate = NULL, age) } \arguments{ -\item{df}{The input data frame.} +\item{df}{Input data frame holding the required variables.} -\item{svdate, birdate}{Vectors containing dates. \code{svdate} refers to the day, month -and year when the data was collected; while \code{birdate} refers to the date when the -child was born (birth-date). By default, both arguments are \code{NULL}. This is -makes \code{process_age()} work even in data sets where either survey date or birth- -data is not available, so the \code{process_age()} works on already given age variable.} +\item{svdate}{A vector of class "Date" holding values corresponding to +the data collection date. Default is \code{NULL}.} -\item{age}{A numeric vector containing already given age in months, usually an -integer in the input data as it is estimated using local event calendars. -\code{age} will typically be available on a particular row when \code{birth_date} of -that same row is missing.} +\item{birdate}{A vector of class "Date" holding values corresponding to +the child's date of birth. Default is \code{NULL}.} + +\item{age}{A numeric vector holding age values in months, usually estimated +using local event calendars.} } \value{ -A data frame of the same length as the input data frame, but of a -different width. If \code{svdate} or \code{birdate} are available, two new vectors are added -to the data frame: \code{age} in months with two decimal places and \code{age_day} which -is age in days with decimal two decimal places. +A data frame of the same length as the input with an additional +column. A new variable, \code{age_day}, is added to the output data frame whilst +the \code{age} variable gets filled where applicable, and then any values outside +the range of 6.0 to 59.99 months get replaced with \code{NA}. } \description{ -\code{process_age()} helps you get the variable age in the right format and ready -to be used for downstream workflow, i.e., get z-scores, as well as exclude -age values that are out-of-range. +\code{process_age()} helps you to get the variable age in the format needed for +the analyses in the downstream workflow. Fundamentally, it calculates age in +months from on the basis of the difference between the data collection date +and the child's date of birth and then censors age values that are out of range. } \examples{ -# Have a sample data ---- +## A sample data ---- df <- data.frame( survy_date = as.Date(c( "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), @@ -42,8 +41,12 @@ birthdate = as.Date(c( age = c(NA, 36, NA, NA, NA) ) -## Apply function ---- +## Apply the function ---- df |> -process_age(svdate = "survy_date", birdate = "birthdate", age = age) +process_age( +svdate = "survy_date", +birdate = "birthdate", +age = age +) } diff --git a/man/process_muac_data.Rd b/man/process_muac_data.Rd deleted file mode 100644 index b579ddd..0000000 --- a/man/process_muac_data.Rd +++ /dev/null @@ -1,86 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_processors.R -\name{process_muac_data} -\alias{process_muac_data} -\title{Process MUAC data a get it ready for analyses} -\usage{ -process_muac_data( - df, - sex, - muac, - age = NULL, - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = c("cm", "mm", "none") -) -} -\arguments{ -\item{df}{The input data frame with variables sex, age and MUAC.} - -\item{sex}{A vector storing values about whether the child is a boy or a girl. -The variable name must be named sex, otherwise it will not work.} - -\item{muac}{A vector storing crude MUAC values.} - -\item{age}{A vector storing values about child's age in months. The variable -name must be named age, otherwise it will not work. For instance, if given as -following: age = months it will not work.} - -\item{.recode_sex}{Logical. It asks whether you should recode your sex variable -to the required shape to use in \code{process_muac_data()}. The default values for -sex are 1 for boys and 2 for girls. Setting \code{.recode_sex = TRUE} works on "m" -and "f" values. If your vector is in any different shape, you should put it in -"m" and "f" or right away to 1 or 2. If you are using data exported from ENA for -SMART software, then you should leave \code{.recode_sex} at its default: \code{TRUE}.} - -\item{.recode_muac}{Logical. Choose between \code{TRUE} or \code{FALSE} if you wish or -not to recode the MUAC variable into the required format to work on.} - -\item{unit}{A choice of the units to which you wish to convert your MUAC -variable into.} -} -\value{ -A data frame of the same length as the input data, but with a -different width as explained:When \code{age} is available in the input data and -supplied, \code{process_muac_data} will return as output a data frame with two -new variables \code{mfaz} and \code{flags}. \code{mfaz} stores MUAC-for-age z-score (MFAZ) -values and \code{flags} tells you whether a given z-score is an outlier or not. -This job is done by \code{\link[=flag_outliers]{flag_outliers()}}. If age is not available in the input -data, therefore not possible to supply in this function, \code{process_muac_data} -will only return \code{flags}. This will refer to flags based on crude MUAC. -} -\description{ -\code{process_muac_data()} gets your input data ready for downstream MUAC related -analysis. -} -\examples{ - -## Have a sample data ---- - -df <- data.frame( - survey_date = as.Date(c( - "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), - birthdate = as.Date(c( - "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), - age = c(NA, 36, NA, NA, NA), - sex = c("m", "f", "m", "m", "f"), - muac = c(110, 130, 300, 123, 125) - ) - - ## Apply function ---- - df |> - process_age( - svdate = "survey_date", - birdate = "birthdate", - age = age - ) |> - process_muac_data( - sex = sex, - age = "age", - muac = muac, - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = "cm" - ) - -} diff --git a/man/process_wfhz_data.Rd b/man/process_wfhz_data.Rd deleted file mode 100644 index b099af3..0000000 --- a/man/process_wfhz_data.Rd +++ /dev/null @@ -1,44 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_processors.R -\name{process_wfhz_data} -\alias{process_wfhz_data} -\title{Process Weight-for-Height data get it ready for analyses} -\usage{ -process_wfhz_data(df, sex, weight, height, .recode_sex = TRUE) -} -\arguments{ -\item{df}{The input data frame with variables sex, age and MUAC.} - -\item{sex}{A vector storing values about whether the child is a boy or a girl.} - -\item{weight, height}{Vectors storing weight values in kilograms and height -values in centimeters, respectively.} - -\item{.recode_sex}{Logical. It asks whether you should recode your sex variable -to the required shape to use in \code{process_wfhz_data()}. The default values for -sex are 1 = boys and 2 = girls. Setting \code{.recode_sex = TRUE} works on "m" -and "f" values. If your vector is in any different shape, you should put it in -"m" and "f" or right away to 1 or 2. If you are using data exported from ENA for -SMART software, then you should leave \code{.recode_sex} at its default: \code{TRUE}.} -} -\value{ -A data frame of the same length as the input data, but with a different -width: two new variables \code{wfhz} and \code{flags}. \code{wfhz} stores weight-for-height -z-score values with three decimal places. \code{flags} tells you whether a given -z-score is an outlier or not. This job is done by \code{\link[=flag_outliers]{flag_outliers()}}. -} -\description{ -\code{process_wfhz_data()} gets your input data ready for downstream WHZ related -analysis. -} -\examples{ -## Have a sample data ---- -anthro.01 |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) - -} diff --git a/man/raters.Rd b/man/raters.Rd new file mode 100644 index 0000000..8c22ee1 --- /dev/null +++ b/man/raters.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quality_raters.R +\name{classify_percent_flagged} +\alias{classify_percent_flagged} +\alias{classify_sd} +\title{Rate the proportion of flagged values in the data and the magnitude of the +standard deviation} +\usage{ +classify_percent_flagged(p, type = c("mfaz", "whz", "crude")) + +classify_sd(sd, type = c("zscore", "crude")) +} +\arguments{ +\item{p}{A numeric vector containing the proportions of flagged values} + +\item{type}{The indicator to be used for the rating. A choice between "mfaz" +for MFAZ, "whz" for WFHZ and "crude" for crude MUAC.} + +\item{sd}{A numeric vector containing values for standard deviation.} +} +\value{ +A character vector with the rating results. +} +\description{ +\code{classify_percent_flagged()} rates how much high is the proportion of +of flagged data in your data set, as well as the magnitude of the standard +deviation. It applies for the WFHZ, the MFAZ and absolute MUAC values. +} +\details{ +The rating categories are: "Excellent", "Good", "Acceptable", "Problematic". +The cut-offs of the WFHZ are as in the \href{https://smartmethodology.org/}{ SMART Methodology}. As for the MFAZ and the +absolute MUAC values, the maximum acceptable limit is at 2\%, as recommended +by \href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}. +Cut-offs for crude MUAC are based on the +\href{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}{IPC AMN guidelines}. +} diff --git a/man/recode_muac.Rd b/man/recode_muac.Rd index a53de0d..3ca438a 100644 --- a/man/recode_muac.Rd +++ b/man/recode_muac.Rd @@ -1,39 +1,37 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_processors.R +% Please edit documentation in R/wranglers.R \name{recode_muac} \alias{recode_muac} -\title{Recode crude MUAC variable into either centimeters or millimeters} +\title{Convert MUAC values to either centimeters or millimeters} \usage{ recode_muac(muac, unit = c("cm", "mm")) } \arguments{ -\item{muac}{A numeric vector storing values for MUAC that can be in centimeters -or in millimeters.} +\item{muac}{A numeric vector holding the absolute MUAC values.} -\item{unit}{A choice of the units to which you wish to convert your MUAC -variable into.} +\item{unit}{A choice of the unit to which you wish to convert the MUAC +values into.} } \value{ -A transformed vector into the unit you wish to have. +A numeric vector of the same length as input, with values converted +into your chosen unit. } \description{ -Sometimes, a vector containing MUAC values may be in centimeters or in -millimeters. You may want to get in the right format to use with -\link[zscorer:addWGSR]{zscorer::addWGSR} or \code{\link[nipnTK:digitPreference]{nipnTK::digitPreference()}}. \code{recode_muac()} helps you -getting the vector in the right format for the job! It works inside works -inside \code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}}. +Recode the MUAC values into either centimeters or millimeters as required. +\code{recode_muac()} works inside \code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}}. } \examples{ -# Have an input data with muac in mm ---- + +## A sample of MUAC data in millimeters ---- muac <- seq(90, 250, by = 4) -# Apply recode ---- +## Apply the function ---- recode_muac(muac, unit = "cm") -# Have an input data with muac in mm ---- +## A sample of MUAC data in centimeters ---- muac <- seq(9.0, 25.0, by = 0.2) -# Apply recode ---- +# Apply the function ---- recode_muac(muac, unit = "mm") } diff --git a/man/remove_flags.Rd b/man/remove_flags.Rd deleted file mode 100644 index b061846..0000000 --- a/man/remove_flags.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_processors.R -\name{remove_flags} -\alias{remove_flags} -\title{Remove detected outliers} -\usage{ -remove_flags(x, unit = c("zscore", "crude")) -} -\arguments{ -\item{x}{A numeric vector containing zscore or crude MUAC values.} - -\item{unit}{A choice of the units to which you wish remove flags on. variable into.} -} -\value{ -A vector of same size, with flagged data replaced by \code{NA}s. -} -\description{ -\code{remove_flags()} removes flags detected by \code{\link[=flag_outliers]{flag_outliers()}}. It helps you -compute your statistics when flags needs to be removed, such as in standard -deviation. -} diff --git a/man/scorer.Rd b/man/scorer.Rd new file mode 100644 index 0000000..060ef04 --- /dev/null +++ b/man/scorer.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quality_scorers.R +\name{assign_penalty_points_flags_and_sd} +\alias{assign_penalty_points_flags_and_sd} +\alias{assign_penalty_points_age_sex_ratio} +\alias{assign_penalty_points_skew_kurt} +\title{Score the rating of proportion of flagged data, the magnitude of the standard +deviation, skewness, kurtosis and the p-values sex and age ratio test} +\usage{ +assign_penalty_points_flags_and_sd(x) + +assign_penalty_points_age_sex_ratio(x) + +assign_penalty_points_skew_kurt(x) +} +\arguments{ +\item{x}{A character vector holding the test classifications for the proportion +of flagged data, the magnitude of the standard deviation, the p-values of the +age and sex ratio tests, as well as the results of skewness and kurtosis tests.} +} +\value{ +A numeric vector with the corresponding score. +} +\description{ +\code{assign_penalty_points_flags_and_sd()} ranks the proportion of the flagged +values in the data and the magnitude of standard deviation based on the SMART +scoring criteria. +} +\details{ +The ranking is as in \href{https://smartmethodology.org/}{SMART Plausibility checks}. +} diff --git a/man/wrangler.Rd b/man/wrangler.Rd new file mode 100644 index 0000000..2f0ea6e --- /dev/null +++ b/man/wrangler.Rd @@ -0,0 +1,107 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wranglers.R +\name{process_muac_data} +\alias{process_muac_data} +\alias{process_wfhz_data} +\title{Process and censor weight-for-height and MUAC data} +\usage{ +process_muac_data( + df, + sex, + muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = TRUE, + unit = c("cm", "mm", "none") +) + +process_wfhz_data(df, sex, weight, height, .recode_sex = TRUE) +} +\arguments{ +\item{df}{The input data frame with the required variables.} + +\item{sex}{A numeric or character vector of child's sex. Code values should +either be 1 or "m" for boy and 2 or "f" for girl. The variable name must be +sex, otherwise it will not work.} + +\item{muac}{A numeric vector holding the absolute MUAC values.} + +\item{age}{A numeric vector of child's age in months. It must be named age, +otherwise it will not work. For instance, if given as following: age = months +it will not work.} + +\item{.recode_sex}{Logical. It asks whether sex should be recoded. In the end, +the variable sex have values coded as 1 for boy and 2 for girl. Setting +\code{.recode_sex = TRUE} works over "m" and "f" values. If your vector is coded +differently, make sure to put it in "m" and "f" or in 1 or 2 right away.} + +\item{.recode_muac}{Logical. Choose between \code{TRUE} if you wish to recode +the MUAC values into either centimeters or millimeters.} + +\item{unit}{A choice of the unit to which you wish to convert the MUAC +variable into. Choose "cm" for centimeters, "mm" for millimeters and "none" +to leave as it is.} + +\item{weight}{A numeric vector holding the weight values of the child in +kilograms.} + +\item{height}{A numeric vector holding the height values of the child in +centimeters.} +} +\value{ +A data frame of the same length as the input with additional +columns: one named \code{wfhz} or \code{mfaz} that holds the zscore values, and the other +holding dummy values: 1 (is a flag) and 0 (is not a flag). For the +\code{process_muac_data} function, when age is not supplied, only \code{flag_muac} is +added. This refers to flags based based on absolute MUAC values as recommended by +\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}. +} +\description{ +This is the job of \code{process_wfhz_data} and \code{process_muac_data()}. They are +responsible for computing the weight-for-height and the muac-for-age z-scores +respectively, and censor the data by flagging outliers based on the SMART flags. +For the latter, if age is not supplied, the function censors the absolute MUAC +values. +} +\examples{ + +## An example application of `process_wfhz_data()` ---- + +anthro.01 |> +process_wfhz_data( +sex = sex, +weight = weight, +height = height, +.recode_sex = TRUE +) + +## An example application of `process_muac_data()` ---- + +### Sample data ---- +df <- data.frame( + survey_date = as.Date(c( + "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), + birthdate = as.Date(c( + "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), + age = c(NA, 36, NA, NA, NA), + sex = c("m", "f", "m", "m", "f"), + muac = c(110, 130, 300, 123, 125) + ) + + ### The application of the function ---- + df |> + process_age( + svdate = "survey_date", + birdate = "birthdate", + age = age + ) |> + process_muac_data( + sex = sex, + age = "age", + muac = muac, + .recode_sex = TRUE, + .recode_muac = TRUE, + unit = "cm" + ) + +} diff --git a/tests/testthat/test-quality_checkers.R b/tests/testthat/test-quality_auditors.R similarity index 100% rename from tests/testthat/test-quality_checkers.R rename to tests/testthat/test-quality_auditors.R diff --git a/tests/testthat/test-classifiers.R b/tests/testthat/test-quality_raters.R similarity index 100% rename from tests/testthat/test-classifiers.R rename to tests/testthat/test-quality_raters.R diff --git a/tests/testthat/test-data_processors.R b/tests/testthat/test-wranglers.R similarity index 100% rename from tests/testthat/test-data_processors.R rename to tests/testthat/test-wranglers.R From 3144f119211dabf6dc5ba9e465dd07d5e9689256 Mon Sep 17 00:00:00 2001 From: tomaszaba Date: Sat, 12 Oct 2024 00:39:45 +0200 Subject: [PATCH 2/9] revise function docs 2 --- NAMESPACE | 1 - R/case_definitions.R | 2 +- R/prevalence_combined.R | 72 +++-- R/prevalence_mfaz.R | 85 +----- R/prevalence_muac.R | 67 +---- R/prevalence_wfhz.R | 327 ++++++++++------------- R/quality_auditors.R | 2 +- man/apply_probit_approach.Rd | 23 -- man/auditor.Rd | 2 +- man/case_definition.Rd | 2 +- man/combined_prevalence.Rd | 61 ++--- man/compute_mfaz_prevalence.Rd | 69 ----- man/compute_muac_prevalence.Rd | 69 ----- man/compute_pps_based_mfaz_prevalence.Rd | 33 --- man/compute_pps_based_muac_prevalence.Rd | 38 --- man/compute_pps_based_wfhz_prevalence.Rd | 33 --- man/compute_probit_prevalence.Rd | 30 --- man/compute_wfhz_prevalence.Rd | 81 ------ man/prevalence.Rd | 105 ++++++++ man/probit-method.Rd | 38 +++ 20 files changed, 362 insertions(+), 778 deletions(-) delete mode 100644 man/apply_probit_approach.Rd delete mode 100644 man/compute_mfaz_prevalence.Rd delete mode 100644 man/compute_muac_prevalence.Rd delete mode 100644 man/compute_pps_based_mfaz_prevalence.Rd delete mode 100644 man/compute_pps_based_muac_prevalence.Rd delete mode 100644 man/compute_pps_based_wfhz_prevalence.Rd delete mode 100644 man/compute_probit_prevalence.Rd delete mode 100644 man/compute_wfhz_prevalence.Rd create mode 100644 man/prevalence.Rd create mode 100644 man/probit-method.Rd diff --git a/NAMESPACE b/NAMESPACE index 02dcf27..b3fa0d8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,6 @@ export(check_plausibility_wfhz) export(check_sample_size) export(classify_overall_quality) export(compute_combined_prevalence) -export(compute_mfaz_prevalence) export(compute_muac_prevalence) export(compute_quality_score) export(compute_wfhz_prevalence) diff --git a/R/case_definitions.R b/R/case_definitions.R index 4ef49e2..988ef09 100644 --- a/R/case_definitions.R +++ b/R/case_definitions.R @@ -23,7 +23,7 @@ #' `"sam"` the codes 1 would mean yes for severe wasting. #' #' @details -#' Use `define_wasting()` to add the case-definitions in your input data frame. +#' Use `define_wasting()` to add the case-definitions into data frame. #' #' @rdname case_definition #' diff --git a/R/prevalence_combined.R b/R/prevalence_combined.R index 149a086..9a1efe0 100644 --- a/R/prevalence_combined.R +++ b/R/prevalence_combined.R @@ -63,50 +63,46 @@ compute_pps_based_combined_prevalence <- function(df, #' #' -#' Compute combined prevalence of acute malnutrition +#' Compute prevalence of wasting on the basis of the combined case-definition #' #' @description -#' `compute_combined_prevalence()` is handy function to compute the combined prevalence of -#' acute malnutrition using the WFHZ and the absolute values of MUAC and edema for case -#' definition. Under the hood, before prevalence computations begin, it first evaluates the -#' status of WFHZ, MFAZ's standard deviation and age ratio test, as documented in -#' [compute_wfhz_prevalence()] and [compute_muac_prevalence()]. Then, it decides on the -#' appropriate analysis approach to employ depending on the outcome of the aforementioned -#' checks: (i) if either WFHZ, MFAZ standard deviation as well as age ratio test are not -#' simultaneously problematic, a complex sample-based prevalence analysis (for a two-stage -#' PPS cluster sampling) is computed; (ii) all other possibilities will involve either one -#' of the z-scores or the age ratio test being problematic, thus NA (for Not Applicable) -#' get thrown to output table. -#' -#' A concept of "combined flags" is introduced here. This consists on creating a new vector -#' (cflags) of the same length as the input vectors (wfhz_flags and mfaz_flags) and assesses -#' if any element of either input vector is a flag (1), then that element is labelled as -#' flag (1) in the "cflags" vector, otherwise is not flag (0). This ensures that all -#' flagged observations in the WFHZ data and in MFAZ data are excluded for the combined -#' prevalence analysis. +#' `compute_combined_prevalence()` is a handy function for calculating the +#' combined prevalence of wasting also in with the complex sample design +#' properties inherent to surveys. #' #' @param df A data frame object returned by [process_muac_data()] and [process_wfhz_data()]. -#' The process_***_data function will have to used both to prepare the input data to be used -#' in the `compute_combined_prevalence()`. The order of which comes first does not matter, +#' Both wranglers need to be used to prepare data to be used +#' `compute_combined_prevalence()`. The order of which comes first does not matter, #' however, since the muac data processor transforms MUAC values into centimeters, those #' need to be put back into millimeter. This can be achieved my using [recode_muac()] inside -#' [dplyr::mutate()] or [base::transform()] (see example number 3 below). -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) -#' the function will assume self weights, like in ENA for SMART, if otherwise given, the -#' weighted analysis will be computed. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. If you are working on a single survey data, set -#' .summary_by = NULL (default). If this argument is not used, the function will error. -#' -#' @returns A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -#' 1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -#' areas in the data set) x 17. -#' +#' [dplyr::mutate()] or [base::transform()]. +#' +#' @param .wt A numeric vector holding final survey weights. When set to `NULL`, +#' the function assumes self weighted survey, as in the ENA for SMART software; +#' Otherwise when supplied, weighted analysis is computed. +#' +#' @param .edema A character vector indicating if an observation has bilateral +#' edema or not. The codes are "y" for presence and "n" for absence of bilateral +#' edema. Default is `NULL`. +#' +#' @param .summary_by A character vector containing data on the geographical areas +#' where the data was collected and for which the analysis should be performed at. +#' +#' @returns A table with the descriptive statistics about wasting. +#' +#' @details +#' The concept of "combined flags" is introduced in this function. It consists of +#' taking the `flag_wfhz` and `flag_mfaz` vectors, generated from the MUAC and +#' WFHZ wranglers, and checking if any value in either vector is flagged. If flagged, +#' the value is marked as a flag in the "cflags" vector; otherwise, it is not flagged +#' (see table below). This ensures that all flagged observations from both WFHZ +#' and MFAZ data are excluded from the combined prevalence analysis. +#' +#' | **flag_wfhz** | **flag_mfaz** | **cflags** | +#' | :---: | :---: | :---: | +#' | 1 | 0 | 1 | +#' | 0 | 1 | 1 | +#' | 0 | 0 | 0 | #' #' @examples #' diff --git a/R/prevalence_mfaz.R b/R/prevalence_mfaz.R index 454f068..185bd32 100644 --- a/R/prevalence_mfaz.R +++ b/R/prevalence_mfaz.R @@ -1,29 +1,4 @@ #' -#' Compute a MUAC-for-age z-score based prevalence estimates of data collected from a two-stage -#' cluster survey sample design, with the first stage sampling done with Probability -#' Proportional to the size of population -#' -#' @description -#' Create a survey design object using the [srvyr::as_survey_design()] and then calculate -#' the survey means as well the sum of positive cases. -#' -#' @param df A data frame object returned by [process_muac_data()]. -#' this will contain the wrangled vectors that are read inside the function. -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. This is to group the survey design object into different -#' geographical areas in the data and allow for summaries to be computed for each of them. -#' -#' @returns A tibble of size depending on the number of groups of the vector given to -#' `.summary_by` or if set to NULL, and of length 17. #' #' compute_pps_based_mfaz_prevalence <- function(df, @@ -82,67 +57,9 @@ compute_pps_based_mfaz_prevalence <- function(df, } #' -#' Compute acute malnutrition prevalence based on MUAC-for-age z-scores (MFAZ) -#' -#' @description -#' `compute_mfaz_prevalence()` is a handy function designed to dynamically compute acute -#' malnutrition's prevalence using WFHZ. Under the hood, it first checks the status of -#' WFHZ's standard deviation (SD) after removing flags, and then it decides on the -#' appropriate prevalence analysis approach to follow: if SD is anything between excellent -#' and acceptable, a complex sample-based prevalence analysis (for a two-stage PPS -#' cluster sampling) is computed, otherwise, a re-calculated prevalence using PROBIT method -#' with a sample mean and a SD = 1 is computed. On the former analysis approach, the function -#' was also designed to work around survey weights. -#' The function also super handy to work on large data sets with multiple survey areas. For -#' this, the aforementioned conditionals are checked for each survey area in a summarized -#' data frame and prevalence get computed according to each row's scenario. -#' -#' @param df A data frame object returned by [process_muac_data()]. -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. If you are working on a single survey data, set -#' .summary_by = NULL (default). -#' -#' @returns A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -#' 1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -#' areas in the data set) x 17. -#' -#' @examples -#' -#' ## When .summary_by = NULL ---- -#' compute_mfaz_prevalence( -#' df = anthro.04, -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = NULL -#' ) -#' -#' ## When .summary_by is not set to NULL ---- -#' compute_mfaz_prevalence( -#' df = anthro.04, -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = province -#' ) #' -#' ## When a weighted analysis is needed ---- -#' ### This example uses a different data set with survey weights ---- -#' compute_mfaz_prevalence( -#' df = anthro.02, -#' .wt = "wtfactor", -#' .edema = edema, -#' .summary_by = province -#' ) +#' @rdname prevalence #' -#' @export #' compute_mfaz_prevalence <- function(df, .wt = NULL, diff --git a/R/prevalence_muac.R b/R/prevalence_muac.R index 51a11f4..bacde30 100644 --- a/R/prevalence_muac.R +++ b/R/prevalence_muac.R @@ -149,31 +149,6 @@ compute_weighted_prevalence <- function(df, .edema=NULL, .summary_by = NULL) { #' #' -#' Compute MUAC based prevalence estimates of data collected from a two-stage cluster -#' survey sample design, with the first stage sampling done with Probability Proportional -#' to the size of population -#' -#' @description -#' Create a survey design object using the [srvyr::as_survey_design()] and then calculate -#' the survey means as well the sum of positive cases. -#' -#' @param df A data frame object returned by [process_muac_data()]. -#' this will contain the wrangled vectors that are read inside the function. -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. This is to group the survey design object into different -#' geographical areas in the data and allow for summaries to be computed for each of them. -#' -#' @returns A tibble of size depending on the number of groups of the vector given to -#' `.summary_by` or if set to NULL, and of length 17. -#' #' #' compute_pps_based_muac_prevalence <- function(df, @@ -226,44 +201,13 @@ compute_pps_based_muac_prevalence <- function(df, #' #' +#' @rdname prevalence #' -#' Compute acute malnutrition prevalence based on MUAC (the absolute values) -#' -#' @description -#' `compute_muac_prevalence()` is a handy function designed to dynamically compute acute -#' malnutrition's prevalence using the absolute values of MUAC, however using the MFAZ for -#' quality checks before advancing to prevalence computations. Under the hood, the function -#' first checks the status of MFAZ's standard deviation (SD) after removing flags, and -#' the status of age ratio among children aged 6:23 vs 24:59 months. Then it decides on the -#' appropriate prevalence analysis approach to follow: (i) if SD & age ratio are both not -#' problematic, a complex sample-based prevalence analysis (for a two-stage PPS -#' cluster sampling) is computed; (ii) if MFAZ's SD is not problematic, but age ratio test -#' is, the CDC/SMART MUAC tool weighting approach is used to compute the prevalence; (iii) -#' lastly, if MFAZ's SD is problematic even if age ratio test is not, no prevalence -#' analysis is computed and NA (of Not Applicable) are thrown. -#' The function also super handy to work on large data sets with multiple survey areas. For -#' this, the aforementioned conditionals are checked for each survey areas in a summarized -#' data frame and prevalence get computed according to each row's scenario. -#' -#' @param df A data frame object returned by [process_muac_data()]. -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. If you are working on a single survey data, set -#' .summary_by = NULL (default). If this argument is not used, the function will error. +#' @examples #' -#' @returns A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -#' 1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -#' areas in the data set) x 17. +#' ## An example of application of `compute_muac_prevalence()` ---- #' -#' @examples -#' ## When .summary.by = NULL ---- +#' ### When .summary.by = NULL ---- #' #' x <- compute_muac_prevalence( #' df = anthro.04, @@ -274,7 +218,7 @@ compute_pps_based_muac_prevalence <- function(df, #' #' print(x) #' -#' ## When .summary_by is not set to NULL ---- +#' ### When .summary_by is not set to NULL ---- #' #' p <- compute_muac_prevalence( #' df = anthro.04, @@ -285,7 +229,6 @@ compute_pps_based_muac_prevalence <- function(df, #' #' print(p) #' -#' #' @export #' compute_muac_prevalence <- function(df, diff --git a/R/prevalence_wfhz.R b/R/prevalence_wfhz.R index 0997a22..0fb41fb 100644 --- a/R/prevalence_wfhz.R +++ b/R/prevalence_wfhz.R @@ -1,29 +1,132 @@ #' -#' Compute a weight-for-height based prevalence estimates of data collected from a two-stage -#' cluster survey sample design, with the first stage sampling done with Probability -#' Proportional to the size of population +#' Compute the prevalence of wasting on the basis of WFHZ or MFAZ or MUAC #' #' @description -#' Create a survey design object using the [srvyr::as_survey_design()] and then calculate -#' the survey means as well the sum of positive cases. +#' The prevalence is calculated in accordance with the complex sample design +#' properties inherent to surveys. This includes weighting the survey data where +#' applicable and applying PROBIT method estimation of prevalence (for WFHZ and +#' MFAZ) when standard deviation is problematic. This is as in the SMART Methodology. #' -#' @param df A data frame object returned by [process_wfhz_data()]. -#' this will contain the wrangled vectors that are read inside the function. +#' @param df A data frame object with the required variables already wrangled. #' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. +#' @param .wt A numeric vector holding final survey weights. When set to `NULL`, +#' the function assumes self weighted survey, as in the ENA for SMART software; +#' Otherwise when supplied, weighted analysis is computed. #' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. +#' @param .edema A character vector indicating if an observation has bilateral +#' edema or not. The codes are "y" for presence and "n" for absence of bilateral +#' edema. Default is `NULL`. +#' +#' @param .summary_by A character vector containing data on the geographical areas +#' where the data was collected and for which the analysis should be performed at. +#' +#' @returns A table with the descriptive statistics about wasting. +#' +#' @examples +#' ## An example of application of `compute_wfhz_prevalence()` ---- +#' +#' ### When .summary_by = NULL ---- +#' anthro.03 |> +#' process_wfhz_data( +#' sex = sex, +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE +#' ) |> +#' compute_wfhz_prevalence( +#' .wt = NULL, +#' .edema = edema, +#' .summary_by = NULL +#' ) +#' +#' ### When .summary_by is not set to NULL ---- +#' +#' anthro.03 |> +#' process_wfhz_data( +#' sex = sex, +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE +#' ) |> +#' compute_wfhz_prevalence( +#' .wt = NULL, +#' .edema = edema, +#' .summary_by = district +#' ) +#' +#' ### When a weighted analysis is needed ---- +#' +#' anthro.02 |> +#' compute_wfhz_prevalence( +#' .wt = "wtfactor", +#' .edema = edema, +#' .summary_by = province +#' ) +#' +#' @rdname prevalence +#' +#' @export +#' +compute_wfhz_prevalence <- function(df, + .wt = NULL, + .edema = NULL, + .summary_by = NULL) { + + ## Difuse argument .summary_by ---- + .summary_by <- rlang::enquo(.summary_by) + + ## An empty vector type list ---- + results <- list() + + if (!rlang::quo_is_null(.summary_by)) { + ## Grouped summary of standard deviation classification ---- + x <- summarise( + df, + std = classify_sd(sd(remove_flags(.data$wfhz, "zscore"), na.rm = TRUE)), + .by = !!.summary_by + ) + } else { + ## Non-grouped summary ---- + x <- summarise( + df, + std = classify_sd(sd(remove_flags(.data$wfhz, "zscore"), na.rm = TRUE)) + ) + } + + ## Iterate over data frame to compute prevalence according to the SD ---- + for (i in seq_len(nrow(x))) { + if (!rlang::quo_is_null(.summary_by)) { + area <- dplyr::pull(x, !!.summary_by)[i] + data <- filter(df, !!sym(rlang::quo_name(.summary_by)) == !!area) + } else { + data <- df + } + + std <- x$std[i] + if (std != "Problematic") { + ### Compute standard complex sample based prevalence analysis ---- + result <- compute_pps_based_wfhz_prevalence(data, {{ .wt }}, {{ .edema }}, !!.summary_by) + } else { + ### Compute grouped PROBIT based prevalence ---- + if (!rlang::quo_is_null(.summary_by)) { + result <- compute_probit_prevalence(data, !!.summary_by, .for = "wfhz") + } else { + ### Compute non-grouped PROBIT based prevalence ---- + result <- compute_probit_prevalence(data, .for = "wfhz") + } + } + results[[i]] <- result + } + dplyr::bind_rows(results) |> + dplyr::relocate(.data$gam_p, .after = .data$gam_n) |> + dplyr::relocate(.data$sam_p, .after = .data$sam_n) |> + dplyr::relocate(.data$mam_p, .after = .data$mam_n) +} + + + #' -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. This is to group the survey design object into different -#' geographical areas in the data and allow for summaries to be computed for each of them. #' -#' @returns A tibble of size depending on the number of groups of the vector given to -#' `.summary_by` or if set to NULL, and of length 17. #' #' compute_pps_based_wfhz_prevalence <- function(df, @@ -84,19 +187,34 @@ compute_pps_based_wfhz_prevalence <- function(df, #' #' -#' Compute global, severe and moderate acute malnutrition prevalence using PROBIT approach. +#' Compute the prevalence of wasting on the basis of the PROBIT method. #' #' @description -#' This approach is only applied for when WFHZ standard deviation's is problematic. The -#' PROBIT approach estimates the prevalence of acute malnutrition indirectly by computing -#' the area under the tail of the curve from negative infinitive to the given threshold -#' through the cumulative normal distribution function using the mean and standard deviation. +#' This approach is applied when the standard deviation of WFHZ is problematic. +#' The PROBIT method estimates the prevalence of wasting indirectly by calculating +#' the area under the curve the tail of the curve, from negative infinitive to +#' the given threshold, using the cumulative normal distribution function with +#' the mean and standard deviation as inputs. +#' +#' @param df A data frame object with the required variables already wrangled. +#' +#' @param x A numeric vector holding WFHZ or MFAZ values. +#' +#' @param .status A choice on the form of wasting for which the prevalence should +#' be calculated for. +#' +#' @param .summary_by A character vector containing data on the geographical areas where +#' the data was collected. This is to group the survey design object into different +#' geographical areas in the data and allow for summaries to be computed for each of them. +#' Default is NULL. +#' +#' @param .for A choice between "wfhz" and "mfaz" for the anthropometric index you wish +#' to compute PROBIT prevalence on. #' -#' @param x A double vector containing the z-score values -#' @param .status A choice on the nutritional status you wish to apply the PROBIT approach -#' on. Default is "gam" for global acute malnutrition. +#' @returns A data frame with the prevalence. No confidence intervals are +#' yielded. #' -#' @returns A numeric value (double) corresponding to the point prevalence estimate. +#' @rdname probit-method #' #' apply_probit_approach <- function(x, .status = c("gam", "sam")) { @@ -111,30 +229,10 @@ apply_probit_approach <- function(x, .status = c("gam", "sam")) { } + #' #' -#' Compute global, severe and moderate acute malnutrition prevalence using PROBIT approach -#' -#' @description -#' This function is a helper function used inside [compute_wfhz_prevalence()] and -#' [compute_mfaz_prevalence()]. It is used to compute PROBIT based prevalence depending -#' on the status of standard deviation. For more details, check the documentation of the -#' aforementioned functions. -#' -#' @param df A data frame object returned by [process_wfhz_data()] or by [process_muac_data()] -#' They will contain the wrangled vectors that are read inside the function. -#' -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. This is to group the survey design object into different -#' geographical areas in the data and allow for summaries to be computed for each of them. -#' Default is NULL. -#' -#' @param .for A choice between "wfhz" and "mfaz" for the anthropometric index you wish -#' to compute PROBIT prevalence on. -#' -#' @returns A tibble with the PROBIT based point prevalence for global, severe and moderate -#' acute malnutrition. -#' +#' @rdname probit-method #' compute_probit_prevalence <- function(df, .summary_by = NULL, @@ -206,134 +304,3 @@ compute_probit_prevalence <- function(df, } ) } - -#' -#' Compute acute malnutrition prevalence based on weight-for-height z-scores (WFHZ), -#' MUAC-for-age z-scores (MFAZ), MUAC and combined -#' -#' @description -#' `compute_wfhz_prevalence()` is a handy function designed to dynamically compute acute -#' malnutrition's prevalence using WFHZ. Under the hood, it first checks the status of -#' WFHZ's standard deviation (SD) after removing flags, and then it decides on the -#' appropriate prevalence analysis approach to follow: if SD is anything between excellent -#' and acceptable, a complex sample-based prevalence analysis (for a two-stage PPS -#' cluster sampling) is computed, otherwise, a re-calculated prevalence using PROBIT method -#' with a sample mean and a SD = 1 is computed. On the former analysis approach, the function -#' was also designed to work around survey weights. -#' The function also super handy to work on large data sets with multiple survey areas. For -#' this, the aforementioned conditionals are checked for each survey areas in a summarized -#' data frame and prevalence get computed according to each row's scenario. -#' -#' @param df A data frame object returned by [process_wfhz_data()]. -#' -#' @param .wt A numeric vector containing survey weights. If set to NULL (default) and -#' the function will assume self weighted, like in ENA for SMART, otherwise if given, the -#' weighted analysis will be computed with weighted population returned. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. If you are working on a single survey data, set -#' .summary_by = NULL (default). -#' -#' @returns A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -#' 1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -#' areas in the data set) x 17. -#' -#' @examples -#' -#' ## When .summary_by = NULL ---- -#' anthro.03 |> -#' process_wfhz_data( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE -#' ) |> -#' compute_wfhz_prevalence( -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = NULL -#' ) -#' -#' ## When .summary_by is not set to NULL ---- -#' anthro.03 |> -#' process_wfhz_data( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE -#' ) |> -#' compute_wfhz_prevalence( -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = district -#' ) -#' -#' ## When a weighted analysis is needed ---- -#' anthro.02 |> -#' compute_wfhz_prevalence( -#' .wt = "wtfactor", -#' .edema = edema, -#' .summary_by = province -#' ) -#' -#' @export -#' -compute_wfhz_prevalence <- function(df, - .wt = NULL, - .edema = NULL, - .summary_by = NULL) { - - ## Difuse argument .summary_by ---- - .summary_by <- rlang::enquo(.summary_by) - - ## An empty vector type list ---- - results <- list() - - if (!rlang::quo_is_null(.summary_by)) { - ## Grouped summary of standard deviation classification ---- - x <- summarise( - df, - std = classify_sd(sd(remove_flags(.data$wfhz, "zscore"), na.rm = TRUE)), - .by = !!.summary_by - ) - } else { - ## Non-grouped summary ---- - x <- summarise( - df, - std = classify_sd(sd(remove_flags(.data$wfhz, "zscore"), na.rm = TRUE)) - ) - } - - ## Iterate over data frame to compute prevalence according to the SD ---- - for (i in seq_len(nrow(x))) { - if (!rlang::quo_is_null(.summary_by)) { - area <- dplyr::pull(x, !!.summary_by)[i] - data <- filter(df, !!sym(rlang::quo_name(.summary_by)) == !!area) - } else { - data <- df - } - - std <- x$std[i] - if (std != "Problematic") { - ### Compute standard complex sample based prevalence analysis ---- - result <- compute_pps_based_wfhz_prevalence(data, {{ .wt }}, {{ .edema }}, !!.summary_by) - } else { - ### Compute grouped PROBIT based prevalence ---- - if (!rlang::quo_is_null(.summary_by)) { - result <- compute_probit_prevalence(data, !!.summary_by, .for = "wfhz") - } else { - ### Compute non-grouped PROBIT based prevalence ---- - result <- compute_probit_prevalence(data, .for = "wfhz") - } - } - results[[i]] <- result - } - dplyr::bind_rows(results) |> - dplyr::relocate(.data$gam_p, .after = .data$gam_n) |> - dplyr::relocate(.data$sam_p, .after = .data$sam_n) |> - dplyr::relocate(.data$mam_p, .after = .data$mam_n) -} - diff --git a/R/quality_auditors.R b/R/quality_auditors.R index bc1f696..1ecd6ce 100644 --- a/R/quality_auditors.R +++ b/R/quality_auditors.R @@ -27,7 +27,7 @@ #' one area, you will still have to supply the corresponding column to `area` in #' `check_plausibility_mfaz()` or `check_plausibility_wfhz()`. #' -#' @returns A summarized table with the raw statistics and respective classification. +#' @returns A summary table of statistics with respective classification. #' #' @examples #' diff --git a/man/apply_probit_approach.Rd b/man/apply_probit_approach.Rd deleted file mode 100644 index 9e45a5a..0000000 --- a/man/apply_probit_approach.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_wfhz.R -\name{apply_probit_approach} -\alias{apply_probit_approach} -\title{Compute global, severe and moderate acute malnutrition prevalence using PROBIT approach.} -\usage{ -apply_probit_approach(x, .status = c("gam", "sam")) -} -\arguments{ -\item{x}{A double vector containing the z-score values} - -\item{.status}{A choice on the nutritional status you wish to apply the PROBIT approach -on. Default is "gam" for global acute malnutrition.} -} -\value{ -A numeric value (double) corresponding to the point prevalence estimate. -} -\description{ -This approach is only applied for when WFHZ standard deviation's is problematic. The -PROBIT approach estimates the prevalence of acute malnutrition indirectly by computing -the area under the tail of the curve from negative infinitive to the given threshold -through the cumulative normal distribution function using the mean and standard deviation. -} diff --git a/man/auditor.Rd b/man/auditor.Rd index b755618..4868ca4 100644 --- a/man/auditor.Rd +++ b/man/auditor.Rd @@ -35,7 +35,7 @@ one area, you will still have to supply the corresponding column to \code{area} \item{height}{A numeric vector holding height measurements (in centimeters).} } \value{ -A summarized table with the raw statistics and respective classification. +A summary table of statistics with respective classification. } \description{ \code{check_plausibility_wfhz()}, \code{check_plausibility_mfaz()}, and diff --git a/man/case_definition.Rd b/man/case_definition.Rd index 2c750cf..d50b238 100644 --- a/man/case_definition.Rd +++ b/man/case_definition.Rd @@ -54,7 +54,7 @@ Define if an observation is wasted on the basis of the criteria of WFHZ, absolute MUAC values and combined case-definition } \details{ -Use \code{define_wasting()} to add the case-definitions in your input data frame. +Use \code{define_wasting()} to add the case-definitions into data frame. } \examples{ # MUAC-based case-definition ---- diff --git a/man/combined_prevalence.Rd b/man/combined_prevalence.Rd index 0103eeb..b6bdc4f 100644 --- a/man/combined_prevalence.Rd +++ b/man/combined_prevalence.Rd @@ -16,48 +16,43 @@ compute_combined_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) } \arguments{ \item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}} and \code{\link[=process_wfhz_data]{process_wfhz_data()}}. -The process_***_data function will have to used both to prepare the input data to be used -in the \code{compute_combined_prevalence()}. The order of which comes first does not matter, +Both wranglers need to be used to prepare data to be used +\code{compute_combined_prevalence()}. The order of which comes first does not matter, however, since the muac data processor transforms MUAC values into centimeters, those need to be put back into millimeter. This can be achieved my using \code{\link[=recode_muac]{recode_muac()}} inside -\code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}} (see example number 3 below).} +\code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}}.} -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) -the function will assume self weights, like in ENA for SMART, if otherwise given, the -weighted analysis will be computed.} +\item{.wt}{A numeric vector holding final survey weights. When set to \code{NULL}, +the function assumes self weighted survey, as in the ENA for SMART software; +Otherwise when supplied, weighted analysis is computed.} -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} +\item{.edema}{A character vector indicating if an observation has bilateral +edema or not. The codes are "y" for presence and "n" for absence of bilateral +edema. Default is \code{NULL}.} -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. If you are working on a single survey data, set -.summary_by = NULL (default). If this argument is not used, the function will error.} +\item{.summary_by}{A character vector containing data on the geographical areas +where the data was collected and for which the analysis should be performed at.} } \value{ -A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -areas in the data set) x 17. +A table with the descriptive statistics about wasting. } \description{ -\code{compute_combined_prevalence()} is handy function to compute the combined prevalence of -acute malnutrition using the WFHZ and the absolute values of MUAC and edema for case -definition. Under the hood, before prevalence computations begin, it first evaluates the -status of WFHZ, MFAZ's standard deviation and age ratio test, as documented in -\code{\link[=compute_wfhz_prevalence]{compute_wfhz_prevalence()}} and \code{\link[=compute_muac_prevalence]{compute_muac_prevalence()}}. Then, it decides on the -appropriate analysis approach to employ depending on the outcome of the aforementioned -checks: (i) if either WFHZ, MFAZ standard deviation as well as age ratio test are not -simultaneously problematic, a complex sample-based prevalence analysis (for a two-stage -PPS cluster sampling) is computed; (ii) all other possibilities will involve either one -of the z-scores or the age ratio test being problematic, thus NA (for Not Applicable) -get thrown to output table. - -A concept of "combined flags" is introduced here. This consists on creating a new vector -(cflags) of the same length as the input vectors (wfhz_flags and mfaz_flags) and assesses -if any element of either input vector is a flag (1), then that element is labelled as -flag (1) in the "cflags" vector, otherwise is not flag (0). This ensures that all -flagged observations in the WFHZ data and in MFAZ data are excluded for the combined -prevalence analysis. +\code{compute_combined_prevalence()} is a handy function for calculating the +combined prevalence of wasting also in with the complex sample design +properties inherent to surveys. +} +\details{ +The concept of "combined flags" is introduced in this function. It consists of +taking the \code{flag_wfhz} and \code{flag_mfaz} vectors, generated from the MUAC and +WFHZ wranglers, and checking if any value in either vector is flagged. If flagged, +the value is marked as a flag in the "cflags" vector; otherwise, it is not flagged +(see table below). This ensures that all flagged observations from both WFHZ +and MFAZ data are excluded from the combined prevalence analysis.\tabular{ccc}{ + \strong{flag_wfhz} \tab \strong{flag_mfaz} \tab \strong{cflags} \cr + 1 \tab 0 \tab 1 \cr + 0 \tab 1 \tab 1 \cr + 0 \tab 0 \tab 0 \cr +} } \examples{ diff --git a/man/compute_mfaz_prevalence.Rd b/man/compute_mfaz_prevalence.Rd deleted file mode 100644 index 177c303..0000000 --- a/man/compute_mfaz_prevalence.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_mfaz.R -\name{compute_mfaz_prevalence} -\alias{compute_mfaz_prevalence} -\title{Compute acute malnutrition prevalence based on MUAC-for-age z-scores (MFAZ)} -\usage{ -compute_mfaz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}}.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. If you are working on a single survey data, set -.summary_by = NULL (default).} -} -\value{ -A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -areas in the data set) x 17. -} -\description{ -\code{compute_mfaz_prevalence()} is a handy function designed to dynamically compute acute -malnutrition's prevalence using WFHZ. Under the hood, it first checks the status of -WFHZ's standard deviation (SD) after removing flags, and then it decides on the -appropriate prevalence analysis approach to follow: if SD is anything between excellent -and acceptable, a complex sample-based prevalence analysis (for a two-stage PPS -cluster sampling) is computed, otherwise, a re-calculated prevalence using PROBIT method -with a sample mean and a SD = 1 is computed. On the former analysis approach, the function -was also designed to work around survey weights. -The function also super handy to work on large data sets with multiple survey areas. For -this, the aforementioned conditionals are checked for each survey area in a summarized -data frame and prevalence get computed according to each row's scenario. -} -\examples{ - -## When .summary_by = NULL ---- -compute_mfaz_prevalence( -df = anthro.04, -.wt = NULL, -.edema = edema, -.summary_by = NULL -) - -## When .summary_by is not set to NULL ---- -compute_mfaz_prevalence( -df = anthro.04, -.wt = NULL, -.edema = edema, -.summary_by = province -) - -## When a weighted analysis is needed ---- -### This example uses a different data set with survey weights ---- -compute_mfaz_prevalence( -df = anthro.02, -.wt = "wtfactor", -.edema = edema, -.summary_by = province -) - -} diff --git a/man/compute_muac_prevalence.Rd b/man/compute_muac_prevalence.Rd deleted file mode 100644 index a9e104b..0000000 --- a/man/compute_muac_prevalence.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_muac.R -\name{compute_muac_prevalence} -\alias{compute_muac_prevalence} -\title{Compute acute malnutrition prevalence based on MUAC (the absolute values)} -\usage{ -compute_muac_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}}.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. If you are working on a single survey data, set -.summary_by = NULL (default). If this argument is not used, the function will error.} -} -\value{ -A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -areas in the data set) x 17. -} -\description{ -\code{compute_muac_prevalence()} is a handy function designed to dynamically compute acute -malnutrition's prevalence using the absolute values of MUAC, however using the MFAZ for -quality checks before advancing to prevalence computations. Under the hood, the function -first checks the status of MFAZ's standard deviation (SD) after removing flags, and -the status of age ratio among children aged 6:23 vs 24:59 months. Then it decides on the -appropriate prevalence analysis approach to follow: (i) if SD & age ratio are both not -problematic, a complex sample-based prevalence analysis (for a two-stage PPS -cluster sampling) is computed; (ii) if MFAZ's SD is not problematic, but age ratio test -is, the CDC/SMART MUAC tool weighting approach is used to compute the prevalence; (iii) -lastly, if MFAZ's SD is problematic even if age ratio test is not, no prevalence -analysis is computed and NA (of Not Applicable) are thrown. -The function also super handy to work on large data sets with multiple survey areas. For -this, the aforementioned conditionals are checked for each survey areas in a summarized -data frame and prevalence get computed according to each row's scenario. -} -\examples{ -## When .summary.by = NULL ---- - -x <- compute_muac_prevalence( -df = anthro.04, -.wt = NULL, -.edema = edema, -.summary_by = NULL -) - -print(x) - -## When .summary_by is not set to NULL ---- - -p <- compute_muac_prevalence( -df = anthro.04, -.wt = NULL, -.edema = edema, -.summary_by = province -) - -print(p) - - -} diff --git a/man/compute_pps_based_mfaz_prevalence.Rd b/man/compute_pps_based_mfaz_prevalence.Rd deleted file mode 100644 index 26642b5..0000000 --- a/man/compute_pps_based_mfaz_prevalence.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_mfaz.R -\name{compute_pps_based_mfaz_prevalence} -\alias{compute_pps_based_mfaz_prevalence} -\title{Compute a MUAC-for-age z-score based prevalence estimates of data collected from a two-stage -cluster survey sample design, with the first stage sampling done with Probability -Proportional to the size of population} -\usage{ -compute_pps_based_mfaz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}}. -this will contain the wrangled vectors that are read inside the function.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. This is to group the survey design object into different -geographical areas in the data and allow for summaries to be computed for each of them. - -@returns A tibble of size depending on the number of groups of the vector given to -\code{.summary_by} or if set to NULL, and of length 17.} -} -\description{ -Create a survey design object using the \code{\link[srvyr:as_survey_design]{srvyr::as_survey_design()}} and then calculate -the survey means as well the sum of positive cases. -} diff --git a/man/compute_pps_based_muac_prevalence.Rd b/man/compute_pps_based_muac_prevalence.Rd deleted file mode 100644 index 4df63df..0000000 --- a/man/compute_pps_based_muac_prevalence.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_muac.R -\name{compute_pps_based_muac_prevalence} -\alias{compute_pps_based_muac_prevalence} -\title{Compute MUAC based prevalence estimates of data collected from a two-stage cluster -survey sample design, with the first stage sampling done with Probability Proportional -to the size of population} -\usage{ -compute_pps_based_muac_prevalence( - df, - .wt = NULL, - .edema = NULL, - .summary_by = NULL -) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}}. -this will contain the wrangled vectors that are read inside the function.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. This is to group the survey design object into different -geographical areas in the data and allow for summaries to be computed for each of them. - -@returns A tibble of size depending on the number of groups of the vector given to -\code{.summary_by} or if set to NULL, and of length 17.} -} -\description{ -Create a survey design object using the \code{\link[srvyr:as_survey_design]{srvyr::as_survey_design()}} and then calculate -the survey means as well the sum of positive cases. -} diff --git a/man/compute_pps_based_wfhz_prevalence.Rd b/man/compute_pps_based_wfhz_prevalence.Rd deleted file mode 100644 index 626dcd8..0000000 --- a/man/compute_pps_based_wfhz_prevalence.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_wfhz.R -\name{compute_pps_based_wfhz_prevalence} -\alias{compute_pps_based_wfhz_prevalence} -\title{Compute a weight-for-height based prevalence estimates of data collected from a two-stage -cluster survey sample design, with the first stage sampling done with Probability -Proportional to the size of population} -\usage{ -compute_pps_based_wfhz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_wfhz_data]{process_wfhz_data()}}. -this will contain the wrangled vectors that are read inside the function.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. This is to group the survey design object into different -geographical areas in the data and allow for summaries to be computed for each of them. - -@returns A tibble of size depending on the number of groups of the vector given to -\code{.summary_by} or if set to NULL, and of length 17.} -} -\description{ -Create a survey design object using the \code{\link[srvyr:as_survey_design]{srvyr::as_survey_design()}} and then calculate -the survey means as well the sum of positive cases. -} diff --git a/man/compute_probit_prevalence.Rd b/man/compute_probit_prevalence.Rd deleted file mode 100644 index ae539b4..0000000 --- a/man/compute_probit_prevalence.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_wfhz.R -\name{compute_probit_prevalence} -\alias{compute_probit_prevalence} -\title{Compute global, severe and moderate acute malnutrition prevalence using PROBIT approach} -\usage{ -compute_probit_prevalence(df, .summary_by = NULL, .for = c("wfhz", "mfaz")) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_wfhz_data]{process_wfhz_data()}} or by \code{\link[=process_muac_data]{process_muac_data()}} -They will contain the wrangled vectors that are read inside the function.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. This is to group the survey design object into different -geographical areas in the data and allow for summaries to be computed for each of them. -Default is NULL.} - -\item{.for}{A choice between "wfhz" and "mfaz" for the anthropometric index you wish -to compute PROBIT prevalence on.} -} -\value{ -A tibble with the PROBIT based point prevalence for global, severe and moderate -acute malnutrition. -} -\description{ -This function is a helper function used inside \code{\link[=compute_wfhz_prevalence]{compute_wfhz_prevalence()}} and -\code{\link[=compute_mfaz_prevalence]{compute_mfaz_prevalence()}}. It is used to compute PROBIT based prevalence depending -on the status of standard deviation. For more details, check the documentation of the -aforementioned functions. -} diff --git a/man/compute_wfhz_prevalence.Rd b/man/compute_wfhz_prevalence.Rd deleted file mode 100644 index 7a8adb7..0000000 --- a/man/compute_wfhz_prevalence.Rd +++ /dev/null @@ -1,81 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_wfhz.R -\name{compute_wfhz_prevalence} -\alias{compute_wfhz_prevalence} -\title{Compute acute malnutrition prevalence based on weight-for-height z-scores (WFHZ), -MUAC-for-age z-scores (MFAZ), MUAC and combined} -\usage{ -compute_wfhz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) -} -\arguments{ -\item{df}{A data frame object returned by \code{\link[=process_wfhz_data]{process_wfhz_data()}}.} - -\item{.wt}{A numeric vector containing survey weights. If set to NULL (default) and -the function will assume self weighted, like in ENA for SMART, otherwise if given, the -weighted analysis will be computed with weighted population returned.} - -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} - -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. If you are working on a single survey data, set -.summary_by = NULL (default).} -} -\value{ -A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of -1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical -areas in the data set) x 17. -} -\description{ -\code{compute_wfhz_prevalence()} is a handy function designed to dynamically compute acute -malnutrition's prevalence using WFHZ. Under the hood, it first checks the status of -WFHZ's standard deviation (SD) after removing flags, and then it decides on the -appropriate prevalence analysis approach to follow: if SD is anything between excellent -and acceptable, a complex sample-based prevalence analysis (for a two-stage PPS -cluster sampling) is computed, otherwise, a re-calculated prevalence using PROBIT method -with a sample mean and a SD = 1 is computed. On the former analysis approach, the function -was also designed to work around survey weights. -The function also super handy to work on large data sets with multiple survey areas. For -this, the aforementioned conditionals are checked for each survey areas in a summarized -data frame and prevalence get computed according to each row's scenario. -} -\examples{ - -## When .summary_by = NULL ---- -anthro.03 |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) |> -compute_wfhz_prevalence( -.wt = NULL, -.edema = edema, -.summary_by = NULL -) - -## When .summary_by is not set to NULL ---- -anthro.03 |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) |> -compute_wfhz_prevalence( -.wt = NULL, -.edema = edema, -.summary_by = district -) - -## When a weighted analysis is needed ---- -anthro.02 |> -compute_wfhz_prevalence( -.wt = "wtfactor", -.edema = edema, -.summary_by = province -) - -} diff --git a/man/prevalence.Rd b/man/prevalence.Rd new file mode 100644 index 0000000..6a2b1ad --- /dev/null +++ b/man/prevalence.Rd @@ -0,0 +1,105 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prevalence_mfaz.R, R/prevalence_muac.R, +% R/prevalence_wfhz.R +\name{compute_mfaz_prevalence} +\alias{compute_mfaz_prevalence} +\alias{compute_muac_prevalence} +\alias{compute_wfhz_prevalence} +\title{Compute the prevalence of wasting on the basis of WFHZ or MFAZ or MUAC} +\usage{ +compute_mfaz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) + +compute_muac_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) + +compute_wfhz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) +} +\arguments{ +\item{df}{A data frame object with the required variables already wrangled.} + +\item{.wt}{A numeric vector holding final survey weights. When set to \code{NULL}, +the function assumes self weighted survey, as in the ENA for SMART software; +Otherwise when supplied, weighted analysis is computed.} + +\item{.edema}{A character vector indicating if an observation has bilateral +edema or not. The codes are "y" for presence and "n" for absence of bilateral +edema. Default is \code{NULL}.} + +\item{.summary_by}{A character vector containing data on the geographical areas +where the data was collected and for which the analysis should be performed at.} +} +\value{ +A table with the descriptive statistics about wasting. +} +\description{ +The prevalence is calculated in accordance with the complex sample design +properties inherent to surveys. This includes weighting the survey data where +applicable and applying PROBIT method estimation of prevalence (for WFHZ and +MFAZ) when standard deviation is problematic. This is as in the SMART Methodology. +} +\examples{ + +## An example of application of `compute_muac_prevalence()` ---- + +### When .summary.by = NULL ---- + +x <- compute_muac_prevalence( +df = anthro.04, +.wt = NULL, +.edema = edema, +.summary_by = NULL +) + +print(x) + +### When .summary_by is not set to NULL ---- + +p <- compute_muac_prevalence( +df = anthro.04, +.wt = NULL, +.edema = edema, +.summary_by = province +) + +print(p) + +## An example of application of `compute_wfhz_prevalence()` ---- + +### When .summary_by = NULL ---- +anthro.03 |> +process_wfhz_data( +sex = sex, +weight = weight, +height = height, +.recode_sex = TRUE +) |> +compute_wfhz_prevalence( +.wt = NULL, +.edema = edema, +.summary_by = NULL +) + +### When .summary_by is not set to NULL ---- + +anthro.03 |> +process_wfhz_data( +sex = sex, +weight = weight, +height = height, +.recode_sex = TRUE +) |> +compute_wfhz_prevalence( +.wt = NULL, +.edema = edema, +.summary_by = district +) + +### When a weighted analysis is needed ---- + +anthro.02 |> +compute_wfhz_prevalence( +.wt = "wtfactor", +.edema = edema, +.summary_by = province +) + +} diff --git a/man/probit-method.Rd b/man/probit-method.Rd new file mode 100644 index 0000000..1e13c26 --- /dev/null +++ b/man/probit-method.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prevalence_wfhz.R +\name{apply_probit_approach} +\alias{apply_probit_approach} +\alias{compute_probit_prevalence} +\title{Compute the prevalence of wasting on the basis of the PROBIT method.} +\usage{ +apply_probit_approach(x, .status = c("gam", "sam")) + +compute_probit_prevalence(df, .summary_by = NULL, .for = c("wfhz", "mfaz")) +} +\arguments{ +\item{x}{A numeric vector holding WFHZ or MFAZ values.} + +\item{.status}{A choice on the form of wasting for which the prevalence should +be calculated for.} + +\item{df}{A data frame object with the required variables already wrangled.} + +\item{.summary_by}{A character vector containing data on the geographical areas where +the data was collected. This is to group the survey design object into different +geographical areas in the data and allow for summaries to be computed for each of them. +Default is NULL.} + +\item{.for}{A choice between "wfhz" and "mfaz" for the anthropometric index you wish +to compute PROBIT prevalence on.} +} +\value{ +A data frame with the prevalence. No confidence intervals are +yielded. +} +\description{ +This approach is applied when the standard deviation of WFHZ is problematic. +The PROBIT method estimates the prevalence of wasting indirectly by calculating +the area under the curve the tail of the curve, from negative infinitive to +the given threshold, using the cumulative normal distribution function with +the mean and standard deviation as inputs. +} From 1a1b1e0a5908af6708adb843aff465a7f71f20cd Mon Sep 17 00:00:00 2001 From: tomaszaba Date: Sat, 12 Oct 2024 09:25:28 +0200 Subject: [PATCH 3/9] revise function doc 3 --- R/data.R | 89 ++++++++++++------------- R/prevalence_muac.R | 102 ++++++++++++++--------------- inst/WORDLIST | 16 +---- man/anthro.01.Rd | 11 ++-- man/anthro.02.Rd | 17 ++--- man/anthro.03.Rd | 17 +++-- man/anthro.04.Rd | 25 +++---- man/apply_cdc_age_weighting.Rd | 35 +++++----- man/compute_weighted_prevalence.Rd | 34 ++++------ man/mfaz.01.Rd | 4 +- man/mfaz.02.Rd | 7 +- man/tell_muac_analysis_strategy.Rd | 27 ++++---- man/wfhz.01.Rd | 7 +- 13 files changed, 184 insertions(+), 207 deletions(-) diff --git a/R/data.R b/R/data.R index 56c0647..52fc30f 100644 --- a/R/data.R +++ b/R/data.R @@ -1,12 +1,12 @@ #' -#' Raw data of a district level representative survey +#' A sample data of district level SMART surveys with location anonymised #' #' @description -#' #' `anthro.01` is about a two-stage and PPS cluster sampling survey data -#' conducted in two district following the SMART survey methodology in two -#' livelihood zones. The location information was anonymized for confidentiality. +#' `anthro.01` is a two-stage cluster-based survey with probability of the +#' selection of the clusters proportional to the size of the population. The +#' survey employed the SMART methodology. Data was anonymised for confidentiality. #' -#' @format A tibble with 1191 rows and 11 columns. +#' @format A tibble of 1,191 rows and 11 columns. #' #' | **Variable** | **Description** | #' | :--- | :--- | @@ -25,28 +25,30 @@ #' @examples #' anthro.01 #' +#' "anthro.01" #' -#' -#' Province representative survey conducted in Mozambique +#' A sample of an already wrangled survey data #' #' @description #' `anthro.02` is about a household budget survey conducted in Mozambique in -#' 2019/2020, known as IOF (*Inquérito ao Orçamento Familiar* in portuguese). +#' 2019/2020, known as IOF (*Inquérito ao Orçamento Familiar* in Portuguese). #' The data is publicly available [here](https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-data_access). #' The survey had a module on nutrition with anthropometric measurements taken #' from children age 0-59 months for weight-for-height and 6-59 months for MUAC. -#' *IOF* is a cluster and PPS-based, survey, with sampling done in two stages, -#' designed to give representative estimates at province level. Its data -#' collection spans for a period of 12 months, with anthropometric measurements +#' +#' *IOF* is a two-stage cluster-based survey, representative at +#' province level (admin 2), with probability of the selection of the clusters +#' proportional to the size of the population. Its data collection spans for a +#' period of 12 months, with anthropometric measurements #' taken during that period too. Read the [Bureau of Statistic's website on #' IOF](https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-sampling) for #' more details. #' -#' `anthro.02` has been processed for this package's purpose. +#' `anthro.02` has already been wrangled using this package's utilities. #' -#' @format A tibble with 2267 rows and 14 columns. +#' @format A tibble of 2,267 rows and 14 columns. #' #' |**Variable** | **Description** | #' | :--- | :---| @@ -68,20 +70,22 @@ #' @examples #' anthro.02 #' -#' "anthro.02" #' -#' District level SMART surveys conducted in four district in Mozambique +#' A sample data of district level SMART surveys conducted in Mozambique #' #' @description -#' This example data contains survey data of four districts. Two of them have their WFHZ -#' standard deviation classified as problematic, and the are other two within range of -#' acceptable standard deviation. The data is used to test the performance of WFHZ based -#' prevalence when used on a data set with multiple survey areas that may or not have -#' different classification for standard deviation that may warrant different analysis -#' approach, as the function is designed for. +#' `anthro.03` contains survey data of four districts. Each district's dataset +#' presents distinct data quality scenarios that requires tailored prevalence +#' analysis approach. Two districts show a problematic WFHZ standard deviation +#' whilst the remaining are all within range. +#' +#' This sample data demonstrates the use of prevalence functions on multi-area +#' survey data, where variations in the standard deviation ratings exist. +#' As a result, different analytical approaches are required for each area +#' to ensure accurate estimation. #' #' @format A tibble of 943 x 9. #' @@ -105,24 +109,27 @@ #' -#' MUAC data from a community-based sentinel site from an anonymized location +#' +#' A sample data of a community-based sentinel site from an anonymized location #' #' @description -#' Data in `anthro.04` was generated from a community-based sentinel site of three provinces. -#' Each province data set holds different scenarios that informs the appropriate analysis -#' approach to follow. One province (province 3) has its MFAZ standard deviation and age -#' ratio tests classified as problematic. Another province (province 2) has its age ratio -#' classified as problematic, but with a within range standard deviation. Lastly, province 1 -#' has both tests falling within range of nor problematic. The data is used to test the -#' performance of `[compute_muac_prevalence()]` based when used on a multiple survey areas -#' data that may or not have on the aforementioned test that may then warrant a different -#' analysis approach, as the function is designed for. +#' `anthro.04` was generated from a community-based sentinel site survey +#' conducted across three provinces. Each province's dataset presents distinct +#' data quality scenarios, requiring tailored prevalence analysis. +#' "Province 3" has problematic MFAZ standard deviation and age ratio tests. +#' "Province 2" shows a problematic age ratio but acceptable MFAZ standard +#' deviation. Lastly, "Province 1" has both tests within acceptable ranges. +#' +#' This sample data demonstrates the use of prevalence functions on multi-area +#' survey data, where variations in the standard deviation ratings exist. +#' As a result, different analytical approaches are required for each area +#' to ensure accurate interpretation. #' #' @format A tibble of 3,002 x 8. #' #' |**Variable** | **Description** | #' | :--- | :---| -#' | *province* | +#' | *province* | location where data was collected | #' | *cluster* | Primary sampling unit | #' | *sex* | Sex, "m" = boys, "f" = girls | #' | *age* | calculated age in months with two decimal places | @@ -134,17 +141,12 @@ #' @examples #' anthro.04 #' +#' "anthro.04" #' -#' A SMART survey data with standard deviation on weight-for-height zscores -#' classified as problematic -#' -#' @description -#' A SMART survey data with weight-for-height data where standard deviation is -#' problematic. The data is used to test that `compute_wfhz_prevalence()` works as -#' designed for when standard deviation is problematic. +#' A sample SMART survey data with WFHZ standard deviation rated as problematic #' #' @format A tibble with 303 rows and 6 columns. #' @@ -165,7 +167,7 @@ #' -#' A MUAC screening data from an anonymized setting +#' A sample MUAC screening data from an anonymized setting #' #' @format A tibble with 661 rows and 4 columns. #' @@ -181,13 +183,8 @@ #' "mfaz.01" -#' A SMART survey data with MUAC #' -#' @description -#' A SMART survey data collected in an anonymized location. This data has -#' mfaz standard deviation and age ratio within range for a normal prevalence -#' analysis. It is, thus, used to check if `compute_muac_prevalence()` performs -#' as designed. +#' A sample SMART survey data with MUAC #' #' @format A tibble with 303 rows and 7 columns. #' diff --git a/R/prevalence_muac.R b/R/prevalence_muac.R index bacde30..516dedb 100644 --- a/R/prevalence_muac.R +++ b/R/prevalence_muac.R @@ -1,15 +1,24 @@ #' -#' A helper function to tell how to go about MUAC prevalence analysis based on -#' on the output of age ratio and standard deviation test results +#' A helper function to identify the MUAC prevalence analysis approach on the +#' basis of age ratio and standard deviation test results #' -#' @param age_ratio_class,sd_class Character vectors storing age ratio's p-values -#' and standard deviation's classification, respectively. +#' @description +#' This is a helper function that gives instruction to the main prevalence +#' analysis function on the analysis approach to follow in a given area of +#' analysis on the basis of the quality of the age ratio test and the standard +#' deviation. +#' +#' @param age_ratio_class A character vector returned from the plausibility +#' auditors holding the rating of the age ratio test results. +#' +#' @param sd_class A character vector returned from the plausibility auditors +#' holding the rating of the standard deviation test results. #' -#' @returns A character vector of the same length containing the indication of -#' what to do for the MUAC prevalence analysis: "weighted", "unweighted" and -#' "missing". If "weighted", the CDC weighting approach is applied to correct for -#' age bias. If "unweighted" a normal complex sample analysis is applied, and for -#' the latter, NA are thrown. +#' @returns A character vector of the same length as the input holding analysis +#' approach to be followed in a given area of analysis: "weighted", "unweighted" and +#' "missing". When "weighted", the CDC weighting approach is applied to correct for +#' age bias; "unweighted" a normal complex sample analysis is applied; when +#' "missing" `NA` gets thrown, so no prevalence computed. #' #' tell_muac_analysis_strategy <- function(age_ratio_class, sd_class) { @@ -24,35 +33,31 @@ tell_muac_analysis_strategy <- function(age_ratio_class, sd_class) { #' #' -#' Correct the observed MUAC prevalence when there is an unbalanced sample -#' between children under 2 and over two years old +#' Apply weighting to the MUAC prevalence when sample distribution is unbalanced +#' between children aged 6 to 23 months and those aged 24 to 59 months old #' #' @description -#' As documented in the SMART MUAC tool and in the literature, MUAC shows a known -#' bias towards younger children. In a balanced sample, it is expected to have -#' nearly two thirds of the sample to be of children over two years old. If too -#' few older children are included in the sample, the weighted tool should be used. +#' `apply_cdc_age_weighting()` calculates a weighted proportion by adding the +#' proportion of children under 2 years to twice the proportion of children over 2 +#' and then dividing by 3. #' -#' `apply_cdc_age_weighting()` does that. It takes the proportion of children -#' under 2 and adds to the product of 2 times the proportion of children over two, -#' then divided by 3. The use of this function is informed by the output of -#' [age_ratio_test()]. There is difference between this function and that in the -#' SMART plausibility check. Consider reading the documentation before use. +#' @param muac A numeric vector holding MUAC values (in mm). #' -#' @param muac An integer vector containing MUAC measurements in mm. +#' @param age A numeric vector holding child's age in months. #' -#' @param age A double vector containing age in months with at least 2 decimal -#' places. +#' @param .edema Optional. If given, it should be a character vector of "y" +#' for presence and "n" for absence of bilateral edema. #' -#' @param .edema Optional. If given, it should be a character vector of "y" = Yes, -#' "n" = No bilateral edema. -#' -#' @param status If you wish to get the prevalence/proportions of severe or -#' moderate acute malnutrition. Set `status = "sam" or status = "mam"` for the -#' former or latter, respectively. +#' @param status A choice between "sam" and "mam" for the form of wasting. #' #' @returns A numeric vector of length and size 1. #' +#' @details +#' This function is informed by the output of [age_ratio_test()]. +#' Note that this method differs from the approach used in the SMART plausibility +#' check. Please refer to the documentation for further details. +#' +#' apply_cdc_age_weighting <- function(muac, age, .edema = NULL, status = c("sam", "mam")) { @@ -86,31 +91,22 @@ apply_cdc_age_weighting <- function(muac, age, #' #' -#' A wrapper function to compute of `apply_cdc_age_weighting()` that allows to work on -#' a data frame +#' Apply weighting to the MUAC prevalence when sample distribution is unbalanced +#' between children aged 6 to 23 months and those aged 24 to 59 months old #' -#' @description -#' `compute_weighted_prevalence()` is the main function use to compute age adjusted MUAC -#' prevalence where there are excess of children 6:23 over 24:59 months. It allows the -#' computations to be done on a data frame. The function is used inside the main and -#' exported function to compute MUAC based prevalence. Before computing the prevalence, -#' the function first removed the flagged data so the computations are performed on -#' non-flagged observations. -#' -#' @param df A data frame object returned by [process_muac_data()] this will contain the -#' wrangled vectors that are read inside the function. -#' -#' @param .edema A character vector containing child's status on edema with "n" for no -#' edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -#' codes. -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. This is to group the survey design object into different -#' geographical areas in the data and allow for summaries to be computed for each of them. -#' -#' @returns A tibble with length and size varying according to use of `.summary_by`. -#' If set to NULL, a tibble of 1 x 3 is returned, otherwise the size of the tibble with be -#' corresponding to the number of groups/areas in the vector given to `.summary_by`, but -#' with the same length. +#' @param df A data frame object with the required variables already wrangled. +#' +#' @param .edema A character vector indicating if an observation has bilateral +#' edema or not. The codes are "y" for presence and "n" for absence of bilateral +#' edema. Default is `NULL`. +#' +#' @param .summary_by A character vector containing data of the geographical areas +#' where the data was collected and for which the analysis should be performed at. +#' +#' @returns A tibble with dimensions that vary based on the use of `.summary_by`. +#' If set to `NULL`, a 1 x 3 tibble is returned. Otherwise, the number of rows +#' will match the number of groups or areas provided in `.summary_by`, +#' while the number of columns will remain the same. #' #' compute_weighted_prevalence <- function(df, .edema=NULL, .summary_by = NULL) { diff --git a/inst/WORDLIST b/inst/WORDLIST index aab8cc6..77fc839 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -13,22 +13,18 @@ IPC Inquérito Kianian Lifecycle -MAM MFAZ MFAZ's MUAC -MUAC's Maravia Metuge ORCID Oftentimes Orçamento -PSUs +PSU +PSU's R's -Skweness WFHZ -WFHZ's -WHZ WIP analyzing anthro @@ -36,21 +32,16 @@ ao callout centimeters cflags -cgam -cmam -csam dob dplyr edema etc -gam ipc mam mfaz millimeter millimeters muac -multicountry mwana mwana’s nipnTK @@ -58,11 +49,9 @@ nutriverse obs offs plausibile -portuguese requeriments sam ssite -tho tibble undernutrition wfhz @@ -70,5 +59,4 @@ whz wtfactor zscore zscorer -zscores ’s diff --git a/man/anthro.01.Rd b/man/anthro.01.Rd index d744cac..6611489 100644 --- a/man/anthro.01.Rd +++ b/man/anthro.01.Rd @@ -3,9 +3,9 @@ \docType{data} \name{anthro.01} \alias{anthro.01} -\title{Raw data of a district level representative survey} +\title{A sample data of district level SMART surveys with location anonymised} \format{ -A tibble with 1191 rows and 11 columns.\tabular{ll}{ +A tibble of 1,191 rows and 11 columns.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr \emph{area} \tab Location where the survey took place \cr \emph{dos} \tab Survey date \cr @@ -24,12 +24,13 @@ A tibble with 1191 rows and 11 columns.\tabular{ll}{ anthro.01 } \description{ -#' \code{anthro.01} is about a two-stage and PPS cluster sampling survey data -conducted in two district following the SMART survey methodology in two -livelihood zones. The location information was anonymized for confidentiality. +\code{anthro.01} is a two-stage cluster-based survey with probability of the +selection of the clusters proportional to the size of the population. The +survey employed the SMART methodology. Data was anonymised for confidentiality. } \examples{ anthro.01 + } \keyword{datasets} diff --git a/man/anthro.02.Rd b/man/anthro.02.Rd index 8f64088..45af271 100644 --- a/man/anthro.02.Rd +++ b/man/anthro.02.Rd @@ -3,9 +3,9 @@ \docType{data} \name{anthro.02} \alias{anthro.02} -\title{Province representative survey conducted in Mozambique} +\title{A sample of an already wrangled survey data} \format{ -A tibble with 2267 rows and 14 columns.\tabular{ll}{ +A tibble of 2,267 rows and 14 columns.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr \emph{province} \tab The administrative unit (admin 1) where data was collected. \cr \emph{strata} \tab Rural and Urban \cr @@ -28,21 +28,22 @@ anthro.02 } \description{ \code{anthro.02} is about a household budget survey conducted in Mozambique in -2019/2020, known as IOF (\emph{Inquérito ao Orçamento Familiar} in portuguese). +2019/2020, known as IOF (\emph{Inquérito ao Orçamento Familiar} in Portuguese). The data is publicly available \href{https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-data_access}{here}. The survey had a module on nutrition with anthropometric measurements taken from children age 0-59 months for weight-for-height and 6-59 months for MUAC. -\emph{IOF} is a cluster and PPS-based, survey, with sampling done in two stages, -designed to give representative estimates at province level. Its data -collection spans for a period of 12 months, with anthropometric measurements + +\emph{IOF} is a two-stage cluster-based survey, representative at +province level (admin 2), with probability of the selection of the clusters +proportional to the size of the population. Its data collection spans for a +period of 12 months, with anthropometric measurements taken during that period too. Read the \href{https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-sampling}{Bureau of Statistic's website on IOF} for more details. -\code{anthro.02} has been processed for this package's purpose. +\code{anthro.02} has already been wrangled using this package's utilities. } \examples{ anthro.02 - } \keyword{datasets} diff --git a/man/anthro.03.Rd b/man/anthro.03.Rd index dc5ae52..68b7a38 100644 --- a/man/anthro.03.Rd +++ b/man/anthro.03.Rd @@ -3,7 +3,7 @@ \docType{data} \name{anthro.03} \alias{anthro.03} -\title{District level SMART surveys conducted in four district in Mozambique} +\title{A sample data of district level SMART surveys conducted in Mozambique} \format{ A tibble of 943 x 9.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr @@ -22,12 +22,15 @@ A tibble of 943 x 9.\tabular{ll}{ anthro.03 } \description{ -This example data contains survey data of four districts. Two of them have their WFHZ -standard deviation classified as problematic, and the are other two within range of -acceptable standard deviation. The data is used to test the performance of WFHZ based -prevalence when used on a data set with multiple survey areas that may or not have -different classification for standard deviation that may warrant different analysis -approach, as the function is designed for. +\code{anthro.03} contains survey data of four districts. Each district's dataset +presents distinct data quality scenarios that requires tailored prevalence +analysis approach. Two districts show a problematic WFHZ standard deviation +whilst the remaining are all within range. + +This sample data demonstrates the use of prevalence functions on multi-area +survey data, where variations in the standard deviation ratings exist. +As a result, different analytical approaches are required for each area +to ensure accurate estimation. } \examples{ anthro.03 diff --git a/man/anthro.04.Rd b/man/anthro.04.Rd index db29727..1414a0e 100644 --- a/man/anthro.04.Rd +++ b/man/anthro.04.Rd @@ -3,11 +3,11 @@ \docType{data} \name{anthro.04} \alias{anthro.04} -\title{MUAC data from a community-based sentinel site from an anonymized location} +\title{A sample data of a community-based sentinel site from an anonymized location} \format{ A tibble of 3,002 x 8.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr - \emph{province} \tab \cr + \emph{province} \tab location where data was collected \cr \emph{cluster} \tab Primary sampling unit \cr \emph{sex} \tab Sex, "m" = boys, "f" = girls \cr \emph{age} \tab calculated age in months with two decimal places \cr @@ -21,18 +21,21 @@ A tibble of 3,002 x 8.\tabular{ll}{ anthro.04 } \description{ -Data in \code{anthro.04} was generated from a community-based sentinel site of three provinces. -Each province data set holds different scenarios that informs the appropriate analysis -approach to follow. One province (province 3) has its MFAZ standard deviation and age -ratio tests classified as problematic. Another province (province 2) has its age ratio -classified as problematic, but with a within range standard deviation. Lastly, province 1 -has both tests falling within range of nor problematic. The data is used to test the -performance of \verb{[compute_muac_prevalence()]} based when used on a multiple survey areas -data that may or not have on the aforementioned test that may then warrant a different -analysis approach, as the function is designed for. +\code{anthro.04} was generated from a community-based sentinel site survey +conducted across three provinces. Each province's dataset presents distinct +data quality scenarios, requiring tailored prevalence analysis. +"Province 3" has problematic MFAZ standard deviation and age ratio tests. +"Province 2" shows a problematic age ratio but acceptable MFAZ standard +deviation. Lastly, "Province 1" has both tests within acceptable ranges. + +This sample data demonstrates the use of prevalence functions on multi-area +survey data, where variations in the standard deviation ratings exist. +As a result, different analytical approaches are required for each area +to ensure accurate interpretation. } \examples{ anthro.04 + } \keyword{datasets} diff --git a/man/apply_cdc_age_weighting.Rd b/man/apply_cdc_age_weighting.Rd index 9ceddd0..1692652 100644 --- a/man/apply_cdc_age_weighting.Rd +++ b/man/apply_cdc_age_weighting.Rd @@ -2,36 +2,31 @@ % Please edit documentation in R/prevalence_muac.R \name{apply_cdc_age_weighting} \alias{apply_cdc_age_weighting} -\title{Correct the observed MUAC prevalence when there is an unbalanced sample -between children under 2 and over two years old} +\title{Apply weighting to the MUAC prevalence when sample distribution is unbalanced +between children aged 6 to 23 months and those aged 24 to 59 months old} \usage{ apply_cdc_age_weighting(muac, age, .edema = NULL, status = c("sam", "mam")) } \arguments{ -\item{muac}{An integer vector containing MUAC measurements in mm.} +\item{muac}{A numeric vector holding MUAC values (in mm).} -\item{age}{A double vector containing age in months with at least 2 decimal -places.} +\item{age}{A numeric vector holding child's age in months.} -\item{.edema}{Optional. If given, it should be a character vector of "y" = Yes, -"n" = No bilateral edema.} +\item{.edema}{Optional. If given, it should be a character vector of "y" +for presence and "n" for absence of bilateral edema.} -\item{status}{If you wish to get the prevalence/proportions of severe or -moderate acute malnutrition. Set \verb{status = "sam" or status = "mam"} for the -former or latter, respectively.} +\item{status}{A choice between "sam" and "mam" for the form of wasting.} } \value{ A numeric vector of length and size 1. } \description{ -As documented in the SMART MUAC tool and in the literature, MUAC shows a known -bias towards younger children. In a balanced sample, it is expected to have -nearly two thirds of the sample to be of children over two years old. If too -few older children are included in the sample, the weighted tool should be used. - -\code{apply_cdc_age_weighting()} does that. It takes the proportion of children -under 2 and adds to the product of 2 times the proportion of children over two, -then divided by 3. The use of this function is informed by the output of -\code{\link[=age_ratio_test]{age_ratio_test()}}. There is difference between this function and that in the -SMART plausibility check. Consider reading the documentation before use. +\code{apply_cdc_age_weighting()} calculates a weighted proportion by adding the +proportion of children under 2 years to twice the proportion of children over 2 +and then dividing by 3. +} +\details{ +This function is informed by the output of \code{\link[=age_ratio_test]{age_ratio_test()}}. +Note that this method differs from the approach used in the SMART plausibility +check. Please refer to the documentation for further details. } diff --git a/man/compute_weighted_prevalence.Rd b/man/compute_weighted_prevalence.Rd index 609ef7f..65109df 100644 --- a/man/compute_weighted_prevalence.Rd +++ b/man/compute_weighted_prevalence.Rd @@ -2,34 +2,28 @@ % Please edit documentation in R/prevalence_muac.R \name{compute_weighted_prevalence} \alias{compute_weighted_prevalence} -\title{A wrapper function to compute of \code{apply_cdc_age_weighting()} that allows to work on -a data frame} +\title{Apply weighting to the MUAC prevalence when sample distribution is unbalanced +between children aged 6 to 23 months and those aged 24 to 59 months old} \usage{ compute_weighted_prevalence(df, .edema = NULL, .summary_by = NULL) } \arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}} this will contain the -wrangled vectors that are read inside the function.} +\item{df}{A data frame object with the required variables already wrangled.} -\item{.edema}{A character vector containing child's status on edema with "n" for no -edema, "y" = yes edema. Should you data be coded differently, re-code it to aforementioned -codes.} +\item{.edema}{A character vector indicating if an observation has bilateral +edema or not. The codes are "y" for presence and "n" for absence of bilateral +edema. Default is \code{NULL}.} -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. This is to group the survey design object into different -geographical areas in the data and allow for summaries to be computed for each of them.} +\item{.summary_by}{A character vector containing data of the geographical areas +where the data was collected and for which the analysis should be performed at.} } \value{ -A tibble with length and size varying according to use of \code{.summary_by}. -If set to NULL, a tibble of 1 x 3 is returned, otherwise the size of the tibble with be -corresponding to the number of groups/areas in the vector given to \code{.summary_by}, but -with the same length. +A tibble with dimensions that vary based on the use of \code{.summary_by}. +If set to \code{NULL}, a 1 x 3 tibble is returned. Otherwise, the number of rows +will match the number of groups or areas provided in \code{.summary_by}, +while the number of columns will remain the same. } \description{ -\code{compute_weighted_prevalence()} is the main function use to compute age adjusted MUAC -prevalence where there are excess of children 6:23 over 24:59 months. It allows the -computations to be done on a data frame. The function is used inside the main and -exported function to compute MUAC based prevalence. Before computing the prevalence, -the function first removed the flagged data so the computations are performed on -non-flagged observations. +Apply weighting to the MUAC prevalence when sample distribution is unbalanced +between children aged 6 to 23 months and those aged 24 to 59 months old } diff --git a/man/mfaz.01.Rd b/man/mfaz.01.Rd index a0cd322..855bdc4 100644 --- a/man/mfaz.01.Rd +++ b/man/mfaz.01.Rd @@ -3,7 +3,7 @@ \docType{data} \name{mfaz.01} \alias{mfaz.01} -\title{A MUAC screening data from an anonymized setting} +\title{A sample MUAC screening data from an anonymized setting} \format{ A tibble with 661 rows and 4 columns.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr @@ -17,7 +17,7 @@ A tibble with 661 rows and 4 columns.\tabular{ll}{ mfaz.01 } \description{ -A MUAC screening data from an anonymized setting +A sample MUAC screening data from an anonymized setting } \examples{ mfaz.01 diff --git a/man/mfaz.02.Rd b/man/mfaz.02.Rd index d83da35..529df23 100644 --- a/man/mfaz.02.Rd +++ b/man/mfaz.02.Rd @@ -3,7 +3,7 @@ \docType{data} \name{mfaz.02} \alias{mfaz.02} -\title{A SMART survey data with MUAC} +\title{A sample SMART survey data with MUAC} \format{ A tibble with 303 rows and 7 columns.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr @@ -19,10 +19,7 @@ A tibble with 303 rows and 7 columns.\tabular{ll}{ mfaz.02 } \description{ -A SMART survey data collected in an anonymized location. This data has -mfaz standard deviation and age ratio within range for a normal prevalence -analysis. It is, thus, used to check if \code{compute_muac_prevalence()} performs -as designed. +A sample SMART survey data with MUAC } \examples{ mfaz.02 diff --git a/man/tell_muac_analysis_strategy.Rd b/man/tell_muac_analysis_strategy.Rd index 38b1dba..2a4e5f2 100644 --- a/man/tell_muac_analysis_strategy.Rd +++ b/man/tell_muac_analysis_strategy.Rd @@ -2,23 +2,28 @@ % Please edit documentation in R/prevalence_muac.R \name{tell_muac_analysis_strategy} \alias{tell_muac_analysis_strategy} -\title{A helper function to tell how to go about MUAC prevalence analysis based on -on the output of age ratio and standard deviation test results} +\title{A helper function to identify the MUAC prevalence analysis approach on the +basis of age ratio and standard deviation test results} \usage{ tell_muac_analysis_strategy(age_ratio_class, sd_class) } \arguments{ -\item{age_ratio_class, sd_class}{Character vectors storing age ratio's p-values -and standard deviation's classification, respectively.} +\item{age_ratio_class}{A character vector returned from the plausibility +auditors holding the rating of the age ratio test results.} + +\item{sd_class}{A character vector returned from the plausibility auditors +holding the rating of the standard deviation test results.} } \value{ -A character vector of the same length containing the indication of -what to do for the MUAC prevalence analysis: "weighted", "unweighted" and -"missing". If "weighted", the CDC weighting approach is applied to correct for -age bias. If "unweighted" a normal complex sample analysis is applied, and for -the latter, NA are thrown. +A character vector of the same length as the input holding analysis +approach to be followed in a given area of analysis: "weighted", "unweighted" and +"missing". When "weighted", the CDC weighting approach is applied to correct for +age bias; "unweighted" a normal complex sample analysis is applied; when +"missing" \code{NA} gets thrown, so no prevalence computed. } \description{ -A helper function to tell how to go about MUAC prevalence analysis based on -on the output of age ratio and standard deviation test results +This is a helper function that gives instruction to the main prevalence +analysis function on the analysis approach to follow in a given area of +analysis on the basis of the quality of the age ratio test and the standard +deviation. } diff --git a/man/wfhz.01.Rd b/man/wfhz.01.Rd index 061bda9..9cf5cc5 100644 --- a/man/wfhz.01.Rd +++ b/man/wfhz.01.Rd @@ -3,8 +3,7 @@ \docType{data} \name{wfhz.01} \alias{wfhz.01} -\title{A SMART survey data with standard deviation on weight-for-height zscores -classified as problematic} +\title{A sample SMART survey data with WFHZ standard deviation rated as problematic} \format{ A tibble with 303 rows and 6 columns.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr @@ -20,9 +19,7 @@ A tibble with 303 rows and 6 columns.\tabular{ll}{ wfhz.01 } \description{ -A SMART survey data with weight-for-height data where standard deviation is -problematic. The data is used to test that \code{compute_wfhz_prevalence()} works as -designed for when standard deviation is problematic. +A sample SMART survey data with WFHZ standard deviation rated as problematic } \examples{ wfhz.01 From a1ce73bd4a8b0a8b9bb64e674468375f121e585d Mon Sep 17 00:00:00 2001 From: tomaszaba Date: Sat, 12 Oct 2024 16:31:23 +0200 Subject: [PATCH 4/9] revise function doc 4 --- .Rbuildignore | 1 + R/age.R | 2 +- R/case_definitions.R | 7 +++-- R/data.R | 57 ++++++++++++++++++++++------------------ R/prevalence_combined.R | 15 +++++------ R/prevalence_muac.R | 4 +-- R/prevalence_wfhz.R | 2 +- R/sysdata.rda | Bin 0 -> 10415 bytes data-raw/DATASET.R | 2 ++ 9 files changed, 49 insertions(+), 41 deletions(-) create mode 100644 R/sysdata.rda create mode 100644 data-raw/DATASET.R diff --git a/.Rbuildignore b/.Rbuildignore index 0f0c78d..ca1efbd 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,4 @@ ^pkgdown$ ^doc$ ^Meta$ +^data-raw$ diff --git a/R/age.R b/R/age.R index b1c72ef..0028653 100644 --- a/R/age.R +++ b/R/age.R @@ -38,7 +38,7 @@ compute_age_in_months <- function (surv_date, birth_date) { } #' -#' Process age +#' Wrangle age #' #' @description #' `process_age()` helps you to get the variable age in the format needed for diff --git a/R/case_definitions.R b/R/case_definitions.R index 988ef09..4c63e79 100644 --- a/R/case_definitions.R +++ b/R/case_definitions.R @@ -1,6 +1,5 @@ #' -#' Define if an observation is wasted on the basis of the criteria -#' of WFHZ, absolute MUAC values and combined case-definition +#' Wasting case-definition based on WFHZ, MFAZ, MUAC and Combined criteria #' #' @param df A data frame containing the required variables. #' @@ -223,8 +222,8 @@ define_wasting <- function(df, zscore = NULL, muac = NULL, edema = NULL, } #' -#' Classify wasting into severe or moderate wasting for use in SMART MUAC tool -#' weighting approach +#' Classify wasting into severe or moderate wasting to be used in the +#' SMART MUAC tool weighting approach #' #' @param muac A numeric vector holding absolute MUAC values (in mm). #' diff --git a/R/data.R b/R/data.R index 52fc30f..bcb1be3 100644 --- a/R/data.R +++ b/R/data.R @@ -2,9 +2,9 @@ #' A sample data of district level SMART surveys with location anonymised #' #' @description -#' `anthro.01` is a two-stage cluster-based survey with probability of the -#' selection of the clusters proportional to the size of the population. The -#' survey employed the SMART methodology. Data was anonymised for confidentiality. +#' `anthro.01` is a two-stage cluster-based survey with probability of selection +#' of clusters proportional to the size of the population. The survey employed +#' the SMART methodology. #' #' @format A tibble of 1,191 rows and 11 columns. #' @@ -22,6 +22,8 @@ #' | *edema* | Edema, "n" = no, "y" = yes | #' | *muac* | Mid-upper arm circumference (mm) | #' +#' @source Anonymous +#' #' @examples #' anthro.01 #' @@ -33,20 +35,10 @@ #' #' @description #' `anthro.02` is about a household budget survey conducted in Mozambique in -#' 2019/2020, known as IOF (*Inquérito ao Orçamento Familiar* in Portuguese). -#' The data is publicly available [here](https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-data_access). -#' The survey had a module on nutrition with anthropometric measurements taken -#' from children age 0-59 months for weight-for-height and 6-59 months for MUAC. -#' -#' *IOF* is a two-stage cluster-based survey, representative at -#' province level (admin 2), with probability of the selection of the clusters -#' proportional to the size of the population. Its data collection spans for a -#' period of 12 months, with anthropometric measurements -#' taken during that period too. Read the [Bureau of Statistic's website on -#' IOF](https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-sampling) for -#' more details. -#' -#' `anthro.02` has already been wrangled using this package's utilities. +#' 2019/2020, known as IOF (*Inquérito ao Orçamento Familiar* in Portuguese).*IOF* +#' is a two-stage cluster-based survey, representative at province level (admin 2), +#' with probability of the selection of the clusters proportional to the size of +#' the population. Its data collection spans for a period of 12 months. #' #' @format A tibble of 2,267 rows and 14 columns. #' @@ -67,6 +59,11 @@ #' | *mfaz* | MUAC-for-age z-scores with 3 decimal places | #' | *flag_mfaz* | Flagged observations. 1=flagged, 0=not flagged | #' +#' @source Mozambique National Institute of Statistics. The data is publicly +#' available at . +#' Data was wrangled using this package's wranglers. Details about survey design +#' can be gotten from: +#' #' @examples #' anthro.02 #' @@ -79,12 +76,12 @@ #' @description #' `anthro.03` contains survey data of four districts. Each district's dataset #' presents distinct data quality scenarios that requires tailored prevalence -#' analysis approach. Two districts show a problematic WFHZ standard deviation +#' analysis approach: two districts show a problematic WFHZ standard deviation #' whilst the remaining are all within range. #' #' This sample data demonstrates the use of prevalence functions on multi-area -#' survey data, where variations in the standard deviation ratings exist. -#' As a result, different analytical approaches are required for each area +#' survey data, where there is variations in the standard deviation rating. +#' As a result, different analyses approaches are required for each area #' to ensure accurate estimation. #' #' @format A tibble of 943 x 9. @@ -101,6 +98,8 @@ #' | *edema* | Edema, "n" = no, "y" = yes | #' | *muac* | Mid-upper arm circumference (mm) | #' +#' @source Anonymous +#' #' @examples #' anthro.03 #' @@ -113,12 +112,12 @@ #' A sample data of a community-based sentinel site from an anonymized location #' #' @description -#' `anthro.04` was generated from a community-based sentinel site survey -#' conducted across three provinces. Each province's dataset presents distinct -#' data quality scenarios, requiring tailored prevalence analysis. -#' "Province 3" has problematic MFAZ standard deviation and age ratio tests. +#' `anthro.04` was generated from a community-based sentinel site conducted +#' across three provinces. Each province's dataset presents distinct +#' data quality scenarios, requiring tailored prevalence analysis: +#' "Province 3" has problematic MFAZ standard deviation and age ratio tests; #' "Province 2" shows a problematic age ratio but acceptable MFAZ standard -#' deviation. Lastly, "Province 1" has both tests within acceptable ranges. +#' deviation; lastly, "Province 1" has both tests within acceptable ranges. #' #' This sample data demonstrates the use of prevalence functions on multi-area #' survey data, where variations in the standard deviation ratings exist. @@ -138,6 +137,8 @@ #' | *mfaz* | MUAC-for-age z-scores with 3 decimal places | #' | *flag_mfaz* | Flagged observations. 1=flagged, 0=not flagged | #' +#' @source Anonymous +#' #' @examples #' anthro.04 #' @@ -159,6 +160,8 @@ #' | *wfhz* | MUAC-for-age z-scores with 3 decimal places | #' | *flag_wfhz* | Flagged observations. 1=flagged, 0=not flagged | #' +#' @source Anonymous +#' #' @examples #' wfhz.01 #' @@ -178,6 +181,8 @@ #' | *edema* | Edema, "n" = no, "y" = yes | #' | *muac* | Mid-upper arm circumference (mm) | #' +#' @source Anonymous +#' #' @examples #' mfaz.01 #' @@ -197,6 +202,8 @@ #' | *mfaz* | MUAC-for-age z-scores with 3 decimal places | #' | *flag_mfaz* | Flagged observations. 1=flagged, 0=not flagged | #' +#' @source Anonymous +#' #' @examples #' mfaz.02 #' diff --git a/R/prevalence_combined.R b/R/prevalence_combined.R index 9a1efe0..a2b4009 100644 --- a/R/prevalence_combined.R +++ b/R/prevalence_combined.R @@ -1,5 +1,5 @@ #' -#' Compute combined prevalence of acute malnutrition +#' Compute combined prevalence of wasting #' #' @rdname combined_prevalence #' @@ -66,14 +66,13 @@ compute_pps_based_combined_prevalence <- function(df, #' Compute prevalence of wasting on the basis of the combined case-definition #' #' @description -#' `compute_combined_prevalence()` is a handy function for calculating the -#' combined prevalence of wasting also in with the complex sample design -#' properties inherent to surveys. +#' `compute_combined_prevalence()` is a handy function for calculating the prevalence +#' of combined wasting in accordance with the complex sample design properties +#' inherent to surveys. #' #' @param df A data frame object returned by [process_muac_data()] and [process_wfhz_data()]. -#' Both wranglers need to be used to prepare data to be used -#' `compute_combined_prevalence()`. The order of which comes first does not matter, -#' however, since the muac data processor transforms MUAC values into centimeters, those +#' Both wranglers need to be used sequentially. The order of use does not matter, +#' however, since muac wrangler transforms MUAC values into centimeters, those #' need to be put back into millimeter. This can be achieved my using [recode_muac()] inside #' [dplyr::mutate()] or [base::transform()]. #' @@ -91,7 +90,7 @@ compute_pps_based_combined_prevalence <- function(df, #' @returns A table with the descriptive statistics about wasting. #' #' @details -#' The concept of "combined flags" is introduced in this function. It consists of +#' A concept of "combined flags" is introduced in this function. It consists of #' taking the `flag_wfhz` and `flag_mfaz` vectors, generated from the MUAC and #' WFHZ wranglers, and checking if any value in either vector is flagged. If flagged, #' the value is marked as a flag in the "cflags" vector; otherwise, it is not flagged diff --git a/R/prevalence_muac.R b/R/prevalence_muac.R index 516dedb..0a4591d 100644 --- a/R/prevalence_muac.R +++ b/R/prevalence_muac.R @@ -3,8 +3,8 @@ #' basis of age ratio and standard deviation test results #' #' @description -#' This is a helper function that gives instruction to the main prevalence -#' analysis function on the analysis approach to follow in a given area of +#' This is a helper function that gives instruction, to the main MUAC prevalence +#' analysis function, on the analysis approach to follow in a given area of #' analysis on the basis of the quality of the age ratio test and the standard #' deviation. #' diff --git a/R/prevalence_wfhz.R b/R/prevalence_wfhz.R index 0fb41fb..e72de0c 100644 --- a/R/prevalence_wfhz.R +++ b/R/prevalence_wfhz.R @@ -1,5 +1,5 @@ #' -#' Compute the prevalence of wasting on the basis of WFHZ or MFAZ or MUAC +#' Compute the prevalence of wasting on the basis of WFHZ, MFAZ and MUAC #' #' @description #' The prevalence is calculated in accordance with the complex sample design diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 0000000000000000000000000000000000000000..d77481812f5a1e40b4ec15f2c4ad768dc3822f5c GIT binary patch literal 10415 zcmaiubx;(J1MH9VjuMXU<2a6#?mqJ92I+24kd%^cknT7-1e9*18$23R1SF)Szew|b zGjHbo`}U8W*`3eqtelCH7$3qIu4ha~e$We0d-44L)h2skKR^Um^yX9`JTj9i?RrUb z&%0C?^F|WGzn|y^0C?Gf5sK0D_Y16-aNJeBUg%UJVXaiLVoe^Ez6qdH-;zqITYmpD z<9IFScpn5)fNMaD${%tyqUAL(YSNuJLMol5P!s8LZk3CZMFafKiIuD=bG2>@2tFNL8L%^d-WfA~ODGvih<8%=! zDEL3#3H^U@EJS4(3P1w@$Ymhqs;a-~Ta7P1!YkoP)fdjF|7em9oduk==9I<>NhhSR zWNC0KK}F?KN()O;KrE8|JZmh_T^=@HDFGaNu2l+qfR7JLUR{=2{{d80Y6T}A7miB@ zRW7;|ht;IR;p&MwxanXA$b5iZt{}5g=p5?me~+BT7vwc?Wt|8KG#JVj1^IAY1PN*Q z?XVC~1tw6&@VJ!**+aUJl-`hPNFMvKQq2t}jkDH7tv_yJ&0W62qw4!i$^J4a4QjoM zT#cqYx5z8NT&@2wD@sT>c0y7K><^!#Vn4`p($c@sf?X83&J*{NF>=tgKu*5or7!K< zuFVL|&D8lCR;z=4q;|N<<*d1?!}3#x>2)1a2bF9%pcV?AT(I%^cj_vsbWk8NvFEco zl!-u%uOdx{fa*8b7Ear+)s#JT1ef9ZtF{;9Wkj;xjU@baVZD%{SVj4%D9E{dJb}A> zSdNkni-7zW^^Tg>Z1N_3Vhw?hO6H@g^^uXE=(+Y%#nk1}nE30)5)@y_^K)f08+(XG z64$XSnX>;tOqz0Su^QZNQ|d$ z1CH?hBpgH!0knf(`>d;Feec@5zQAALy|?ldcrvY9ODVZ%)pVlRIYx}A>GaQ59cg|=fohIK#$KbC}u zj$`=EPI4lI&XzuSDBd^Q%X5wM%qw@6q9SmQthDLaHtk(j^}e$w<;|@$ZHib-_bU4>#W3Fci8$5_ zp_L+JVxg-a~$fI0pMhDnJMEaR9tF`ZF4IVi{EZ)tli_a$Iy_cD!O7HTrd5?4uql zU}PE44nr#bPE3~ZM0W0QG0!j-q&Y5hUl#GY`4tu~-)re#xjzUINd}g!iKDnyW&wrk zvHpB3s1I~0;!Y=k(2l8GOwq03aa<~6W*9z^)=O}caU4rQ)l0xc?4_)+4h9|76gHZ- ze~_z;qRAIf6sNzW6nxeBaQ(Q5{ZoQ^b=sT^fM`<3N@KBz$ZJF>pu+;z z=kj}BZJ}s^WC%^Iw$0e;$9)dwKU)^74p20pprs}ah^~jM6#Exe?iFd}o_wQTp$$3l zeWb5HD)Kz44(@jG%kc~a)n1dqVx^`H$! zu>V~Ov~?bD5*UVw2|ZMSmXzm;F{^HYBFksNWEspUL~_P)k-#l#4r&D`)X7!oO>=Xe z`ig;p3lR4Q3sl#A66j4cfp~jiS~Ea zq5cPcaP1@-PE{Ohy;I%dE`FEEB^b0x~Hy24gM}Ifl6Qqap1FdNVxDF zUut*k*I@h<$j-D(h|?s#bs{_IS4l>o9A7m7pU;xteiGsDmy8+L;o2&?X_)=6t6fn8 zs*?~Ck4JLy6d^O>(W_0nzHbdec~kEv1HgGHsU};iBrKSAG=5ma1OW=2;*XP(3CmbV zeWorUKO8r9`UY16KMsNzwQ7)H{37O5CKZAlhC1KgsgG82=F}UWJk8AUqaQRI>I^*J zrwN>idn?2fsYC$|P)sUH=aF09Uu6=GM;Zp<5BEHJ$Ia^}?Oi^mg@FyVOrr(<9e7)V zH|J~dC@;Rcv38|c4=F+Spn`waevF18N^-+Ub08!~S0xQhpsoyM(h|IXmTR-V6;9zkI;KjUDuDF6s+Vk9teS1Ww z+FSz`;RIB%V5jj2Zv3w#g;Rd$%l1hQY>q*kzb|#;a z^1ulH9V-=foAwhulCN7=WU-3sG&1mJ6Th8FfPGVce+Mo;N$4(!Hr11+K5*sA z>-V-!e7z~QoE#Sx6(KzQ-{XG;eA}C12T5JyRkWk3i|}dSGNGP<^wUE3nQWSZ-84zs zS=Wg+=J->4`J2_{O4YS{x`fE7$kvL$c&{3%+(hZm_uJ&Yg?7FgOcc2N|5k~a&NLvM zQW1a+CFDgK*qS#b+iRG*gK`=}VTjVwfqO&rSAhLAABp4SL0 zG23KT!Zr?XvpV`l+^R0G&b`O!w!dF>Avs!Fe&*7oV5H!*w;p(buM9qsl00`mq4|D= zGelJM_(-<}c9TTdJ{$jYc-~c=@{4~~q2Z@|@Nm!Na>}>X7Vgaq)0R`>fv!pthFegx&F!t`0$$y-ty2FnoH!$^iAiL(asb2 zelAJ!Y=!79E~GjrU}xH$>S%yyya&qQa@-RjQ$-(6E~Fy!etFc89!h_GIgFZ zj1he`KJ_^goNi`J%+lAu(zG0I<%s2@=U&H~NB3tf^z_ET3V);ce?mj|pVRt2hWgO| zmTCgOEKKJ8I~eIw6r+E&#%VX3^Qs=-%dds$>$?;C70d9g4#NA`KaxCZ3!y)Lp*sG% zlgY3TVv4r850YE*o(>*35&;cryD!xVeG;QkUyMW)E_7q9d8Ws8QC zmRd*CMCU*uD0p0KPJ9{_q&8eN#KM-3jFebMfkhpA*Z@17SbaiUe}NyDlwj)D7e_(q zpcd0!#mQ{^{nsx+pSZAf#kXb#jN1ACBwBFm+_#H`ik?b#>po~`DhyUrshfB2#;~m# zZ~j)sO8yvCE#W3{gk7I><53rR5sOZs#=EhW9k!dZcGMSitz!_>86YW!MMi=lK`;8k z5>ot~?eMfnP4y#UHxxnKtPAOR6T_Z*_OK>fSV~fT#Yi0&lS`6(#TPf@6p5BvIqZXY z7r{0>@grs!L`KY7!oJp*EF4nVi%T$X4bYb4F1;uCFY_Jiz8^s?1KbY36>!D{ajM$D zAnTU^Fh|R%hogwLBM0`@1^kN9h<$v#kO}naxWO(EdatFU%OePkUrtgBM_@ zeH&vubK2YEVqqAp@s!MnUv2O5Jl}HGV@swQ}UThL!ExFr`4ONvcJW zOGvyn&V*f3loK5RG=X3qyd8&Qc3mEsOl9TCR3yiZQ$XXmL(75RP-Z|p4Wm$WP4M>4gxsbYeP4kUav~2gYiik5E5k#}OKNe$ zGf?FvB^54_sO@I#*m4t^S{f6Mc#dip7m@M~q&69j0dQECgoRv#Q*It@fUQFalGn#T z&g(8=BS;N2APQ0B@~RoyWEEslWF?9qgrZ;t3&8|0dO;rt1kOm(=3_$YF39uFXJG4- zBjiW-xYIgcanB0;70DNNQ-*ZX*~#uGcDtL`*wz1dtC8=-l`W-Zv2U-h_nvZHe8;F_ z+kBV%{Tf+=@2}>we7G`ER(1qwqy%!}OmL~O;Of;}g4bX=^B4^@$s$`#4PKuM|2$I* za$oW8tNsMrFXeRYbB-rkBM8utbmyX1@3XGuWN#V*y3eFRW!##o%doUC1X&Mexa+q^sW7cz`N2%pJabh zf5WN*;W?KD!BUd4i#I?1D0!|sOmlpPj=xcEb;;~Sx|ME5+j!X64R)CX)BtUq7Qcdx z|KQ6_QWvHNoMU$Vm$s*@X;@K@zr=g75LEDbiFi~)*RtTF+l67mH#~{hPt?CTA=+@# z)%>@l-_=LQ-L>)=61sTQ^k6QvjV}5pi!7uQ1f%|W`Zpyp^(?en4(O;Yxw>5{)sHg< zvfqs6c?x}oN8;j5ogQ*s+qm<5Uh6mOqFcnQUm|RYm*#}iC#HKgNSyXom6bqqxr%KX z`&Xpj$7SuRT;B^QMrXN=+O4qrr~cg8VpmXVRVE-Hyz>9z@qzsQGJT1Qi>i zl17D?fU?{+;3cm=pM#R)5ndBmO0PTQ^Wp6-mQ_$+$WR+GZu#|lixnmmKPxR-t7gYX zxzqd6G3&YY%a*kByevDBD}zOcg_S9c00-y3 zF!`+uF2mt2h#X^b)+KLNt?jvZ`p}_Y)u|817Hh&6>*2GP1oUrB72J|Jz$0stptr=k)5PTj|YKoH+1%cf=8qDkBeU9wbYx$ z39ZfI_Y!|KRmOYb6va3;P@37ebQ~}IzHZKXf*mQVq)Wl={;g6b%y+4gqLZ1Y+rjPYw*y!Zf6!K%Cz<1F* zig_dv-I_J}9%+LdS@<#` zOilh*C1rFct)=35w7O_)$^>15St_78+mp92bNGPnfQB+4V)tYWQ+aOI1NM1ElO>q5 z(UOaIZ+{z80pH*hVd&z2#NzAcwQwm;|;`1lXf7|U(Vd&xGMXh!9W^Lir%0Jun zv#SLx-NimGGlm_NfRVm1sM|0L8)Xk@2iNLG`NNRJV!1>+hk`7&1H6SYMrh zJh*7*259pB$!&CeIA(BOmd<#|^+Y6W-Iz+GB+iVEqN36mYOqJsnAK|Ec0>m>)g3ca z?I!IcwJ#`Hgpw?h$Kt+o4ONDF#0c|e^qqylA{q@Edg_>~89%Cx49!b}ZW~0_Nhg!T zko;WRx#1j1*f1!MCgJoR*?dpT18d8)tJ&lcA(VgBRB%why5}(DbL|s%;xpDofY~Mq z3sd~1-G&3jz!uuamoYDVTz_HSxnP=Lakj~YPwbgy!Mar*CWqs$pU4VCl)s~kXc7HN zOTF{(Hrc;(mI%IXJ3T+JN7fkzX`B1n?UwD6UC!draXRdMiHDJ{&mCq=x)I%q>iYWe z>de^KxjL~`2oS$q2W$`KtAQg4&OLdwz6}#2)hY-Y;w`etJ!2mWOE&Rz=x{frrqf-2 zOgEb_l*q1kKaNPzk=ncOOgy>zPqfA=MSLE$Ng}9J4Fg^siY?~u)l?HQ4PNe4vixYp z=i<})i$SEfVJV}&$IQVjlqh(PTXOb(1W!4kVW-@Rg)#vt;uKMC<(}K>q1o2t{ALZ< zd0u!Ds6OyE&|NM zLRd;pAH~5r)YWHLA8-C{W2TTmR?>gfJiT;#J2B=(*ZFOlSS&N@$#VL!PZziKOP9`q zH61@Js-g?|7-+47Rlt!@t?Bt4Lx>Q&(9+c^HW zt?z2gCZ!tbjRvOV-rL-U(V`i{B@h z8hv?Q(`a|}VD>xBuRLzML-4F9}p(^Mhtd<%n&JJKSZ_8BB;TC_voD#X;7y( zgIR-VeC3f0XMOyVfiuUVxvXq!RAEb`P?nYdYh6OAe%T~16{_>fAtsJb1<{ttnH8U6XTJpgnV<&I(+YytTfVzFSNyjGlY9XOJR}NgZtiWG{s~ zz_3mNeY+|jhR)(3*?-Ciqx%d+tI_j{D%j4-q6gsRZMhpoFGXx+jr0eBe-Akwaj?KW zkz$9NlRsT9L|(nw%82iHUPqW8yAQqH+GZU+*8|+@7}RT3=>Np}M#KjKuSo+Ls4GPs zTq~7QQ!}Hx40CPPfM0UsorZ|TU}y@8#Pa;T2gsIi`xE8Rm!uKwb8+EjEfn>bL_}EK zmf`!qraHG?+Gk-nsT~D-%>t~FM5~Zj_ij%wgDgGX^0N$Y9i4Ii7k6NPNZ|VYdU}YJ z;Zii%rt*D%OCnQBK)vPX;0>9Vl!2bqA!#8-H?~oj?8FC^akwuOcG6%?mIZUOMQbNY z(3m|XBid&PcL!zTRSC*u$w)}oM$%(6b~LxaVxVi`c1(7s#6BBwHcM=zoV^x8rObArk%Mt-KU%h)SR1Z`U_8xF{d&QQV{DIC_Zg)rCcZ z#??7wILA9B0$n?vrPul0UC~T4_qZtPu_Kwct%&ex^>+F5Q8@Ci6~uM03C?d9m!kOs z8+CDPbG-a!k3stS9gpb?Yz?>Ls&CigMx9Ud92+E64nf$V%7cT#fxWvNUv52b3_oHQ z40s-R3g+YsQsJF=-LnICnqb*ag$x4YG6@E$;dVE5l43p^WNn#?#h>$6PodZ;%67c9Karh(-)g3FQNjamMQ(Y+tNp z&k5VKv3T2sri|PBqc#3^{RXewwFV(pO8;Sf!}VRFZnunYzEBY~AM>SOdZo3%)cm^= zcM=t^uPB%iBNTt|Ftc1VH(l)Mq?4Iz&|3|{H|W_Y$90NdAA0YOqd~V-@!hK2!<&hd z5U-3nFL!H7fUI5BHXKiTes=d}J$pbTWyk959T=I?KV{acBo|pM7F2b>R$b3hYgjmY z8@kCWBM}j!Xj~ruN5PMW1juA2wKV{_?lt|JydP$TuY;xhY3U8YwmVgD_ge^#pm?h1 z`v*NmK0MEDr;}S(31JCK@YkG8pKKb=ykGn__jf^(k7qrqW#2pU=*(Fqn5D+P4Tgj4 ze~qukOw4`%eE@Oqz}jbN<}(!t^%#`v`kfh>`}sjky}kO~X26N)@{^7GlGJSPm~#G% zVRv@fr0WnqqQxJ|w^0+HHGAp&fge^I8MGrLI6$}-{>N)UE`}|B$tWENw>bQQ6_l_V zbGr&B|MW#XsthfXcHH{U3`-Aetp96;Zv$W8L`K|FW>!jDm+|7Kx*IStdmn_ljl;m- z;q^hd4y>?yoV(6oj*#{05EKt#<1z}r{7EiNRm-VSYpncV8%#iKEV&Xh@$3?H5R z;&iDd+q(R92egC;cvd<7?K&&EhAgIm#^o&Q-_o*g)0$tp;zz@;Z5sdVQLW$CeCoYC}@2A;&j9zs}#d`MXI{RcwJRIr)f}x;5o2vf+lut z%TS~ENAD|eYA3Gii?NAUZ{eH6)!MZ6+KG~(?^3PCg&Z{KI<+=Y;$&T)DH&R#qFs&f z=A}bbZ$lr%)jkKIme2C+lpNeTe>&dSRte^S04ZLhju9pQ!*Y2dv4^*8jN?1H4U z)rZmMP-~iBC7(FlD#IABo30cRMQl7>>buH;2X!b5vWw$ob`eus{heR2$fA}>*+wCw zR_$}Bce38dWzx(a!BdcFPvdMpy`f})yj8A!oY?Qxj(3{L0vyX-1@&(@L7ML>wYg`~ zMUDHRp4JWCVCC_Wg6j4}2O7|(Mrs&HT!oES*^c}3@G@@5cc6XFxWY(uty>l5kF@Ia zx=uYSe#41R)9st}edaty4KbAXkpuRQw6hKFFZ8k9IM{c1WdrjuiVtNoBt!{2(&VMn ztX583Sf{qyc8XZ9KV|W%Kex>Jl0ab*>kVn{{ zEA=izZoT*t>~&jSv1ct|blRueawzY3#(%S`K!}z!#wL*O->CVhJ1cf^#14e6NWo(U z3wPALwp)PIA#B|&p<$*qp?gNeY<462bk`DjAR05jd9d`%Bht4)e}hJqma^G7^R*1H zt8~gL528lyERG@aO78V7`}<_mfgIJC~yyQZ>~` zYNYG|Egi%&@ouavEa#^sX9C~a3bfN|R#a;n`k3>mXW5n@mFF!&wV9UO+~EC5fJnan zZI{4|?Poz3uc0#DjGUPTYG(yw8fSZleUW024Aqtg@1~!5X!PRfnhkmHDRnfJ!~Fn; zj>d|?$<7;V$ghy^1e@=5QecM{8-#S58h!8Bbh(ry>|3zGPmF2ZESKhuzk zJ5J!9p)miHxWH8Uq-B=+N6Rd|>t1<*S1~3}4Ki4kSXI|PbLIJUpp9wM9tBHuuZ zE5DRXog8!w2pQuq9P;SJB`rd}^Yy6*7JQvD!+!mx047Syi_1HHu&YQ7rjS!>IqDBP z-Vm19TeaHY)ka*(w~TjF(je1dyZlL3KvFgW`GV>(hC7?0?hv7)boagoQA6f^m$ChrYBo*jf)?gAm)-J6s4W`AL$WQx{u%nt=jwFwTOA}i! zFQ$9WGXF58@8^#Xe)z|!jh%EE zOiihF=DoNy$X3Jt-zqhUp3yzb|E0vDKVd97Pb)rVu$_@K$KJ`nNnk zz+xHutNfmxqOXN#_Z{Q9pO!^&y725%J>8 z;olN+@L#)s!o9(Q$rA(L%xOkE{!o>^o085t<1c2!clvQi{Wr2Or*0zeW#_%FG3WyG&1!*j;Omc3JR9LnT9(%x|WGhNs0Jm~2e z`7VVIV&_M_<%Kho^g1C^m4RLa%Pr?(tcgQ7$R97AiR#y%HIm3^o|WQ3$*BpM#0mV1 zwn(HbTJk;S?(0P5w>!B&+RaLDdyd{P#88V$L@;{T_no%e@pi*CG*}$sJ#RZrkyW zvnG9SBbi1B*IzGg7)7i4Jr7C8N3{WH1@$qn0LBw+4IPf{vP|@s&wlo6qu=Z@c5;O& z1p>|~ys3&4FX$+NUG0MY-!d)folg9SWzE`^zT%i(-->JkbmBDlL0xli-)XE>FkO%u zlg$x5$hEDQJ|-!QLQ(oBd^~%+dE{2c*ff--Y_+Cssc6U>pq>u{cV&9Tn0f7z`djuR X(ziA_KczjN`@!_0Ko|oS`RM-xuee** literal 0 HcmV?d00001 diff --git a/data-raw/DATASET.R b/data-raw/DATASET.R new file mode 100644 index 0000000..482b9a2 --- /dev/null +++ b/data-raw/DATASET.R @@ -0,0 +1,2 @@ +# Internal data ---- +usethis::use_data(wfhz.01, mfaz.01, mfaz.02, internal = TRUE, overwrite = TRUE) From 8fd6f70850f6788c5ebd74814eb21a467c16f51c Mon Sep 17 00:00:00 2001 From: tomaszaba Date: Thu, 17 Oct 2024 14:32:10 +0200 Subject: [PATCH 5/9] doc: revise function doc (1) --- R/age.R | 99 +++++------ R/quality_raters.R | 59 +++---- R/quality_scorers.R | 34 ++-- R/sample_size.R | 32 ++-- R/wranglers.R | 205 ++++++++++++----------- man/age_ratio_test.Rd | 36 ++-- man/anthro.01.Rd | 9 +- man/anthro.02.Rd | 23 ++- man/anthro.03.Rd | 9 +- man/anthro.04.Rd | 13 +- man/case_definition.Rd | 6 +- man/check_sample_size.Rd | 31 ++-- man/classify_age_sex_ratio.Rd | 9 +- man/classify_overall_quality.Rd | 13 +- man/classify_skew_kurt.Rd | 9 +- man/classify_wasting_for_cdc_approach.Rd | 8 +- man/combined_prevalence.Rd | 15 +- man/compute_age_in_months.Rd | 17 +- man/compute_month_to_days.Rd | 9 +- man/compute_quality_score.Rd | 15 +- man/mfaz.01.Rd | 3 + man/mfaz.02.Rd | 3 + man/outliers.Rd | 39 +++-- man/prevalence.Rd | 2 +- man/process_age.Rd | 31 ++-- man/raters.Rd | 31 ++-- man/recode_muac.Rd | 27 ++- man/scorer.Rd | 18 +- man/tell_muac_analysis_strategy.Rd | 4 +- man/wfhz.01.Rd | 3 + man/wrangler.Rd | 71 ++++---- 31 files changed, 445 insertions(+), 438 deletions(-) diff --git a/R/age.R b/R/age.R index 0028653..3771d0d 100644 --- a/R/age.R +++ b/R/age.R @@ -1,33 +1,30 @@ #' -#' Transform age in months to days +#' Calculate child's age in days #' -#' @param x A numeric vector containing age values in months. +#' @param x A double vector of child's age in months. #' -#' @returns A numeric vector, of the same length as the input variable, containing -#' age values in days. +#' @returns A double vector of the same length as `x` of age in days. #' #' compute_month_to_days <- function(x) { x * (365.25 / 12) } + + + #' -#' Calculate age in months +#' Calculate child's age in months #' #' @description -#' `compute_age_in_months()` calculates age in months from on the basis of -#' difference between the data collection date and the child's date of birth. -#' It works inside [dplyr::mutate()] or [base::transform()]. +#' Calculate child's age in months based on date of birth and the data collection date. #' -#' @param surv_date A vector of class "Date" holding values corresponding to -#' the date of data collection. +#' @param surv_date A vector of class `Date` for data collection date. #' -#' @param birth_date A vector of class "Date" holding values corresponding to -#' the child's date of birth. +#' @param birth_date A vector of class `Date` for child's date of birth. #' -#' @returns A numeric vector named `age` holding age values in months with two -#' decimal places. Any value outside the range of 6.0 to 59.99 is replaced with -#' `NA`. +#' @returns A vector of class `double` for child's age in months with two decimal places. +#' Any value less than 6.0 and greater than or equal to 60.0 months will be set to `NA`. #' #' compute_age_in_months <- function (surv_date, birth_date) { @@ -37,30 +34,34 @@ compute_age_in_months <- function (surv_date, birth_date) { age_mo <- ifelse(age_mo < 6.0 | age_mo >= 60.0, NA, age_mo) } + + + #' -#' Wrangle age +#' Wrangle child's age #' #' @description -#' `process_age()` helps you to get the variable age in the format needed for -#' the analyses in the downstream workflow. Fundamentally, it calculates age in -#' months from on the basis of the difference between the data collection date -#' and the child's date of birth and then censors age values that are out of range. +#' Wrangle child's age for downstream analysis. This includes calculating age +#' in months based on the date of data collection and child's date of birth and +#' setting to `NA` the age values that are less than 6.0 and greater than or equal +#' to 60.0 months old. #' -#' @param df Input data frame holding the required variables. +#' @param df A dataset of class `data.frame` to process age from. #' -#' @param svdate A vector of class "Date" holding values corresponding to -#' the data collection date. Default is `NULL`. +#' @param svdate A vector of class `Date` for date of data collection. +#' Default is `NULL`. #' -#' @param birdate A vector of class "Date" holding values corresponding to -#' the child's date of birth. Default is `NULL`. +#' @param birdate A vector of class `Date` for child's date of birth. +#' Default is `NULL`. #' -#' @param age A numeric vector holding age values in months, usually estimated +#' @param age A vector of class `integer` of age in months, usually estimated #' using local event calendars. #' -#' @returns A data frame of the same length as the input with an additional -#' column. A new variable, `age_day`, is added to the output data frame whilst -#' the `age` variable gets filled where applicable, and then any values outside -#' the range of 6.0 to 59.99 months get replaced with `NA`. +#' @returns A `data.frame` based on `df`. The variable `age` that is required to be +#' included in `df` will be filled where applicable with the age in months for +#' each row of data in `df`. A new variable for `df` named `age_days` will be +#' created. Values for `age` and `age_days` for children less than 6.0 and greater +#' than or equal to 60.0 months old will be set to `NA`. #' #' @examples #' @@ -105,38 +106,38 @@ process_age <- function(df, svdate = NULL, birdate = NULL, age) { tibble::as_tibble(df) } + + #' -#' Test the proportion of children aged 24 to 59 months over 6 to 23 months old +#' Test for statistical difference between the proportion of children aged 24 to +#' 59 months old over those aged 6 to 23 months old #' #' @description -#' Age ratio test of the proportion of children aged 24 to 59 months over those -#' aged 6 to 23 months old. +#' Calculate the observed age ratio of children aged 24 to 59 months old over +#' those aged 6 to 23 months old and test if there is a statistical difference +#' between the observed and the expected. #' -#' @param age A numeric vector holding child's age in months. +#' @param age A double vector of age in months. #' #' @param .expectedP The expected proportion of children aged 24 to 59 months -#' old over those aged 6 to 23 months old. As in the -#' [SMART MUAC tool](https://smartmethodology.org/survey-planning-tools/updated-muac-tool/), -#' this is estimated at 0.66. +#' old over those aged 6 to 23 months old. This is estimated to be 0.66 as in the +#' [SMART MUAC tool](https://smartmethodology.org/survey-planning-tools/updated-muac-tool/). #' -#' @returns A vector of class "list" holding three statistics: `p` for p-value, -#' `observedR` for the observed ratio and `observedP` for the observed proportion -#' of children aged 24 to 59 months over those aged 6 to 24 months old. +#' @returns A vector of class `list` of three statistics: `p` for p-value of the +#' statistical difference between the observed and the expected proportion of +#' children aged 24 to 59 months old over those aged 6 to 23 months old; +#' `observedR` and `observedP` for the observed ratio and proportion respectively. #' #' @details -#' `age_ratio_test()` should be used specifically for assessing MUAC data. For -#' age ratio tests of children ages 6 to 29 months and 30 to 59 months old, as -#' performed in the SMART plausibility checks, use [nipnTK::ageRatioTest()] instead. +#' This function should be used specifically for assessing MUAC data. For +#' age ratio tests of children aged 6 to 29 months old over 30 to 59 months old, as +#' performed in the SMART plausibility check, use [nipnTK::ageRatioTest()] instead. #' #' @examples #' -#' ## A sample data ---- -#' age <- seq(6,59) |> -#' sample(300, replace = TRUE) -#' -#' ## Apply the function ---- +#' ## An example of application using `anthro.02` dataset ---- #' age_ratio_test( -#' age = age, +#' age = anthro.02$age, #' .expectedP = 0.66 #' ) #' diff --git a/R/quality_raters.R b/R/quality_raters.R index 688e1ac..74fe00e 100644 --- a/R/quality_raters.R +++ b/R/quality_raters.R @@ -1,28 +1,28 @@ #' -#' Rate the proportion of flagged values in the data and the magnitude of the -#' standard deviation +#' Rate the acceptability of the standard deviation and the percentage of flagged +#' data #' #' @description -#' `classify_percent_flagged()` rates how much high is the proportion of -#' of flagged data in your data set, as well as the magnitude of the standard -#' deviation. It applies for the WFHZ, the MFAZ and absolute MUAC values. +#' Rate how much high is the standard deviation and the percentage of flagged +#' data in the dataset, hence it's acceptability. #' -#' @param p A numeric vector containing the proportions of flagged values +#' @param p A vector of class `double` of the proportions of flagged values in +#' the dataset. #' -#' @param sd A numeric vector containing values for standard deviation. +#' @param sd A vector of class `double` of the values of the standard deviation. #' -#' @param type The indicator to be used for the rating. A choice between "mfaz" -#' for MFAZ, "whz" for WFHZ and "crude" for crude MUAC. +#' @param type A choice between "wfhz", "mfaz" and "crude" for the basis on which +#' the rating should be done. #' -#' @returns A character vector with the rating results. +#' @returns A vector of class `character` for the acceptability rate. #' #' @details -#' The rating categories are: "Excellent", "Good", "Acceptable", "Problematic". -#' The cut-offs of the WFHZ are as in the [ -#' SMART Methodology](https://smartmethodology.org/). As for the MFAZ and the -#' absolute MUAC values, the maximum acceptable limit is at 2%, as recommended -#' by [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478). -#' Cut-offs for crude MUAC are based on the +#' The ranges of acceptability are: "Excellent", "Good", "Acceptable", "Problematic". +#' The cut-offs for WFHZ are as in the [SMART Methodology](https://smartmethodology.org/). +#' For the MFAZ and the absolute MUAC values, the maximum acceptable limit for +#' outliers is 2%, as recommended by +#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478). +#' Cut-offs for the standard deviation of the absolute MUAC values are based on the #' [IPC AMN guidelines](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/). #' #' @@ -93,11 +93,12 @@ classify_sd <- function(sd, type = c("zscore", "crude")) { #' -#' Rate the p-values of the age and sex ratio test +#' Rate the acceptability of the age and sex ratio test p-values #' -#' @param p A numeric vector containing the test p-values. +#' @param p A vector of class `double` of the age or sex ratio test p-values. #' -#' @returns A character vector with the rating results. +#' @returns A vector of class `character` of the same length as `p` for the +#' acceptability rate. #' #' classify_age_sex_ratio <- function(p) { @@ -111,11 +112,12 @@ classify_age_sex_ratio <- function(p) { #' -#' Rate the magnitude of skewness and kurtosis test results +#' Rate the acceptability of the skewness and kurtosis test results #' -#' @param sk A numeric vector containing values of either skewness or kurtosis. +#' @param sk A vector of class `double` for skewness or kurtosis test results. #' -#' @returns A character vector with the rating results. +#' @returns A vector of class `character` of the same length as `sk` for the +#' acceptability rate. #' #' classify_skew_kurt <- function(sk) { @@ -130,18 +132,17 @@ classify_skew_kurt <- function(sk) { #' #' -#' Rate the overall data quality +#' Rate the overall acceptability score #' #' @description -#' `classify_overall_quality()` informs you about the overall quality of the data -#' by rating the overall quality score in "Excellent", "Good", "Acceptable" and +#' Rate the overall acceptability score into "Excellent", "Good", "Acceptable" and #' "Problematic". #' -#' @param df A data frame containing a vector with the quality scores yielded -#' from [compute_quality_score()]. +#' @param df A dataset of class `data.frame` containing a vector of the overall +#' acceptability score as yielded from [compute_quality_score()]. #' -#' @returns A character vector of the same length with a new column called -#' `quality_class`. +#' @returns A `data.frame` based on `df`. A new column `quality_class` for the +#' overall acceptability rate is created and added to `df`. #' #' @examples #' ## A sample data ---- diff --git a/R/quality_scorers.R b/R/quality_scorers.R index 1314629..58e3490 100644 --- a/R/quality_scorers.R +++ b/R/quality_scorers.R @@ -1,20 +1,18 @@ #' -#' Score the rating of proportion of flagged data, the magnitude of the standard -#' deviation, skewness, kurtosis and the p-values sex and age ratio test +#' Score the acceptability classification of the standard deviation and percentage +#' of flagged data test results #' #' @description -#' `assign_penalty_points_flags_and_sd()` ranks the proportion of the flagged -#' values in the data and the magnitude of standard deviation based on the SMART -#' scoring criteria. +#' Attribute a penalty point based on the acceptability classification in which +#' the plausibility test result falls. #' -#' @param x A character vector holding the test classifications for the proportion -#' of flagged data, the magnitude of the standard deviation, the p-values of the -#' age and sex ratio tests, as well as the results of skewness and kurtosis tests. +#' @param x A vector of class `character` of acceptability classification of the +#' plausibility test results. #' -#' @returns A numeric vector with the corresponding score. +#' @returns A vector of class `integer` of the same length as `x` for the score. #' #' @details -#' The ranking is as in [SMART Plausibility checks](https://smartmethodology.org/). +#' The scoring criteria is as in [SMART Plausibility checks](https://smartmethodology.org/). #' #' @rdname scorer #' @@ -56,19 +54,19 @@ assign_penalty_points_skew_kurt <- function(x) { #' #' -#' Get the overall quality score for WFHZ and MFAZ +#' Get the overall acceptability score from the acceptability classification scores #' #' @description -#' `compute_quality_score()` calculates the overall score of the quality of the -#' data for both WFHZ and MFAZ. +#' Calculate the total amount of penalty points based on each plausibility test +#' result acceptability classification for WFHZ and MFAZ. #' -#' @param df A data frame containing individual test quality scores. +#' @param df A dataset object of class `data.frame` to calculate from. #' -#' @param type The method you wish to get the overall quality score for. -#' A choice between "mfaz" and "wfhz". -#' -#' @returns A vector named `"quality_score"` with the overall quality score. +#' @param type A choice between "wfhz" and "mfaz" for the basis on which the +#' calculations should be made. #' +#' @returns A `data.frame` based on `df` with a new column named `"quality_score"` +#' for the overall of acceptability (of quality) score. #' #' @examples #' diff --git a/R/sample_size.R b/R/sample_size.R index 18b206c..a1645dd 100644 --- a/R/sample_size.R +++ b/R/sample_size.R @@ -1,30 +1,34 @@ #' -#' Check if the IPC AMN sample size requirement were met +#' Check whether the IPC Acute Malnutrition sample size requirements were met #' #' @description -#' `check_sample_size()` verifies if the minimum sample size requirements of the -#' IPC Acute Malnutrition protocols are met in a given area of analysis. +#' Verify whether the minimum sample size requirements for the area of analysis +#' were met, in accordance with the IPC Acute Malnutrition (IPC AMN) protocols. #' -#' @param df A data frame containing the required variables. +#' @param df A dataset of class `data.frame` to check. #' -#' @param .group A vector containing the primary sampling unit (PSU) ID's. Usually and -#' ideally a numeric vector, but sometimes this may present itself as a character. -#' Either way, `check_sample_size()` will work accordingly. +#' @param .group A vector of class `integer` of the cluster ID's for survey, +#' screening or site ID's for screenings and sentinel sites. #' #' @param .data_type A choice between "survey" for survey data, "screening" for #' screening data or "ssite" for community-based sentinel site data. #' -#' @returns By default, a summary table of one row and three additional columns -#' are returned. Column `groups` and `n_obs` hold the total number of unique -#' PSU's and children respectively, and `meet_ipc` tells whether the IPC AMN -#' sample size requirements were met. +#' @returns A summarised table of three columns: `groups` for the total number +#' of unique cluster or screening or site IDs; `n_obs` for the respective total +#' number of children; and `meet_ipc` for whether the IPC AMN requirements were met. #' #' @details -#' Use dplyr::group_by() before `check_sample_size()` to get a summary for each -#' unique survey or screening location from your data. +#' [The IPC Manual](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/). +#' #' #' @examples -#' check_sample_size(anthro.01, .group = cluster, .data_type = "survey") +#' +#' anthro.01 |> +#' dplyr::group_by(area) |> +#' check_sample_size( +#' .group = cluster, +#' .data_type = "survey" +#' ) #' #' @export #' diff --git a/R/wranglers.R b/R/wranglers.R index 9b4d049..8026225 100644 --- a/R/wranglers.R +++ b/R/wranglers.R @@ -3,43 +3,42 @@ #' Identify and flag outliers #' #' @description -#' Outliers are extreme values that deviate remarkably from the mean, making -#' them unlikely to be accurate measurements. `flag_outliers()` helps you to -#' identify them whether in the WFHZ, the MFAZ or the absolute MUAC values. +#' Outliers are extreme values that deviate remarkably from the survey mean, making +#' them unlikely to be accurate measurements. This function detects and signals +#' them based on a criterion set for the WFHZ, the MFAZ and for the absolute MUAC +#' values. #' -#' @param x A numeric vector holding either the WFHZ, the MFAZ values, or the -#' absolute MUAC values (in millimeters). +#' @param x A vector of class `double` of WFHZ or MFAZ or absolute MUAC values. +#' The latter should be in millimeters. #' -#' @param type The method you wish `flag_outliers()` to identify flag outliers -#' in the data. A choice between "zscore" (for WFHZ and MFAZ), and "crude" (for -#' absolute MUAC values). +#' @param type A choice between `zscore` and `crude` for where outliers should be +#' detected and flagged from. #' -#' @param unit A choice between "zscore" (for WFHZ and MFAZ), and "crude" (for -#' absolute MUAC values). +#' @param unit A choice between `zscore` and `crude` for where outliers should be +#' detected and flagged from. #' -#' @return A vector of the same length as input holding dummy values: 1 for is -#' a flag and 0 is not a flag. +#' @return A vector of the same length as `x` of flagged observations that are +#' outliers: 1 for is a flag and 0 is not a flag. #' #' @details -#' The flagging criteria for the WFHZ is as in -#' [SMART plausibility check](https://smartmethodology.org/). As for the MFAZ, it -#' uses the same criteria as WFHZ, whilst a fixed flagging criteria is used for -#' absolute MUAC values. This is as recommended by +#' The flagging criterion used for the WFHZ and the MFAZ is as in +#' [SMART plausibility check](https://smartmethodology.org/). A fixed flagging +#' criterion is used for the absolute MUAC values. This is as recommended by #' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478) #' #' #' @examples #' #' ## Sample data for absolute MUAC values ---- -#' x <- c(90, 110, 140, 200, 119, 235) +#' x <- anthro.01$muac #' -#' ## Apply `flag_outliers()` with type set to "crude" ---- +#' ## Apply the function with type set to "crude" ---- #' flag_outliers(x, type = "crude") #' #' ## Sample data for MFAZ or for WFHZ values ---- -#' x <- c(-2.265, -5.275, -0.72, -2.261, -2.264, -4.451, -2.261, -1.828) +#' x <- anthro.02$mfaz #' -#' # Apply `flag_outliers()` with type set to "zscore" ---- +#' # Apply the function with type set to "zscore" ---- #' flag_outliers(x, type = "zscore") #' #' @rdname outliers @@ -95,30 +94,23 @@ remove_flags <- function(x, unit = c("zscore", "crude")) { #' Convert MUAC values to either centimeters or millimeters #' #' @description -#' Recode the MUAC values into either centimeters or millimeters as required. -#' `recode_muac()` works inside [dplyr::mutate()] or [base::transform()]. +#' Recode the MUAC values to either centimeters or millimeters as required. #' -#' @param muac A numeric vector holding the absolute MUAC values. +#' @param muac A vector of class `double` or `integer` of the absolute MUAC values. #' -#' @param unit A choice of the unit to which you wish to convert the MUAC -#' values into. +#' @param unit A choice of the unit to which the MUAC values should be converted. #' -#' @returns A numeric vector of the same length as input, with values converted -#' into your chosen unit. +#' @returns A numeric vector of the same length `muac`, with values converted +#' to the chosen unit. #' #' @examples #' -#' ## A sample of MUAC data in millimeters ---- -#' muac <- seq(90, 250, by = 4) -#' -#' ## Apply the function ---- -#' recode_muac(muac, unit = "cm") -#' -#' ## A sample of MUAC data in centimeters ---- -#' muac <- seq(9.0, 25.0, by = 0.2) +#' ## Recode from millimeters to centimeters ---- +#' muac <- anthro.01$muac +#' muac_cm <- recode_muac(muac, unit = "cm") #' -#' # Apply the function ---- -#' recode_muac(muac, unit = "mm") +#' ## Using the `muac_cm` object to recode it back to "mm" ---- +#' muac_mm <- recode_muac(muac_cm, unit = "mm") #' #' @export #' @@ -141,52 +133,52 @@ recode_muac <- function(muac, unit = c("cm", "mm")) { #' #' -#' Process and censor weight-for-height and MUAC data +#' Wrangle weight-for-height and MUAC data #' #' @description -#' This is the job of `process_wfhz_data` and `process_muac_data()`. They are -#' responsible for computing the weight-for-height and the muac-for-age z-scores -#' respectively, and censor the data by flagging outliers based on the SMART flags. -#' For the latter, if age is not supplied, the function censors the absolute MUAC -#' values. +#' This function performs data wrangling by calculating weight-for-height +#' and MUAC-for-age z-scores, followed by the detection and flagging of outliers. +#' For MUAC data, if age is not supplies, z-scores do not get computed. In such +#' cases, outlier detection and flagging are based on the absolute MUAC values. #' -#' @param df The input data frame with the required variables. +#' @param df A dataset of class `data.frame` to wrangle data from. #' #' @param sex A numeric or character vector of child's sex. Code values should -#' either be 1 or "m" for boy and 2 or "f" for girl. The variable name must be -#' sex, otherwise it will not work. +#' be 1 or "m" for boy and 2 or "f" for girl. The variable name must be sex, +#' otherwise it will not work. #' -#' @param .recode_sex Logical. It asks whether sex should be recoded. In the end, -#' the variable sex have values coded as 1 for boy and 2 for girl. Setting -#' `.recode_sex = TRUE` works over "m" and "f" values. If your vector is coded -#' differently, make sure to put it in "m" and "f" or in 1 or 2 right away. +#' @param .recode_sex Logical. Default is `FALSE`. Setting to `TRUE` assumes that +#' the sex variable is a character vector of values "m" for boys and "f" for girls +#' and will recode them to 1 and 2 respectively. #' -#' @param muac A numeric vector holding the absolute MUAC values. +#' @param muac A vector of class `double` or `integer` of the absolute MUAC values. #' -#' @param .recode_muac Logical. Choose between `TRUE` if you wish to recode -#' the MUAC values into either centimeters or millimeters. +#' @param .recode_muac Logical. Default is `FALSE`. Set to `TRUE` if MUAC values +#' should be converted to either centimeters or millimeters. #' -#' @param unit A choice of the unit to which you wish to convert the MUAC -#' variable into. Choose "cm" for centimeters, "mm" for millimeters and "none" -#' to leave as it is. +#' @param unit A choice of the unit to which the MUAC values should be converted. +#' "cm" for centimeters, "mm" for millimeters and "none" to leave as it is. #' -#' @param age A numeric vector of child's age in months. It must be named age, -#' otherwise it will not work. For instance, if given as following: age = months -#' it will not work. +#' @param age A double vector of child's age in months. It must be named age, +#' otherwise it will not work. #' -#' @param weight A numeric vector holding the weight values of the child in -#' kilograms. +#' @param weight A vector of class `double` of child's weight in kilograms. #' -#' @param height A numeric vector holding the height values of the child in -#' centimeters. +#' @param height A vector of class `double` of child's height in centimeters. #' -#' @returns A data frame of the same length as the input with additional -#' columns: one named `wfhz` or `mfaz` that holds the zscore values, and the other -#' holding dummy values: 1 (is a flag) and 0 (is not a flag). For the -#' `process_muac_data` function, when age is not supplied, only `flag_muac` is -#' added. This refers to flags based based on absolute MUAC values as recommended by +#' @returns A data frame based on `df`. New variables named `wfhz` and +#' `flag_wfhz`, of child's weight-for-height z-scores and flags, or `mfaz` and +#' `flag_mfaz`, of child's MUAC-for-age z-scores and flags, will be created. For +#' MUAC, when age is not supplied only `flag_muac` variable is created. +#' This refers to flags based on the absolute MUAC values as recommended by #' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478). #' +#' @details +#' The flagging criterion used for the WFHZ and MFAZ is as in +#' [SMART plausibility check](https://smartmethodology.org/). A fixed flagging +#' criterion is used for the absolute MUAC values. This is as recommended by +#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478) +#' #' @examples #' #' ## An example application of `process_wfhz_data()` ---- @@ -213,6 +205,7 @@ recode_muac <- function(muac, unit = c("cm", "mm")) { #' ) #' #' ### The application of the function ---- +#' #' df |> #' process_age( #' svdate = "survey_date", @@ -227,13 +220,54 @@ recode_muac <- function(muac, unit = c("cm", "mm")) { #' .recode_muac = TRUE, #' unit = "cm" #' ) +#' +#' @rdname wrangler +#' +#' @export +#' + +process_wfhz_data <- function(df, + sex, + weight, + height, + .recode_sex = TRUE) { + + recode_sex <- quote( + if (.recode_sex) { + sex <- ifelse({{ sex }} == "m", 1, 2) + } else { + {{ sex }} + } + ) + + df <- df |> + mutate( + sex = !!recode_sex + ) |> + addWGSR( + sex = {{ "sex" }}, + firstPart = {{ "weight" }}, + secondPart = {{ "height" }}, + index = "wfh", + digits = 3 + ) |> + mutate( + flag_wfhz = do.call(flag_outliers, list(.data$wfhz, type = "zscore")) + ) + tibble::as_tibble(df) +} + + + #' #' @rdname wrangler #' #' @export #' process_muac_data <- function(df, - sex, muac, age = NULL, + sex, + muac, + age = NULL, .recode_sex = TRUE, .recode_muac = TRUE, unit = c("cm", "mm", "none")) { @@ -282,36 +316,3 @@ process_muac_data <- function(df, } tibble::as_tibble(df) } - - -#' -#' @rdname wrangler -#' -#' @export -#' -process_wfhz_data <- function(df, sex, weight, height, .recode_sex = TRUE) { - - recode_sex <- quote( - if (.recode_sex) { - sex <- ifelse({{ sex }} == "m", 1, 2) - } else { - {{ sex }} - } - ) - - df <- df |> - mutate( - sex = !!recode_sex - ) |> - addWGSR( - sex = {{ "sex" }}, - firstPart = {{ "weight" }}, - secondPart = {{ "height" }}, - index = "wfh", - digits = 3 - ) |> - mutate( - flag_wfhz = do.call(flag_outliers, list(.data$wfhz, type = "zscore")) - ) - tibble::as_tibble(df) -} diff --git a/man/age_ratio_test.Rd b/man/age_ratio_test.Rd index a0f3ccd..9b8fe1c 100644 --- a/man/age_ratio_test.Rd +++ b/man/age_ratio_test.Rd @@ -2,41 +2,39 @@ % Please edit documentation in R/age.R \name{age_ratio_test} \alias{age_ratio_test} -\title{Test the proportion of children aged 24 to 59 months over 6 to 23 months old} +\title{Test for statistical difference between the proportion of children aged 24 to +59 months old over those aged 6 to 23 months old} \usage{ age_ratio_test(age, .expectedP = 0.66) } \arguments{ -\item{age}{A numeric vector holding child's age in months.} +\item{age}{A double vector of age in months.} \item{.expectedP}{The expected proportion of children aged 24 to 59 months -old over those aged 6 to 23 months old. As in the -\href{https://smartmethodology.org/survey-planning-tools/updated-muac-tool/}{SMART MUAC tool}, -this is estimated at 0.66.} +old over those aged 6 to 23 months old. This is estimated to be 0.66 as in the +\href{https://smartmethodology.org/survey-planning-tools/updated-muac-tool/}{SMART MUAC tool}.} } \value{ -A vector of class "list" holding three statistics: \code{p} for p-value, -\code{observedR} for the observed ratio and \code{observedP} for the observed proportion -of children aged 24 to 59 months over those aged 6 to 24 months old. +A vector of class \code{list} of three statistics: \code{p} for p-value of the +statistical difference between the observed and the expected proportion of +children aged 24 to 59 months old over those aged 6 to 23 months old; +\code{observedR} and \code{observedP} for the observed ratio and proportion respectively. @details -\code{age_ratio_test()} should be used specifically for assessing MUAC data. For -age ratio tests of children ages 6 to 29 months and 30 to 59 months old, as -performed in the SMART plausibility checks, use \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}} instead. +This function should be used specifically for assessing MUAC data. For +age ratio tests of children aged 6 to 29 months old over 30 to 59 months old, as +performed in the SMART plausibility check, use \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}} instead. } \description{ -Age ratio test of the proportion of children aged 24 to 59 months over those -aged 6 to 23 months old. +Calculate the observed age ratio of children aged 24 to 59 months old over +those aged 6 to 23 months old and test if there is a statistical difference +between the observed and the expected. } \examples{ -## A sample data ---- -age <- seq(6,59) |> -sample(300, replace = TRUE) - -## Apply the function ---- +## An example of application using `anthro.02` dataset ---- age_ratio_test( -age = age, +age = anthro.02$age, .expectedP = 0.66 ) diff --git a/man/anthro.01.Rd b/man/anthro.01.Rd index 6611489..158e764 100644 --- a/man/anthro.01.Rd +++ b/man/anthro.01.Rd @@ -20,13 +20,16 @@ A tibble of 1,191 rows and 11 columns.\tabular{ll}{ \emph{muac} \tab Mid-upper arm circumference (mm) \cr } } +\source{ +Anonymous +} \usage{ anthro.01 } \description{ -\code{anthro.01} is a two-stage cluster-based survey with probability of the -selection of the clusters proportional to the size of the population. The -survey employed the SMART methodology. Data was anonymised for confidentiality. +\code{anthro.01} is a two-stage cluster-based survey with probability of selection +of clusters proportional to the size of the population. The survey employed +the SMART methodology. } \examples{ anthro.01 diff --git a/man/anthro.02.Rd b/man/anthro.02.Rd index 45af271..c5b4861 100644 --- a/man/anthro.02.Rd +++ b/man/anthro.02.Rd @@ -23,24 +23,21 @@ A tibble of 2,267 rows and 14 columns.\tabular{ll}{ \emph{flag_mfaz} \tab Flagged observations. 1=flagged, 0=not flagged \cr } } +\source{ +Mozambique National Institute of Statistics. The data is publicly +available at \url{https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-data_access}. +Data was wrangled using this package's wranglers. Details about survey design +can be gotten from: \url{https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-sampling} +} \usage{ anthro.02 } \description{ \code{anthro.02} is about a household budget survey conducted in Mozambique in -2019/2020, known as IOF (\emph{Inquérito ao Orçamento Familiar} in Portuguese). -The data is publicly available \href{https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-data_access}{here}. -The survey had a module on nutrition with anthropometric measurements taken -from children age 0-59 months for weight-for-height and 6-59 months for MUAC. - -\emph{IOF} is a two-stage cluster-based survey, representative at -province level (admin 2), with probability of the selection of the clusters -proportional to the size of the population. Its data collection spans for a -period of 12 months, with anthropometric measurements -taken during that period too. Read the \href{https://mozdata.ine.gov.mz/index.php/catalog/88#metadata-sampling}{Bureau of Statistic's website on IOF} for -more details. - -\code{anthro.02} has already been wrangled using this package's utilities. +2019/2020, known as IOF (\emph{Inquérito ao Orçamento Familiar} in Portuguese).\emph{IOF} +is a two-stage cluster-based survey, representative at province level (admin 2), +with probability of the selection of the clusters proportional to the size of +the population. Its data collection spans for a period of 12 months. } \examples{ anthro.02 diff --git a/man/anthro.03.Rd b/man/anthro.03.Rd index 68b7a38..1d95f94 100644 --- a/man/anthro.03.Rd +++ b/man/anthro.03.Rd @@ -18,18 +18,21 @@ A tibble of 943 x 9.\tabular{ll}{ \emph{muac} \tab Mid-upper arm circumference (mm) \cr } } +\source{ +Anonymous +} \usage{ anthro.03 } \description{ \code{anthro.03} contains survey data of four districts. Each district's dataset presents distinct data quality scenarios that requires tailored prevalence -analysis approach. Two districts show a problematic WFHZ standard deviation +analysis approach: two districts show a problematic WFHZ standard deviation whilst the remaining are all within range. This sample data demonstrates the use of prevalence functions on multi-area -survey data, where variations in the standard deviation ratings exist. -As a result, different analytical approaches are required for each area +survey data, where there is variations in the standard deviation rating. +As a result, different analyses approaches are required for each area to ensure accurate estimation. } \examples{ diff --git a/man/anthro.04.Rd b/man/anthro.04.Rd index 1414a0e..fd961a2 100644 --- a/man/anthro.04.Rd +++ b/man/anthro.04.Rd @@ -17,16 +17,19 @@ A tibble of 3,002 x 8.\tabular{ll}{ \emph{flag_mfaz} \tab Flagged observations. 1=flagged, 0=not flagged \cr } } +\source{ +Anonymous +} \usage{ anthro.04 } \description{ -\code{anthro.04} was generated from a community-based sentinel site survey -conducted across three provinces. Each province's dataset presents distinct -data quality scenarios, requiring tailored prevalence analysis. -"Province 3" has problematic MFAZ standard deviation and age ratio tests. +\code{anthro.04} was generated from a community-based sentinel site conducted +across three provinces. Each province's dataset presents distinct +data quality scenarios, requiring tailored prevalence analysis: +"Province 3" has problematic MFAZ standard deviation and age ratio tests; "Province 2" shows a problematic age ratio but acceptable MFAZ standard -deviation. Lastly, "Province 1" has both tests within acceptable ranges. +deviation; lastly, "Province 1" has both tests within acceptable ranges. This sample data demonstrates the use of prevalence functions on multi-area survey data, where variations in the standard deviation ratings exist. diff --git a/man/case_definition.Rd b/man/case_definition.Rd index d50b238..4cd4dbf 100644 --- a/man/case_definition.Rd +++ b/man/case_definition.Rd @@ -5,8 +5,7 @@ \alias{define_wasting_cases_whz} \alias{define_wasting_cases_combined} \alias{define_wasting} -\title{Define if an observation is wasted on the basis of the criteria -of WFHZ, absolute MUAC values and combined case-definition} +\title{Wasting case-definition based on WFHZ, MFAZ, MUAC and Combined criteria} \usage{ define_wasting_cases_muac(muac, edema = NULL, cases = c("gam", "sam", "mam")) @@ -50,8 +49,7 @@ changes depending on the form of wasting chosen. That is, if set \code{cases} to \code{"sam"} the codes 1 would mean yes for severe wasting. } \description{ -Define if an observation is wasted on the basis of the criteria -of WFHZ, absolute MUAC values and combined case-definition +Wasting case-definition based on WFHZ, MFAZ, MUAC and Combined criteria } \details{ Use \code{define_wasting()} to add the case-definitions into data frame. diff --git a/man/check_sample_size.Rd b/man/check_sample_size.Rd index fa7a5c6..f2cb5d9 100644 --- a/man/check_sample_size.Rd +++ b/man/check_sample_size.Rd @@ -2,35 +2,38 @@ % Please edit documentation in R/sample_size.R \name{check_sample_size} \alias{check_sample_size} -\title{Check if the IPC AMN sample size requirement were met} +\title{Check whether the IPC Acute Malnutrition sample size requirements were met} \usage{ check_sample_size(df, .group, .data_type = c("survey", "screening", "ssite")) } \arguments{ -\item{df}{A data frame containing the required variables.} +\item{df}{A dataset of class \code{data.frame} to check.} -\item{.group}{A vector containing the primary sampling unit (PSU) ID's. Usually and -ideally a numeric vector, but sometimes this may present itself as a character. -Either way, \code{check_sample_size()} will work accordingly.} +\item{.group}{A vector of class \code{integer} of the cluster ID's for survey, +screening or site ID's for screenings and sentinel sites.} \item{.data_type}{A choice between "survey" for survey data, "screening" for screening data or "ssite" for community-based sentinel site data.} } \value{ -By default, a summary table of one row and three additional columns -are returned. Column \code{groups} and \code{n_obs} hold the total number of unique -PSU's and children respectively, and \code{meet_ipc} tells whether the IPC AMN -sample size requirements were met. +A summarised table of three columns: \code{groups} for the total number +of unique cluster or screening or site IDs; \code{n_obs} for the respective total +number of children; and \code{meet_ipc} for whether the IPC AMN requirements were met. } \description{ -\code{check_sample_size()} verifies if the minimum sample size requirements of the -IPC Acute Malnutrition protocols are met in a given area of analysis. +Verify whether the minimum sample size requirements for the area of analysis +were met, in accordance with the IPC Acute Malnutrition (IPC AMN) protocols. } \details{ -Use dplyr::group_by() before \code{check_sample_size()} to get a summary for each -unique survey or screening location from your data. +\href{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}{The IPC Manual}. } \examples{ -check_sample_size(anthro.01, .group = cluster, .data_type = "survey") + +anthro.01 |> +dplyr::group_by(area) |> +check_sample_size( +.group = cluster, +.data_type = "survey" +) } diff --git a/man/classify_age_sex_ratio.Rd b/man/classify_age_sex_ratio.Rd index 333ef72..ffb2aaa 100644 --- a/man/classify_age_sex_ratio.Rd +++ b/man/classify_age_sex_ratio.Rd @@ -2,16 +2,17 @@ % Please edit documentation in R/quality_raters.R \name{classify_age_sex_ratio} \alias{classify_age_sex_ratio} -\title{Rate the p-values of the age and sex ratio test} +\title{Rate the acceptability of the age and sex ratio test p-values} \usage{ classify_age_sex_ratio(p) } \arguments{ -\item{p}{A numeric vector containing the test p-values.} +\item{p}{A vector of class \code{double} of the age or sex ratio test p-values.} } \value{ -A character vector with the rating results. +A vector of class \code{character} of the same length as \code{p} for the +acceptability rate. } \description{ -Rate the p-values of the age and sex ratio test +Rate the acceptability of the age and sex ratio test p-values } diff --git a/man/classify_overall_quality.Rd b/man/classify_overall_quality.Rd index 8657a65..7a61406 100644 --- a/man/classify_overall_quality.Rd +++ b/man/classify_overall_quality.Rd @@ -2,21 +2,20 @@ % Please edit documentation in R/quality_raters.R \name{classify_overall_quality} \alias{classify_overall_quality} -\title{Rate the overall data quality} +\title{Rate the overall acceptability score} \usage{ classify_overall_quality(df) } \arguments{ -\item{df}{A data frame containing a vector with the quality scores yielded -from \code{\link[=compute_quality_score]{compute_quality_score()}}.} +\item{df}{A dataset of class \code{data.frame} containing a vector of the overall +acceptability score as yielded from \code{\link[=compute_quality_score]{compute_quality_score()}}.} } \value{ -A character vector of the same length with a new column called -\code{quality_class}. +A \code{data.frame} based on \code{df}. A new column \code{quality_class} for the +overall acceptability rate is created and added to \code{df}. } \description{ -\code{classify_overall_quality()} informs you about the overall quality of the data -by rating the overall quality score in "Excellent", "Good", "Acceptable" and +Rate the overall acceptability score into "Excellent", "Good", "Acceptable" and "Problematic". } \examples{ diff --git a/man/classify_skew_kurt.Rd b/man/classify_skew_kurt.Rd index 9fcdb9a..f8a08ae 100644 --- a/man/classify_skew_kurt.Rd +++ b/man/classify_skew_kurt.Rd @@ -2,16 +2,17 @@ % Please edit documentation in R/quality_raters.R \name{classify_skew_kurt} \alias{classify_skew_kurt} -\title{Rate the magnitude of skewness and kurtosis test results} +\title{Rate the acceptability of the skewness and kurtosis test results} \usage{ classify_skew_kurt(sk) } \arguments{ -\item{sk}{A numeric vector containing values of either skewness or kurtosis.} +\item{sk}{A vector of class \code{double} for skewness or kurtosis test results.} } \value{ -A character vector with the rating results. +A vector of class \code{character} of the same length as \code{sk} for the +acceptability rate. } \description{ -Rate the magnitude of skewness and kurtosis test results +Rate the acceptability of the skewness and kurtosis test results } diff --git a/man/classify_wasting_for_cdc_approach.Rd b/man/classify_wasting_for_cdc_approach.Rd index 780fcd1..a709c95 100644 --- a/man/classify_wasting_for_cdc_approach.Rd +++ b/man/classify_wasting_for_cdc_approach.Rd @@ -2,8 +2,8 @@ % Please edit documentation in R/case_definitions.R \name{classify_wasting_for_cdc_approach} \alias{classify_wasting_for_cdc_approach} -\title{Classify wasting into severe or moderate wasting for use in SMART MUAC tool -weighting approach} +\title{Classify wasting into severe or moderate wasting to be used in the +SMART MUAC tool weighting approach} \usage{ classify_wasting_for_cdc_approach(muac, .edema = NULL) } @@ -19,6 +19,6 @@ A character vector of the same length as the input indicating if a child is severe or moderate wasted or not wasted. } \description{ -Classify wasting into severe or moderate wasting for use in SMART MUAC tool -weighting approach +Classify wasting into severe or moderate wasting to be used in the +SMART MUAC tool weighting approach } diff --git a/man/combined_prevalence.Rd b/man/combined_prevalence.Rd index b6bdc4f..17a5cbb 100644 --- a/man/combined_prevalence.Rd +++ b/man/combined_prevalence.Rd @@ -3,7 +3,7 @@ \name{compute_pps_based_combined_prevalence} \alias{compute_pps_based_combined_prevalence} \alias{compute_combined_prevalence} -\title{Compute combined prevalence of acute malnutrition} +\title{Compute combined prevalence of wasting} \usage{ compute_pps_based_combined_prevalence( df, @@ -16,9 +16,8 @@ compute_combined_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) } \arguments{ \item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}} and \code{\link[=process_wfhz_data]{process_wfhz_data()}}. -Both wranglers need to be used to prepare data to be used -\code{compute_combined_prevalence()}. The order of which comes first does not matter, -however, since the muac data processor transforms MUAC values into centimeters, those +Both wranglers need to be used sequentially. The order of use does not matter, +however, since muac wrangler transforms MUAC values into centimeters, those need to be put back into millimeter. This can be achieved my using \code{\link[=recode_muac]{recode_muac()}} inside \code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}}.} @@ -37,12 +36,12 @@ where the data was collected and for which the analysis should be performed at.} A table with the descriptive statistics about wasting. } \description{ -\code{compute_combined_prevalence()} is a handy function for calculating the -combined prevalence of wasting also in with the complex sample design -properties inherent to surveys. +\code{compute_combined_prevalence()} is a handy function for calculating the prevalence +of combined wasting in accordance with the complex sample design properties +inherent to surveys. } \details{ -The concept of "combined flags" is introduced in this function. It consists of +A concept of "combined flags" is introduced in this function. It consists of taking the \code{flag_wfhz} and \code{flag_mfaz} vectors, generated from the MUAC and WFHZ wranglers, and checking if any value in either vector is flagged. If flagged, the value is marked as a flag in the "cflags" vector; otherwise, it is not flagged diff --git a/man/compute_age_in_months.Rd b/man/compute_age_in_months.Rd index e378361..4e96d75 100644 --- a/man/compute_age_in_months.Rd +++ b/man/compute_age_in_months.Rd @@ -2,24 +2,19 @@ % Please edit documentation in R/age.R \name{compute_age_in_months} \alias{compute_age_in_months} -\title{Calculate age in months} +\title{Calculate child's age in months} \usage{ compute_age_in_months(surv_date, birth_date) } \arguments{ -\item{surv_date}{A vector of class "Date" holding values corresponding to -the date of data collection.} +\item{surv_date}{A vector of class \code{Date} for data collection date.} -\item{birth_date}{A vector of class "Date" holding values corresponding to -the child's date of birth.} +\item{birth_date}{A vector of class \code{Date} for child's date of birth.} } \value{ -A numeric vector named \code{age} holding age values in months with two -decimal places. Any value outside the range of 6.0 to 59.99 is replaced with -\code{NA}. +A vector of class \code{double} for child's age in months with two decimal places. +Any value less than 6.0 and greater than or equal to 60.0 months will be set to \code{NA}. } \description{ -\code{compute_age_in_months()} calculates age in months from on the basis of -difference between the data collection date and the child's date of birth. -It works inside \code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}}. +Calculate child's age in months based on date of birth and the data collection date. } diff --git a/man/compute_month_to_days.Rd b/man/compute_month_to_days.Rd index 3d3cc83..790fd25 100644 --- a/man/compute_month_to_days.Rd +++ b/man/compute_month_to_days.Rd @@ -2,17 +2,16 @@ % Please edit documentation in R/age.R \name{compute_month_to_days} \alias{compute_month_to_days} -\title{Transform age in months to days} +\title{Calculate child's age in days} \usage{ compute_month_to_days(x) } \arguments{ -\item{x}{A numeric vector containing age values in months.} +\item{x}{A double vector of child's age in months.} } \value{ -A numeric vector, of the same length as the input variable, containing -age values in days. +A double vector of the same length as \code{x} of age in days. } \description{ -Transform age in months to days +Calculate child's age in days } diff --git a/man/compute_quality_score.Rd b/man/compute_quality_score.Rd index a367a19..ebe4266 100644 --- a/man/compute_quality_score.Rd +++ b/man/compute_quality_score.Rd @@ -2,22 +2,23 @@ % Please edit documentation in R/quality_scorers.R \name{compute_quality_score} \alias{compute_quality_score} -\title{Get the overall quality score for WFHZ and MFAZ} +\title{Get the overall acceptability score from the acceptability classification scores} \usage{ compute_quality_score(df, type = c("mfaz", "whz")) } \arguments{ -\item{df}{A data frame containing individual test quality scores.} +\item{df}{A dataset object of class \code{data.frame} to calculate from.} -\item{type}{The method you wish to get the overall quality score for. -A choice between "mfaz" and "wfhz".} +\item{type}{A choice between "wfhz" and "mfaz" for the basis on which the +calculations should be made.} } \value{ -A vector named \code{"quality_score"} with the overall quality score. +A \code{data.frame} based on \code{df} with a new column named \code{"quality_score"} +for the overall of acceptability (of quality) score. } \description{ -\code{compute_quality_score()} calculates the overall score of the quality of the -data for both WFHZ and MFAZ. +Calculate the total amount of penalty points based on each plausibility test +result acceptability classification for WFHZ and MFAZ. } \examples{ diff --git a/man/mfaz.01.Rd b/man/mfaz.01.Rd index 855bdc4..cce5e02 100644 --- a/man/mfaz.01.Rd +++ b/man/mfaz.01.Rd @@ -13,6 +13,9 @@ A tibble with 661 rows and 4 columns.\tabular{ll}{ \emph{muac} \tab Mid-upper arm circumference (mm) \cr } } +\source{ +Anonymous +} \usage{ mfaz.01 } diff --git a/man/mfaz.02.Rd b/man/mfaz.02.Rd index 529df23..655c3bb 100644 --- a/man/mfaz.02.Rd +++ b/man/mfaz.02.Rd @@ -15,6 +15,9 @@ A tibble with 303 rows and 7 columns.\tabular{ll}{ \emph{flag_mfaz} \tab Flagged observations. 1=flagged, 0=not flagged \cr } } +\source{ +Anonymous +} \usage{ mfaz.02 } diff --git a/man/outliers.Rd b/man/outliers.Rd index 8c9a56d..43c063c 100644 --- a/man/outliers.Rd +++ b/man/outliers.Rd @@ -10,44 +10,43 @@ flag_outliers(x, type = c("zscore", "crude")) remove_flags(x, unit = c("zscore", "crude")) } \arguments{ -\item{x}{A numeric vector holding either the WFHZ, the MFAZ values, or the -absolute MUAC values (in millimeters).} +\item{x}{A vector of class \code{double} of WFHZ or MFAZ or absolute MUAC values. +The latter should be in millimeters.} -\item{type}{The method you wish \code{flag_outliers()} to identify flag outliers -in the data. A choice between "zscore" (for WFHZ and MFAZ), and "crude" (for -absolute MUAC values).} +\item{type}{A choice between \code{zscore} and \code{crude} for where outliers should be +detected and flagged from.} -\item{unit}{A choice between "zscore" (for WFHZ and MFAZ), and "crude" (for -absolute MUAC values).} +\item{unit}{A choice between \code{zscore} and \code{crude} for where outliers should be +detected and flagged from.} } \value{ -A vector of the same length as input holding dummy values: 1 for is -a flag and 0 is not a flag. +A vector of the same length as \code{x} of flagged observations that are +outliers: 1 for is a flag and 0 is not a flag. } \description{ -Outliers are extreme values that deviate remarkably from the mean, making -them unlikely to be accurate measurements. \code{flag_outliers()} helps you to -identify them whether in the WFHZ, the MFAZ or the absolute MUAC values. +Outliers are extreme values that deviate remarkably from the survey mean, making +them unlikely to be accurate measurements. This function detects and signals +them based on a criterion set for the WFHZ, the MFAZ and for the absolute MUAC +values. } \details{ -The flagging criteria for the WFHZ is as in -\href{https://smartmethodology.org/}{SMART plausibility check}. As for the MFAZ, it -uses the same criteria as WFHZ, whilst a fixed flagging criteria is used for -absolute MUAC values. This is as recommended by +The flagging criterion used for the WFHZ and the MFAZ is as in +\href{https://smartmethodology.org/}{SMART plausibility check}. A fixed flagging +criterion is used for the absolute MUAC values. This is as recommended by \href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).} } \examples{ ## Sample data for absolute MUAC values ---- -x <- c(90, 110, 140, 200, 119, 235) +x <- anthro.01$muac -## Apply `flag_outliers()` with type set to "crude" ---- +## Apply the function with type set to "crude" ---- flag_outliers(x, type = "crude") ## Sample data for MFAZ or for WFHZ values ---- -x <- c(-2.265, -5.275, -0.72, -2.261, -2.264, -4.451, -2.261, -1.828) +x <- anthro.02$mfaz -# Apply `flag_outliers()` with type set to "zscore" ---- +# Apply the function with type set to "zscore" ---- flag_outliers(x, type = "zscore") } diff --git a/man/prevalence.Rd b/man/prevalence.Rd index 6a2b1ad..e0b8a6b 100644 --- a/man/prevalence.Rd +++ b/man/prevalence.Rd @@ -5,7 +5,7 @@ \alias{compute_mfaz_prevalence} \alias{compute_muac_prevalence} \alias{compute_wfhz_prevalence} -\title{Compute the prevalence of wasting on the basis of WFHZ or MFAZ or MUAC} +\title{Compute the prevalence of wasting on the basis of WFHZ, MFAZ and MUAC} \usage{ compute_mfaz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) diff --git a/man/process_age.Rd b/man/process_age.Rd index 4cb2e76..7108b88 100644 --- a/man/process_age.Rd +++ b/man/process_age.Rd @@ -2,33 +2,34 @@ % Please edit documentation in R/age.R \name{process_age} \alias{process_age} -\title{Process age} +\title{Wrangle child's age} \usage{ process_age(df, svdate = NULL, birdate = NULL, age) } \arguments{ -\item{df}{Input data frame holding the required variables.} +\item{df}{A dataset of class \code{data.frame} to process age from.} -\item{svdate}{A vector of class "Date" holding values corresponding to -the data collection date. Default is \code{NULL}.} +\item{svdate}{A vector of class \code{Date} for date of data collection. +Default is \code{NULL}.} -\item{birdate}{A vector of class "Date" holding values corresponding to -the child's date of birth. Default is \code{NULL}.} +\item{birdate}{A vector of class \code{Date} for child's date of birth. +Default is \code{NULL}.} -\item{age}{A numeric vector holding age values in months, usually estimated +\item{age}{A vector of class \code{integer} of age in months, usually estimated using local event calendars.} } \value{ -A data frame of the same length as the input with an additional -column. A new variable, \code{age_day}, is added to the output data frame whilst -the \code{age} variable gets filled where applicable, and then any values outside -the range of 6.0 to 59.99 months get replaced with \code{NA}. +A \code{data.frame} based on \code{df}. The variable \code{age} that is required to be +included in \code{df} will be filled where applicable with the age in months for +each row of data in \code{df}. A new variable for \code{df} named \code{age_days} will be +created. Values for \code{age} and \code{age_days} for children less than 6.0 and greater +than or equal to 60.0 months old will be set to \code{NA}. } \description{ -\code{process_age()} helps you to get the variable age in the format needed for -the analyses in the downstream workflow. Fundamentally, it calculates age in -months from on the basis of the difference between the data collection date -and the child's date of birth and then censors age values that are out of range. +Wrangle child's age for downstream analysis. This includes calculating age +in months based on the date of data collection and child's date of birth and +setting to \code{NA} the age values that are less than 6.0 and greater than or equal +to 60.0 months old. } \examples{ diff --git a/man/raters.Rd b/man/raters.Rd index 8c22ee1..43284fb 100644 --- a/man/raters.Rd +++ b/man/raters.Rd @@ -3,34 +3,35 @@ \name{classify_percent_flagged} \alias{classify_percent_flagged} \alias{classify_sd} -\title{Rate the proportion of flagged values in the data and the magnitude of the -standard deviation} +\title{Rate the acceptability of the standard deviation and the percentage of flagged +data} \usage{ classify_percent_flagged(p, type = c("mfaz", "whz", "crude")) classify_sd(sd, type = c("zscore", "crude")) } \arguments{ -\item{p}{A numeric vector containing the proportions of flagged values} +\item{p}{A vector of class \code{double} of the proportions of flagged values in +the dataset.} -\item{type}{The indicator to be used for the rating. A choice between "mfaz" -for MFAZ, "whz" for WFHZ and "crude" for crude MUAC.} +\item{type}{A choice between "wfhz", "mfaz" and "crude" for the basis on which +the rating should be done.} -\item{sd}{A numeric vector containing values for standard deviation.} +\item{sd}{A vector of class \code{double} of the values of the standard deviation.} } \value{ -A character vector with the rating results. +A vector of class \code{character} for the acceptability rate. } \description{ -\code{classify_percent_flagged()} rates how much high is the proportion of -of flagged data in your data set, as well as the magnitude of the standard -deviation. It applies for the WFHZ, the MFAZ and absolute MUAC values. +Rate how much high is the standard deviation and the percentage of flagged +data in the dataset, hence it's acceptability. } \details{ -The rating categories are: "Excellent", "Good", "Acceptable", "Problematic". -The cut-offs of the WFHZ are as in the \href{https://smartmethodology.org/}{ SMART Methodology}. As for the MFAZ and the -absolute MUAC values, the maximum acceptable limit is at 2\%, as recommended -by \href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}. -Cut-offs for crude MUAC are based on the +The ranges of acceptability are: "Excellent", "Good", "Acceptable", "Problematic". +The cut-offs for WFHZ are as in the \href{https://smartmethodology.org/}{SMART Methodology}. +For the MFAZ and the absolute MUAC values, the maximum acceptable limit for +outliers is 2\%, as recommended by +\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}. +Cut-offs for the standard deviation of the absolute MUAC values are based on the \href{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}{IPC AMN guidelines}. } diff --git a/man/recode_muac.Rd b/man/recode_muac.Rd index 3ca438a..086b0da 100644 --- a/man/recode_muac.Rd +++ b/man/recode_muac.Rd @@ -7,31 +7,24 @@ recode_muac(muac, unit = c("cm", "mm")) } \arguments{ -\item{muac}{A numeric vector holding the absolute MUAC values.} +\item{muac}{A vector of class \code{double} or \code{integer} of the absolute MUAC values.} -\item{unit}{A choice of the unit to which you wish to convert the MUAC -values into.} +\item{unit}{A choice of the unit to which the MUAC values should be converted.} } \value{ -A numeric vector of the same length as input, with values converted -into your chosen unit. +A numeric vector of the same length \code{muac}, with values converted +to the chosen unit. } \description{ -Recode the MUAC values into either centimeters or millimeters as required. -\code{recode_muac()} works inside \code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}}. +Recode the MUAC values to either centimeters or millimeters as required. } \examples{ -## A sample of MUAC data in millimeters ---- -muac <- seq(90, 250, by = 4) +## Recode from millimeters to centimeters ---- +muac <- anthro.01$muac +muac_cm <- recode_muac(muac, unit = "cm") -## Apply the function ---- -recode_muac(muac, unit = "cm") - -## A sample of MUAC data in centimeters ---- -muac <- seq(9.0, 25.0, by = 0.2) - -# Apply the function ---- -recode_muac(muac, unit = "mm") +## Using the `muac_cm` object to recode it back to "mm" ---- +muac_mm <- recode_muac(muac_cm, unit = "mm") } diff --git a/man/scorer.Rd b/man/scorer.Rd index 060ef04..93590de 100644 --- a/man/scorer.Rd +++ b/man/scorer.Rd @@ -4,8 +4,8 @@ \alias{assign_penalty_points_flags_and_sd} \alias{assign_penalty_points_age_sex_ratio} \alias{assign_penalty_points_skew_kurt} -\title{Score the rating of proportion of flagged data, the magnitude of the standard -deviation, skewness, kurtosis and the p-values sex and age ratio test} +\title{Score the acceptability classification of the standard deviation and percentage +of flagged data test results} \usage{ assign_penalty_points_flags_and_sd(x) @@ -14,18 +14,16 @@ assign_penalty_points_age_sex_ratio(x) assign_penalty_points_skew_kurt(x) } \arguments{ -\item{x}{A character vector holding the test classifications for the proportion -of flagged data, the magnitude of the standard deviation, the p-values of the -age and sex ratio tests, as well as the results of skewness and kurtosis tests.} +\item{x}{A vector of class \code{character} of acceptability classification of the +plausibility test results.} } \value{ -A numeric vector with the corresponding score. +A vector of class \code{integer} of the same length as \code{x} for the score. } \description{ -\code{assign_penalty_points_flags_and_sd()} ranks the proportion of the flagged -values in the data and the magnitude of standard deviation based on the SMART -scoring criteria. +Attribute a penalty point based on the acceptability classification in which +the plausibility test result falls. } \details{ -The ranking is as in \href{https://smartmethodology.org/}{SMART Plausibility checks}. +The scoring criteria is as in \href{https://smartmethodology.org/}{SMART Plausibility checks}. } diff --git a/man/tell_muac_analysis_strategy.Rd b/man/tell_muac_analysis_strategy.Rd index 2a4e5f2..cad60d1 100644 --- a/man/tell_muac_analysis_strategy.Rd +++ b/man/tell_muac_analysis_strategy.Rd @@ -22,8 +22,8 @@ age bias; "unweighted" a normal complex sample analysis is applied; when "missing" \code{NA} gets thrown, so no prevalence computed. } \description{ -This is a helper function that gives instruction to the main prevalence -analysis function on the analysis approach to follow in a given area of +This is a helper function that gives instruction, to the main MUAC prevalence +analysis function, on the analysis approach to follow in a given area of analysis on the basis of the quality of the age ratio test and the standard deviation. } diff --git a/man/wfhz.01.Rd b/man/wfhz.01.Rd index 9cf5cc5..e23b80f 100644 --- a/man/wfhz.01.Rd +++ b/man/wfhz.01.Rd @@ -15,6 +15,9 @@ A tibble with 303 rows and 6 columns.\tabular{ll}{ \emph{flag_wfhz} \tab Flagged observations. 1=flagged, 0=not flagged \cr } } +\source{ +Anonymous +} \usage{ wfhz.01 } diff --git a/man/wrangler.Rd b/man/wrangler.Rd index 2f0ea6e..97df312 100644 --- a/man/wrangler.Rd +++ b/man/wrangler.Rd @@ -1,10 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/wranglers.R -\name{process_muac_data} -\alias{process_muac_data} +\name{process_wfhz_data} \alias{process_wfhz_data} -\title{Process and censor weight-for-height and MUAC data} +\alias{process_muac_data} +\title{Wrangle weight-for-height and MUAC data} \usage{ +process_wfhz_data(df, sex, weight, height, .recode_sex = TRUE) + process_muac_data( df, sex, @@ -14,54 +16,52 @@ process_muac_data( .recode_muac = TRUE, unit = c("cm", "mm", "none") ) - -process_wfhz_data(df, sex, weight, height, .recode_sex = TRUE) } \arguments{ -\item{df}{The input data frame with the required variables.} +\item{df}{A dataset of class \code{data.frame} to wrangle data from.} \item{sex}{A numeric or character vector of child's sex. Code values should -either be 1 or "m" for boy and 2 or "f" for girl. The variable name must be -sex, otherwise it will not work.} +be 1 or "m" for boy and 2 or "f" for girl. The variable name must be sex, +otherwise it will not work.} -\item{muac}{A numeric vector holding the absolute MUAC values.} +\item{weight}{A vector of class \code{double} of child's weight in kilograms.} -\item{age}{A numeric vector of child's age in months. It must be named age, -otherwise it will not work. For instance, if given as following: age = months -it will not work.} +\item{height}{A vector of class \code{double} of child's height in centimeters.} -\item{.recode_sex}{Logical. It asks whether sex should be recoded. In the end, -the variable sex have values coded as 1 for boy and 2 for girl. Setting -\code{.recode_sex = TRUE} works over "m" and "f" values. If your vector is coded -differently, make sure to put it in "m" and "f" or in 1 or 2 right away.} +\item{.recode_sex}{Logical. Default is \code{FALSE}. Setting to \code{TRUE} assumes that +the sex variable is a character vector of values "m" for boys and "f" for girls +and will recode them to 1 and 2 respectively.} -\item{.recode_muac}{Logical. Choose between \code{TRUE} if you wish to recode -the MUAC values into either centimeters or millimeters.} +\item{muac}{A vector of class \code{double} or \code{integer} of the absolute MUAC values.} -\item{unit}{A choice of the unit to which you wish to convert the MUAC -variable into. Choose "cm" for centimeters, "mm" for millimeters and "none" -to leave as it is.} +\item{age}{A double vector of child's age in months. It must be named age, +otherwise it will not work.} -\item{weight}{A numeric vector holding the weight values of the child in -kilograms.} +\item{.recode_muac}{Logical. Default is \code{FALSE}. Set to \code{TRUE} if MUAC values +should be converted to either centimeters or millimeters.} -\item{height}{A numeric vector holding the height values of the child in -centimeters.} +\item{unit}{A choice of the unit to which the MUAC values should be converted. +"cm" for centimeters, "mm" for millimeters and "none" to leave as it is.} } \value{ -A data frame of the same length as the input with additional -columns: one named \code{wfhz} or \code{mfaz} that holds the zscore values, and the other -holding dummy values: 1 (is a flag) and 0 (is not a flag). For the -\code{process_muac_data} function, when age is not supplied, only \code{flag_muac} is -added. This refers to flags based based on absolute MUAC values as recommended by +A data frame based on \code{df}. New variables named \code{wfhz} and +\code{flag_wfhz}, of child's weight-for-height z-scores and flags, or \code{mfaz} and +\code{flag_mfaz}, of child's MUAC-for-age z-scores and flags, will be created. For +MUAC, when age is not supplied only \code{flag_muac} variable is created. +This refers to flags based on the absolute MUAC values as recommended by \href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}. } \description{ -This is the job of \code{process_wfhz_data} and \code{process_muac_data()}. They are -responsible for computing the weight-for-height and the muac-for-age z-scores -respectively, and censor the data by flagging outliers based on the SMART flags. -For the latter, if age is not supplied, the function censors the absolute MUAC -values. +This function performs data wrangling by calculating weight-for-height +and MUAC-for-age z-scores, followed by the detection and flagging of outliers. +For MUAC data, if age is not supplies, z-scores do not get computed. In such +cases, outlier detection and flagging are based on the absolute MUAC values. +} +\details{ +The flagging criterion used for the WFHZ and MFAZ is as in +\href{https://smartmethodology.org/}{SMART plausibility check}. A fixed flagging +criterion is used for the absolute MUAC values. This is as recommended by +\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).} } \examples{ @@ -89,6 +89,7 @@ df <- data.frame( ) ### The application of the function ---- + df |> process_age( svdate = "survey_date", From 29721913f69f0d1d38852172cea18d5cc9d44b53 Mon Sep 17 00:00:00 2001 From: tomaszaba Date: Thu, 17 Oct 2024 17:11:04 +0200 Subject: [PATCH 6/9] doc: revise doc (plausibility) --- R/pretty_tables.R | 72 ++++++++++++------------- R/quality_auditors.R | 79 +++++++++++++-------------- man/auditor.Rd | 111 -------------------------------------- man/plausibility-check.Rd | 108 +++++++++++++++++++++++++++++++++++++ man/pretty_table.Rd | 72 ++++++++++++------------- 5 files changed, 216 insertions(+), 226 deletions(-) delete mode 100644 man/auditor.Rd create mode 100644 man/plausibility-check.Rd diff --git a/R/pretty_tables.R b/R/pretty_tables.R index cfdbc6e..4e64e64 100644 --- a/R/pretty_tables.R +++ b/R/pretty_tables.R @@ -1,45 +1,38 @@ -#' Get a formatted and presentable output table for the plausibility auditors +#' Get a formatted and presentable output table for the plausibility checkers #' #' @description -#' `generate_pretty_table_mfaz()`, `generate_pretty_table_wfhz()` and -#' `generate_pretty_table_muac()` are useful to getting the output returned from -#' the plausibility auditors into a presentable format. They convert scientific -#' notation, round values and rename columns to meaningful names. +#' Useful to getting the output returned from the plausibility checkers +#' into a presentable format. It converts scientific notation to standard +#' notations, round values and rename columns to meaningful names. #' -#' @param df The table returned by [check_plausibility_mfaz()], -#' [check_plausibility_wfhz()] or [check_plausibility_muac()]. +#' @param df A summary table object of class `data.frame` returned by the +#' plausibility checkers. #' -#' @returns An output table of the same size as the input, with values -#' formatted, columns renamed, and ready to be shared. +#' @returns A `data.frame` as `df`, columns columns renamed, values formatted and +#' ready to be shared. #' #' @examples #' -#' ## Audit the plausibility of MFAZ data ---- +#' ## Check the plausibility of WFHZ data ---- #' #' anthro.01 |> -#' process_age( -#' svdate = "dos", -#' birdate = "dob", -#' age = age -#' ) |> -#' process_muac_data( +#' process_wfhz_data( #' sex = sex, -#' age = "age", -#' muac = muac, -#' .recode_sex = TRUE, -#' .recode_muac = TRUE, -#' unit = "cm" +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE #' ) |> -#' check_plausibility_mfaz( -#' flags = flag_mfaz, +#' check_plausibility_wfhz( #' sex = sex, -#' muac = muac, #' age = age, +#' weight = weight, +#' height = height, +#' flags = flag_wfhz, #' area = area #' ) |> -#' generate_pretty_table_mfaz() +#' generate_pretty_table_wfhz() #' -#' ## Audit the plausibility of absolute MUAC values ---- +#' ## Check the plausibility of the absolute MUAC values ---- #' #' anthro.01 |> #' process_muac_data( @@ -57,25 +50,30 @@ #' ) |> #' generate_pretty_table_muac() #' -#' ## Audit the plausibility of WFHZ data ---- +#' ## Check the plausibility of MFAZ data ---- #' #' anthro.01 |> -#' process_wfhz_data( +#' process_age( +#' svdate = "dos", +#' birdate = "dob", +#' age = age +#' ) |> +#' process_muac_data( #' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE +#' age = "age", +#' muac = muac, +#' .recode_sex = TRUE, +#' .recode_muac = TRUE, +#' unit = "cm" #' ) |> -#' check_plausibility_wfhz( +#' check_plausibility_mfaz( +#' flags = flag_mfaz, #' sex = sex, +#' muac = muac, #' age = age, -#' weight = weight, -#' height = height, -#' flags = flag_wfhz, #' area = area #' ) |> -#' generate_pretty_table_wfhz() -#' +#' generate_pretty_table_mfaz() #' #' @rdname pretty_table #' diff --git a/R/quality_auditors.R b/R/quality_auditors.R index 1ecd6ce..0d901d9 100644 --- a/R/quality_auditors.R +++ b/R/quality_auditors.R @@ -1,37 +1,34 @@ #' -#' Audit the plausibility of WFHZ, MFAZ data and absolute MUAC values +#' Check the plausibility of the data #' #' @description -#' `check_plausibility_wfhz()`, `check_plausibility_mfaz()`, and -#' `check_plausibility_muac()` examines the plausibility of data through a -#' structured set of tests around sampling and measurement-related errors. +#' Verify the overall acceptability of the data through a set of +#' structured tests around sampling and measurement-related biases in the data. #' -#' @param df A data frame yielded from [process_muac_data()] for -#' `check_plausibility_mfaz()` and `check_plausibility_muac()`, and yielded from -#' [process_wfhz_data()] for `check_plausibility_wfhz()`. +#' @param df A dataset object of class `data.frame` to check. It should have been +#' wrangled using this package's wranglers. #' -#' @param sex A vector holding codes on child's sex: 1 for boy and 2 for girl. +#' @param sex A vector of class `numeric` of child's sex: 1 for boy and 2 for girl. #' -#' @param age A numeric vector holding age in months. +#' @param age A vector of class `double` of child's age in months. #' -#' @param muac A numeric vector holding MUAC measurements (in centimeters). +#' @param muac A vector of class `double` of child's MUAC in centimeters. #' -#' @param weight A numeric vector holding weight measurements (in kilograms). +#' @param weight A vector of class `double` of child's weight in kilograms. #' -#' @param height A numeric vector holding height measurements (in centimeters). +#' @param height A vector of class `double` of child's height in centimeters. #' -#' @param flags A character vector holding on values on flagged observations. +#' @param flags A vector of class `numeric` of flagged observations. #' -#' @param area A character vector holding values on where was the data collected -#' and for which you want the analysis to be performed. If analysing data of just -#' one area, you will still have to supply the corresponding column to `area` in -#' `check_plausibility_mfaz()` or `check_plausibility_wfhz()`. +#' @param area A vector of class `character` of the geographical location where +#' data was collected and to which the analysis should be aggregated at. #' -#' @returns A summary table of statistics with respective classification. +#' @returns A summarised `data.frame` of plausibility test results and their +#' respective acceptability ratings. #' #' @examples #' -#' ## Audit the plausibility of MFAZ data ---- +#' ## Check the plausibility of WFHZ data ---- #' #' anthro.01 |> #' process_age( @@ -39,23 +36,22 @@ #' birdate = "dob", #' age = age #' ) |> -#' process_muac_data( +#' process_wfhz_data( #' sex = sex, -#' age = "age", -#' muac = muac, -#' .recode_sex = TRUE, -#' .recode_muac = TRUE, -#' unit = "cm" +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE #' ) |> -#' check_plausibility_mfaz( -#' flags = flag_mfaz, +#' check_plausibility_wfhz( #' sex = sex, -#' muac = muac, #' age = age, +#' weight = weight, +#' height = height, +#' flags = flag_wfhz, #' area = area #' ) #' -#' ## Audit the plausibility of WFHZ ---- +#' ## Check the plausibility of MFAZ data ---- #' #' anthro.01 |> #' process_age( @@ -63,22 +59,23 @@ #' birdate = "dob", #' age = age #' ) |> -#' process_wfhz_data( +#' process_muac_data( #' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE +#' age = "age", +#' muac = muac, +#' .recode_sex = TRUE, +#' .recode_muac = TRUE, +#' unit = "cm" #' ) |> -#' check_plausibility_wfhz( +#' check_plausibility_mfaz( +#' flags = flag_mfaz, #' sex = sex, +#' muac = muac, #' age = age, -#' weight = weight, -#' height = height, -#' flags = flag_wfhz, #' area = area #' ) #' -#' ## Audit the plausibility of the absolute MUAC values ---- +#' ## Check the plausibility of the absolute MUAC values ---- #' #' anthro.01 |> #' process_muac_data( @@ -95,7 +92,7 @@ #' muac = muac #' ) #' -#' @rdname auditor +#' @rdname plausibility-check #' #' @export #' @@ -142,7 +139,7 @@ check_plausibility_mfaz <- function(df, sex, muac, age, flags, area) { #' #' -#' @rdname auditor +#' @rdname plausibility-check #' #' @export #' @@ -192,7 +189,7 @@ check_plausibility_wfhz <- function(df, sex, age, weight, height, flags, area) { #' -#' @rdname auditor +#' @rdname plausibility-check #' #' @export #' diff --git a/man/auditor.Rd b/man/auditor.Rd deleted file mode 100644 index 4868ca4..0000000 --- a/man/auditor.Rd +++ /dev/null @@ -1,111 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_auditors.R -\name{check_plausibility_mfaz} -\alias{check_plausibility_mfaz} -\alias{check_plausibility_wfhz} -\alias{check_plausibility_muac} -\title{Audit the plausibility of WFHZ, MFAZ data and absolute MUAC values} -\usage{ -check_plausibility_mfaz(df, sex, muac, age, flags, area) - -check_plausibility_wfhz(df, sex, age, weight, height, flags, area) - -check_plausibility_muac(df, flags, sex, muac) -} -\arguments{ -\item{df}{A data frame yielded from \code{\link[=process_muac_data]{process_muac_data()}} for -\code{check_plausibility_mfaz()} and \code{check_plausibility_muac()}, and yielded from -\code{\link[=process_wfhz_data]{process_wfhz_data()}} for \code{check_plausibility_wfhz()}.} - -\item{sex}{A vector holding codes on child's sex: 1 for boy and 2 for girl.} - -\item{muac}{A numeric vector holding MUAC measurements (in centimeters).} - -\item{age}{A numeric vector holding age in months.} - -\item{flags}{A character vector holding on values on flagged observations.} - -\item{area}{A character vector holding values on where was the data collected -and for which you want the analysis to be performed. If analysing data of just -one area, you will still have to supply the corresponding column to \code{area} in -\code{check_plausibility_mfaz()} or \code{check_plausibility_wfhz()}.} - -\item{weight}{A numeric vector holding weight measurements (in kilograms).} - -\item{height}{A numeric vector holding height measurements (in centimeters).} -} -\value{ -A summary table of statistics with respective classification. -} -\description{ -\code{check_plausibility_wfhz()}, \code{check_plausibility_mfaz()}, and -\code{check_plausibility_muac()} examines the plausibility of data through a -structured set of tests around sampling and measurement-related errors. -} -\examples{ - -## Audit the plausibility of MFAZ data ---- - -anthro.01 |> -process_age( -svdate = "dos", -birdate = "dob", -age = age -) |> -process_muac_data( -sex = sex, -age = "age", -muac = muac, -.recode_sex = TRUE, -.recode_muac = TRUE, -unit = "cm" -) |> -check_plausibility_mfaz( -flags = flag_mfaz, -sex = sex, -muac = muac, -age = age, -area = area -) - -## Audit the plausibility of WFHZ ---- - -anthro.01 |> -process_age( -svdate = "dos", -birdate = "dob", -age = age -) |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) |> -check_plausibility_wfhz( -sex = sex, -age = age, -weight = weight, -height = height, -flags = flag_wfhz, -area = area -) - -## Audit the plausibility of the absolute MUAC values ---- - -anthro.01 |> -process_muac_data( -sex = sex, -muac = muac, -age = NULL, -.recode_sex = TRUE, -.recode_muac = FALSE, -unit = "none" -) |> -check_plausibility_muac( -flags = flag_muac, -sex = sex, -muac = muac -) - -} diff --git a/man/plausibility-check.Rd b/man/plausibility-check.Rd new file mode 100644 index 0000000..1c93e9d --- /dev/null +++ b/man/plausibility-check.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quality_auditors.R +\name{check_plausibility_mfaz} +\alias{check_plausibility_mfaz} +\alias{check_plausibility_wfhz} +\alias{check_plausibility_muac} +\title{Check the plausibility of the data} +\usage{ +check_plausibility_mfaz(df, sex, muac, age, flags, area) + +check_plausibility_wfhz(df, sex, age, weight, height, flags, area) + +check_plausibility_muac(df, flags, sex, muac) +} +\arguments{ +\item{df}{A dataset object of class \code{data.frame} to check. It should have been +wrangled using this package's wranglers.} + +\item{sex}{A vector of class \code{numeric} of child's sex: 1 for boy and 2 for girl.} + +\item{muac}{A vector of class \code{double} of child's MUAC in centimeters.} + +\item{age}{A vector of class \code{double} of child's age in months.} + +\item{flags}{A vector of class \code{numeric} of flagged observations.} + +\item{area}{A vector of class \code{character} of the geographical location where +data was collected and to which the analysis should be aggregated at.} + +\item{weight}{A vector of class \code{double} of child's weight in kilograms.} + +\item{height}{A vector of class \code{double} of child's height in centimeters.} +} +\value{ +A summarised \code{data.frame} of plausibility test results and their +respective acceptability ratings. +} +\description{ +Verify the overall acceptability of the data through a set of +structured tests around sampling and measurement-related biases in the data. +} +\examples{ + +## Check the plausibility of WFHZ data ---- + +anthro.01 |> +process_age( +svdate = "dos", +birdate = "dob", +age = age +) |> +process_wfhz_data( +sex = sex, +weight = weight, +height = height, +.recode_sex = TRUE +) |> +check_plausibility_wfhz( +sex = sex, +age = age, +weight = weight, +height = height, +flags = flag_wfhz, +area = area +) + +## Check the plausibility of MFAZ data ---- + +anthro.01 |> +process_age( +svdate = "dos", +birdate = "dob", +age = age +) |> +process_muac_data( +sex = sex, +age = "age", +muac = muac, +.recode_sex = TRUE, +.recode_muac = TRUE, +unit = "cm" +) |> +check_plausibility_mfaz( +flags = flag_mfaz, +sex = sex, +muac = muac, +age = age, +area = area +) + +## Check the plausibility of the absolute MUAC values ---- + +anthro.01 |> +process_muac_data( +sex = sex, +muac = muac, +age = NULL, +.recode_sex = TRUE, +.recode_muac = FALSE, +unit = "none" +) |> +check_plausibility_muac( +flags = flag_muac, +sex = sex, +muac = muac +) + +} diff --git a/man/pretty_table.Rd b/man/pretty_table.Rd index 92f47f0..e4fab27 100644 --- a/man/pretty_table.Rd +++ b/man/pretty_table.Rd @@ -4,7 +4,7 @@ \alias{generate_pretty_table_mfaz} \alias{generate_pretty_table_wfhz} \alias{generate_pretty_table_muac} -\title{Get a formatted and presentable output table for the plausibility auditors} +\title{Get a formatted and presentable output table for the plausibility checkers} \usage{ generate_pretty_table_mfaz(df) @@ -13,47 +13,40 @@ generate_pretty_table_wfhz(df) generate_pretty_table_muac(df) } \arguments{ -\item{df}{The table returned by \code{\link[=check_plausibility_mfaz]{check_plausibility_mfaz()}}, -\code{\link[=check_plausibility_wfhz]{check_plausibility_wfhz()}} or \code{\link[=check_plausibility_muac]{check_plausibility_muac()}}.} +\item{df}{A summary table object of class \code{data.frame} returned by the +plausibility checkers.} } \value{ -An output table of the same size as the input, with values -formatted, columns renamed, and ready to be shared. +A \code{data.frame} as \code{df}, columns columns renamed, values formatted and +ready to be shared. } \description{ -\code{generate_pretty_table_mfaz()}, \code{generate_pretty_table_wfhz()} and -\code{generate_pretty_table_muac()} are useful to getting the output returned from -the plausibility auditors into a presentable format. They convert scientific -notation, round values and rename columns to meaningful names. +Useful to getting the output returned from the plausibility checkers +into a presentable format. It converts scientific notation to standard +notations, round values and rename columns to meaningful names. } \examples{ -## Audit the plausibility of MFAZ data ---- +## Check the plausibility of WFHZ data ---- anthro.01 |> -process_age( -svdate = "dos", -birdate = "dob", -age = age -) |> -process_muac_data( +process_wfhz_data( sex = sex, -age = "age", -muac = muac, -.recode_sex = TRUE, -.recode_muac = TRUE, -unit = "cm" +weight = weight, +height = height, +.recode_sex = TRUE ) |> -check_plausibility_mfaz( -flags = flag_mfaz, +check_plausibility_wfhz( sex = sex, -muac = muac, age = age, +weight = weight, +height = height, +flags = flag_wfhz, area = area ) |> -generate_pretty_table_mfaz() +generate_pretty_table_wfhz() -## Audit the plausibility of absolute MUAC values ---- +## Check the plausibility of the absolute MUAC values ---- anthro.01 |> process_muac_data( @@ -71,24 +64,29 @@ muac = muac ) |> generate_pretty_table_muac() -## Audit the plausibility of WFHZ data ---- +## Check the plausibility of MFAZ data ---- anthro.01 |> -process_wfhz_data( +process_age( +svdate = "dos", +birdate = "dob", +age = age +) |> +process_muac_data( sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE +age = "age", +muac = muac, +.recode_sex = TRUE, +.recode_muac = TRUE, +unit = "cm" ) |> -check_plausibility_wfhz( +check_plausibility_mfaz( +flags = flag_mfaz, sex = sex, +muac = muac, age = age, -weight = weight, -height = height, -flags = flag_wfhz, area = area ) |> -generate_pretty_table_wfhz() - +generate_pretty_table_mfaz() } From fe30649ef184ba3df596d015f590786ba6ee617f Mon Sep 17 00:00:00 2001 From: tomaszaba Date: Thu, 17 Oct 2024 17:30:58 +0200 Subject: [PATCH 7/9] doc: revise doc (case-definition) --- R/case_definitions.R | 50 ++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/R/case_definitions.R b/R/case_definitions.R index 4c63e79..9947254 100644 --- a/R/case_definitions.R +++ b/R/case_definitions.R @@ -1,28 +1,24 @@ #' -#' Wasting case-definition based on WFHZ, MFAZ, MUAC and Combined criteria +#' Define wasting based on WFHZ, MFAZ, MUAC and Combined criteria #' -#' @param df A data frame containing the required variables. +#' @param df A dataset object of class `data.frame` to use. #' -#' @param muac A numeric vector holding absolute MUAC values (in mm). +#' @param muac A vector of class `integer` of MUAC values in millimeters. #' -#' @param zscore A numeric vector holding WFHZ values (with 3 decimal places). +#' @param zscore A vector of class `double` of WFHZ values (with 3 decimal places). #' -#' @param edema A character vector indicating if an observation has bilateral -#' edema or not. The codes are "y" for presence and "n" for absence of bilateral -#' edema. Default is `NULL`. +#' @param edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. #' #' @param cases A choice of the form of wasting to be defined. #' -#' @param base A choice of the criterion which the case-definition should be based -#' on. +#' @param base A choice of the criterion on which the case-definition should be based. #' -#' @returns A numeric vector of the same length as the input vector, with dummy -#' values: 1 for yes wasted and 0 for not wasted. The meaning of the codes -#' changes depending on the form of wasting chosen. That is, if set `cases` to -#' `"sam"` the codes 1 would mean yes for severe wasting. +#' @returns A vector of class `numeric` of dummy values: 1 for case and 0 +#' for not case. #' #' @details -#' Use `define_wasting()` to add the case-definitions into data frame. +#' Use `define_wasting()` to add the case-definitions to data frame. #' #' @rdname case_definition #' @@ -113,25 +109,26 @@ define_wasting_cases_combined <- function(zscore, muac, edema = NULL, #' #' @examples -#' # MUAC-based case-definition ---- +#' +#' ## Weight-for-height based case-definition ---- #' x <- anthro.02 |> #' define_wasting( -#' muac = muac, +#' zscore = wfhz, #' edema = edema, -#' base = "muac" +#' base = "wfhz" #' ) #' head(x) #' -#' # Weight-for-height based case-definition ---- +#' ## MUAC-based case-definition ---- #' x <- anthro.02 |> #' define_wasting( -#' zscore = wfhz, +#' muac = muac, #' edema = edema, -#' base = "wfhz" +#' base = "muac" #' ) #' head(x) #' -#' # Combined case-definition ---- +#' ## Combined case-definition ---- #' x <- anthro.02 |> #' define_wasting( #' zscore = wfhz, @@ -225,14 +222,13 @@ define_wasting <- function(df, zscore = NULL, muac = NULL, edema = NULL, #' Classify wasting into severe or moderate wasting to be used in the #' SMART MUAC tool weighting approach #' -#' @param muac A numeric vector holding absolute MUAC values (in mm). +#' @param muac A vector of class `integer` of MUAC values in millimeters. #' -#' @param .edema Optional. A character vector indicating if an observation has -#' bilateral edema or not. The codes are "y" for presence and "n" for absence of -#' bilateral edema. +#' @param .edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. #' -#' @returns A character vector of the same length as the input indicating if a -#' child is severe or moderate wasted or not wasted. +#' @returns A vector of class `character` of the same length as `muac` and `.edema` +#' indicating if a child is severe or moderately wasted or not wasted. #' #' classify_wasting_for_cdc_approach <- function(muac, .edema = NULL) { From f5d1afd5118e8edfb0d34a4c92abfb3f34d0d4cb Mon Sep 17 00:00:00 2001 From: tomaszaba Date: Thu, 17 Oct 2024 22:02:53 +0200 Subject: [PATCH 8/9] doc: revise doc (prevalence) --- R/case_definitions.R | 4 ++ R/pretty_tables.R | 6 +- R/prevalence_combined.R | 44 +++++++------- R/prevalence_muac.R | 76 +++++++++++------------- R/prevalence_wfhz.R | 51 ++++++++-------- R/quality_auditors.R | 2 +- man/apply_cdc_age_weighting.Rd | 23 ++++--- man/case_definition.Rd | 42 +++++++------ man/classify_wasting_for_cdc_approach.Rd | 11 ++-- man/combined_prevalence.Rd | 45 +++++++------- man/compute_weighted_prevalence.Rd | 23 +++---- man/plausibility-check.Rd | 2 +- man/pretty_table.Rd | 6 +- man/prevalence.Rd | 26 ++++---- man/probit-method.Rd | 25 ++++---- man/tell_muac_analysis_strategy.Rd | 31 +++++----- 16 files changed, 200 insertions(+), 217 deletions(-) diff --git a/R/case_definitions.R b/R/case_definitions.R index 9947254..2696f6f 100644 --- a/R/case_definitions.R +++ b/R/case_definitions.R @@ -1,6 +1,10 @@ #' #' Define wasting based on WFHZ, MFAZ, MUAC and Combined criteria #' +#' @description +#' Define if a given observation in the dataset is wasted or not, on the basis of +#' WFHZ, MFAZ, MUAC and the combined criteria. +#' #' @param df A dataset object of class `data.frame` to use. #' #' @param muac A vector of class `integer` of MUAC values in millimeters. diff --git a/R/pretty_tables.R b/R/pretty_tables.R index 4e64e64..2b0c7f7 100644 --- a/R/pretty_tables.R +++ b/R/pretty_tables.R @@ -2,13 +2,13 @@ #' #' @description #' Useful to getting the output returned from the plausibility checkers -#' into a presentable format. It converts scientific notation to standard +#' into a presentable format. It converts scientific notations to standard #' notations, round values and rename columns to meaningful names. #' #' @param df A summary table object of class `data.frame` returned by the #' plausibility checkers. #' -#' @returns A `data.frame` as `df`, columns columns renamed, values formatted and +#' @returns A `data.frame` as `df`. Columns are renamed, values formatted and #' ready to be shared. #' #' @examples @@ -32,7 +32,7 @@ #' ) |> #' generate_pretty_table_wfhz() #' -#' ## Check the plausibility of the absolute MUAC values ---- +#' ## Check the plausibility of MUAC data ---- #' #' anthro.01 |> #' process_muac_data( diff --git a/R/prevalence_combined.R b/R/prevalence_combined.R index a2b4009..9cc8869 100644 --- a/R/prevalence_combined.R +++ b/R/prevalence_combined.R @@ -63,40 +63,40 @@ compute_pps_based_combined_prevalence <- function(df, #' #' -#' Compute prevalence of wasting on the basis of the combined case-definition +#' Compute the prevalence of combined wasting #' #' @description -#' `compute_combined_prevalence()` is a handy function for calculating the prevalence -#' of combined wasting in accordance with the complex sample design properties -#' inherent to surveys. +#' The prevalence is calculated in accordance with the complex sample design +#' properties inherent to surveys. This includes weighting the survey data where +#' applicable. When either the acceptability of the standard deviation of WFHZ or +#' of the age ratio test is problematic, prevalence is not calculated. #' -#' @param df A data frame object returned by [process_muac_data()] and [process_wfhz_data()]. -#' Both wranglers need to be used sequentially. The order of use does not matter, -#' however, since muac wrangler transforms MUAC values into centimeters, those -#' need to be put back into millimeter. This can be achieved my using [recode_muac()] inside -#' [dplyr::mutate()] or [base::transform()]. +#' @param df An already wrangled dataset of class `data.frame` to use. Both +#' wranglers (of WFHZ and MUAC) need to be used sequentially, regardless of the +#' order. Note that MUAC values should be converted to millimeters after using +#' the MUAC wrangler. #' -#' @param .wt A numeric vector holding final survey weights. When set to `NULL`, -#' the function assumes self weighted survey, as in the ENA for SMART software; -#' Otherwise when supplied, weighted analysis is computed. +#' @param .wt A vector of class `double` of the final survey weights. Default is +#' `NULL` assuming a self weighted survey, as in the ENA for SMART software; +#' otherwise, when a vector of weights if supplied, weighted analysis is computed. #' -#' @param .edema A character vector indicating if an observation has bilateral -#' edema or not. The codes are "y" for presence and "n" for absence of bilateral -#' edema. Default is `NULL`. +#' @param .edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. #' -#' @param .summary_by A character vector containing data on the geographical areas -#' where the data was collected and for which the analysis should be performed at. +#' @param .summary_by A vector of class `character` of the geographical areas +#' where the data was collected and for which the analysis should be performed. #' -#' @returns A table with the descriptive statistics about wasting. +#' @returns A summarised table of class `data.frame` of the descriptive +#' statistics about combined wasting. #' #' @details #' A concept of "combined flags" is introduced in this function. It consists of -#' taking the `flag_wfhz` and `flag_mfaz` vectors, generated from the MUAC and -#' WFHZ wranglers, and checking if any value in either vector is flagged. If flagged, -#' the value is marked as a flag in the "cflags" vector; otherwise, it is not flagged -#' (see table below). This ensures that all flagged observations from both WFHZ +#' defining as flag any observation that is flagged in either `flag_wfhz` or +#' `flag_mfaz` vectors. A new column `cflag` for combined flags is created and +#' added to `df`. This ensures that all flagged observations from both WFHZ #' and MFAZ data are excluded from the combined prevalence analysis. #' +#' *The table below shows an overview of how `cflags` are defined* #' | **flag_wfhz** | **flag_mfaz** | **cflags** | #' | :---: | :---: | :---: | #' | 1 | 0 | 1 | diff --git a/R/prevalence_muac.R b/R/prevalence_muac.R index 0a4591d..38de555 100644 --- a/R/prevalence_muac.R +++ b/R/prevalence_muac.R @@ -1,24 +1,26 @@ #' -#' A helper function to identify the MUAC prevalence analysis approach on the -#' basis of age ratio and standard deviation test results +#' A helper function to determine the MUAC prevalence analysis approach to follow #' #' @description -#' This is a helper function that gives instruction, to the main MUAC prevalence -#' analysis function, on the analysis approach to follow in a given area of -#' analysis on the basis of the quality of the age ratio test and the standard -#' deviation. +#' It determines the analysis approach to follow for a given analysis area on +#' the basis of the rate of acceptability of the age ratio test and the standard +#' deviation analysis result. #' -#' @param age_ratio_class A character vector returned from the plausibility -#' auditors holding the rating of the age ratio test results. +#' @param age_ratio_class A vector of class `character` of the acceptability +#' classification of the age ratio test result. #' -#' @param sd_class A character vector returned from the plausibility auditors -#' holding the rating of the standard deviation test results. +#' @param sd_class A vector of class `character` of the acceptability +#' classification of the standard deviation analysis result. +#' +#' @returns A vector of class `character` of the same length as the input vectors, +#' containing values indicating the analysis approach for each analysis area: "weighted", +#' "unweighted" and "missing". +#' +#' @details +#' When "weighted", the CDC weighting approach is applied to correct for +#' age bias; when "unweighted" a normal complex sample analysis is applied; when +#' "missing" `NA` gets thrown. #' -#' @returns A character vector of the same length as the input holding analysis -#' approach to be followed in a given area of analysis: "weighted", "unweighted" and -#' "missing". When "weighted", the CDC weighting approach is applied to correct for -#' age bias; "unweighted" a normal complex sample analysis is applied; when -#' "missing" `NA` gets thrown, so no prevalence computed. #' #' tell_muac_analysis_strategy <- function(age_ratio_class, sd_class) { @@ -33,29 +35,26 @@ tell_muac_analysis_strategy <- function(age_ratio_class, sd_class) { #' #' -#' Apply weighting to the MUAC prevalence when sample distribution is unbalanced -#' between children aged 6 to 23 months and those aged 24 to 59 months old +#' Apply the CDC/SMART prevalence weighting approach on MUAC data #' #' @description -#' `apply_cdc_age_weighting()` calculates a weighted proportion by adding the -#' proportion of children under 2 years to twice the proportion of children over 2 -#' and then dividing by 3. +#' Calculate a weighted prevalence estimate of MUAC by adding the proportion of +#' children under 2 years to twice the proportion of children over 2 and then +#' dividing by 3. #' -#' @param muac A numeric vector holding MUAC values (in mm). +#' @param muac A vector of class `integer` of MUAC values (in mm). #' -#' @param age A numeric vector holding child's age in months. +#' @param age A vector of class `double` of child's age in months. #' -#' @param .edema Optional. If given, it should be a character vector of "y" -#' for presence and "n" for absence of bilateral edema. +#' @param .edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. #' -#' @param status A choice between "sam" and "mam" for the form of wasting. +#' @param status A choice of the form of wasting to be defined. #' -#' @returns A numeric vector of length and size 1. +#' @returns A vector of class `numeric` of length and size 1. #' #' @details #' This function is informed by the output of [age_ratio_test()]. -#' Note that this method differs from the approach used in the SMART plausibility -#' check. Please refer to the documentation for further details. #' #' apply_cdc_age_weighting <- function(muac, age, @@ -90,23 +89,18 @@ apply_cdc_age_weighting <- function(muac, age, #' +#' Apply the CDC/SMART prevalence weighting approach on MUAC data #' -#' Apply weighting to the MUAC prevalence when sample distribution is unbalanced -#' between children aged 6 to 23 months and those aged 24 to 59 months old -#' -#' @param df A data frame object with the required variables already wrangled. +#' @param df An already wrangled dataset object of class `data.frame` to use. #' -#' @param .edema A character vector indicating if an observation has bilateral -#' edema or not. The codes are "y" for presence and "n" for absence of bilateral -#' edema. Default is `NULL`. +#' @param .edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. #' -#' @param .summary_by A character vector containing data of the geographical areas -#' where the data was collected and for which the analysis should be performed at. +#' @param .summary_by A vector of class `character` of the geographical areas +#' where the data was collected and for which the analysis should be performed. #' -#' @returns A tibble with dimensions that vary based on the use of `.summary_by`. -#' If set to `NULL`, a 1 x 3 tibble is returned. Otherwise, the number of rows -#' will match the number of groups or areas provided in `.summary_by`, -#' while the number of columns will remain the same. +#' @returns A table of class `data.frame` of dimensions that vary based on +#' `.summary_by`, containing the results. #' #' compute_weighted_prevalence <- function(df, .edema=NULL, .summary_by = NULL) { diff --git a/R/prevalence_wfhz.R b/R/prevalence_wfhz.R index e72de0c..b976c87 100644 --- a/R/prevalence_wfhz.R +++ b/R/prevalence_wfhz.R @@ -1,26 +1,26 @@ #' -#' Compute the prevalence of wasting on the basis of WFHZ, MFAZ and MUAC +#' Compute the prevalence estimates of wasting on the basis of WFHZ, MFAZ or MUAC #' #' @description #' The prevalence is calculated in accordance with the complex sample design #' properties inherent to surveys. This includes weighting the survey data where -#' applicable and applying PROBIT method estimation of prevalence (for WFHZ and -#' MFAZ) when standard deviation is problematic. This is as in the SMART Methodology. +#' applicable and applying PROBIT method estimation (for WFHZ) when the standard +#' deviation is problematic. This is as in the SMART Methodology. #' -#' @param df A data frame object with the required variables already wrangled. +#' @param df An already wrangled dataset object of class `data.frame` to use. #' -#' @param .wt A numeric vector holding final survey weights. When set to `NULL`, -#' the function assumes self weighted survey, as in the ENA for SMART software; -#' Otherwise when supplied, weighted analysis is computed. +#' @param .wt A vector of class `double` of the final survey weights. Default is +#' `NULL` assuming a self weighted survey, as in the ENA for SMART software; +#' otherwise, when a vector of weights if supplied, weighted analysis is computed. #' -#' @param .edema A character vector indicating if an observation has bilateral -#' edema or not. The codes are "y" for presence and "n" for absence of bilateral -#' edema. Default is `NULL`. +#' @param .edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. #' -#' @param .summary_by A character vector containing data on the geographical areas -#' where the data was collected and for which the analysis should be performed at. +#' @param .summary_by A vector of class `character` of the geographical areas +#' where the data was collected and for which the analysis should be performed. #' -#' @returns A table with the descriptive statistics about wasting. +#' @returns A summarised table of class `data.frame` of the descriptive +#' statistics about wasting. #' #' @examples #' ## An example of application of `compute_wfhz_prevalence()` ---- @@ -187,32 +187,29 @@ compute_pps_based_wfhz_prevalence <- function(df, #' #' -#' Compute the prevalence of wasting on the basis of the PROBIT method. +#' Compute the prevalence estimates of wasting on the basis of the PROBIT method. #' #' @description #' This approach is applied when the standard deviation of WFHZ is problematic. #' The PROBIT method estimates the prevalence of wasting indirectly by calculating -#' the area under the curve the tail of the curve, from negative infinitive to +#' the area under the tail of the curve, from negative infinitive to #' the given threshold, using the cumulative normal distribution function with #' the mean and standard deviation as inputs. #' -#' @param df A data frame object with the required variables already wrangled. +#' @param df An already wrangled dataset object of class `data.frame` to use. #' -#' @param x A numeric vector holding WFHZ or MFAZ values. +#' @param x A vector of class `double` of WFHZ or MFAZ values. #' -#' @param .status A choice on the form of wasting for which the prevalence should -#' be calculated for. +#' @param .status A choice of the form of wasting for which the prevalence should +#' be estimated. #' -#' @param .summary_by A character vector containing data on the geographical areas where -#' the data was collected. This is to group the survey design object into different -#' geographical areas in the data and allow for summaries to be computed for each of them. -#' Default is NULL. +#' @param .summary_by A vector of class `character` of the geographical areas +#' where the data was collected and for which the analysis should be performed. #' -#' @param .for A choice between "wfhz" and "mfaz" for the anthropometric index you wish -#' to compute PROBIT prevalence on. +#' @param .for A choice between "wfhz" and "mfaz" for the anthropometric index. #' -#' @returns A data frame with the prevalence. No confidence intervals are -#' yielded. +#' @returns A summarised table of class `data.frame` of the prevalence estimates. +#' No confidence intervals are yielded. #' #' @rdname probit-method #' diff --git a/R/quality_auditors.R b/R/quality_auditors.R index 0d901d9..2174280 100644 --- a/R/quality_auditors.R +++ b/R/quality_auditors.R @@ -21,7 +21,7 @@ #' @param flags A vector of class `numeric` of flagged observations. #' #' @param area A vector of class `character` of the geographical location where -#' data was collected and to which the analysis should be aggregated at. +#' data was collected and for which the analysis should be aggregated. #' #' @returns A summarised `data.frame` of plausibility test results and their #' respective acceptability ratings. diff --git a/man/apply_cdc_age_weighting.Rd b/man/apply_cdc_age_weighting.Rd index 1692652..ef4bea0 100644 --- a/man/apply_cdc_age_weighting.Rd +++ b/man/apply_cdc_age_weighting.Rd @@ -2,31 +2,28 @@ % Please edit documentation in R/prevalence_muac.R \name{apply_cdc_age_weighting} \alias{apply_cdc_age_weighting} -\title{Apply weighting to the MUAC prevalence when sample distribution is unbalanced -between children aged 6 to 23 months and those aged 24 to 59 months old} +\title{Apply the CDC/SMART prevalence weighting approach on MUAC data} \usage{ apply_cdc_age_weighting(muac, age, .edema = NULL, status = c("sam", "mam")) } \arguments{ -\item{muac}{A numeric vector holding MUAC values (in mm).} +\item{muac}{A vector of class \code{integer} of MUAC values (in mm).} -\item{age}{A numeric vector holding child's age in months.} +\item{age}{A vector of class \code{double} of child's age in months.} -\item{.edema}{Optional. If given, it should be a character vector of "y" -for presence and "n" for absence of bilateral edema.} +\item{.edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} -\item{status}{A choice between "sam" and "mam" for the form of wasting.} +\item{status}{A choice of the form of wasting to be defined.} } \value{ -A numeric vector of length and size 1. +A vector of class \code{numeric} of length and size 1. } \description{ -\code{apply_cdc_age_weighting()} calculates a weighted proportion by adding the -proportion of children under 2 years to twice the proportion of children over 2 -and then dividing by 3. +Calculate a weighted prevalence estimate of MUAC by adding the proportion of +children under 2 years to twice the proportion of children over 2 and then +dividing by 3. } \details{ This function is informed by the output of \code{\link[=age_ratio_test]{age_ratio_test()}}. -Note that this method differs from the approach used in the SMART plausibility -check. Please refer to the documentation for further details. } diff --git a/man/case_definition.Rd b/man/case_definition.Rd index 4cd4dbf..4de9245 100644 --- a/man/case_definition.Rd +++ b/man/case_definition.Rd @@ -5,7 +5,7 @@ \alias{define_wasting_cases_whz} \alias{define_wasting_cases_combined} \alias{define_wasting} -\title{Wasting case-definition based on WFHZ, MFAZ, MUAC and Combined criteria} +\title{Define wasting based on WFHZ, MFAZ, MUAC and Combined criteria} \usage{ define_wasting_cases_muac(muac, edema = NULL, cases = c("gam", "sam", "mam")) @@ -27,53 +27,51 @@ define_wasting( ) } \arguments{ -\item{muac}{A numeric vector holding absolute MUAC values (in mm).} +\item{muac}{A vector of class \code{integer} of MUAC values in millimeters.} -\item{edema}{A character vector indicating if an observation has bilateral -edema or not. The codes are "y" for presence and "n" for absence of bilateral -edema. Default is \code{NULL}.} +\item{edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} \item{cases}{A choice of the form of wasting to be defined.} -\item{zscore}{A numeric vector holding WFHZ values (with 3 decimal places).} +\item{zscore}{A vector of class \code{double} of WFHZ values (with 3 decimal places).} -\item{df}{A data frame containing the required variables.} +\item{df}{A dataset object of class \code{data.frame} to use.} -\item{base}{A choice of the criterion which the case-definition should be based -on.} +\item{base}{A choice of the criterion on which the case-definition should be based.} } \value{ -A numeric vector of the same length as the input vector, with dummy -values: 1 for yes wasted and 0 for not wasted. The meaning of the codes -changes depending on the form of wasting chosen. That is, if set \code{cases} to -\code{"sam"} the codes 1 would mean yes for severe wasting. +A vector of class \code{numeric} of dummy values: 1 for case and 0 +for not case. } \description{ -Wasting case-definition based on WFHZ, MFAZ, MUAC and Combined criteria +Define if a given observation in the dataset is wasted or not, on the basis of +WFHZ, MFAZ, MUAC and the combined criteria. } \details{ -Use \code{define_wasting()} to add the case-definitions into data frame. +Use \code{define_wasting()} to add the case-definitions to data frame. } \examples{ -# MUAC-based case-definition ---- + +## Weight-for-height based case-definition ---- x <- anthro.02 |> define_wasting( -muac = muac, +zscore = wfhz, edema = edema, -base = "muac" +base = "wfhz" ) head(x) -# Weight-for-height based case-definition ---- +## MUAC-based case-definition ---- x <- anthro.02 |> define_wasting( -zscore = wfhz, +muac = muac, edema = edema, -base = "wfhz" +base = "muac" ) head(x) -# Combined case-definition ---- +## Combined case-definition ---- x <- anthro.02 |> define_wasting( zscore = wfhz, diff --git a/man/classify_wasting_for_cdc_approach.Rd b/man/classify_wasting_for_cdc_approach.Rd index a709c95..26f11d4 100644 --- a/man/classify_wasting_for_cdc_approach.Rd +++ b/man/classify_wasting_for_cdc_approach.Rd @@ -8,15 +8,14 @@ SMART MUAC tool weighting approach} classify_wasting_for_cdc_approach(muac, .edema = NULL) } \arguments{ -\item{muac}{A numeric vector holding absolute MUAC values (in mm).} +\item{muac}{A vector of class \code{integer} of MUAC values in millimeters.} -\item{.edema}{Optional. A character vector indicating if an observation has -bilateral edema or not. The codes are "y" for presence and "n" for absence of -bilateral edema.} +\item{.edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} } \value{ -A character vector of the same length as the input indicating if a -child is severe or moderate wasted or not wasted. +A vector of class \code{character} of the same length as \code{muac} and \code{.edema} +indicating if a child is severe or moderately wasted or not wasted. } \description{ Classify wasting into severe or moderate wasting to be used in the diff --git a/man/combined_prevalence.Rd b/man/combined_prevalence.Rd index 17a5cbb..497cf48 100644 --- a/man/combined_prevalence.Rd +++ b/man/combined_prevalence.Rd @@ -15,38 +15,39 @@ compute_pps_based_combined_prevalence( compute_combined_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) } \arguments{ -\item{df}{A data frame object returned by \code{\link[=process_muac_data]{process_muac_data()}} and \code{\link[=process_wfhz_data]{process_wfhz_data()}}. -Both wranglers need to be used sequentially. The order of use does not matter, -however, since muac wrangler transforms MUAC values into centimeters, those -need to be put back into millimeter. This can be achieved my using \code{\link[=recode_muac]{recode_muac()}} inside -\code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[base:transform]{base::transform()}}.} +\item{df}{An already wrangled dataset of class \code{data.frame} to use. Both +wranglers (of WFHZ and MUAC) need to be used sequentially, regardless of the +order. Note that MUAC values should be converted to millimeters after using +the MUAC wrangler.} -\item{.wt}{A numeric vector holding final survey weights. When set to \code{NULL}, -the function assumes self weighted survey, as in the ENA for SMART software; -Otherwise when supplied, weighted analysis is computed.} +\item{.wt}{A vector of class \code{double} of the final survey weights. Default is +\code{NULL} assuming a self weighted survey, as in the ENA for SMART software; +otherwise, when a vector of weights if supplied, weighted analysis is computed.} -\item{.edema}{A character vector indicating if an observation has bilateral -edema or not. The codes are "y" for presence and "n" for absence of bilateral -edema. Default is \code{NULL}.} +\item{.edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} -\item{.summary_by}{A character vector containing data on the geographical areas -where the data was collected and for which the analysis should be performed at.} +\item{.summary_by}{A vector of class \code{character} of the geographical areas +where the data was collected and for which the analysis should be performed.} } \value{ -A table with the descriptive statistics about wasting. +A summarised table of class \code{data.frame} of the descriptive +statistics about combined wasting. } \description{ -\code{compute_combined_prevalence()} is a handy function for calculating the prevalence -of combined wasting in accordance with the complex sample design properties -inherent to surveys. +The prevalence is calculated in accordance with the complex sample design +properties inherent to surveys. This includes weighting the survey data where +applicable. When either the acceptability of the standard deviation of WFHZ or +of the age ratio test is problematic, prevalence is not calculated. } \details{ A concept of "combined flags" is introduced in this function. It consists of -taking the \code{flag_wfhz} and \code{flag_mfaz} vectors, generated from the MUAC and -WFHZ wranglers, and checking if any value in either vector is flagged. If flagged, -the value is marked as a flag in the "cflags" vector; otherwise, it is not flagged -(see table below). This ensures that all flagged observations from both WFHZ -and MFAZ data are excluded from the combined prevalence analysis.\tabular{ccc}{ +defining as flag any observation that is flagged in either \code{flag_wfhz} or +\code{flag_mfaz} vectors. A new column \code{cflag} for combined flags is created and +added to \code{df}. This ensures that all flagged observations from both WFHZ +and MFAZ data are excluded from the combined prevalence analysis. + +\emph{The table below shows an overview of how \code{cflags} are defined}\tabular{ccc}{ \strong{flag_wfhz} \tab \strong{flag_mfaz} \tab \strong{cflags} \cr 1 \tab 0 \tab 1 \cr 0 \tab 1 \tab 1 \cr diff --git a/man/compute_weighted_prevalence.Rd b/man/compute_weighted_prevalence.Rd index 65109df..e55d415 100644 --- a/man/compute_weighted_prevalence.Rd +++ b/man/compute_weighted_prevalence.Rd @@ -2,28 +2,23 @@ % Please edit documentation in R/prevalence_muac.R \name{compute_weighted_prevalence} \alias{compute_weighted_prevalence} -\title{Apply weighting to the MUAC prevalence when sample distribution is unbalanced -between children aged 6 to 23 months and those aged 24 to 59 months old} +\title{Apply the CDC/SMART prevalence weighting approach on MUAC data} \usage{ compute_weighted_prevalence(df, .edema = NULL, .summary_by = NULL) } \arguments{ -\item{df}{A data frame object with the required variables already wrangled.} +\item{df}{An already wrangled dataset object of class \code{data.frame} to use.} -\item{.edema}{A character vector indicating if an observation has bilateral -edema or not. The codes are "y" for presence and "n" for absence of bilateral -edema. Default is \code{NULL}.} +\item{.edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} -\item{.summary_by}{A character vector containing data of the geographical areas -where the data was collected and for which the analysis should be performed at.} +\item{.summary_by}{A vector of class \code{character} of the geographical areas +where the data was collected and for which the analysis should be performed.} } \value{ -A tibble with dimensions that vary based on the use of \code{.summary_by}. -If set to \code{NULL}, a 1 x 3 tibble is returned. Otherwise, the number of rows -will match the number of groups or areas provided in \code{.summary_by}, -while the number of columns will remain the same. +A table of class \code{data.frame} of dimensions that vary based on +\code{.summary_by}, containing the results. } \description{ -Apply weighting to the MUAC prevalence when sample distribution is unbalanced -between children aged 6 to 23 months and those aged 24 to 59 months old +Apply the CDC/SMART prevalence weighting approach on MUAC data } diff --git a/man/plausibility-check.Rd b/man/plausibility-check.Rd index 1c93e9d..8c78981 100644 --- a/man/plausibility-check.Rd +++ b/man/plausibility-check.Rd @@ -25,7 +25,7 @@ wrangled using this package's wranglers.} \item{flags}{A vector of class \code{numeric} of flagged observations.} \item{area}{A vector of class \code{character} of the geographical location where -data was collected and to which the analysis should be aggregated at.} +data was collected and for which the analysis should be aggregated.} \item{weight}{A vector of class \code{double} of child's weight in kilograms.} diff --git a/man/pretty_table.Rd b/man/pretty_table.Rd index e4fab27..4b334e9 100644 --- a/man/pretty_table.Rd +++ b/man/pretty_table.Rd @@ -17,12 +17,12 @@ generate_pretty_table_muac(df) plausibility checkers.} } \value{ -A \code{data.frame} as \code{df}, columns columns renamed, values formatted and +A \code{data.frame} as \code{df}. Columns are renamed, values formatted and ready to be shared. } \description{ Useful to getting the output returned from the plausibility checkers -into a presentable format. It converts scientific notation to standard +into a presentable format. It converts scientific notations to standard notations, round values and rename columns to meaningful names. } \examples{ @@ -46,7 +46,7 @@ area = area ) |> generate_pretty_table_wfhz() -## Check the plausibility of the absolute MUAC values ---- +## Check the plausibility of MUAC data ---- anthro.01 |> process_muac_data( diff --git a/man/prevalence.Rd b/man/prevalence.Rd index e0b8a6b..d28b34c 100644 --- a/man/prevalence.Rd +++ b/man/prevalence.Rd @@ -5,7 +5,7 @@ \alias{compute_mfaz_prevalence} \alias{compute_muac_prevalence} \alias{compute_wfhz_prevalence} -\title{Compute the prevalence of wasting on the basis of WFHZ, MFAZ and MUAC} +\title{Compute the prevalence estimates of wasting on the basis of WFHZ, MFAZ or MUAC} \usage{ compute_mfaz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) @@ -14,27 +14,27 @@ compute_muac_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) compute_wfhz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) } \arguments{ -\item{df}{A data frame object with the required variables already wrangled.} +\item{df}{An already wrangled dataset object of class \code{data.frame} to use.} -\item{.wt}{A numeric vector holding final survey weights. When set to \code{NULL}, -the function assumes self weighted survey, as in the ENA for SMART software; -Otherwise when supplied, weighted analysis is computed.} +\item{.wt}{A vector of class \code{double} of the final survey weights. Default is +\code{NULL} assuming a self weighted survey, as in the ENA for SMART software; +otherwise, when a vector of weights if supplied, weighted analysis is computed.} -\item{.edema}{A character vector indicating if an observation has bilateral -edema or not. The codes are "y" for presence and "n" for absence of bilateral -edema. Default is \code{NULL}.} +\item{.edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} -\item{.summary_by}{A character vector containing data on the geographical areas -where the data was collected and for which the analysis should be performed at.} +\item{.summary_by}{A vector of class \code{character} of the geographical areas +where the data was collected and for which the analysis should be performed.} } \value{ -A table with the descriptive statistics about wasting. +A summarised table of class \code{data.frame} of the descriptive +statistics about wasting. } \description{ The prevalence is calculated in accordance with the complex sample design properties inherent to surveys. This includes weighting the survey data where -applicable and applying PROBIT method estimation of prevalence (for WFHZ and -MFAZ) when standard deviation is problematic. This is as in the SMART Methodology. +applicable and applying PROBIT method estimation (for WFHZ) when the standard +deviation is problematic. This is as in the SMART Methodology. } \examples{ diff --git a/man/probit-method.Rd b/man/probit-method.Rd index 1e13c26..0b1b698 100644 --- a/man/probit-method.Rd +++ b/man/probit-method.Rd @@ -3,36 +3,33 @@ \name{apply_probit_approach} \alias{apply_probit_approach} \alias{compute_probit_prevalence} -\title{Compute the prevalence of wasting on the basis of the PROBIT method.} +\title{Compute the prevalence estimates of wasting on the basis of the PROBIT method.} \usage{ apply_probit_approach(x, .status = c("gam", "sam")) compute_probit_prevalence(df, .summary_by = NULL, .for = c("wfhz", "mfaz")) } \arguments{ -\item{x}{A numeric vector holding WFHZ or MFAZ values.} +\item{x}{A vector of class \code{double} of WFHZ or MFAZ values.} -\item{.status}{A choice on the form of wasting for which the prevalence should -be calculated for.} +\item{.status}{A choice of the form of wasting for which the prevalence should +be estimated.} -\item{df}{A data frame object with the required variables already wrangled.} +\item{df}{An already wrangled dataset object of class \code{data.frame} to use.} -\item{.summary_by}{A character vector containing data on the geographical areas where -the data was collected. This is to group the survey design object into different -geographical areas in the data and allow for summaries to be computed for each of them. -Default is NULL.} +\item{.summary_by}{A vector of class \code{character} of the geographical areas +where the data was collected and for which the analysis should be performed.} -\item{.for}{A choice between "wfhz" and "mfaz" for the anthropometric index you wish -to compute PROBIT prevalence on.} +\item{.for}{A choice between "wfhz" and "mfaz" for the anthropometric index.} } \value{ -A data frame with the prevalence. No confidence intervals are -yielded. +A summarised table of class \code{data.frame} of the prevalence estimates. +No confidence intervals are yielded. } \description{ This approach is applied when the standard deviation of WFHZ is problematic. The PROBIT method estimates the prevalence of wasting indirectly by calculating -the area under the curve the tail of the curve, from negative infinitive to +the area under the tail of the curve, from negative infinitive to the given threshold, using the cumulative normal distribution function with the mean and standard deviation as inputs. } diff --git a/man/tell_muac_analysis_strategy.Rd b/man/tell_muac_analysis_strategy.Rd index cad60d1..0e230fd 100644 --- a/man/tell_muac_analysis_strategy.Rd +++ b/man/tell_muac_analysis_strategy.Rd @@ -2,28 +2,29 @@ % Please edit documentation in R/prevalence_muac.R \name{tell_muac_analysis_strategy} \alias{tell_muac_analysis_strategy} -\title{A helper function to identify the MUAC prevalence analysis approach on the -basis of age ratio and standard deviation test results} +\title{A helper function to determine the MUAC prevalence analysis approach to follow} \usage{ tell_muac_analysis_strategy(age_ratio_class, sd_class) } \arguments{ -\item{age_ratio_class}{A character vector returned from the plausibility -auditors holding the rating of the age ratio test results.} +\item{age_ratio_class}{A vector of class \code{character} of the acceptability +classification of the age ratio test result.} -\item{sd_class}{A character vector returned from the plausibility auditors -holding the rating of the standard deviation test results.} +\item{sd_class}{A vector of class \code{character} of the acceptability +classification of the standard deviation analysis result.} } \value{ -A character vector of the same length as the input holding analysis -approach to be followed in a given area of analysis: "weighted", "unweighted" and -"missing". When "weighted", the CDC weighting approach is applied to correct for -age bias; "unweighted" a normal complex sample analysis is applied; when -"missing" \code{NA} gets thrown, so no prevalence computed. +A vector of class \code{character} of the same length as the input vectors, +containing values indicating the analysis approach for each analysis area: "weighted", +"unweighted" and "missing". } \description{ -This is a helper function that gives instruction, to the main MUAC prevalence -analysis function, on the analysis approach to follow in a given area of -analysis on the basis of the quality of the age ratio test and the standard -deviation. +It determines the analysis approach to follow for a given analysis area on +the basis of the rate of acceptability of the age ratio test and the standard +deviation analysis result. +} +\details{ +When "weighted", the CDC weighting approach is applied to correct for +age bias; when "unweighted" a normal complex sample analysis is applied; when +"missing" \code{NA} gets thrown. } From 49077d1f5ce581bf1019c4b8367a09a6faf54c9e Mon Sep 17 00:00:00 2001 From: tomaszaba Date: Fri, 18 Oct 2024 07:04:25 +0200 Subject: [PATCH 9/9] doc: revise doc (data) --- R/data.R | 36 +++++++++++++++++++----------------- R/prevalence_combined.R | 12 ++++++------ man/anthro.02.Rd | 4 ++-- man/anthro.03.Rd | 12 ++++++------ man/anthro.04.Rd | 20 ++++++++++++-------- man/combined_prevalence.Rd | 12 ++++++------ 6 files changed, 51 insertions(+), 45 deletions(-) diff --git a/R/data.R b/R/data.R index bcb1be3..00b8661 100644 --- a/R/data.R +++ b/R/data.R @@ -34,8 +34,8 @@ #' A sample of an already wrangled survey data #' #' @description -#' `anthro.02` is about a household budget survey conducted in Mozambique in -#' 2019/2020, known as IOF (*Inquérito ao Orçamento Familiar* in Portuguese).*IOF* +#' A household budget survey data conducted in Mozambique in +#' 2019/2020, known as *IOF* (*Inquérito ao Orçamento Familiar* in Portuguese). *IOF* #' is a two-stage cluster-based survey, representative at province level (admin 2), #' with probability of the selection of the clusters proportional to the size of #' the population. Its data collection spans for a period of 12 months. @@ -74,21 +74,21 @@ #' A sample data of district level SMART surveys conducted in Mozambique #' #' @description -#' `anthro.03` contains survey data of four districts. Each district's dataset +#' `anthro.03` contains survey data of four districts. Each district dataset #' presents distinct data quality scenarios that requires tailored prevalence #' analysis approach: two districts show a problematic WFHZ standard deviation #' whilst the remaining are all within range. #' -#' This sample data demonstrates the use of prevalence functions on multi-area -#' survey data, where there is variations in the standard deviation rating. -#' As a result, different analyses approaches are required for each area -#' to ensure accurate estimation. +#' This sample data is useful to demonstrate the use of the prevalence functions on +#' a multi-area survey data where there can be variations in the rating of +#' acceptability of the standard deviation, hence require different analyses approaches +#' for each area to ensure accurate estimation. #' #' @format A tibble of 943 x 9. #' #' |**Variable** | **Description** | #' | :--- | :---| -#' | *district* | The administrative unit (admin 1) where data was collected. | +#' | *district* | The location where data was collected | #' | *cluster* | Primary sampling unit | #' | *team* | Survey teams | #' | *sex* | Sex, "m" = boys, "f" = girls | @@ -112,17 +112,19 @@ #' A sample data of a community-based sentinel site from an anonymized location #' #' @description -#' `anthro.04` was generated from a community-based sentinel site conducted +#' Data was generated through a community-based sentinel site conducted #' across three provinces. Each province's dataset presents distinct #' data quality scenarios, requiring tailored prevalence analysis: -#' "Province 3" has problematic MFAZ standard deviation and age ratio tests; -#' "Province 2" shows a problematic age ratio but acceptable MFAZ standard -#' deviation; lastly, "Province 1" has both tests within acceptable ranges. -#' -#' This sample data demonstrates the use of prevalence functions on multi-area -#' survey data, where variations in the standard deviation ratings exist. -#' As a result, different analytical approaches are required for each area -#' to ensure accurate interpretation. +#' + "Province 1" has MFAZ's standard deviation and age ratio test rating of +#' acceptability falling within range; +#' + "Province 2" has age ratio rated as problematic but with an acceptable +#' standard deviation of MFAZ; +#' + "Province 3" has both tests rated as problematic. +#' +#' This sample data is useful to demonstrate the use of prevalence functions on +#' a multi-area survey data where variations in the rating of acceptability of the +#' standard deviation exist, hence require different analyses approaches for each +#' area to ensure accurate estimation. #' #' @format A tibble of 3,002 x 8. #' diff --git a/R/prevalence_combined.R b/R/prevalence_combined.R index 9cc8869..c73f7b2 100644 --- a/R/prevalence_combined.R +++ b/R/prevalence_combined.R @@ -67,18 +67,18 @@ compute_pps_based_combined_prevalence <- function(df, #' #' @description #' The prevalence is calculated in accordance with the complex sample design -#' properties inherent to surveys. This includes weighting the survey data where +#' properties inherent to surveys. This includes weighting of survey data where #' applicable. When either the acceptability of the standard deviation of WFHZ or #' of the age ratio test is problematic, prevalence is not calculated. #' #' @param df An already wrangled dataset of class `data.frame` to use. Both -#' wranglers (of WFHZ and MUAC) need to be used sequentially, regardless of the +#' wranglers (of WFHZ and of MUAC) need to be used sequentially, regardless of the #' order. Note that MUAC values should be converted to millimeters after using #' the MUAC wrangler. #' #' @param .wt A vector of class `double` of the final survey weights. Default is -#' `NULL` assuming a self weighted survey, as in the ENA for SMART software; -#' otherwise, when a vector of weights if supplied, weighted analysis is computed. +#' `NULL` assuming a self-weighted survey, as in the ENA for SMART software; +#' otherwise a weighted analysis is computed. #' #' @param .edema A vector of class `character` of edema. Code should be #' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. @@ -86,13 +86,13 @@ compute_pps_based_combined_prevalence <- function(df, #' @param .summary_by A vector of class `character` of the geographical areas #' where the data was collected and for which the analysis should be performed. #' -#' @returns A summarised table of class `data.frame` of the descriptive +#' @returns A summarised table of class `data.frame` for the descriptive #' statistics about combined wasting. #' #' @details #' A concept of "combined flags" is introduced in this function. It consists of #' defining as flag any observation that is flagged in either `flag_wfhz` or -#' `flag_mfaz` vectors. A new column `cflag` for combined flags is created and +#' `flag_mfaz` vectors. A new column `cflags` for combined flags is created and #' added to `df`. This ensures that all flagged observations from both WFHZ #' and MFAZ data are excluded from the combined prevalence analysis. #' diff --git a/man/anthro.02.Rd b/man/anthro.02.Rd index c5b4861..bc62b6e 100644 --- a/man/anthro.02.Rd +++ b/man/anthro.02.Rd @@ -33,8 +33,8 @@ can be gotten from: \url{https://mozdata.ine.gov.mz/index.php/catalog/88#metadat anthro.02 } \description{ -\code{anthro.02} is about a household budget survey conducted in Mozambique in -2019/2020, known as IOF (\emph{Inquérito ao Orçamento Familiar} in Portuguese).\emph{IOF} +A household budget survey data conducted in Mozambique in +2019/2020, known as \emph{IOF} (\emph{Inquérito ao Orçamento Familiar} in Portuguese). \emph{IOF} is a two-stage cluster-based survey, representative at province level (admin 2), with probability of the selection of the clusters proportional to the size of the population. Its data collection spans for a period of 12 months. diff --git a/man/anthro.03.Rd b/man/anthro.03.Rd index 1d95f94..1d5414e 100644 --- a/man/anthro.03.Rd +++ b/man/anthro.03.Rd @@ -7,7 +7,7 @@ \format{ A tibble of 943 x 9.\tabular{ll}{ \strong{Variable} \tab \strong{Description} \cr - \emph{district} \tab The administrative unit (admin 1) where data was collected. \cr + \emph{district} \tab The location where data was collected \cr \emph{cluster} \tab Primary sampling unit \cr \emph{team} \tab Survey teams \cr \emph{sex} \tab Sex, "m" = boys, "f" = girls \cr @@ -25,15 +25,15 @@ Anonymous anthro.03 } \description{ -\code{anthro.03} contains survey data of four districts. Each district's dataset +\code{anthro.03} contains survey data of four districts. Each district dataset presents distinct data quality scenarios that requires tailored prevalence analysis approach: two districts show a problematic WFHZ standard deviation whilst the remaining are all within range. -This sample data demonstrates the use of prevalence functions on multi-area -survey data, where there is variations in the standard deviation rating. -As a result, different analyses approaches are required for each area -to ensure accurate estimation. +This sample data is useful to demonstrate the use of the prevalence functions on +a multi-area survey data where there can be variations in the rating of +acceptability of the standard deviation, hence require different analyses approaches +for each area to ensure accurate estimation. } \examples{ anthro.03 diff --git a/man/anthro.04.Rd b/man/anthro.04.Rd index fd961a2..5cfae6e 100644 --- a/man/anthro.04.Rd +++ b/man/anthro.04.Rd @@ -24,17 +24,21 @@ Anonymous anthro.04 } \description{ -\code{anthro.04} was generated from a community-based sentinel site conducted +Data was generated through a community-based sentinel site conducted across three provinces. Each province's dataset presents distinct data quality scenarios, requiring tailored prevalence analysis: -"Province 3" has problematic MFAZ standard deviation and age ratio tests; -"Province 2" shows a problematic age ratio but acceptable MFAZ standard -deviation; lastly, "Province 1" has both tests within acceptable ranges. +\itemize{ +\item "Province 1" has MFAZ's standard deviation and age ratio test rating of +acceptability falling within range; +\item "Province 2" has age ratio rated as problematic but with an acceptable +standard deviation of MFAZ; +\item "Province 3" has both tests rated as problematic. +} -This sample data demonstrates the use of prevalence functions on multi-area -survey data, where variations in the standard deviation ratings exist. -As a result, different analytical approaches are required for each area -to ensure accurate interpretation. +This sample data is useful to demonstrate the use of prevalence functions on +a multi-area survey data where variations in the rating of acceptability of the +standard deviation exist, hence require different analyses approaches for each +area to ensure accurate estimation. } \examples{ anthro.04 diff --git a/man/combined_prevalence.Rd b/man/combined_prevalence.Rd index 497cf48..131e552 100644 --- a/man/combined_prevalence.Rd +++ b/man/combined_prevalence.Rd @@ -16,13 +16,13 @@ compute_combined_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) } \arguments{ \item{df}{An already wrangled dataset of class \code{data.frame} to use. Both -wranglers (of WFHZ and MUAC) need to be used sequentially, regardless of the +wranglers (of WFHZ and of MUAC) need to be used sequentially, regardless of the order. Note that MUAC values should be converted to millimeters after using the MUAC wrangler.} \item{.wt}{A vector of class \code{double} of the final survey weights. Default is -\code{NULL} assuming a self weighted survey, as in the ENA for SMART software; -otherwise, when a vector of weights if supplied, weighted analysis is computed.} +\code{NULL} assuming a self-weighted survey, as in the ENA for SMART software; +otherwise a weighted analysis is computed.} \item{.edema}{A vector of class \code{character} of edema. Code should be "y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} @@ -31,19 +31,19 @@ otherwise, when a vector of weights if supplied, weighted analysis is computed.} where the data was collected and for which the analysis should be performed.} } \value{ -A summarised table of class \code{data.frame} of the descriptive +A summarised table of class \code{data.frame} for the descriptive statistics about combined wasting. } \description{ The prevalence is calculated in accordance with the complex sample design -properties inherent to surveys. This includes weighting the survey data where +properties inherent to surveys. This includes weighting of survey data where applicable. When either the acceptability of the standard deviation of WFHZ or of the age ratio test is problematic, prevalence is not calculated. } \details{ A concept of "combined flags" is introduced in this function. It consists of defining as flag any observation that is flagged in either \code{flag_wfhz} or -\code{flag_mfaz} vectors. A new column \code{cflag} for combined flags is created and +\code{flag_mfaz} vectors. A new column \code{cflags} for combined flags is created and added to \code{df}. This ensures that all flagged observations from both WFHZ and MFAZ data are excluded from the combined prevalence analysis.