From a744fe62a75f5b66daddb3718a577f27ed9681d9 Mon Sep 17 00:00:00 2001 From: Ernest Guevarra Date: Fri, 26 Apr 2024 13:00:38 +0100 Subject: [PATCH] check and recode functions --- DESCRIPTION | 5 +++ NAMESPACE | 9 ++++ R/cod_calculate_age.R | 75 +++++++++++++++++++++++++++++++++ R/cod_check_age.R | 89 ++++++++++++++++++++++++++++++++++++++++ R/cod_check_missing.R | 0 R/cod_recode_sex.R | 37 +++++++++++++++++ R/codeditr-package.R | 4 ++ R/utils.R | 8 ++-- inst/WORDLIST | 3 ++ man/cod_calculate_age.Rd | 36 ++++++++++++++++ man/cod_check_age.Rd | 36 ++++++++++++++++ man/cod_recode_sex.Rd | 34 +++++++++++++++ 12 files changed, 332 insertions(+), 4 deletions(-) create mode 100644 R/cod_calculate_age.R create mode 100644 R/cod_check_age.R create mode 100644 R/cod_check_missing.R create mode 100644 R/cod_recode_sex.R create mode 100644 man/cod_calculate_age.Rd create mode 100644 man/cod_check_age.Rd create mode 100644 man/cod_recode_sex.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 2f3724e..0deb298 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,11 @@ Description: The World Health Organization's CoDEdit electronic tool is intended License: GPL (>= 3) Depends: R (>= 2.10) +Imports: + dplyr, + methods, + rlang, + tibble Suggests: covr, spelling diff --git a/NAMESPACE b/NAMESPACE index 1a47391..8da9f43 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,12 @@ # Generated by roxygen2: do not edit by hand +export(cod_calculate_age) +export(cod_check_age) +export(cod_recode_sex) export(get_age_values) +importFrom(dplyr,case_when) +importFrom(dplyr,mutate) +importFrom(dplyr,select) +importFrom(methods,is) +importFrom(rlang,.data) +importFrom(tibble,tibble) diff --git a/R/cod_calculate_age.R b/R/cod_calculate_age.R new file mode 100644 index 0000000..0f54084 --- /dev/null +++ b/R/cod_calculate_age.R @@ -0,0 +1,75 @@ +#' +#' Calculate age at death based on date of birth and date of death +#' +#' @param dob Date of birth. This should ideally be in standard ISO extended +#' date format of *"YYYY-MM-DD"* as specified in the default value for +#' `date_format`. +#' @param dod Date of death. This should ideally be in standard ISO extended +#' date format of *YYYY-MM_DD"* as specified in the default value for +#' `date_format`. +#' @param date_format Format for date values provided. Date formatting is +#' handled using [strptime()] hence this needs to be specified based on what +#' [strptime()] requires for its `format` argument. By default, this is set +#' to the standard ISO extended date format expressed as *"%Y-%m-%d"* which +#' corresponds to *"YYYY-MM-DD"*. +#' @param codedit Logical. Should output be based on the CoDEdit version 2 +#' coding rules. Default to TRUE. +#' +#' @returns Values for age in days, months and years. IF `codedit` +#' +#' @examples +#' # example code +#' +#' @rdname cod_calculate_age +#' @export +#' + +cod_calculate_age <- function(dob, + dod, + date_format = "%Y-%m-%d", + codedit = TRUE) { + ## Set date format ---- + if (!is(dob, "Date")) + dob <- as.Date(dob, format = date_format) + + if (!is(dod, "Date")) + dob <- as.Date(dod, format = date_format) + + ## Calculate different age values for different age types ---- + age_days <- as.numeric(dod - dob) + age_months <- as.numeric(age_days / (365.25 / 12)) + age_years <- as.numeric(age_days / 365.25) + + ## Process output based on whether codedit ---- + if (codedit) { + age_df <- tibble::tibble( + age_days = ifelse(age_days >= 28, NA, age_days), + age_months = ifelse( + age_months >= 1 & age_months < 12, age_months, + ifelse( + age_days >= 28 & age_days < 32, 1, NA + ) + ), + age_years = ifelse(age_years >= 1, age_years, NA) + ) + + age_df <- age_df |> + dplyr::mutate( + age_value = sum( + .data$age_days, .data$age_months, .data$age_years, na.rm = TRUE + ) |> + as.integer(), + age_type = dplyr::case_when( + !is.na(.data$age_days) ~ "D", + !is.na(.data$age_months) ~ "M", + !is.na(.data$age_years) ~ "Y" + ) + ) |> + dplyr::select(.data$age_value, .data$age_type) + } else { + age_df <- tibble::tibble(age_days, age_months, age_years) + } + + ## Return age_df ---- + age_df +} diff --git a/R/cod_check_age.R b/R/cod_check_age.R new file mode 100644 index 0000000..1d29f1a --- /dev/null +++ b/R/cod_check_age.R @@ -0,0 +1,89 @@ +#' +#' Check age values in cause of death data based on CoDEdit rules +#' +#' @param age_value An integer value or vector of values for age based on the +#' CoDEdit rules. +#' @param age_type A vector of values for age type based on the CoDEdit rules. +#' This should either be "D" for age in days, "M" for age +#' in months, or "Y" for age in years. If values are different from these, +#' then `age_type_code` should be specified to correspond to the day, month, +#' and year values of `age_type`. +#' @param age_type_code A character or integer vector of 3 values that indicate +#' which values are to be considered pertaining to days (first value in the +#' vector), to months (second value in the vector), or years (third value +#' in the vector). +#' +#' @returns A tibble with number of rows equal to length of `age_value` and +#' two columns for age_check_score and age_check_note. +#' +#' @examples +#' cod_check_age(120, "Y") +#' cod_check_age(28, "D") +#' cod_check_age(32, "D") +#' +#' @rdname cod_check_age +#' @export +#' + +cod_check_age <- function(age_value, + age_type, + age_type_code = c("D", "M", "Y")) { + ## Check that age_value is of the correct class ---- + if (is(age_value, "numeric")) + age_value <- as.integer(age_value) + + if (!is(age_value, "integer")) + stop( + "`age_value` should be an integer. Please check and try again.", + call. = FALSE + ) + + ## Recode age_type ---- + age_type[age_type == age_type_code[1]] <- "D" + age_type[age_type == age_type_code[2]] <- "M" + age_type[age_type == age_type_code[3]] <- "Y" + age_type[!age_type %in% age_type_code] <- NA_character_ + + ## Create age_score vector ---- + age_check <- vector(mode = "integer", length = length(age_value)) + + ## Classify errors/issues ---- + if (age_type == "D") { + age_check <- ifelse(age_value >= 28 & age_value <= 31, 1, age_check) + age_check <- ifelse(age_value > 31, 2, age_check) + } + + if (age_type == "M") { + age_check <- ifelse(age_value < 1, 3, age_check) + age_check <- ifelse(age_value >= 12, 4, age_check) + } + + if (age_type == "Y") { + age_check <- ifelse(age_value < 1, 5, age_check) + age_check <- ifelse(age_value > 125, 6, age_check) + } + + if (is.na(age_value)) age_check <- 7 + if (is.na(age_type)) age_check <- 8 + + age_check_note <- cut( + x = age_check, + breaks = c(0, 1, 2, 3, 4, 5, 6, 7, 8, Inf), + labels = c( + "No issues with age value and age type", + "Should probably be age value of 1 and age type of months (M)", + "Should probably be converted to age value of age type months (M)", + "Should probably be converted to age value of age type days (D)", + "Should probably be converted to age value of age type years (Y)", + "Should probably be converted to age value of age type months (M)", + "Age value is more than 125 years which is highly unlikely", + "Missing age value", + "Missing age type" + ), + include.lowest = TRUE, right = FALSE + ) + + + ## Return age checks ---- + tibble::tibble(age_check, age_check_note) +} diff --git a/R/cod_check_missing.R b/R/cod_check_missing.R new file mode 100644 index 0000000..e69de29 diff --git a/R/cod_recode_sex.R b/R/cod_recode_sex.R new file mode 100644 index 0000000..f8afaf4 --- /dev/null +++ b/R/cod_recode_sex.R @@ -0,0 +1,37 @@ +#' +#' Recode cause of death data on sex +#' +#' @param sex_value A character or integer value or vector of values signifying +#' the sex. +#' @param sex_code A character or integer vector of 2 values that indicate +#' which values are to be considered pertaining to males (first value in the +#' vector) or to females (second value in the vector). +#' @param codedit Logical. Should output be based on the CoDEdit version 2 +#' coding rules. Default to TRUE. +#' +#' @returns An integer value or vector of values containing either 1 for males +#' or 2 for females. If `codedit = TRUE`, values not equal to the `sex_code` +#' values are coded as 9 (integer). Otherwise, it is coded as NA_integer_. +#' +#' @examples +#' cod_recode_sex( +#' sex_value = c(rep("m", 2), rep("f", 3)), +#' sex_code = c("m", "f") +#' ) +#' +#' @rdname cod_recode_sex +#' @export +#' + +cod_recode_sex <- function(sex_value, sex_code = c(1, 2), codedit = TRUE) { + if (codedit) { + sex_value[!sex_value %in% sex_code] <- 9L + } else { + sex_value[!sex_value %in% sex_code] <- NA_integer_ + } + + sex_value[sex_value == sex_code[1]] <- 1L + sex_value[sex_value == sex_code[2]] <- 2L + + as.integer(sex_value) +} diff --git a/R/codeditr-package.R b/R/codeditr-package.R index fed7265..7a520b9 100644 --- a/R/codeditr-package.R +++ b/R/codeditr-package.R @@ -13,5 +13,9 @@ #' @docType package #' @keywords internal #' @name codeditr +#' @importFrom methods is +#' @importFrom tibble tibble +#' @importFrom dplyr mutate select case_when +#' @importFrom rlang .data #' "_PACKAGE" diff --git a/R/utils.R b/R/utils.R index 0b5633c..f7ca46f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -20,14 +20,14 @@ get_age_values <- function(age_value, age_type = c("D", "M", "Y")) { "Age value of greater than 100 for age type `year` has been provided. Please double check that these are correct inputs." ) - if (age_value > 31 & age_type == "D") + if (age_value >= 28 & age_type == "D") warning( - "Age value of greater than 31 for age type `day` has been provided. Please double check that these are correct inputs." + "Age value of 28 or greater for age type `day` has been provided. Please double check that these are correct inputs." ) - if (age_value > 12 & age_type == "M") + if (age_value >= 12 & age_type == "M") warning( - "Age value of greater than 12 for age type `month` has been provided. Please double check that these are correct inputs." + "Age value of 12 or greater for age type `month` has been provided. Please double check that these are correct inputs." ) if (age_type == "D") diff --git a/inst/WORDLIST b/inst/WORDLIST index 6a78070..adb2086 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,6 +1,7 @@ CMD CoDEdit Codecov +DD FreeId ICD IHTM @@ -10,7 +11,9 @@ ORCID Udoh Umanah WIP +YYYY codedit int +tibble www ’s diff --git a/man/cod_calculate_age.Rd b/man/cod_calculate_age.Rd new file mode 100644 index 0000000..9bc9c0c --- /dev/null +++ b/man/cod_calculate_age.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cod_calculate_age.R +\name{cod_calculate_age} +\alias{cod_calculate_age} +\title{Calculate age at death based on date of birth and date of death} +\usage{ +cod_calculate_age(dob, dod, date_format = "\%Y-\%m-\%d", codedit = TRUE) +} +\arguments{ +\item{dob}{Date of birth. This should ideally be in standard ISO extended +date format of \emph{"YYYY-MM-DD"} as specified in the default value for +\code{date_format}.} + +\item{dod}{Date of death. This should ideally be in standard ISO extended +date format of \emph{YYYY-MM_DD"} as specified in the default value for +\code{date_format}.} + +\item{date_format}{Format for date values provided. Date formatting is +handled using \code{\link[=strptime]{strptime()}} hence this needs to be specified based on what +\code{\link[=strptime]{strptime()}} requires for its \code{format} argument. By default, this is set +to the standard ISO extended date format expressed as \emph{"\%Y-\%m-\%d"} which +corresponds to \emph{"YYYY-MM-DD"}.} + +\item{codedit}{Logical. Should output be based on the CoDEdit version 2 +coding rules. Default to TRUE.} +} +\value{ +Values for age in days, months and years. IF \code{codedit} +} +\description{ +Calculate age at death based on date of birth and date of death +} +\examples{ +# example code + +} diff --git a/man/cod_check_age.Rd b/man/cod_check_age.Rd new file mode 100644 index 0000000..7147fe0 --- /dev/null +++ b/man/cod_check_age.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cod_check_age.R +\name{cod_check_age} +\alias{cod_check_age} +\title{Check age values in cause of death data based on CoDEdit rules} +\usage{ +cod_check_age(age_value, age_type, age_type_code = c("D", "M", "Y")) +} +\arguments{ +\item{age_value}{An integer value or vector of values for age based on the +CoDEdit rules.} + +\item{age_type}{A vector of values for age type based on the CoDEdit rules. +This should either be "D" for age in days, "M" for age +in months, or "Y" for age in years. If values are different from these, +then \code{age_type_code} should be specified to correspond to the day, month, +and year values of \code{age_type}.} + +\item{age_type_code}{A character or integer vector of 3 values that indicate +which values are to be considered pertaining to days (first value in the +vector), to months (second value in the vector), or years (third value +in the vector).} +} +\value{ +A tibble with number of rows equal to length of \code{age_value} and +two columns for age_check_score and age_check_note. +} +\description{ +Check age values in cause of death data based on CoDEdit rules +} +\examples{ +cod_check_age(120, "Y") +cod_check_age(28, "D") +cod_check_age(32, "D") + +} diff --git a/man/cod_recode_sex.Rd b/man/cod_recode_sex.Rd new file mode 100644 index 0000000..e317664 --- /dev/null +++ b/man/cod_recode_sex.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cod_recode_sex.R +\name{cod_recode_sex} +\alias{cod_recode_sex} +\title{Recode cause of death data on sex} +\usage{ +cod_recode_sex(sex_value, sex_code = c(1, 2), codedit = TRUE) +} +\arguments{ +\item{sex_value}{A character or integer value or vector of values signifying +the sex.} + +\item{sex_code}{A character or integer vector of 2 values that indicate +which values are to be considered pertaining to males (first value in the +vector) or to females (second value in the vector).} + +\item{codedit}{Logical. Should output be based on the CoDEdit version 2 +coding rules. Default to TRUE.} +} +\value{ +An integer value or vector of values containing either 1 for males +or 2 for females. If \code{codedit = TRUE}, values not equal to the \code{sex_code} +values are coded as 9 (integer). Otherwise, it is coded as NA_integer_. +} +\description{ +Recode cause of death data on sex +} +\examples{ +cod_recode_sex( + sex_value = c(rep("m", 2), rep("f", 3)), + sex_code = c("m", "f") +) + +}