Skip to content

Commit

Permalink
Merge pull request #22 from OxfordIHTM/dev
Browse files Browse the repository at this point in the history
check and recode functions
  • Loading branch information
ernestguevarra authored Apr 26, 2024
2 parents 6132c59 + a744fe6 commit 11ca00d
Show file tree
Hide file tree
Showing 12 changed files with 332 additions and 4 deletions.
5 changes: 5 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ Description: The World Health Organization's CoDEdit electronic tool is intended
License: GPL (>= 3)
Depends:
R (>= 2.10)
Imports:
dplyr,
methods,
rlang,
tibble
Suggests:
covr,
spelling
Expand Down
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# Generated by roxygen2: do not edit by hand

export(cod_calculate_age)
export(cod_check_age)
export(cod_recode_sex)
export(get_age_values)
importFrom(dplyr,case_when)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(methods,is)
importFrom(rlang,.data)
importFrom(tibble,tibble)
75 changes: 75 additions & 0 deletions R/cod_calculate_age.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#'
#' Calculate age at death based on date of birth and date of death
#'
#' @param dob Date of birth. This should ideally be in standard ISO extended
#' date format of *"YYYY-MM-DD"* as specified in the default value for
#' `date_format`.
#' @param dod Date of death. This should ideally be in standard ISO extended
#' date format of *YYYY-MM_DD"* as specified in the default value for
#' `date_format`.
#' @param date_format Format for date values provided. Date formatting is
#' handled using [strptime()] hence this needs to be specified based on what
#' [strptime()] requires for its `format` argument. By default, this is set
#' to the standard ISO extended date format expressed as *"%Y-%m-%d"* which
#' corresponds to *"YYYY-MM-DD"*.
#' @param codedit Logical. Should output be based on the CoDEdit version 2
#' coding rules. Default to TRUE.
#'
#' @returns Values for age in days, months and years. IF `codedit`
#'
#' @examples
#' # example code
#'
#' @rdname cod_calculate_age
#' @export
#'

cod_calculate_age <- function(dob,
dod,
date_format = "%Y-%m-%d",
codedit = TRUE) {
## Set date format ----
if (!is(dob, "Date"))
dob <- as.Date(dob, format = date_format)

if (!is(dod, "Date"))
dob <- as.Date(dod, format = date_format)

## Calculate different age values for different age types ----
age_days <- as.numeric(dod - dob)
age_months <- as.numeric(age_days / (365.25 / 12))
age_years <- as.numeric(age_days / 365.25)

## Process output based on whether codedit ----
if (codedit) {
age_df <- tibble::tibble(
age_days = ifelse(age_days >= 28, NA, age_days),
age_months = ifelse(
age_months >= 1 & age_months < 12, age_months,
ifelse(
age_days >= 28 & age_days < 32, 1, NA
)
),
age_years = ifelse(age_years >= 1, age_years, NA)
)

age_df <- age_df |>
dplyr::mutate(
age_value = sum(
.data$age_days, .data$age_months, .data$age_years, na.rm = TRUE
) |>
as.integer(),
age_type = dplyr::case_when(
!is.na(.data$age_days) ~ "D",
!is.na(.data$age_months) ~ "M",
!is.na(.data$age_years) ~ "Y"
)
) |>
dplyr::select(.data$age_value, .data$age_type)
} else {
age_df <- tibble::tibble(age_days, age_months, age_years)
}

## Return age_df ----
age_df
}
89 changes: 89 additions & 0 deletions R/cod_check_age.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#'
#' Check age values in cause of death data based on CoDEdit rules
#'
#' @param age_value An integer value or vector of values for age based on the
#' CoDEdit rules.
#' @param age_type A vector of values for age type based on the CoDEdit rules.
#' This should either be "D" for age in days, "M" for age
#' in months, or "Y" for age in years. If values are different from these,
#' then `age_type_code` should be specified to correspond to the day, month,
#' and year values of `age_type`.
#' @param age_type_code A character or integer vector of 3 values that indicate
#' which values are to be considered pertaining to days (first value in the
#' vector), to months (second value in the vector), or years (third value
#' in the vector).
#'
#' @returns A tibble with number of rows equal to length of `age_value` and
#' two columns for age_check_score and age_check_note.
#'
#' @examples
#' cod_check_age(120, "Y")
#' cod_check_age(28, "D")
#' cod_check_age(32, "D")
#'
#' @rdname cod_check_age
#' @export
#'

cod_check_age <- function(age_value,
age_type,
age_type_code = c("D", "M", "Y")) {
## Check that age_value is of the correct class ----
if (is(age_value, "numeric"))
age_value <- as.integer(age_value)

if (!is(age_value, "integer"))
stop(
"`age_value` should be an integer. Please check and try again.",
call. = FALSE
)

## Recode age_type ----
age_type[age_type == age_type_code[1]] <- "D"
age_type[age_type == age_type_code[2]] <- "M"
age_type[age_type == age_type_code[3]] <- "Y"
age_type[!age_type %in% age_type_code] <- NA_character_

## Create age_score vector ----
age_check <- vector(mode = "integer", length = length(age_value))

## Classify errors/issues ----
if (age_type == "D") {
age_check <- ifelse(age_value >= 28 & age_value <= 31, 1, age_check)
age_check <- ifelse(age_value > 31, 2, age_check)
}

if (age_type == "M") {
age_check <- ifelse(age_value < 1, 3, age_check)
age_check <- ifelse(age_value >= 12, 4, age_check)
}

if (age_type == "Y") {
age_check <- ifelse(age_value < 1, 5, age_check)
age_check <- ifelse(age_value > 125, 6, age_check)
}

if (is.na(age_value)) age_check <- 7
if (is.na(age_type)) age_check <- 8

age_check_note <- cut(
x = age_check,
breaks = c(0, 1, 2, 3, 4, 5, 6, 7, 8, Inf),
labels = c(
"No issues with age value and age type",
"Should probably be age value of 1 and age type of months (M)",
"Should probably be converted to age value of age type months (M)",
"Should probably be converted to age value of age type days (D)",
"Should probably be converted to age value of age type years (Y)",
"Should probably be converted to age value of age type months (M)",
"Age value is more than 125 years which is highly unlikely",
"Missing age value",
"Missing age type"
),
include.lowest = TRUE, right = FALSE
)


## Return age checks ----
tibble::tibble(age_check, age_check_note)
}
Empty file added R/cod_check_missing.R
Empty file.
37 changes: 37 additions & 0 deletions R/cod_recode_sex.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#'
#' Recode cause of death data on sex
#'
#' @param sex_value A character or integer value or vector of values signifying
#' the sex.
#' @param sex_code A character or integer vector of 2 values that indicate
#' which values are to be considered pertaining to males (first value in the
#' vector) or to females (second value in the vector).
#' @param codedit Logical. Should output be based on the CoDEdit version 2
#' coding rules. Default to TRUE.
#'
#' @returns An integer value or vector of values containing either 1 for males
#' or 2 for females. If `codedit = TRUE`, values not equal to the `sex_code`
#' values are coded as 9 (integer). Otherwise, it is coded as NA_integer_.
#'
#' @examples
#' cod_recode_sex(
#' sex_value = c(rep("m", 2), rep("f", 3)),
#' sex_code = c("m", "f")
#' )
#'
#' @rdname cod_recode_sex
#' @export
#'

cod_recode_sex <- function(sex_value, sex_code = c(1, 2), codedit = TRUE) {
if (codedit) {
sex_value[!sex_value %in% sex_code] <- 9L
} else {
sex_value[!sex_value %in% sex_code] <- NA_integer_
}

sex_value[sex_value == sex_code[1]] <- 1L
sex_value[sex_value == sex_code[2]] <- 2L

as.integer(sex_value)
}
4 changes: 4 additions & 0 deletions R/codeditr-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,9 @@
#' @docType package
#' @keywords internal
#' @name codeditr
#' @importFrom methods is
#' @importFrom tibble tibble
#' @importFrom dplyr mutate select case_when
#' @importFrom rlang .data
#'
"_PACKAGE"
8 changes: 4 additions & 4 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ get_age_values <- function(age_value, age_type = c("D", "M", "Y")) {
"Age value of greater than 100 for age type `year` has been provided. Please double check that these are correct inputs."
)

if (age_value > 31 & age_type == "D")
if (age_value >= 28 & age_type == "D")
warning(
"Age value of greater than 31 for age type `day` has been provided. Please double check that these are correct inputs."
"Age value of 28 or greater for age type `day` has been provided. Please double check that these are correct inputs."
)

if (age_value > 12 & age_type == "M")
if (age_value >= 12 & age_type == "M")
warning(
"Age value of greater than 12 for age type `month` has been provided. Please double check that these are correct inputs."
"Age value of 12 or greater for age type `month` has been provided. Please double check that these are correct inputs."
)

if (age_type == "D")
Expand Down
3 changes: 3 additions & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
CMD
CoDEdit
Codecov
DD
FreeId
ICD
IHTM
Expand All @@ -10,7 +11,9 @@ ORCID
Udoh
Umanah
WIP
YYYY
codedit
int
tibble
www
’s
36 changes: 36 additions & 0 deletions man/cod_calculate_age.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 36 additions & 0 deletions man/cod_check_age.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions man/cod_recode_sex.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 11ca00d

Please sign in to comment.