Merge branch 'master' into #51

ellessenne · Apr 6, 2022 · 80674df · 80674df
2 parents 91ad241 + cd556d9
commit 80674df
Show file tree

Hide file tree

Showing 52 changed files with 419 additions and 180 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Type: Package
 Package: comorbidity
-Version: 1.0.0.9000
+Version: 1.0.2
 Title: Computing Comorbidity Scores
 Description: Computing comorbidity indices and scores such as the weighted Charlson
   score (Charlson, 1987 <doi:10.1016/0021-9681(87)90171-8>) and the Elixhauser

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,10 @@
-# comorbidity (development version)
+# comorbidity 1.0.2
+
+* The `copd` comorbidity for the Charlson index has been renamed to `cpd`, and the `ami` comorbidity for the Charlson index has been renamed to `mi` (#53, thanks @DrYan1102). Please be aware that this might break some old code if you were selecting comorbidities by name.
+
+* New dataset: ICD10-CM, 2022 version, named `icd10cm_2022`.
+
+# comorbidity 1.0.1
 
 * The startup message pointing out changes in the API now appears less often (40% probability).
 

diff --git a/R/comorbidity.R b/R/comorbidity.R
@@ -32,12 +32,12 @@
 #'
 #' For the Charlson score, the following variables are included in the dataset:
 #' * The `id` variable as defined by the user;
-#' * `ami`, for acute myocardial infarction;
+#' * `mi`, for myocardial infarction;
 #' * `chf`, for congestive heart failure;
 #' * `pvd`, for peripheral vascular disease;
 #' * `cevd`, for cerebrovascular disease;
 #' * `dementia`, for dementia;
-#' * `copd`, chronic obstructive pulmonary disease;
+#' * `cpd`, for chronic pulmonary disease;
 #' * `rheumd`, for rheumatoid disease;
 #' * `pud`, for peptic ulcer disease;
 #' * `mld`, for mild liver disease;
@@ -48,7 +48,8 @@
 #' * `canc`, for cancer (any malignancy);
 #' * `msld`, for moderate or severe liver disease;
 #' * `metacanc`, for metastatic solid tumour;
-#' * `aids`, for AIDS/HIV;
+#' * `aids`, for AIDS/HIV.
+#' Please note that we combine "chronic obstructive pulmonary disease" and "chronic other pulmonary disease" for the Swedish version of the Charlson index, for comparability (and compatibility) with other definitions/implementations.
 #'
 #' Conversely, for the Elixhauser score the dataset contains the following variables:
 #' * The `id` variable as defined by the user;
@@ -178,42 +179,8 @@ comorbidity <- function(x, id, code, map, assign0, labelled = TRUE, tidy.codes =
   ### Turn x into a DT
   data.table::setDT(x)
 
-  ### Deal with missing codes
-  mvb <- id
-  backup <- x[, ..mvb]
-  backup <- unique(backup)
-  x <- stats::na.omit(x)
-  # If there are no rows left (= user passed missing data only), then error:
-  if (nrow(x) == 0) stop("No non-missing data, please check your input data", call. = FALSE)
-
-  ### Get list of unique codes used in dataset that match comorbidities
-  ..cd <- unique(x[[code]])
-  loc <- sapply(X = regex, FUN = function(p) stringi::stri_subset_regex(str = ..cd, pattern = p))
-  loc <- utils::stack(loc)
-  data.table::setDT(loc)
-  data.table::setnames(x = loc, new = c(code, "ind"))
-
-  ### Merge list with original data.table (data.frame)
-  x <- merge(x, loc, all.x = TRUE, allow.cartesian = TRUE, by = code)
-  x[, (code) := NULL]
-  x <- unique(x)
-
-  ### Spread wide
-  mv <- c(id, "ind")
-  xin <- x[, ..mv]
-  xin[, value := 1L]
-  x <- data.table::dcast.data.table(xin, stats::as.formula(paste(id, "~ ind")), fill = 0)
-  if (!is.null(x[["NA"]])) x[, `NA` := NULL]
-
-  ### Restore IDs
-  x <- merge(x, backup, by = id, all.y = TRUE, )
-  data.table::setnafill(x = x, type = "const", fill = 0L)
-
-  ### Add missing columns
-  for (col in names(regex)) {
-    if (is.null(x[[col]])) x[, (col) := 0L]
-  }
-  data.table::setcolorder(x, c(id, names(regex)))
+  ### Match comorbidity mapping
+  x <- .matchit(x = x, id = id, code = code, regex = regex)
 
   ### Assign zero-values to avoid double-counting comorbidities, if requested
   if (assign0) {

diff --git a/R/data.R b/R/data.R
@@ -48,6 +48,19 @@
 #' @source CMS.gov Website: https://www.cms.gov/Medicare/Coding/ICD9ProviderDiagnosticCodes/codes.html
 "icd9_2015"
 
+#' @title ICD-10-CM Diagnostic Codes, 2022 Version
+#'
+#' @description A dataset containing the 2022 version of the ICD10-CM coding system.
+#'
+#' @format A data frame with 72,750 rows and 2 variables:
+#' \describe{
+#'   \item{Code}{ICD-10-CM diagnostic code}
+#'   \item{Description}{Description of each code}
+#' }
+#'
+#' @note The R code used to download and process the dataset from the CDC website is available [here](https://raw.githubusercontent.com/ellessenne/comorbidity/master/data-raw/make-data.R).
+"icd10cm_2022"
+
 #' @title ICD-10-CM Diagnostic Codes, 2018 Version
 #'
 #' @description A dataset containing the 2018 version of the ICD10-CM coding system.

diff --git a/R/labelled.R b/R/labelled.R
@@ -1,7 +1,7 @@
 #' @keywords internal
 .labelled <- function(x, map) {
   attr(x, "variable.labels") <- if (grepl("^charlson_", map)) {
-    c("ID", "Myocardial infarction", "Congestive heart failure", "Peripheral vascular disease", "Cerebrovascular disease", "Dementia", "Chronic obstructive pulmonary disease", "Rheumatoid disease", "Peptic ulcer disease", "Mild liver disease", "Diabetes without chronic complications", "Diabetes with chronic complications", "Hemiplegia or paraplegia", "Renal disease", "Cancer (any malignancy)", "Moderate or severe liver disease", "Metastatic solid tumour", "AIDS/HIV")
+    c("ID", "Myocardial infarction", "Congestive heart failure", "Peripheral vascular disease", "Cerebrovascular disease", "Dementia", "Chronic pulmonary disease", "Rheumatoid disease", "Peptic ulcer disease", "Mild liver disease", "Diabetes without chronic complications", "Diabetes with chronic complications", "Hemiplegia or paraplegia", "Renal disease", "Cancer (any malignancy)", "Moderate or severe liver disease", "Metastatic solid tumour", "AIDS/HIV")
   } else {
     c("ID", "Congestive heart failure", "Cardiac arrhythmias", "Valvular disease", "Pulmonary circulation disorders", "Peripheral vascular disorders", "Hypertension, uncomplicated", "Hypertension, complicated", "Paralysis", "Other neurological disorders", "Chronic pulmonary disease", "Diabetes, uncomplicated", "Diabetes, complicated", "Hypothyroidism", "Renal failure", "Liver disease", "Peptic ulcer disease excluding bleeding", "AIDS/HIV", "Lymphoma", "Metastatic cancer", "Solid tumour without metastasis", "Rheumatoid artritis/collaged vascular disease", "Coagulopathy", "Obesity", "Weight loss", "Fluid and electrolyte disorders", "Blood loss anaemia", "Deficiency anaemia", "Alcohol abuse", "Drug abuse", "Psychoses", "Depression")
   }

diff --git a/R/matchit.R b/R/matchit.R
@@ -0,0 +1,42 @@
+#' @keywords internal
+.matchit <- function(x, id, code, regex) {
+  ### Deal with missing codes
+  mvb <- id
+  backup <- x[, ..mvb]
+  backup <- unique(backup)
+  x <- stats::na.omit(x)
+  # If there are no rows left (= user passed missing data only), then error:
+  if (nrow(x) == 0) stop("No non-missing data, please check your input data", call. = FALSE)
+
+  ### Get list of unique codes used in dataset that match comorbidities
+  ..cd <- unique(x[[code]])
+  loc <- sapply(X = regex, FUN = function(p) stringi::stri_subset_regex(str = ..cd, pattern = p))
+  loc <- utils::stack(loc)
+  data.table::setDT(loc)
+  data.table::setnames(x = loc, new = c(code, "ind"))
+
+  ### Merge list with original data.table (data.frame)
+  x <- merge(x, loc, all.x = TRUE, allow.cartesian = TRUE, by = code)
+  x[, (code) := NULL]
+  x <- unique(x)
+
+  ### Spread wide
+  mv <- c(id, "ind")
+  xin <- x[, ..mv]
+  xin[, value := 1L]
+  x <- data.table::dcast.data.table(xin, stats::as.formula(paste(id, "~ ind")), fill = 0)
+  if (!is.null(x[["NA"]])) x[, `NA` := NULL]
+
+  ### Restore IDs
+  x <- merge(x, backup, by = id, all.y = TRUE, )
+  data.table::setnafill(x = x, type = "const", fill = 0L)
+
+  ### Add missing columns
+  for (col in names(regex)) {
+    if (is.null(x[[col]])) x[, (col) := 0L]
+  }
+  data.table::setcolorder(x, c(id, names(regex)))
+
+  ### Return
+  return(x)
+}
diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/R/zzz.R b/R/zzz.R
@@ -1,7 +1,7 @@
 .onAttach <- function(libname, pkgname) {
   tick <- stats::rbinom(n = 1, size = 1, prob = 0.4)
   if (tick == 1) {
-    packageStartupMessage("This is {comorbidity} version 1.0.0.9000.")
+    packageStartupMessage("This is {comorbidity} version 1.0.2.")
     packageStartupMessage("A lot has changed since the last release on CRAN, please check-out breaking changes here:")
     packageStartupMessage("-> https://ellessenne.github.io/comorbidity/articles/C-changes.html")
   }

diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 
 # The {comorbidity} Package: Computing Comorbidity Scores <img src="man/figures/hex.png" width = "150" align="right" />
 
-Last updated: 2022-01-17
+Last updated: 2022-04-06
 
 <!-- badges: start -->
 
@@ -164,10 +164,10 @@ We could compute the Charlson comorbidity domains:
 ``` r
 charlson <- comorbidity(x = x, id = "id", code = "code", map = "charlson_icd10_quan", assign0 = FALSE)
 charlson
-##   id ami chf pvd cevd dementia copd rheumd pud mld diab diabwc hp rend canc msld metacanc aids
-## 1  1   0   0   0    0        0    0      0   0   0    0      0  0    0    1    0        0    1
-## 2  2   0   0   0    0        0    0      0   0   0    0      0  0    0    1    0        0    0
-## 3  3   0   0   0    0        0    0      0   0   0    0      0  0    0    0    0        0    0
+##   id mi chf pvd cevd dementia cpd rheumd pud mld diab diabwc hp rend canc msld metacanc aids
+## 1  1  0   0   0    0        0   0      0   0   0    0      0  0    0    1    0        0    1
+## 2  2  0   0   0    0        0   0      0   0   0    0      0  0    0    1    0        0    0
+## 3  3  0   0   0    0        0   0      0   0   0    0      0  0    0    0    0        0    0
 ```
 
 We set the `assign0` argument to `FALSE` to not apply a hierarchy of
@@ -230,12 +230,12 @@ once again:
 ``` r
 charlson9 <- comorbidity(x = x, id = "id", code = "code", map = "charlson_icd9_quan", assign0 = FALSE)
 charlson9
-##   id ami chf pvd cevd dementia copd rheumd pud mld diab diabwc hp rend canc msld metacanc aids
-## 1  1   0   0   1    0        0    0      0   0   0    0      0  0    0    1    0        0    0
-## 2  2   0   0   0    1        0    0      0   0   0    0      0  0    0    0    0        0    0
-## 3  3   0   0   0    0        0    0      0   1   0    0      0  0    0    0    0        0    0
-## 4  4   0   0   1    1        0    0      0   0   0    0      0  0    0    1    0        0    0
-## 5  5   0   0   0    0        0    0      0   0   0    0      0  0    0    1    0        0    0
+##   id mi chf pvd cevd dementia cpd rheumd pud mld diab diabwc hp rend canc msld metacanc aids
+## 1  1  0   0   1    0        0   0      0   0   0    0      0  0    0    1    0        0    0
+## 2  2  0   0   0    1        0   0      0   0   0    0      0  0    0    0    0        0    0
+## 3  3  0   0   0    0        0   0      0   1   0    0      0  0    0    0    0        0    0
+## 4  4  0   0   1    1        0   0      0   0   0    0      0  0    0    1    0        0    0
+## 5  5  0   0   0    0        0   0      0   0   0    0      0  0    0    1    0        0    0
 ```
 
 ``` r

diff --git a/data-raw/make-data.R b/data-raw/make-data.R
@@ -5,6 +5,8 @@ library(stringr)
 library(devtools)
 library(usethis)
 library(haven)
+library(stringi)
+library(tidyverse)
 
 ########################################################################################################################
 ### Dataset #1: ICD-10 codes, 2009 version
@@ -40,7 +42,7 @@ usethis::use_data(icd10_2009, overwrite = TRUE)
 ########################################################################################################################
 ### Dataset #2: ICD-10 codes, 2011 version
 # Download dataset
-download.file(url = "ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10/allvalid2011 (detailed%20titles%20headings).xls", destfile = "data-raw/allvalid2011.xls")
+download.file(url = "ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10/allvalid2011%20%28detailed%20titles%20headings%29.xls", destfile = "data-raw/allvalid2011.xls")
 
 # Read data in Excel format
 icd10_2011 <- readxl::read_excel(
@@ -144,7 +146,31 @@ australia10 <- haven::zap_labels(australia10)
 # Save data in R format
 usethis::use_data(australia10, overwrite = TRUE)
 
+########################################################################################################################
+### Dataset #7 ICD-10-CM codes, 2022 version
+download.file(url = "https://www.cms.gov/files/zip/2022-code-descriptions-tabular-order-updated-02012022.zip", destfile = "data-raw/tmp.zip")
+unzip(zipfile = "data-raw/tmp.zip", exdir = "data-raw")
+
+# Read files
+icd10cm_2022 <- readLines(con = "data-raw/Code Descriptions/icd10cm_codes_2022.txt")
+where_to_split <- stri_locate_first(str = icd10cm_2022, regex = " ")
+icd10cm_2022 <- data.frame(
+  Code = stri_sub(icd10cm_2022, from = 1L, to = where_to_split[2, ]),
+  Description = stri_sub(icd10cm_2022, from = where_to_split[2, ])
+)
+icd10cm_2022 <- mutate(
+  icd10cm_2022,
+  Code = stri_trim_both(Code),
+  Description = stri_trim_both(Description)
+)
+
+# Save data in R format
+usethis::use_data(icd10cm_2022, overwrite = TRUE)
+
 ########################################################################################################################
 ### Remove unnecessary files
+lf <- list.files(path = "data-raw/Code Descriptions", full.names = TRUE, pattern = ".xls|.txt|.zip|.pdf")
+invisible(file.remove(lf))
+invisible(file.remove("data-raw/Code Descriptions/"))
 lf <- list.files(path = "data-raw", full.names = TRUE, pattern = ".xls|.txt|.zip|.pdf")
 invisible(file.remove(lf))
diff --git a/data-raw/make-mapping.R b/data-raw/make-mapping.R
@@ -4,12 +4,12 @@
 # Charlson score, ICD9
 .tmpn <- "charlson_icd9_quan"
 .maps[[.tmpn]] <- list()
-.maps[[.tmpn]][["ami"]] <- c("410", "412")
+.maps[[.tmpn]][["mi"]] <- c("410", "412")
 .maps[[.tmpn]][["chf"]] <- c("39891", "40201", "40211", "40291", "40401", "40403", "40411", "40413", "40491", "40493", "4254", "4255", "4256", "4257", "4258", "4259", "428")
 .maps[[.tmpn]][["pvd"]] <- c("0930", "4373", "440", "441", "4431", "4432", "4433", "4434", "4435", "4436", "4437", "4438", "4439", "4471", "5571", "5579", "V434")
 .maps[[.tmpn]][["cevd"]] <- c("36234", "430", "431", "432", "433", "434", "435", "436", "437", "438")
 .maps[[.tmpn]][["dementia"]] <- c("290", "2941", "3312")
-.maps[[.tmpn]][["copd"]] <- c("4168", "4169", "490", "491", "492", "493", "494", "495", "496", "497", "498", "499", "500", "501", "502", "503", "504", "505", "5064", "5081", "5088")
+.maps[[.tmpn]][["cpd"]] <- c("4168", "4169", "490", "491", "492", "493", "494", "495", "496", "497", "498", "499", "500", "501", "502", "503", "504", "505", "5064", "5081", "5088")
 .maps[[.tmpn]][["rheumd"]] <- c("4465", "7100", "7101", "7102", "7103", "7104", "7140", "7141", "7142", "7148", "725")
 .maps[[.tmpn]][["pud"]] <- c("531", "532", "533", "534")
 .maps[[.tmpn]][["mld"]] <- c("07022", "07023", "07032", "07033", "07044", "07054", "0706", "0709", "570", "571", "5733", "5734", "5738", "5739", "V427")
@@ -25,12 +25,12 @@
 # Charlson score, ICD10
 .tmpn <- "charlson_icd10_quan"
 .maps[[.tmpn]] <- list()
-.maps[[.tmpn]][["ami"]] <- c("I21", "I22", "I252")
+.maps[[.tmpn]][["mi"]] <- c("I21", "I22", "I252")
 .maps[[.tmpn]][["chf"]] <- c("I099", "I110", "I130", "I132", "I255", "I420", "I425", "I426", "I427", "I428", "I429", "I43", "I50", "P290")
 .maps[[.tmpn]][["pvd"]] <- c("I70", "I71", "I731", "I738", "I739", "I771", "I790", "I792", "K551", "K558", "K559", "Z958", "Z959")
 .maps[[.tmpn]][["cevd"]] <- c("G45", "G46", "H340", "I60", "I61", "I62", "I63", "I64", "I65", "I66", "I67", "I68", "I69")
 .maps[[.tmpn]][["dementia"]] <- c("F00", "F01", "F02", "F03", "F051", "G30", "G311")
-.maps[[.tmpn]][["copd"]] <- c("I278", "I279", "J40", "J41", "J42", "J43", "J44", "J45", "J46", "J47", "J60", "J61", "J62", "J63", "J64", "J65", "J66", "J67", "J684", "J701", "J703")
+.maps[[.tmpn]][["cpd"]] <- c("I278", "I279", "J40", "J41", "J42", "J43", "J44", "J45", "J46", "J47", "J60", "J61", "J62", "J63", "J64", "J65", "J66", "J67", "J684", "J701", "J703")
 .maps[[.tmpn]][["rheumd"]] <- c("M05", "M06", "M315", "M32", "M33", "M34", "M351", "M353", "M360")
 .maps[[.tmpn]][["pud"]] <- c("K25", "K26", "K27", "K28")
 .maps[[.tmpn]][["mld"]] <- c("B18", "K700", "K701", "K702", "K703", "K709", "K713", "K714", "K715", "K717", "K73", "K74", "K760", "K762", "K763", "K764", "K768", "K769", "Z944")
@@ -46,12 +46,12 @@
 # Charlson score, ICD10, Swedish version
 .tmpn <- "charlson_icd10_se"
 .maps[[.tmpn]] <- list()
-.maps[[.tmpn]][["ami"]] <- c("I21", "I22", "I252")
+.maps[[.tmpn]][["mi"]] <- c("I21", "I22", "I252")
 .maps[[.tmpn]][["chf"]] <- c("I110", "I130", "I132", "I255", "I420", "I426", "I427", "I428", "I429", "I43", "I50")
 .maps[[.tmpn]][["pvd"]] <- c("I70", "I71", "I731", "I738", "I771", "I790", "I792", "K55")
 .maps[[.tmpn]][["cevd"]] <- c("G45", "I60", "I61", "I62", "I63", "I64", "I67", "I69")
 .maps[[.tmpn]][["dementia"]] <- c("F00", "F01", "F02", "F03", "F051", "G30", "G311", "G319")
-.maps[[.tmpn]][["copd"]] <- c("J43", "J44", "J41", "J42", "J45", "J46", "J47", "J60", "J61", "J62", "J63", "J64", "J65", "J66", "J67", "J68", "J69", "J70")
+.maps[[.tmpn]][["cpd"]] <- c("J43", "J44", "J41", "J42", "J45", "J46", "J47", "J60", "J61", "J62", "J63", "J64", "J65", "J66", "J67", "J68", "J69", "J70")
 .maps[[.tmpn]][["rheumd"]] <- c("M05", "M06", "M123", "M070", "M071", "M072", "M073", "M08", "M13", "M30", "M313", "M314", "M315", "M316", "M32", "M33", "M34", "M350", "M351", "M353", "M45", "M46")
 .maps[[.tmpn]][["pud"]] <- c("K25", "K26", "K27", "K28")
 .maps[[.tmpn]][["mld"]] <- c("B15", "B16", "B17", "B18", "B19", "K703", "K73", "K746", "K703", "K754")
@@ -67,12 +67,12 @@
 # Charlson score, ICD10, Australian version
 .tmpn <- "charlson_icd10_am"
 .maps[[.tmpn]] <- list()
-.maps[[.tmpn]][["ami"]] <- c("I21", "I22", "I252")
+.maps[[.tmpn]][["mi"]] <- c("I21", "I22", "I252")
 .maps[[.tmpn]][["chf"]] <- c("I50")
 .maps[[.tmpn]][["pvd"]] <- c("I71", "I790", "I739", "R02", "Z958", "Z959")
 .maps[[.tmpn]][["cevd"]] <- c("I60", "I61", "I62", "I63", "I65", "I66", "G450", "G451", "G452", "G458", "G459", "G46", "I64", "G454", "I670", "I671", "I672", "I674", "I675", "I676", "I677", "I678", "I679", "I681", "I682", "I688", "I69")
 .maps[[.tmpn]][["dementia"]] <- c("F00", "F01", "F02", "F051")
-.maps[[.tmpn]][["copd"]] <- c("J40", "J41", "J42", "J44", "J43", "J45", "J46", "J47", "J67", "J44", "J60", "J61", "J62", "J63", "J66", "J64", "J65")
+.maps[[.tmpn]][["cpd"]] <- c("J40", "J41", "J42", "J44", "J43", "J45", "J46", "J47", "J67", "J44", "J60", "J61", "J62", "J63", "J66", "J64", "J65")
 .maps[[.tmpn]][["rheumd"]] <- c("M32", "M34", "M332", "M053", "M058", "M059", "M060", "M063", "M069", "M050", "M052", "M051", "M353")
 .maps[[.tmpn]][["pud"]] <- c("K25", "K26", "K27", "K28")
 .maps[[.tmpn]][["mld"]] <- c("K702", "K703", "K73", "K717", "K740", "K742", "K746", "K743", "K744", "K745")

diff --git a/data-raw/make-weights.R b/data-raw/make-weights.R
@@ -11,7 +11,7 @@ for (w in names(.maps)) {
       pvd = 1,
       cevd = 1,
       dementia = 1,
-      copd = 1,
+      cpd = 1,
       rheumd = 1,
       pud = 1,
       mld = 1,
@@ -31,7 +31,7 @@ for (w in names(.maps)) {
       pvd = 0,
       cevd = 0,
       dementia = 2,
-      copd = 1,
+      cpd = 1,
       rheumd = 1,
       pud = 0,
       mld = 2,

diff --git a/data/australia10.rda b/data/australia10.rda
diff --git a/data/icd10_2009.rda b/data/icd10_2009.rda
diff --git a/data/icd10_2011.rda b/data/icd10_2011.rda
diff --git a/data/icd10cm_2017.rda b/data/icd10cm_2017.rda
diff --git a/data/icd10cm_2018.rda b/data/icd10cm_2018.rda
diff --git a/data/icd10cm_2022.rda b/data/icd10cm_2022.rda
diff --git a/data/icd9_2015.rda b/data/icd9_2015.rda
diff --git a/data/nhds2010.rda b/data/nhds2010.rda
diff --git a/docs/404.html b/docs/404.html
diff --git a/docs/CONDUCT.html b/docs/CONDUCT.html