diff --git a/AHRQ-Elixhauser/sas-formats/icd10cm_2020_1/icd10cm_2020_1_lofmsdrg.Rds b/AHRQ-Elixhauser/sas-formats/icd10cm_2020_1/icd10cm_2020_1_lofmsdrg.Rds new file mode 100644 index 0000000..b27d663 Binary files /dev/null and b/AHRQ-Elixhauser/sas-formats/icd10cm_2020_1/icd10cm_2020_1_lofmsdrg.Rds differ diff --git a/AHRQ-Elixhauser/sas-formats/icd10cm_2020_1/icd10cm_2020_1_lofregex.Rds b/AHRQ-Elixhauser/sas-formats/icd10cm_2020_1/icd10cm_2020_1_lofregex.Rds new file mode 100644 index 0000000..c1abd37 Binary files /dev/null and b/AHRQ-Elixhauser/sas-formats/icd10cm_2020_1/icd10cm_2020_1_lofregex.Rds differ diff --git a/AHRQ-Elixhauser/sas-formats/icd10cm_2021_1/Elixhauser2021Formats.Rds b/AHRQ-Elixhauser/sas-formats/icd10cm_2021_1/Elixhauser2021Formats.Rds new file mode 100644 index 0000000..b3cc2f0 Binary files /dev/null and b/AHRQ-Elixhauser/sas-formats/icd10cm_2021_1/Elixhauser2021Formats.Rds differ diff --git a/AHRQ-Elixhauser/sas-formats/icd10cm_2022_1/Elixhauser2022Formats.rds b/AHRQ-Elixhauser/sas-formats/icd10cm_2022_1/Elixhauser2022Formats.rds new file mode 100644 index 0000000..97932c2 Binary files /dev/null and b/AHRQ-Elixhauser/sas-formats/icd10cm_2022_1/Elixhauser2022Formats.rds differ diff --git a/AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/get_lofmsdrg.R b/AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/get_lofmsdrg.R new file mode 100644 index 0000000..9e110f7 --- /dev/null +++ b/AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/get_lofmsdrg.R @@ -0,0 +1,73 @@ +library(stringr) +library(dplyr) + +# Make lofmsdrg from comformat_icd10cm_2020_1.txt +download.file( + url = "https://www.hcup-us.AHRQ.gov/toolssoftware/comorbidityicd10/comformat_icd10cm_2020_1.txt", + destfile = "AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/comformat_icd10cm_2020_1.txt" +) + +# Read in data +sas_path <- "AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/comformat_icd10cm_2020_1.txt" +sas_AHRQ_raw <- readLines(sas_path) + + +# Helper functions to format msdrgs +convert_interval = function(interval) { + split_interval = str_split(interval, '-')[[1]] + if (length(split_interval)>1) { + c(as.numeric(split_interval[1]):as.numeric(split_interval[2])) + } else { + as.numeric(split_interval[1]) + } +} + +format_msdrg = function(x) { + x %>% + str_split(',') %>% # Separate intervals + unlist() %>% # Unlist + str_trim() %>% # Trim whitespace + .[.!=""] %>% # Remove blanks + sapply(convert_interval) %>% # convert to numeric intervals + unlist() %>% # clean up + unname() %>% # clean up + as.vector() # keep consistent with vectors +} + +make_lofmsdrg <- function(sas_AHRQ_raw){ + raw_msdrg = sas_AHRQ_raw[-(1:grep("ICD-10 MS-DRG V37 Formats", + sas_AHRQ_raw))] # Skip to MS-DRG + raw_msdrg = raw_msdrg[raw_msdrg!="" & + raw_msdrg!="Run;" & + raw_msdrg!=" "] # Drop empty and run + raw_msdrg = str_trim(raw_msdrg) + + msdrg_labels = list() + msdrg_num_unformated = c() + for (i in raw_msdrg){ + # Get value labels + if (grepl("VALUE", i)){ + split_label = str_split(i, 'VALUE')[[1]][[2]] %>% + str_trim() %>% + str_split(" ") + msdrg_labels[[split_label[[1]][1]]] = list() + last_value = split_label[[1]][1] + } + if (grepl("\\d", i[1])){ # Extract numbers + msdrg_num_unformated = append(msdrg_num_unformated, + str_split(i, ' = ')[[1]][1]) + } + if (grepl(';', i)){ # assign formatted MS-DRGs to label + msdrg_labels[[split_label[[1]][1]]] = format_msdrg(msdrg_num_unformated) + msdrg_num_unformated = c() # clear MS-DRG list for next loop + } + } + msdrg_labels +} +lofmsdrg = make_lofmsdrg(sas_AHRQ_raw) + +# Save lofmsdrg as .Rds +saveRDS(lofmsdrg, 'AHRQ-Elixhauser/sas-formats/icd10cm_2020_1/icd10cm_2020_1_lofmsdrg.Rds') + +# Remove comformat_icd10cm_2020_1.txt +file.remove(sas_path) \ No newline at end of file diff --git a/AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/get_lofregex.R b/AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/get_lofregex.R new file mode 100644 index 0000000..05d3951 --- /dev/null +++ b/AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/get_lofregex.R @@ -0,0 +1,66 @@ +library(stringr) +library(dplyr) + +download.file( + url = "https://www.hcup-us.AHRQ.gov/toolssoftware/comorbidityicd10/comformat_icd10cm_2020_1.txt", + destfile = "AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/comformat_icd10cm_2020_1.txt" +) + + +# Example pattern to extract: +# "D500", +# "O9081", +# "O99011", +# "O99012", +# "O99013", +# "O99019", +# "O9902", +# "O9903"="BLDLOSS" /*Blood loss anemia*/ + +# Read in data +sas_path <- "AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/comformat_icd10cm_2020_1.txt" +sas_AHRQ_raw <- readLines(sas_path) + +# Make list of lists for AHRQ codes to compare +make_sas_list = function(sas_AHRQ_raw){ + # Assigns ICD-10 codes to comorbidty labels from sas file located here: + # https://www.hcup-us.AHRQ.gov/toolssoftware/comorbidityicd10/comformat_icd10cm_2020_1.txt + # Omits /**** ICD-10 MS-DRG V37 Formats ****/ + + # Clean up readlines + sas_AHRQ_prep <- sas_AHRQ_raw[sas_AHRQ_raw!=""] %>% # Remove empty lines + .[-(1:18)] %>% # First 18 elements are extraneous + lapply(function(x) str_split(x,"\\/\\*")[[1]][1] ) %>% # Drop sas comments + unlist() %>% + str_trim() %>% # Trim white space + str_replace_all('\\"', "") %>% # Remove extraneous characters + str_replace_all(',', "") # Remove extraneous characters + + AHRQ_list = list() # create empty list + temp_list = c() # placeholder for codes + for(l in sas_AHRQ_prep){ + if(grepl("=", l, fixed=T)){ + split_l = str_split(l,'=')[[1]] + temp_list = append(temp_list, split_l[1]) + # AHRQ_list[[split_l[2]]] = str_c(temp_list, collapse="|") + # Must have ^ so that regex doesn't search for within-code substrings + AHRQ_list[[split_l[2]]] = paste0("^", str_c(temp_list, collapse="|^")) + temp_list = c() + } else { + temp_list = append(temp_list, l) + } + # Omit everything after wghtloss + if(l == "R636=WGHTLOSS"){ + break + } + } + AHRQ_list # return the list +} +icd10cm_2020_1_lofregex = make_sas_list(sas_AHRQ_raw) + +# Save AHRQ_list object as RDS +saveRDS(icd10cm_2020_1_lofregex, + 'AHRQ-Elixhauser/sas-formats/icd10cm_2020_1/icd10cm_2020_1_lofregex.Rds') + +# Remove comformat_icd10cm_2020_1.txt +file.remove(sas_path) \ No newline at end of file diff --git a/AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/get_mappings.R b/AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/get_mappings.R new file mode 100644 index 0000000..2388d4f --- /dev/null +++ b/AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/get_mappings.R @@ -0,0 +1,243 @@ + +# Download file +download.file( + url = "https://www.hcup-us.ahrq.gov/toolssoftware/comorbidityicd10/ElixhauserComorbidity_v2021-1.zip", + destfile = "AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1.zip" +) +# Unzip +unzip("AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1.zip", + exdir = 'AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1') + +# Get raw SAS code line-by-line +raw_format = readLines( + "AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1/Comorb_ICD10CM_Format_v2021-1.sas" + ) + +# Remove quotes, commas, and whitespace +trim_format = trimws(gsub(',', '', gsub('"', "", raw_format))) +# Remove 'proc' lines +trim_format = trim_format[!grepl('Proc', trim_format)] +# Remove 'run' lines +trim_format = trim_format[!grepl(';', trim_format)] +# Remove 'other' lines +trim_format = trim_format[!grepl('other', trim_format)] + +# Separate vector by blank line +format_list = split(trim_format[trim_format!=''], + cumsum(trim_format=="")[trim_format!='']) + +# Remove extraneous +format_list = format_list[3:length(format_list)] # Header stuff + +# Split into value groups +new_values = unlist( + lapply(format_list, + function(x) { + any(grepl('Value \\$', x)) + }) + ) + +ElixhauserAHRQ2021Map = sapply(1:max(cumsum(new_values)), + function(x){ + format_list[cumsum(new_values)==x] + }) + +# Name the value groups +names(ElixhauserAHRQ2021Map) = unlist( + lapply(ElixhauserAHRQ2021Map, + function(x){ + strsplit(x[[1]][1], '\\$')[[1]][2] + }) +) + +# Drop 'value' elements in comfmt +ElixhauserAHRQ2021Map$comfmt = sapply(ElixhauserAHRQ2021Map$comfmt, + function(x) x[!grepl('Value', x)]) + +# Get icd group names for comfmt +names(ElixhauserAHRQ2021Map$comfmt) = sapply(ElixhauserAHRQ2021Map$comfmt, + function(x){ + strsplit(x[grepl(' = ', x)], ' = ')[[1]][2] + }) + +# Drop ' = XXXX' from icd groups +ElixhauserAHRQ2021Map$comfmt = sapply(ElixhauserAHRQ2021Map$comfmt, + function(x){ + x = sapply(x, function(x) { + strsplit(x, ' = ')[[1]][1] + }) + names(x) = NULL + x + }) + +# Drop " = 1" from poaxmpt and 'value' elements +poaxmpt_names = names(ElixhauserAHRQ2021Map)[grepl('poa', names(ElixhauserAHRQ2021Map))] +for (i in poaxmpt_names){ + # Drop " = 1" + ElixhauserAHRQ2021Map[[i]] = unlist( + lapply( + strsplit(ElixhauserAHRQ2021Map[[i]][[1]], + ' = '), + function(x) { + x[[1]][1] + } + ) + ) + + # Drop 'Value' elements + ElixhauserAHRQ2021Map[[i]] = ElixhauserAHRQ2021Map[[i]][!grepl( + 'Value', ElixhauserAHRQ2021Map[[i]])] +} + +# Define and save final comorbidities in AHRQ format: +ElixhauserAHRQ2021Abbr = c( + 'AIDS', + 'ALCOHOL', + 'ANEMDEF', + 'ARTH', + 'BLDLOSS', + 'CANCER_LYMPH', + 'CANCER_LEUK', + 'CANCER_METS', + 'CANCER_NSITU', + 'CANCER_SOLID', + 'CBVD', + 'CHF', + 'COAG', + 'DEMENTIA', + 'DEPRESS', + 'DIAB_UNCX', + 'DIAB_CX', + 'DRUG_ABUSE', + 'HTN_CX', + 'HTN_UNCX', + 'LIVER_MLD', + 'LIVER_SEV', + 'LUNG_CHRONIC', + 'NEURO_MOVT', + 'NEURO_OTH', + 'NEURO_SEIZ', + 'OBESE', + 'PARALYSIS', + 'PERIVASC', + 'PSYCHOSES', + 'PULMCIRC', + 'RENLFL_MOD', + 'RENLFL_SEV', + 'THYROID_HYPO', + 'THYROID_OTH', + 'ULCER_PEPTIC', + 'VALVE', + 'WGHTLOSS' +) + +# Define and save value labels (see Comorb_ICD10CM_Format_v2021-1.sas) +ElixhauserAHRQ2021Labels = c( + 'AIDS' = 'Acquired immune deficiency syndrome', + 'ALCOHOL' = 'Alcohol abuse', + 'ANEMDEF' = 'Deficiency anemias', + 'ARTH' = 'Arthropathies', + 'BLDLOSS' = 'Chronic blood loss anemia', + 'CANCER_LEUK' = 'Leukemia', + 'CANCER_LYMPH' = 'Lymphoma', + 'CANCER_METS' = 'Metastatic cancer', + 'CANCER_NSITU' = 'Solid tumor without metastasis, in situ', + 'CANCER_SOLID' = 'Solid tumor without metastasis, malignant', + 'CBVD' = 'Cerebrovascular disease', + 'CBVD_NPOA' = 'Cerebrovascular disease, not on admission', + 'CBVD_POA' = 'Cerebrovascular disease, on admission', + 'CBVD_SQLA' = 'Cerebrovascular disease, sequela', + 'CHF' = 'Congestive heart failure', + 'COAG' = 'Coagulopthy', + 'DEMENTIA' = 'Dementia', + 'DEPRESS' = 'Depression', + 'DIAB_CX' = 'Diabetes with chronic complications', + 'DIAB_UNCX' = 'Diabetes without chronic complications', + 'DRUG_ABUSE' = 'Drug abuse', + 'HTN_CX' = 'Hypertension, complicated', + 'HTN_UNCX' = 'Hypertension, uncomplicated', + 'LIVER_MLD' = 'Liver disease, mild', + 'LIVER_SEV' = 'Liver disease, moderate to severe', + 'LUNG_CHRONIC' = 'Chronic pulmonary disease', + 'NEURO_MOVT' = 'Neurological disorders affecting movement', + 'NEURO_OTH' = 'Other neurological disorders', + 'NEURO_SEIZ' = 'Seizures and epilepsy', + 'OBESE' = 'Obesity', + 'PARALYSIS' = 'Paralysis', + 'PERIVASC' = 'Peripheral vascular disease', + 'PSYCHOSES' = 'Psychoses', + 'PULMCIRC' = 'Pulmonary circulation disease', + 'RENLFL_MOD' = 'Renal failure, moderate', + 'RENLFL_SEV' = 'Renal failure, severe', + 'THYROID_HYPO' = 'Hypothyroidism', + 'THYROID_OTH' = 'Other thyroid disorders', + 'ULCER_PEPTIC' = 'Peptic ulcer disease x bleeding', + 'VALVE' = 'Valvular disease', + 'WGHTLOSS' = 'Weight loss' +) + +ElixhauserAHRQ2021PreExclusion = c( + "AIDS", + "ALCOHOL", + "ANEMDEF", + "ARTH", + "BLDLOSS", + "CANCER_LYMPH", + "CANCER_LEUK", + "CANCER_METS", + "CANCER_NSITU", + "CANCER_SOLID", + "CBVD_SQLA", + "CBVD_POA", + "CBVD_NPOA", + "CBVD", + "CHF", + "COAG", + "DEMENTIA", + "DEPRESS", + "DIAB_UNCX", + "DIAB_CX", + "DRUG_ABUSE", + "HTN_CX", + "HTN_UNCX", + "LIVER_MLD", + "LIVER_SEV", + "LUNG_CHRONIC", + "NEURO_MOVT", + "NEURO_OTH", + "NEURO_SEIZ", + "OBESE", + "PARALYSIS", + "PERIVASC", + "PSYCHOSES", + "PULMCIRC", + "RENLFL_MOD", + "RENLFL_SEV", + "THYROID_HYPO", + "THYROID_OTH", + "ULCER_PEPTIC", + "VALVE", + "WGHTLOSS" +) + +# Save list of format objects +Elixhauser2021Formats = list( + ElixhauserAHRQ2021Map = ElixhauserAHRQ2021Map, + ElixhauserAHRQ2021Abbr = ElixhauserAHRQ2021Abbr, + ElixhauserAHRQ2021Labels = ElixhauserAHRQ2021Labels, + ElixhauserAHRQ2021PreExclusion = ElixhauserAHRQ2021PreExclusion +) + +saveRDS(Elixhauser2021Formats, + 'AHRQ-Elixhauser/sas-formats/icd10cm_2021_1/Elixhauser2021Formats.Rds') + +# Remove .zip file +file.remove( + "AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1.zip" +) + +# Remove unzipped folder +unlink( + 'AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1', + recursive = T +) \ No newline at end of file diff --git a/AHRQ-Elixhauser/sas-parse/icd10cm_2022_1/get_mappings.R b/AHRQ-Elixhauser/sas-parse/icd10cm_2022_1/get_mappings.R new file mode 100644 index 0000000..f06a459 --- /dev/null +++ b/AHRQ-Elixhauser/sas-parse/icd10cm_2022_1/get_mappings.R @@ -0,0 +1,268 @@ +### Downloads and parses the 2022 formatting sas program + +destfile_path = "AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/CMR_v2022-1.zip" + +# Download file +download.file( + url = "https://www.hcup-us.ahrq.gov/toolssoftware/comorbidityicd10/CMR_v2022-1.zip", + destfile = destfile_path +) +# Unzip +unzip(destfile_path, + exdir = 'AHRQ-Elixhauser/sas-parse/icd10cm_2022_1/ElixhauserComorbidity_v2022-1') + +# Get raw SAS code line-by-line +raw_format = readLines( + "AHRQ-Elixhauser/sas-parse/icd10cm_2022_1/ElixhauserComorbidity_v2022-1/CMR_Format_Program_v2022-1.sas" + ) + +# Remove quotes, commas, and whitespace +trim_format = trimws(gsub(',', '', gsub('"', "", raw_format))) +# Remove 'proc' lines +trim_format = trim_format[!grepl('Proc', trim_format)] +# Remove 'run' lines +trim_format = trim_format[!grepl(';', trim_format)] +# Remove 'other' lines +trim_format = trim_format[!grepl('other', trim_format)] + +# Separate vector by blank line +format_list = split( + trim_format[trim_format!=''], + cumsum(trim_format=="")[trim_format!=''] +) + +# Remove extraneous +format_list = format_list[3:length(format_list)] # Header stuff + +# Get comfmt vs. poaxmpt entries +poaxmpt_filter <- unlist( + lapply(format_list, function(x){grepl(' = 1', tail(x, 1))}) +) + +# Get names for comfmt +comfmt_names <- as.vector( + unlist( + lapply( + format_list[!poaxmpt_filter], + function(x){ + strsplit(tail(x, 1), ' = ')[[1]][2] + } + ) + ) +) +# Get values for each group in comfmt +comfmt_values <- list() +# First element is unique as it contains comfmt header +comfmt_values <- append( + comfmt_values, + list( + sapply( + strsplit(format_list[!poaxmpt_filter][[1]][-1], ' = '), + function(x){ + x[[1]] + } + ) + ) +) + +# Subsequent elements follow the same pattern +comfmt_values <- append( + comfmt_values, + lapply( + format_list[!poaxmpt_filter][-1], + function(x){ + # Splits the last element that contains " = NAME" + sapply( + # Takes the first element of the previous split + strsplit(x, ' = '), + function(y){ + y[[1]] + } + ) + } + ) +) + +# Add names to values +names(comfmt_values) <- comfmt_names + +# Get poaxmpt names +poaxmpt_names <- as.vector( + sapply( + format_list[poaxmpt_filter], + function(x){ + strsplit(x[1], '\\$')[[1]][2] + } + ) +) + +# Get poaxmpt values +poaxmpt_values <- lapply( + format_list[poaxmpt_filter], + function(x){ + gsub(' = 1', '', x[-1]) + } +) + +# Add names to values +names(poaxmpt_values) <- tolower(poaxmpt_names) + +# Create complete ElixhauserAHRQ2022Map +ElixhauserAHRQ2022Map <- list( + comfmt = comfmt_values +) +ElixhauserAHRQ2022Map <- append( + ElixhauserAHRQ2022Map, + poaxmpt_values +) + +ElixhauserAHRQ2022PreExclusion <- c( + "AIDS", + "ALCOHOL", + "ANEMDEF", + "AUTOIMMUNE", + "BLDLOSS", + "CANCER_LYMPH", + "CANCER_LEUK", + "CANCER_METS", + "CANCER_NSITU", + "CANCER_SOLID", + "CBVD_SQLA", + "CBVD_POA", + "CBVD_NPOA", + "CBVD", + "HF", + "COAG", + "DEMENTIA", + "DEPRESS", + "DIAB_UNCX", + "DIAB_CX", + "DRUG_ABUSE", + "HTN_CX", + "HTN_UNCX", + "LIVER_MLD", + "LIVER_SEV", + "LUNG_CHRONIC", + "NEURO_MOVT", + "NEURO_OTH", + "NEURO_SEIZ", + "OBESE", + "PARALYSIS", + "PERIVASC", + "PSYCHOSES", + "PULMCIRC", + "RENLFL_MOD", + "RENLFL_SEV", + "THYROID_HYPO", + "THYROID_OTH", + "ULCER_PEPTIC", + "VALVE", + "WGHTLOSS" +) + +# Define and save final comorbidities in AHRQ format: +ElixhauserAHRQ2022Abbr = c( + 'AIDS', + 'ALCOHOL', + 'ANEMDEF', + 'AUTOIMMUNE', + 'BLDLOSS', + 'CANCER_LEUK', + 'CANCER_LYMPH', + 'CANCER_METS', + 'CANCER_NSITU', + 'CANCER_SOLID', + 'CBVD', + 'COAG', + 'DEMENTIA', + 'DEPRESS', + 'DIAB_CX', + 'DIAB_UNCX', + 'DRUG_ABUSE', + 'HF', + 'HTN_CX', + 'HTN_UNCX', + 'LIVER_MLD', + 'LIVER_SEV', + 'LUNG_CHRONIC', + 'NEURO_MOVT', + 'NEURO_OTH', + 'NEURO_SEIZ', + 'OBESE', + 'PARALYSIS', + 'PERIVASC', + 'PSYCHOSES', + 'PULMCIRC', + 'RENLFL_MOD', + 'RENLFL_SEV', + 'THYROID_HYPO', + 'THYROID_OTH', + 'ULCER_PEPTIC', + 'VALVE', + 'WGHTLOSS' +) + +# Define and save value labels +# (see Comorb_ICD10CM_Format_v2022-1.sas) +ElixhauserAHRQ2022Labels = c( + 'AIDS' = 'Acquired immune deficiency syndrome', + 'ALCOHOL' = 'Alcohol abuse', + 'ANEMDEF' = 'Deficiency anemias', + 'AUTOIMMUNE' = 'Autoimmune conditions', + 'BLDLOSS' = 'Chronic blood loss anemia', + 'CANCER_LEUK' = 'Leukemia', + 'CANCER_LYMPH' = 'Lymphoma', + 'CANCER_METS' = 'Metastatic cancer', + 'CANCER_NSITU' = 'Solid tumor without metastasis, in situ', + 'CANCER_SOLID' = 'Solid tumor without metastasis, malignant', + 'CBVD' = 'Cerebrovascular disease', + 'COAG' = 'Coagulopthy', + 'DEMENTIA' = 'Dementia', + 'DEPRESS' = 'Depression', + 'DIAB_CX' = 'Diabetes with chronic complications', + 'DIAB_UNCX' = 'Diabetes without chronic complications', + 'DRUG_ABUSE' = 'Drug abuse', + 'HF' = 'Heart failure', + 'HTN_CX' = 'Hypertension, complicated', + 'HTN_UNCX' = 'Hypertension, uncomplicated', + 'LIVER_MLD' = 'Liver disease, mild', + 'LIVER_SEV' = 'Liver disease, moderate to severe', + 'LUNG_CHRONIC' = 'Chronic pulmonary disease', + 'NEURO_MOVT' = 'Neurological disorders affecting movement', + 'NEURO_OTH' = 'Other neurological disorders', + 'NEURO_SEIZ' = 'Seizures and epilepsy', + 'OBESE' = 'Obesity', + 'PARALYSIS' = 'Paralysis', + 'PERIVASC' = 'Peripheral vascular disease', + 'PSYCHOSES' = 'Psychoses', + 'PULMCIRC' = 'Pulmonary circulation disease', + 'RENLFL_MOD' = 'Renal failure, moderate', + 'RENLFL_SEV' = 'Renal failure, severe', + 'THYROID_HYPO' = 'Hypothyroidism', + 'THYROID_OTH' = 'Other thyroid disorders', + 'ULCER_PEPTIC' = 'Peptic ulcer disease x bleeding', + 'VALVE' = 'Valvular disease', + 'WGHTLOSS' = 'Weight loss' +) + +# Save list of format objects +Elixhauser2022Formats = list( + ElixhauserAHRQ2022Map = ElixhauserAHRQ2022Map, + ElixhauserAHRQ2022Abbr = ElixhauserAHRQ2022Abbr, + ElixhauserAHRQ2022Labels = ElixhauserAHRQ2022Labels, + ElixhauserAHRQ2022PreExclusion = ElixhauserAHRQ2022PreExclusion +) + +saveRDS(Elixhauser2022Formats, + 'AHRQ-Elixhauser/sas-formats/icd10cm_2022_1/Elixhauser2022Formats.Rds') + +# Remove .zip file +file.remove( + destfile_path +) + +# Remove unzipped folder +unlink( + 'AHRQ-Elixhauser/sas-parse/icd10cm_2022_1/ElixhauserComorbidity_v2022-1', + recursive = T +) diff --git a/DESCRIPTION b/DESCRIPTION index 76120ce..9c1afac 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,7 @@ Suggests: rmarkdown, testthat Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.0 +RoxygenNote: 7.1.1 Encoding: UTF-8 LazyData: true ByteCompile: true diff --git a/NAMESPACE b/NAMESPACE index 8401fd6..ba6f29b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,8 @@ # Generated by roxygen2: do not edit by hand export(comorbidity) +export(get_ahrq_2020) +export(get_ahrq_2021) export(sample_diag) +export(sample_drg) +export(sample_year_quarter) diff --git a/R/ahrq_sas.R b/R/ahrq_sas.R new file mode 100644 index 0000000..eca96c6 --- /dev/null +++ b/R/ahrq_sas.R @@ -0,0 +1,1422 @@ +# All AHRQ SAS conversion functions stored here + +################################################################################ +################################################################################ +################################################################################ +# 2020 + +#' @export +#' @keywords internal + +get_ahrq_2020 <- function(x, id, code, assign0, drg, icd_rank) { + ### Extract regex for internal use + regex <- lofregex[['elixhauser_ahrq_2020']][['icd10']] + + ### Extract SAS DRGS and make sure id is not factor + # make sure there are no factors (it will break when combining drgs) + if (any(lapply(x, class)=='factor')) { + x <- data.frame( + lapply(x, function(j) { + if(class(j)=='factor') { + as.character(j) + } else {j} + }), + stringsAsFactors = F + ) + } + + # Remove first-rank ICD (D473 codes to "NONE") + x[x[[icd_rank]]==1,code] = "D473" + + + # Get number-group key-value pairs + reverse_lofmsdrg = unstack(stack(lofmsdrg), form=ind~values) + + # Drop DRG leading zeros, convert to character + all_drgs = as.numeric(x[[drg]]) + all_drgs = as.character(all_drgs) + + # Get list of SAS drg flags + drg_flags = reverse_lofmsdrg[all_drgs] + names(drg_flags) = x[[id]] + drg_flags = drg_flags[unique(names(drg_flags))] + drg_mask = !unlist(lapply(drg_flags, is.null)) + drg_flags = drg_flags[drg_mask] + + ### Subset only 'id' and 'code' columns + if (data.table::is.data.table(x)) { + x <- x[, c(id, code), with = FALSE] + } else { + x <- x[, c(id, code)] + } + + ## Turn x into a DT + data.table::setDT(x) + + ### Get list of unique codes used in dataset that match comorbidities + loc <- sapply(regex, grep, unique(x[[code]]), value = TRUE) + loc <- utils::stack(loc) + names(loc)[1] <- code + + ### Merge list with original data.table (data.frame) + x <- merge(x, loc, all.x = TRUE, allow.cartesian = TRUE) + x[[code]] <- NULL + x <- unique(x) + + ### Spread wide + xin <- x[, c(id, "ind"), with = FALSE] + xin[, value := 1L] + x <- data.table::dcast.data.table( + xin, stats::as.formula(paste(id, "~ ind")), fill = 0) + x[["NA"]] <- NULL + + ### Add missing columns + for (col in names(regex)) { + if (is.null(x[[col]])) x[[col]] <- 0 + } + data.table::setcolorder(x, c(id, names(regex))) + + # This section replicates the AHRQ Elixhauser Comorbidity Software v3.7 + # https://www.hcup-us.ahrq.gov/toolssoftware/comorbidity/comorbidity.jsp + # /*******************************************/ + # /* Initialize Hypertension, CHF, and Renal */ + # /* Comorbidity flags to 1 using the detail */ + # /* hypertension flags. */ + # /*******************************************/ + # IF HTNPREG_ THEN HTNCX = 1; + x[HTNPREG==1, HTNCX := 1] + + # IF HTNWOCHF_ THEN HTNCX = 1; + x[HTNWOCHF==1, HTNCX := 1] + + # IF HTNWCHF_ THEN DO; + # HTNCX = 1; + # CHF = 1; + x[HTNWCHF==1, c('HTNCX', 'CHF') := 1] + # END; + + # IF HRENWORF_ THEN HTNCX = 1; + x[HRENWORF==1, HTNCX := 1] + + # IF HRENWRF_ THEN DO; + # HTNCX = 1; + # RENLFAIL = 1; + x[HRENWRF==1, c('HTNCX', 'RENLFAIL') := 1] + # END; + + # IF HHRWOHRF_ THEN HTNCX = 1; + x[HHRWOHRF==1, HTNCX := 1] + + # IF HHRWCHF_ THEN DO; + # HTNCX = 1; + # CHF = 1; + # END; + x[HHRWCHF==1, c('HTNCX', 'CHF') := 1] + + # IF HHRWRF_ THEN DO; + # HTNCX = 1; + # RENLFAIL = 1; + # END; + x[HHRWRF==1, c('HTNCX', 'RENLFAIL') := 1] + + # IF HHRWHRF_ THEN DO; + # HTNCX = 1; + # CHF = 1; + # RENLFAIL = 1; + # END; + x[HHRWHRF==1, c('HTNCX', 'CHF', 'RENLFAIL') := 1] + + # IF OHTNPREG_ THEN HTNCX = 1; + x[OHTNPREG==1, HTNCX := 1] + + # + # + # /*********************************************************/ + # /* Set up code to only count the more severe comorbidity */ + # /*********************************************************/ + # IF HTNCX = 1 THEN HTN = 0 ; + x[HTNCX==1, HTN := 0] + + # IF METS = 1 THEN TUMOR = 0 ; + x[METS==1, TUMOR := 0] + + # IF DMCX = 1 THEN DM = 0 ; + x[DMCX==1, DM := 0] + + # + # /******************************************************/ + # /* Examine DRG and set flags to identify a particular */ + # /* DRG group */ + # /******************************************************/ + # IF PUT(DRG,CARDDRG.) = 'YES' THEN CARDFLG = 1; + # IF PUT(DRG,PERIDRG.) = 'YES' THEN PERIFLG = 1; + # IF PUT(DRG,CEREDRG.) = 'YES' THEN CEREFLG = 1; + # IF PUT(DRG,NERVDRG.) = 'YES' THEN NERVFLG = 1; + # IF PUT(DRG,PULMDRG.) = 'YES' THEN PULMFLG = 1; + # IF PUT(DRG,DIABDRG.) = 'YES' THEN DIABFLG = 1; + # IF PUT(DRG,HYPODRG.) = 'YES' THEN HYPOFLG = 1; + # IF PUT(DRG,RENALDRG.) = 'YES' THEN RENALFLG = 1; + # IF PUT(DRG,RENFDRG.) = 'YES' THEN RENFFLG = 1; + # IF PUT(DRG,LIVERDRG.) = 'YES' THEN LIVERFLG = 1; + # IF PUT(DRG,ULCEDRG.) = 'YES' THEN ULCEFLG = 1; + # IF PUT(DRG,HIVDRG.) = 'YES' THEN HIVFLG = 1; + # IF PUT(DRG,LEUKDRG.) = 'YES' THEN LEUKFLG = 1; + # IF PUT(DRG,CANCDRG.) = 'YES' THEN CANCFLG = 1; + # IF PUT(DRG,ARTHDRG.) = 'YES' THEN ARTHFLG = 1; + # IF PUT(DRG,NUTRDRG.) = 'YES' THEN NUTRFLG = 1; + # IF PUT(DRG,ANEMDRG.) = 'YES' THEN ANEMFLG = 1; + # IF PUT(DRG,ALCDRG.) = 'YES' THEN ALCFLG = 1; + # IF PUT(DRG,HTNCXDRG.) = 'YES' THEN HTNCXFLG = 1; + # IF PUT(DRG,HTNDRG.) = 'YES' THEN HTNFLG = 1; + # IF PUT(DRG,COAGDRG.) = 'YES' THEN COAGFLG = 1; + # IF PUT(DRG,PSYDRG.) = 'YES' THEN PSYFLG = 1; + # IF PUT(DRG,OBESEDRG.) = 'YES' THEN OBESEFLG = 1; + # IF PUT(DRG,DEPRSDRG.) = 'YES' THEN DEPRSFLG = 1; + + # drg_flags have ids as names and indicated drgs as values + drg_df = lapply(drg_flags, function(x) (names(lofmsdrg) %in% x)*1) + drg_df = matrix(unlist(drg_df), nrow=length(drg_df), byrow=T) + drg_df = data.table::data.table(drg_df, drg_id=names(drg_flags)) + colnames(drg_df) = c(names(lofmsdrg), 'drg_id') + # Coerce drg_df to class of x to ensure merge + class(drg_df$drg_id) = class(x[[id]]) + # Merge with x by ID + x = merge(x, drg_df, by.x=id, by.y='drg_id', sort=F, all.x=T) + + # Make NAs 0 + x[is.na(x)] = 0 + + # + # + # /************************************************************/ + # /* Redefining comorbidities by eliminating the DRG directly */ + # /* related to comorbidity, thus limiting the screens to */ + # /* principal diagnoses not directly related to comorbidity */ + # /* in question */ + # /************************************************************/ + # IF CHF AND CARDFLG THEN CHF = 0; + x[CHF==1 & CARDDRG==1, CHF := 0] + + # IF VALVE AND CARDFLG THEN VALVE = 0; + x[VALVE==1 & CARDDRG==1, VALVE := 0] + + # IF PULMCIRC AND (CARDFLG OR PULMFLG ) THEN PULMCIRC = 0; + x[PULMCIRC==1 & (CARDDRG==1 | PULMDRG==1), PULMCIRC := 0] + + # IF PERIVASC AND PERIFLG THEN PERIVASC = 0; + x[PERIVASC==1 & PERIDRG==1, PERIVASC := 0] + + # IF HTN AND HTNFLG THEN HTN = 0; + x[HTN==1 & HTNDRG==1, HTN := 0] + + # + # /**********************************************************/ + # /* Apply DRG Exclusions to Hypertension Complicated, Con- */ + # /* gestive Heart Failure, and Renal Failure comorbidities */ + # /* using the detailed hypertension flags created above. */ + # /**********************************************************/ + # IF HTNCX AND HTNCXFLG THEN HTNCX = 0 ; + x[HTNCX==1 & HTNCXDRG==1, HTNCX := 0] + + # IF HTNPREG_ AND HTNCXFLG THEN HTNCX = 0; + x[HTNPREG==1 & HTNCXDRG==1, HTNCX := 0] + + # IF HTNWOCHF_ AND (HTNCXFLG OR CARDFLG) THEN HTNCX = 0; + x[HTNWOCHF==1 & (HTNCXDRG==1 | CARDDRG==1), HTNCX := 0] + + # IF HTNWCHF_ THEN DO; + # IF HTNCXFLG THEN HTNCX = 0; + x[HTNWCHF==1 & HTNCXDRG==1, HTNCX := 0] + + # IF CARDFLG THEN DO; + # HTNCX = 0; + x[HTNWCHF==1 & CARDDRG==1, HTNCX := 0] + # CHF = 0; + x[HTNWCHF==1 & CARDDRG==1, CHF := 0] + + # END; + # END; + + # IF HRENWORF_ AND (HTNCXFLG OR RENALFLG) THEN HTNCX = 0; + x[HRENWORF==1 & (HTNCXDRG==1 | RENALDRG==1), HTNCX := 0] + + # IF HRENWRF_ THEN DO; + # IF HTNCXFLG THEN HTNCX = 0; + x[HRENWRF==1 & HTNCXDRG==1, HTNCX := 0] + + # IF RENALFLG THEN DO; + # HTNCX = 0; + # RENLFAIL = 0; + x[HRENWRF==1 & RENALDRG==1, c('HTNCX', 'RENLFAIL') := 0] + + # END; + # END; + + # IF HHRWOHRF_ AND (HTNCXFLG OR CARDFLG OR RENALFLG) THEN HTNCX = 0; + x[HHRWOHRF==1 & (HTNCXDRG==1 | CARDDRG==1 | RENALDRG==1), HTNCX := 0] + + # IF HHRWCHF_ THEN DO; + # IF HTNCXFLG THEN HTNCX = 0; + x[HHRWCHF==1 & HTNCXDRG==1, HTNCX := 0] + + # IF CARDFLG THEN DO; + # HTNCX = 0; + # CHF = 0; + x[HHRWCHF==1 & CARDDRG==1, c('HTNCX', 'CHF') := 0] + + # END; + # IF RENALFLG THEN HTNCX = 0; + x[HHRWCHF==1 & RENALDRG==1, HTNCX := 0] + + # END; + + + # IF HHRWRF_ THEN DO; + # IF HTNCXFLG OR CARDFLG THEN HTNCX = 0; + x[HHRWRF==1 & (HTNCXDRG==1 | CARDDRG==1), HTNCX := 0] + + # IF RENALFLG THEN DO; + # HTNCX = 0; + # RENLFAIL = 0; + x[HHRWRF==1 & RENALDRG==1, c('HTNCX', 'RENLFAIL') := 0] + + # END; + # END; + + # IF HHRWHRF_ THEN DO; + # IF HTNCXFLG THEN HTNCX = 0; + x[HHRWHRF==1 & HTNCXDRG==1, HTNCX := 0] + + # IF CARDFLG THEN DO; + # HTNCX = 0; + # CHF = 0; + x[HHRWHRF==1 & CARDDRG==1, c('HTNCX', 'CHF') := 0] + + # END; + # IF RENALFLG THEN DO; + # HTNCX = 0; + # RENLFAIL = 0; + x[HHRWHRF==1 & RENALDRG==1, c('HTNCX', 'RENLFAIL') := 0] + + # END; + # END; + # IF OHTNPREG_ AND (HTNCXFLG OR CARDFLG OR RENALFLG) THEN HTNCX = 0; + x[OHTNPREG==1 & (HTNCXDRG==1 | CARDDRG==1 | RENALDRG==1), HTNCX := 0] + + # + # IF NEURO AND NERVFLG THEN NEURO = 0; + x[NEURO==1 & NERVDRG==1, NEURO := 0] + + # IF CHRNLUNG AND PULMFLG THEN CHRNLUNG = 0; + x[CHRNLUNG==1 & PULMDRG==1, CHRNLUNG := 0] + + # IF DM AND DIABFLG THEN DM = 0; + x[DM==1 & DIABDRG==1, DM := 0] + + # IF DMCX AND DIABFLG THEN DMCX = 0 ; + x[DMCX==1 & DIABDRG==1, DMCX := 0] + + # IF HYPOTHY AND HYPOFLG THEN HYPOTHY = 0; + x[HYPOTHY==1 & HYPODRG==1, HYPOTHY := 0] + + # IF RENLFAIL AND RENFFLG THEN RENLFAIL = 0; + x[RENLFAIL==1 & RENFDRG==1, RENLFAIL := 0] + + # IF LIVER AND LIVERFLG THEN LIVER = 0; + x[LIVER==1 & LIVERDRG==1, LIVER := 0] + + # IF ULCER AND ULCEFLG THEN ULCER = 0; + x[ULCER==1 & ULCEDRG==1, ULCER := 0] + + # IF AIDS AND HIVFLG THEN AIDS = 0; + x[AIDS==1 & HIVDRG==1, AIDS := 0] + + # IF LYMPH AND LEUKFLG THEN LYMPH = 0; + x[LYMPH==1 & LEUKDRG==1, LYMPH := 0] + + # IF METS AND CANCFLG THEN METS = 0; + x[METS==1 & CANCDRG==1, METS := 0] + + # IF TUMOR AND CANCFLG THEN TUMOR = 0; + x[TUMOR==1 & CANCDRG==1, TUMOR := 0] + + # IF ARTH AND ARTHFLG THEN ARTH = 0; + x[ARTH==1 & ARTHDRG==1, ARTH := 0] + + # IF COAG AND COAGFLG THEN COAG = 0; + x[COAG==1 & COAGDRG==1, COAG := 0] + + # IF OBESE AND (NUTRFLG OR OBESEFLG) THEN OBESE = 0; + x[OBESE==1 & (NUTRDRG==1 | OBESEDRG==1), OBESE := 0] + + # IF WGHTLOSS AND NUTRFLG THEN WGHTLOSS = 0; + x[WGHTLOSS==1 & NUTRDRG==1, WGHTLOSS := 0] + + # IF LYTES AND NUTRFLG THEN LYTES = 0; + x[LYTES==1 & NUTRDRG==1, LYTES := 0] + + # IF BLDLOSS AND ANEMFLG THEN BLDLOSS = 0; + x[BLDLOSS==1 & ANEMDRG==1, BLDLOSS := 0] + + # IF ANEMDEF AND ANEMFLG THEN ANEMDEF = 0; + x[ANEMDEF==1 & ANEMDRG==1, ANEMDEF := 0] + + # IF ALCOHOL AND ALCFLG THEN ALCOHOL = 0; + x[ALCOHOL==1 & ALCDRG==1, ALCOHOL := 0] + + # IF DRUG AND ALCFLG THEN DRUG = 0; + x[DRUG==1 & ALCDRG==1, DRUG := 0] + + # IF PSYCH AND PSYFLG THEN PSYCH = 0; + x[PSYCH==1 & PSYDRG==1, PSYCH := 0] + + # IF DEPRESS AND DEPRSFLG THEN DEPRESS = 0; + x[DEPRESS==1 & DEPRSDRG==1, DEPRESS := 0] + + # IF PARA AND CEREFLG THEN PARA = 0; + x[PARA==1 & CEREDRG==1, PARA := 0] + + # + # /*************************************/ + # /* Combine HTN and HTNCX into HTN_C */ + # /*************************************/ + # ATTRIB HTN_C LENGTH=3 LABEL='Hypertension'; + # + # IF HTN=1 OR HTNCX=1 THEN HTN_C=1; + # ELSE HTN_C=0; + x$HTN_C = ifelse(x$HTN==1 | x$HTNCX==1, 1, 0) + + # Rename columns to comorbidity package conventions for calculations + old_names = c("CHF", "VALVE", "PULMCIRC", "PERIVASC", "HTN", "HTNCX", "PARA", + "NEURO", "CHRNLUNG", "DM", "DMCX", "HYPOTHY", "RENLFAIL", "LIVER", + "ULCER", "AIDS", "LYMPH", "METS", "TUMOR", "ARTH", "COAG", "OBESE", + "WGHTLOSS", "LYTES", "BLDLOSS", "ANEMDEF", "ALCOHOL", "DRUG", + "PSYCH", "DEPRESS") + new_names = c("chf", "valv", "pcd", "pvd", "hypunc", "hypc", "para", "ond", "cpd", + "diabunc", "diabc", "hypothy", "rf", "ld", "pud", "aids", "lymph", + "metacanc", "solidtum", "rheumd", "coag", "obes", "wloss", "fed", + "blane", "dane", "alcohol", "drug", "psycho", "depre") + x <- data.table::setnames(x, old=old_names, new=new_names) + + # Same computations as "elixhauser" (except carit removed) + x$score <- with(x, chf + valv + pcd + pvd + hypunc * ifelse(hypc == 1 & assign0, 0, 1) + hypc + para + ond + cpd + diabunc * ifelse(diabc == 1 & assign0, 0, 1) + diabc + hypothy + rf + ld + pud + aids + lymph + metacanc + solidtum * ifelse(metacanc == 1 & assign0, 0, 1) + rheumd + coag + obes + wloss + fed + blane + dane + alcohol + drug + psycho + depre) + x$index <- with(x, cut(score, breaks = c(-Inf, 0, 1, 4.5, Inf), labels = c("<0", "0", "1-4", ">=5"), right = FALSE)) + x$wscore_ahrq <- with(x, chf * 9 + valv * 0 + pcd * 6 + pvd * 3 + ifelse(hypunc == 1 | hypc == 1, 1, 0) * (-1) + para * 5 + ond * 5 + cpd * 3 + diabunc * ifelse(diabc == 1 & assign0, 0, 0) + diabc * (-3) + hypothy * 0 + rf * 6 + ld * 4 + pud * 0 + aids * 0 + lymph * 6 + metacanc * 14 + solidtum * ifelse(metacanc == 1 & assign0, 0, 7) + rheumd * 0 + coag * 11 + obes * (-5) + wloss * 9 + fed * 11 + blane * (-3) + dane * (-2) + alcohol * (-1) + drug * (-7) + psycho * (-5) + depre * (-5)) + x$wscore_vw <- with(x, chf * 7 + valv * (-1) + pcd * 4 + pvd * 2 + ifelse(hypunc == 1 | hypc == 1, 1, 0) * 0 + para * 7 + ond * 6 + cpd * 3 + diabunc * ifelse(diabc == 1 & assign0, 0, 0) + diabc * 0 + hypothy * 0 + rf * 5 + ld * 11 + pud * 0 + aids * 0 + lymph * 9 + metacanc * 12 + solidtum * ifelse(metacanc == 1 & assign0, 0, 4) + rheumd * 0 + coag * 3 + obes * (-4) + wloss * 6 + fed * 5 + blane * (-2) + dane * (-2) + alcohol * 0 + drug * (-7) + psycho * 0 + depre * (-3)) + x$windex_ahrq <- with(x, cut(wscore_ahrq, breaks = c(-Inf, 0, 1, 4.5, Inf), labels = c("<0", "0", "1-4", ">=5"), right = FALSE)) + x$windex_vw <- with(x, cut(wscore_vw, breaks = c(-Inf, 0, 1, 4.5, Inf), labels = c("<0", "0", "1-4", ">=5"), right = FALSE)) + + # Return AHRQ vars in SAS format: + x <- data.table::setnames(x, old=new_names, new=old_names) + + ### Turn internal DT into a DF + data.table::setDF(x) + + # Keep only relevant vars + x <- x[c(id, "CHF", "VALVE", "PULMCIRC", "PERIVASC", "PARA", + "NEURO", "CHRNLUNG", "DM", "DMCX", "HYPOTHY", "RENLFAIL", "LIVER", + "ULCER", "AIDS", "LYMPH", "METS", "TUMOR", "ARTH", "COAG", "OBESE", + "WGHTLOSS", "LYTES", "BLDLOSS", "ANEMDEF", "ALCOHOL", "DRUG", + "PSYCH", "DEPRESS", "HTN_C", + 'score', 'index', 'wscore_ahrq', 'wscore_vw', 'windex_ahrq', + 'windex_vw')] + + # Return the dataframe + x +} + +################################################################################ +################################################################################ +################################################################################ +# 2021 + +#' @export +#' @keywords internal + +get_ahrq_2021 = function( + df, + patient_id = NULL, + icd_code = NULL, + icd_seq = NULL, + poa_code = NULL, + year = NULL, + quarter = NULL, + icd10cm_vers = NULL, # If NULL, vers derived from year/quarter columns + return_n_unique = T # For N comorbidity vs. N ICD-Codes per comorbdiity +) { + + # Set poa value based on whether poa_code is supplied + if (is.null(poa_code)) { + poa = F + } else { + poa = T + } + + # Make df into dt + dt = data.table::data.table(df) + + # rename cols + new_col_names = c() + new_col_names[patient_id] = 'id' + new_col_names[icd_code] = 'code' + new_col_names[icd_seq] = 'icd_seq' + new_col_names[poa_code] = 'poa_code' + new_col_names[year] = 'year' + new_col_names[quarter] = 'quarter' + + data.table::setnames(dt, names(new_col_names), new_col_names, + skip_absent = T) + + # Start AHRQ, SAS code in comments as reference + # get_ahrq_2021-specific comments begin with "# !R": + # + # !R: Code is implemented differently in R, not all SAS lines will have a 1:1 + # corresponding R counterpart. SAS code included only as reference + + # /******************************************************************/ + # /* Title: CREATION OF COMORBIDITY VARIABLES */ + # /* ICD-10-CM COMORBIDITY SOFTWARE, */ + # /* VERSION 2021.1 */ + # /* */ + # /* PROGRAM: Comorb_ICD10CM_Analy_v2021-1.sas */ + # /* */ + # /* Description: Creates comorbidity variables based on the */ + # /* secondary diagnoses. Identification of some */ + # /* comorbidities is dependent on the diagnosis being */ + # /* present on admission. Valid through FY2021 */ + # /* (09/30/21). */ + # /* */ + # /* Note: Please specify below if diagnosis present on */ + # /* admission (POA) indicators are available in your */ + # /* data. If POA is not available, comorbidity flags */ + # /* that require POA will be set to missing. */ + # /******************************************************************/ + # + + # !R: SAS Macro definitions lines omitted + + # /*****************************************/ + # /* Establish the ICD-10-CM Version */ + # /* This will default to the last version */ + # /* for discharges outside of coding */ + # /* updates. */ + # /*****************************************/ + + # !R: ICD-10-CM Version derived from Year/Quarter column unless specified + # manually in `icd10cm_vers` param + + # attrib ICDVER length=3 label='ICD-10-CM VERSION'; + # + # ICDVER = 0; + # + # if (YEAR in (2015) and DQTR in (4)) then ICDVER = 33; + # else if (YEAR in (2016) and DQTR in (1,2,3)) then ICDVER = 33; + # else if (YEAR in (2016) and DQTR in (4)) then ICDVER = 34; + # else if (YEAR in (2017) and DQTR in (1,2,3)) then ICDVER = 34; + # else if (YEAR in (2017) and DQTR in (4)) then ICDVER = 35; + # else if (YEAR in (2018) and DQTR in (1,2,3)) then ICDVER = 35; + # else if (YEAR in (2018) and DQTR in (4)) then ICDVER = 36; + # else if (YEAR in (2019) and DQTR in (1,2,3)) then ICDVER = 36; + # else if (YEAR in (2019) and DQTR in (4)) then ICDVER = 37; + # else if (YEAR in (2020) and DQTR in (1,2,3)) then ICDVER = 37; + # else if (YEAR in (2020) and DQTR in (4)) then ICDVER = 38; + # else if (YEAR in (2021) and DQTR in (1,2,3)) then ICDVER = 38; + # else ICDVER = 38; + + if (is.null(icd10cm_vers)) { + dt[, ICDVER := 38] # !R: Default value + dt[year == 2015 & quarter == 4, + ICDVER := 33] + dt[year == 2016 & quarter < 4, + ICDVER := 33] + dt[year == 2016 & quarter == 4, + ICDVER := 34] + dt[year == 2017 & quarter < 4, + ICDVER := 34] + dt[year == 2017 & quarter == 4, + ICDVER := 35] + dt[year == 2018 & quarter < 4, + ICDVER := 35] + dt[year == 2018 & quarter == 4, + ICDVER := 36] + dt[year == 2019 & quarter < 4, + ICDVER := 36] + dt[year == 2019 & quarter == 4, + ICDVER := 37] + dt[year == 2020 & quarter < 4, + ICDVER := 37] + } else { + dt[, ICDVER := icd10cm_vers] + } + + # + # /********************************************/ + # /* Establish lengths for all comorbidity */ + # /* flags. */ + # /********************************************/ + + # !R: Unncessary SAS code omitted + + # + # /********************************************/ + # /* Create diagnosis and comorbidity arrays */ + # /* for all comorbidity flags. */ + # /********************************************/ + + # !R: Unncessary SAS code omitted + + # ARRAY VALANYPOA (20) $13 A1-A20 + # ("AIDS" "ALCOHOL" "ARTH" "LUNG_CHRONIC" "DEMENTIA" "DEPRESS" "DIAB_UNCX" "DIAB_CX" + # "DRUG_ABUSE" "HTN_UNCX" "HTN_CX" "THYROID_HYPO" "THYROID_OTH" "CANCER_LYMPH" "CANCER_LEUK" + # "CANCER_METS" "OBESE" "PERIVASC" "CANCER_SOLID" "CANCER_NSITU"); + + VALANYPOA = c( + 'AIDS', + 'ALCOHOL', + 'ARTH', + 'LUNG_CHRONIC', + 'DEMENTIA', + 'DEPRESS', + 'DIAB_UNCX', + 'DIAB_CX', + 'DRUG_ABUSE', + 'HTN_UNCX', + 'HTN_CX', + 'THYROID_HYPO', + 'THYROID_OTH', + 'CANCER_LYMPH', + 'CANCER_LEUK', + 'CANCER_METS', + 'OBESE', + 'PERIVASC', + 'CANCER_SOLID', + 'CANCER_NSITU' + ) + + # + # /****************************************************/ + # /* If POA flags are available, create POA, exempt, */ + # /* and value arrays. */ + # /****************************************************/ + # %if &POA. = 1 %then %do; + # ARRAY EXEMPTPOA (&NUMDX) EXEMPTPOA1 - EXEMPTPOA&NUMDX; + # + # ARRAY DXPOA (&NUMDX) $ DXPOA1 - DXPOA&NUMDX; + # + # ARRAY VALPOA (19) $13 B1-B19 + # ("ANEMDEF" "BLDLOSS" "CHF" "COAG" "LIVER_MLD" "LIVER_SEV" + # "NEURO_MOVT" "NEURO_SEIZ" "NEURO_OTH" "PARALYSIS" "PSYCHOSES" "PULMCIRC" "RENLFL_MOD" + # "RENLFL_SEV" "ULCER_PEPTIC" "WGHTLOSS" "CBVD_POA" "CBVD_SQLA" "VALVE"); + # %end; + + # !R: These variables require POA status AND require "Y" or "W" POA (present) + # !R: Note CBVD_POA requires POA status but requires "N" or "U" (not present) + VALPOA = c( + 'ANEMDEF', + 'BLDLOSS', + 'CHF', + 'COAG', + 'LIVER_MLD', + 'LIVER_SEV', + 'NEURO_MOVT', + 'NEURO_SEIZ', + 'NEURO_OTH', + 'PARALYSIS', + 'PSYCHOSES', + 'PULMCIRC', + 'RENLFL_MOD', + 'RENLFL_SEV', + 'ULCER_PEPTIC', + 'WGHTLOSS', + 'CBVD_POA', + 'CBVD_SQLA', + 'VALVE' + ) + + + # /****************************************************/ + # /* Initialize POA independent comorbidity flags to */ + # /* zero. */ + # /****************************************************/ + # DO I = 1 TO 20; + # COMANYPOA(I) = 0; + # END; + # + # /****************************************************/ + # /* IF POA flags are available, initialize POA */ + # /* dependent comorbidiy flags to zero. If POA flags */ + # /* are not available, these fields will be default */ + # /* to missing. */ + # /****************************************************/ + + if (poa) { + # %if &POA. = 1 %then %do; + # DO I = 1 TO 19; + # COMPOA(I) = 0; + # END; + # CBVD_NPOA = 0; + # CBVD = 0; + # EXEMPTPOA1 = 0; + dt[, c('CBVD_NPOA', 'CBVD', 'EXEMPTPOA') := 0] + # %end; + } else { + # %else %do; + # CBVD_NPOA = .; + # CBVD = .; + dt[, c('CBVD_NPOA', 'CBVD') := NA] + # %end; + } + + # + # /****************************************************/ + # /* Examine each secondary diagnosis on a record and */ + # /* assign comorbidity flags. */ + # /* 1) Assign comorbidities which are neutral to POA */ + # /* reporting. */ + # /* 2) IF POA flags are available, assign */ + # /* comorbidities that require a diagnosis be */ + # /* present on admission and are not exempt from */ + # /* POA reporting. */ + # /* 3) IF POA flags are available, assign one */ + # /* comorbidity that requires that the diagnosis */ + # /* NOT be present admission. */ + # /****************************************************/ + # DO I = 2 TO MIN(I10_NDX, &NUMDX); + # IF DX(I) NE " " THEN DO; + # + # DXVALUE = PUT(DX(I),COMFMT.); + + + # + # /****************************************************/ + # /* Assign Comorbidities that are neutral to POA */ + # /****************************************************/ + # DO J = 1 TO 20; + # IF DXVALUE = VALANYPOA(J) THEN COMANYPOA(J) = 1; + # END; + + dt[icd_seq==1, code := ''] # !R: Omit 1st diagnosis + dxvalues = Elixhauser2021Formats$ElixhauserAHRQ2021Map$comfmt + # !R: !VALPOA + for (i in names(dxvalues)[!names(dxvalues) %in% VALPOA]) { + dt[, paste0(i) := as.numeric(code %in% dxvalues[[i]])] + } + + # IF DXVALUE = "DRUG_ABUSEPSYCHOSES" THEN DRUG_ABUSE= 1; + dt[DRUG_ABUSEPSYCHOSES==1, DRUG_ABUSE := 1] + # IF DXVALUE = "CHFHTN_CX" THEN HTN_CX = 1; + dt[CHFHTN_CX==1, HTN_CX := 1] + # IF DXVALUE = "HTN_CXRENLFL_SEV" THEN HTN_CX = 1; + dt[HTN_CXRENLFL_SEV==1, HTN_CX := 1] + # IF DXVALUE = "CHFHTN_CXRENLFL_SEV" THEN HTN_CX = 1; + dt[CHFHTN_CXRENLFL_SEV==1, HTN_CX := 1] + # IF DXVALUE = "ALCOHOLLIVER_MLD" THEN ALCOHOL = 1; + dt[ALCOHOLLIVER_MLD==1, ALCOHOL := 1] + + + if (poa) { + # %if &POA. = 1 %then %do; + # /****************************************************/ + # /* IF POA flags are available, assign comorbidities */ + # /* requiring POA that are also not exempt from POA */ + # /* reporting. */ + # /****************************************************/ + # EXEMPTPOA(I) = 0; + # IF (ICDVER = 38 AND PUT(DX(I),$poaxmpt_v38fmt.)='1') OR + # (ICDVER = 37 AND PUT(DX(I),$poaxmpt_v37fmt.)='1') OR + # (ICDVER = 36 AND PUT(DX(I),$poaxmpt_v36fmt.)='1') OR + # (ICDVER = 35 AND PUT(DX(I),$poaxmpt_v35fmt.)='1') OR + # (ICDVER = 34 AND PUT(DX(I),$poaxmpt_v34fmt.)='1') OR + # (ICDVER = 33 AND PUT(DX(I),$poaxmpt_v33fmt.)='1') THEN EXEMPTPOA(I) = 1; + + # !R: if icd10cm_vers = NULL, formats extracted from year/quarter (see above) + dt[ICDVER == 33, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2021Formats$ElixhauserAHRQ2021Map$poaxmpt_v33fmt)] + dt[ICDVER == 34, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2021Formats$ElixhauserAHRQ2021Map$poaxmpt_v34fmt)] + dt[ICDVER == 35, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2021Formats$ElixhauserAHRQ2021Map$poaxmpt_v35fmt)] + dt[ICDVER == 36, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2021Formats$ElixhauserAHRQ2021Map$poaxmpt_v36fmt)] + dt[ICDVER == 37, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2021Formats$ElixhauserAHRQ2021Map$poaxmpt_v37fmt)] + dt[ICDVER == 38, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2021Formats$ElixhauserAHRQ2021Map$poaxmpt_v38fmt)] + + # /**** Flag record if diagnosis is POA exempt or requires POA and POA ****/ + # /**** indicates present on admission (Y or W) ****/ + # IF (EXEMPTPOA(I) = 1) or (EXEMPTPOA(I) = 0 AND DXPOA(I) IN ("Y","W")) THEN DO; + # DO K = 1 TO 19; + # IF DXVALUE = VALPOA(K) THEN COMPOA(K) = 1; + # END; + # !R: VALPOA w/conditions + for (i in VALPOA) { + dt[EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W')), + paste0(i) := as.numeric(code %in% dxvalues[[i]])] + } + + # IF DXVALUE = "DRUG_ABUSEPSYCHOSES" THEN PSYCHOSES = 1; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + DRUG_ABUSEPSYCHOSES==1, + PSYCHOSES := 1] + + # IF DXVALUE = "CHFHTN_CX" THEN CHF = 1; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + CHFHTN_CX==1, + CHF := 1] + + # IF DXVALUE = "HTN_CXRENLFL_SEV" THEN RENLFL_SEV = 1; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + HTN_CXRENLFL_SEV==1, + RENLFL_SEV := 1] + + # IF DXVALUE = "CHFHTN_CXRENLFL_SEV" THEN DO; + # CHF = 1; + # RENLFL_SEV = 1; + # END; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + CHFHTN_CXRENLFL_SEV==1, + c('CHF', 'RENLFL_SEV') := 1] + + # IF DXVALUE = "CBVD_SQLAPARALYSIS" THEN DO; + # PARALYSIS = 1; + # CBVD_SQLA = 1; + # END; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + CBVD_SQLAPARALYSIS==1, + c('PARALYSIS', 'CBVD_SQLA') := 1] + + # IF DXVALUE = "ALCOHOLLIVER_MLD" THEN LIVER_MLD = 1; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + ALCOHOLLIVER_MLD==1, + LIVER_MLD := 1] + + # END; + + # /****************************************************/ + # /* IF POA flags are available, assign comorbidities */ + # /* requiring that the diagnosis is not POA */ + # /****************************************************/ + # IF (EXEMPTPOA(I) = 0 AND DXPOA(I) IN ("N","U")) THEN DO; + # IF DXVALUE = "CBVD_POA" THEN CBVD_NPOA = 1; + + dt[EXEMPTPOA==0 & poa_code %in% c('N', 'U') & + code %in% dxvalues$CBVD_POA, + c('CBVD_NPOA') := 1] + } + # END; + # %end; + # END; + # END; + # + + #! R pivot_wider to get pre-exclusion assignments + IDs + #! R make any NA -> 0 + for (i in names(dt)) { + dt[is.na(get(i)), (i):=0] + } + + to_pivot = c('id', + Elixhauser2021Formats$ElixhauserAHRQ2021PreExclusion) + # Remove columns in to_pivot that do not exist if poa=F + to_pivot = to_pivot[to_pivot %in% colnames(dt)] + dt = dt[, ..to_pivot] + + dt = dt[, lapply(.SD, sum), by=id, + .SDcols = to_pivot[-1]] + + dt[, names(dt)[-1] := lapply(.SD, function(x) as.integer(x!=0)), + .SDcols = names(dt)[-1]] + + # /****************************************************/ + # /* Implement exclusions for comorbidities that are */ + # /* neutral to POA. */ + # /****************************************************/ + # IF DIAB_CX = 1 then DIAB_UNCX = 0; + dt[DIAB_CX==1, DIAB_UNCX := 0] + + # IF HTN_CX = 1 then HTN_UNCX = 0; + dt[HTN_CX==1, HTN_UNCX := 0] + + # IF CANCER_METS = 1 THEN DO; + # CANCER_SOLID = 0; + # CANCER_NSITU = 0; + dt[CANCER_METS==1, c('CANCER_SOLID', 'CANCER_NSITU') := 0] + + # END; + # IF CANCER_SOLID = 1 then CANCER_NSITU = 0; + dt[CANCER_SOLID==1, CANCER_NSITU := 0] + # + + if (poa) { + # /****************************************************/ + # /* IF POA flags are available, implement exclusions */ + # /* for comorbidities requiring POA. */ + # /****************************************************/ + # %if &POA. = 1 %then %do; + # IF LIVER_SEV = 1 then LIVER_MLD = 0; + dt[LIVER_SEV==1, LIVER_MLD := 0] + + # IF RENLFL_SEV = 1 then RENLFL_MOD = 0; + dt[RENLFL_SEV==1, RENLFL_MOD := 0] + + # IF (CBVD_POA=1) or (CBVD_POA=0 and CBVD_NPOA=0 and CBVD_SQLA=1) then CBVD = 1; + dt[(CBVD_POA==1) | (CBVD_POA==0 & CBVD_NPOA==0 & CBVD_SQLA==1), + CBVD := 1] + } + # %end; + # + # !R: Remainder of SAS code not relevant + + # !R: Get final comorbidities + keep_vars = c('id', + Elixhauser2021Formats$ElixhauserAHRQ2021Abbr) + # Drop vars that don't exist if poa=T + keep_vars = keep_vars[keep_vars %in% colnames(dt)] + dt = dt[, ..keep_vars] + + # Compute total score + dt[, score := rowSums(.SD), + .SDcols = keep_vars[-1]] + + # Rename id back to user-specified + data.table::setnames(dt, new_col_names, names(new_col_names), + skip_absent = T) + + # Return as data.frame + as.data.frame(dt) +} + +get_ahrq_2022 = function( + df, + patient_id = NULL, + icd_code = NULL, + icd_seq = NULL, + poa_code = NULL, + year = NULL, + quarter = NULL, + icd10cm_vers = NULL, # If NULL, vers derived from year/quarter columns + return_n_unique = T # For N comorbidity vs. N ICD-Codes per comorbdiity +) { + + # Set poa value based on whether poa_code is supplied + if (is.null(poa_code)) { + poa = F + } else { + poa = T + } + + # Make df into dt + dt = data.table::data.table(df) + + # rename cols + new_col_names = c() + new_col_names[patient_id] = 'id' + new_col_names[icd_code] = 'code' + new_col_names[icd_seq] = 'icd_seq' + new_col_names[poa_code] = 'poa_code' + new_col_names[year] = 'year' + new_col_names[quarter] = 'quarter' + + data.table::setnames(dt, names(new_col_names), new_col_names, + skip_absent = T) + + # Start AHRQ, SAS code in comments as reference + # get_ahrq_2021-specific comments begin with "# !R": + # + # !R: Code is implemented differently in R, not all SAS lines will have a 1:1 + # corresponding R counterpart. SAS code included only as reference + + # /******************************************************************/ + # /* Title: ELIXHAUSER COMORBIDITY SOFTWARE REFINED */ + # /* FOR ICD-10-CM MAPPING PROGRAM */ + # /* */ + # /* Program: Mapping_Program_v2022-1.SAS */ + # /* */ + # /* Diagnoses: v2022-1 is compatible with ICD-10-CM diagnosis */ + # /* codes from October 2015 through September 2022. */ + # /* ICD-10-CM codes should not include embedded */ + # /* decimals (example: S0100XA, not S01.00XA). */ + # /* */ + # /* Description: This SAS mapping program assigns the Elixhauser */ + # /* comorbidity measures from ICD-10-CM secondary */ + # /* diagnoses. Some comorbidities require additional */ + # /* information on whether the diagnosis was present */ + # /* on admission (POA). Please specify below if POA */ + # /* indicators are available in your data. If POA */ + # /* information is not available, comorbidities that */ + # /* require POA will be set to missing. */ + # /* */ + # /* Note: The SAS program Format_Program_v2022-1 must */ + # /* be run prior to running this mapping program. */ + # /* */ + # /* Output: This program appends the comorbidity measures */ + # /* to the input SAS file. The data elements start */ + # /* with the 4-character prefix */ + # /* */ + # /******************************************************************/ + + # !R: SAS Macro definitions lines omitted + + ############################################################################################### + # The following block of SAS code is a copy of "Mapping_Program_v2022-1.sas" lines 100-124. + ############################################################################################### + # /*****************************************/ + # /* Establish the ICD-10-CM Version */ + # /* This will default to the last version */ + # /* for discharges outside of coding */ + # /* updates. */ + # /*****************************************/ + # ATTRIB ICDVER LENGTH=3 LABEL='ICD-10-CM VERSION'; + # + # ICDVER = 0; + # + # IF (YEAR IN (2015) AND DQTR IN (4)) THEN ICDVER = 33; + # ELSE IF (YEAR IN (2016) AND DQTR IN (1,2,3)) THEN ICDVER = 33; + # ELSE IF (YEAR IN (2016) AND DQTR IN (4)) THEN ICDVER = 34; + # ELSE IF (YEAR IN (2017) AND DQTR IN (1,2,3)) THEN ICDVER = 34; + # ELSE IF (YEAR IN (2017) AND DQTR IN (4)) THEN ICDVER = 35; + # ELSE IF (YEAR IN (2018) AND DQTR IN (1,2,3)) THEN ICDVER = 35; + # ELSE IF (YEAR IN (2018) AND DQTR IN (4)) THEN ICDVER = 36; + # ELSE IF (YEAR IN (2019) AND DQTR IN (1,2,3)) THEN ICDVER = 36; + # ELSE IF (YEAR IN (2019) AND DQTR IN (4)) THEN ICDVER = 37; + # ELSE IF (YEAR IN (2020) AND DQTR IN (1,2,3)) THEN ICDVER = 37; + # ELSE IF (YEAR IN (2020) AND DQTR IN (4)) THEN ICDVER = 38; + # ELSE IF (YEAR IN (2021) AND DQTR IN (1,2,3)) THEN ICDVER = 38; + # ELSE IF (YEAR IN (2021) AND DQTR IN (4)) THEN ICDVER = 39; + # ELSE IF (YEAR IN (2022) AND DQTR IN (1,2,3)) THEN ICDVER = 39; + # ELSE ICDVER = 39; + + if (is.null(icd10cm_vers)) { + dt[, ICDVER := 39] # !R: Default value + dt[year == 2015 & quarter == 4, + ICDVER := 33] + dt[year == 2016 & quarter < 4, + ICDVER := 33] + dt[year == 2016 & quarter == 4, + ICDVER := 34] + dt[year == 2017 & quarter < 4, + ICDVER := 34] + dt[year == 2017 & quarter == 4, + ICDVER := 35] + dt[year == 2018 & quarter < 4, + ICDVER := 35] + dt[year == 2018 & quarter == 4, + ICDVER := 36] + dt[year == 2019 & quarter < 4, + ICDVER := 36] + dt[year == 2019 & quarter == 4, + ICDVER := 37] + dt[year == 2020 & quarter < 4, + ICDVER := 37] + dt[year == 2020 & quarter == 4, + ICDVER := 38] + dt[year == 2021 & quarter < 4, + ICDVER := 38] + } else { + dt[, ICDVER := icd10cm_vers] + } + + # + # /********************************************/ + # /* Establish lengths for all comorbidity */ + # /* flags. */ + # /********************************************/ + + # !R: Unncessary SAS code omitted + + # + # /********************************************/ + # /* Create diagnosis and comorbidity arrays */ + # /* for all comorbidity flags. */ + # /********************************************/ + + # !R: Unncessary SAS code omitted + + ############################################################################################### + # The following block of SAS code is a copy of "Mapping_Program_v2022-1.sas" lines 153-156. + ############################################################################################### + # ARRAY VALANYPOA (20) $13 A1-A20 + # ("AIDS" "ALCOHOL" "AUTOIMMUNE" "LUNG_CHRONIC" "DEMENTIA" "DEPRESS" "DIAB_UNCX" "DIAB_CX" + # "DRUG_ABUSE" "HTN_UNCX" "HTN_CX" "THYROID_HYPO" "THYROID_OTH" "CANCER_LYMPH" "CANCER_LEUK" + # "CANCER_METS" "OBESE" "PERIVASC" "CANCER_SOLID" "CANCER_NSITU" ); + + VALANYPOA = c( + 'AIDS', + 'ALCOHOL', + 'AUTOIMMUNE', + 'LUNG_CHRONIC', + 'DEMENTIA', + 'DEPRESS', + 'DIAB_UNCX', + 'DIAB_CX', + 'DRUG_ABUSE', + 'HTN_UNCX', + 'HTN_CX', + 'THYROID_HYPO', + 'THYROID_OTH', + 'CANCER_LYMPH', + 'CANCER_LEUK', + 'CANCER_METS', + 'OBESE', + 'PERIVASC', + 'CANCER_SOLID', + 'CANCER_NSITU' + ) + + ############################################################################################### + # The following block of SAS code is a copy of "Mapping_Program_v2022-1.sas" lines 158-171. + ############################################################################################### + # /****************************************************/ + # /* If POA flags are available, create POA, exempt, */ + # /* and value arrays. */ + # /****************************************************/ + # %if &POA. = 1 %then %do; + # ARRAY EXEMPTPOA (&NUMDX) EXEMPTPOA1 - EXEMPTPOA&NUMDX; + # + # ARRAY DXPOA (&NUMDX) $ &POAPREFIX.1 - &POAPREFIX.&NUMDX; + # + # ARRAY VALPOA (19) $13 B1-B19 + # ("ANEMDEF" "BLDLOSS" "HF" "COAG" "LIVER_MLD" "LIVER_SEV" + # "NEURO_MOVT" "NEURO_SEIZ" "NEURO_OTH" "PARALYSIS" "PSYCHOSES" "PULMCIRC" "RENLFL_MOD" + # "RENLFL_SEV" "ULCER_PEPTIC" "WGHTLOSS" "CBVD_POA" "CBVD_SQLA" "VALVE"); + # %end; + + + # !R: These variables require POA status AND require "Y" or "W" POA (present) + # !R: Note CBVD_POA requires POA status but requires "N" or "U" (not present) + VALPOA = c( + 'ANEMDEF', + 'BLDLOSS', + 'HF', + 'COAG', + 'LIVER_MLD', + 'LIVER_SEV', + 'NEURO_MOVT', + 'NEURO_SEIZ', + 'NEURO_OTH', + 'PARALYSIS', + 'PSYCHOSES', + 'PULMCIRC', + 'RENLFL_MOD', + 'RENLFL_SEV', + 'ULCER_PEPTIC', + 'WGHTLOSS', + 'CBVD_POA', + 'CBVD_SQLA', + 'VALVE' + ) + + ############################################################################################### + # The following block of SAS code is a copy of "Mapping_Program_v2022-1.sas" lines 173-198. + ############################################################################################### + # /****************************************************/ + # /* Initialize POA independent comorbidity flags to */ + # /* zero. */ + # /****************************************************/ + # DO I = 1 TO 20; + # COMANYPOA(I) = 0; + # END; + # + # /****************************************************/ + # /* IF POA flags are available, initialize POA */ + # /* dependent comorbidiy flags to zero. If POA flags */ + # /* are not available, these fields will be default */ + # /* to missing. */ + # /****************************************************/ + + if (poa) { + # %if &POA. = 1 %then %do; + # DO I = 1 TO 19; + # COMPOA(I) = 0; + # END; + # CBVD_NPOA = 0; + # CBVD = 0; + # EXEMPTPOA1 = 0; + dt[, c('CBVD_NPOA', 'CBVD', 'EXEMPTPOA1') := 0] ###### "EXEMPTPOA" changed to "EXEMPTPOA1" + # %end; + } else { + # %else %do; + # CBVD_NPOA = .; + # CBVD = .; + dt[, c('CBVD_NPOA', 'CBVD') := NA] + # %end; + } + + + ############################################################################################### + # The following block of SAS code is a copy of "Mapping_Program_v2022-1.sas" lines 200-226. + ############################################################################################### + # /****************************************************/ + # /* Examine each secondary diagnosis on a record and */ + # /* assign comorbidity flags. */ + # /* 1) Assign comorbidities which are neutral to POA */ + # /* reporting. */ + # /* 2) IF POA flags are available, assign */ + # /* comorbidities that require a diagnosis be */ + # /* present on admission and are not exempt from */ + # /* POA reporting. */ + # /* 3) IF POA flags are available, assign one */ + # /* comorbidity that requires that the diagnosis */ + # /* NOT be present admission. */ + # /****************************************************/ + # DO I = 2 TO MIN(I10_NDX, &NUMDX); + # IF DX(I) NE " " THEN DO; + # + # DXVALUE = PUT(DX(I),COMFMT.); + + + # + # /****************************************************/ + # /* Assign Comorbidities that are neutral to POA */ + # /****************************************************/ + # DO J = 1 TO 20; + # IF DXVALUE = VALANYPOA(J) THEN COMANYPOA(J) = 1; + # END; + + dt[icd_seq==1, code := ''] # !R: Omit 1st diagnosis + dxvalues = Elixhauser2022Formats$ElixhauserAHRQ2022Map$comfmt + # !R: !VALPOA + for (i in names(dxvalues)[!names(dxvalues) %in% VALPOA]) { + dt[, paste0(i) := as.numeric(code %in% dxvalues[[i]])] + } + + ############################################################################################### + # The following set of logics are based on "Mapping_Program_v2022-1.sas" lines 221-232 + ############################################################################################### + # IF DXVALUE = "DRUG_ABUSEPSYCHOSES" THEN DRUG_ABUSE= 1; + dt[DRUG_ABUSEPSYCHOSES==1, DRUG_ABUSE := 1] + # IF DXVALUE = "HFHTN_CX" THEN HTN_CX = 1; + dt[HFHTN_CX==1, HTN_CX := 1] + # IF DXVALUE = "HTN_CXRENLFL_SEV" THEN HTN_CX = 1; + dt[HTN_CXRENLFL_SEV==1, HTN_CX := 1] + # IF DXVALUE = "HFHTN_CXRENLFL_SEV" THEN HTN_CX = 1; + dt[HFHTN_CXRENLFL_SEV==1, HTN_CX := 1] + # IF DXVALUE = "ALCOHOLLIVER_MLD" THEN ALCOHOL = 1; + dt[ALCOHOLLIVER_MLD==1, ALCOHOL := 1] + # IF DXVALUE = "VALVE_AUTOIMMUNE" THEN AUTOIMMUNE= 1; + dt[VALVE_AUTOIMMUNE==1, AUTOIMMUNE := 1] + + if (poa) { + ############################################################################################### + # The following set of logics are based on "Mapping_Program_v2022-1.sas" lines 234-247 + ############################################################################################### + # %if &POA. = 1 %then %do; + # /****************************************************/ + # /* IF POA flags are available, assign comorbidities */ + # /* requiring POA that are also not exempt from POA */ + # /* reporting. */ + # /****************************************************/ + # EXEMPTPOA(I) = 0; + # IF (ICDVER = 39 AND PUT(DX(I),$poaxmpt_v39fmt.)='1') OR + # (ICDVER = 38 AND PUT(DX(I),$poaxmpt_v38fmt.)='1') OR + # (ICDVER = 37 AND PUT(DX(I),$poaxmpt_v37fmt.)='1') OR + # (ICDVER = 36 AND PUT(DX(I),$poaxmpt_v36fmt.)='1') OR + # (ICDVER = 35 AND PUT(DX(I),$poaxmpt_v35fmt.)='1') OR + # (ICDVER = 34 AND PUT(DX(I),$poaxmpt_v34fmt.)='1') OR + # (ICDVER = 33 AND PUT(DX(I),$poaxmpt_v33fmt.)='1') THEN EXEMPTPOA(I) = 1; + + # !R: if icd10cm_vers = NULL, formats extracted from year/quarter (see above) + dt[ICDVER == 33, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2022Formats$ElixhauserAHRQ2022Map$poaxmpt_v33fmt)] + dt[ICDVER == 34, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2022Formats$ElixhauserAHRQ2022Map$poaxmpt_v34fmt)] + dt[ICDVER == 35, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2022Formats$ElixhauserAHRQ2022Map$poaxmpt_v35fmt)] + dt[ICDVER == 36, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2022Formats$ElixhauserAHRQ2022Map$poaxmpt_v36fmt)] + dt[ICDVER == 37, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2022Formats$ElixhauserAHRQ2022Map$poaxmpt_v37fmt)] + dt[ICDVER == 38, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2022Formats$ElixhauserAHRQ2022Map$poaxmpt_v38fmt)] + dt[ICDVER == 39, + EXEMPTPOA := as.numeric( + code %in% + Elixhauser2022Formats$ElixhauserAHRQ2022Map$poaxmpt_v39fmt)] + + ############################################################################################### + # The following set of logics are based on "Mapping_Program_v2022-1.sas" lines 249-267 + ############################################################################################### + # /**** Flag record if diagnosis is POA exempt or requires POA and POA ****/ + # /**** indicates present on admission (Y or W) ****/ + # IF (EXEMPTPOA(I) = 1) or (EXEMPTPOA(I) = 0 AND DXPOA(I) IN ("Y","W")) THEN DO; + # DO K = 1 TO 19; + # IF DXVALUE = VALPOA(K) THEN COMPOA(K) = 1; + # END; + # !R: VALPOA w/conditions + for (i in VALPOA) { + dt[EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W')), + paste0(i) := as.numeric(code %in% dxvalues[[i]])] + } + + # IF DXVALUE = "DRUG_ABUSEPSYCHOSES" THEN PSYCHOSES = 1; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + DRUG_ABUSEPSYCHOSES==1, + PSYCHOSES := 1] + + # IF DXVALUE = "HFHTN_CX" THEN CHF = 1; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + HFHTN_CX==1, + HF := 1] + + # IF DXVALUE = "HTN_CXRENLFL_SEV" THEN RENLFL_SEV = 1; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + HTN_CXRENLFL_SEV==1, + RENLFL_SEV := 1] + + # IF DXVALUE = "HFHTN_CXRENLFL_SEV" THEN DO; + # HF = 1; + # RENLFL_SEV = 1; + # END; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + HFHTN_CXRENLFL_SEV==1, + c('HF', 'RENLFL_SEV') := 1] + + # IF DXVALUE = "CBVD_SQLAPARALYSIS" THEN DO; + # PARALYSIS = 1; + # CBVD_SQLA = 1; + # END; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + CBVD_SQLAPARALYSIS==1, + c('PARALYSIS', 'CBVD_SQLA') := 1] + + # IF DXVALUE = "ALCOHOLLIVER_MLD" THEN LIVER_MLD = 1; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + ALCOHOLLIVER_MLD==1, + LIVER_MLD := 1] + # IF DXVALUE = "VALVE_AUTOIMMUNE" THEN VALVE = 1; + dt[(EXEMPTPOA==1 | (EXEMPTPOA==0 & poa_code %in% c('Y', 'W'))) & + VALVE_AUTOIMMUNE==1, + VALVE := 1] + + # END; + + ############################################################################################### + # The following set of logics are based on "Mapping_Program_v2022-1.sas" lines 270-278 + ############################################################################################### + # /****************************************************/ + # /* IF POA flags are available, assign comorbidities */ + # /* requiring that the diagnosis is not POA */ + # /****************************************************/ + # IF (EXEMPTPOA(I) = 0 AND DXPOA(I) IN ("N","U")) THEN DO; + # IF DXVALUE = "CBVD_POA" THEN CBVD_NPOA = 1; + + dt[EXEMPTPOA==0 & poa_code %in% c('N', 'U') & + code %in% dxvalues$CBVD_POA, + c('CBVD_NPOA') := 1] + } + # END; + # %end; + # END; + # END; + # + + #! R pivot_wider to get pre-exclusion assignments + IDs + #! R make any NA -> 0 + for (i in names(dt)) { + dt[is.na(get(i)), (i):=0] + } + + to_pivot = c('id', + Elixhauser2022Formats$ElixhauserAHRQ2022PreExclusion) + + # Remove columns in to_pivot that do not exist if poa=F + to_pivot = to_pivot[to_pivot %in% colnames(dt)] + dt = dt[, ..to_pivot] + + dt = dt[, lapply(.SD, sum), by=id, + .SDcols = to_pivot[-1]] + + dt[, names(dt)[-1] := lapply(.SD, function(x) as.integer(x!=0)), + .SDcols = names(dt)[-1]] + + + ############################################################################################### + # The following set of logics are based on "Mapping_Program_v2022-1.sas" lines 280-290 + ############################################################################################### + # /****************************************************/ + # /* Implement exclusions for comorbidities that are */ + # /* neutral to POA. */ + # /****************************************************/ + # IF DIAB_CX = 1 then DIAB_UNCX = 0; + dt[DIAB_CX==1, DIAB_UNCX := 0] + + # IF HTN_CX = 1 then HTN_UNCX = 0; + dt[HTN_CX==1, HTN_UNCX := 0] + + # IF CANCER_METS = 1 THEN DO; + # CANCER_SOLID = 0; + # CANCER_NSITU = 0; + dt[CANCER_METS==1, c('CANCER_SOLID', 'CANCER_NSITU') := 0] + + # END; + # IF CANCER_SOLID = 1 then CANCER_NSITU = 0; + dt[CANCER_SOLID==1, CANCER_NSITU := 0] + # + + + ############################################################################################### + # The following set of logics are based on "Mapping_Program_v2022-1.sas" lines 292-299 + ############################################################################################### + if (poa) { + # /****************************************************/ + # /* IF POA flags are available, implement exclusions */ + # /* for comorbidities requiring POA. */ + # /****************************************************/ + # %if &POA. = 1 %then %do; + # IF LIVER_SEV = 1 THEN LIVER_MLD = 0; + dt[LIVER_SEV==1, LIVER_MLD := 0] + + # IF RENLFL_SEV = 1 THEN RENLFL_MOD = 0; + dt[RENLFL_SEV==1, RENLFL_MOD := 0] + + # IF (CBVD_POA=1) OR (CBVD_POA=0 AND CBVD_NPOA=0 AND CBVD_SQLA=1) THEN CBVD = 1; + dt[(CBVD_POA==1) | (CBVD_POA==0 & CBVD_NPOA==0 & CBVD_SQLA==1), + CBVD := 1] + } + # %end; + # + # !R: Remainder of SAS code not relevant + + # !R: Get final comorbidities + keep_vars = c('id', + Elixhauser2022Formats$ElixhauserAHRQ2022Abbr) + # Drop vars that don't exist if poa=T + keep_vars = keep_vars[keep_vars %in% colnames(dt)] + dt = dt[, ..keep_vars] + + # Compute total score + dt[, score := rowSums(.SD), + .SDcols = keep_vars[-1]] + + # Rename id back to user-specified + data.table::setnames(dt, new_col_names, names(new_col_names), + skip_absent = T) + + # Return as data.frame + as.data.frame(dt) +} \ No newline at end of file diff --git a/R/check_output.R b/R/check_output.R index ac8fe89..8f72d6e 100644 --- a/R/check_output.R +++ b/R/check_output.R @@ -13,7 +13,14 @@ x[["wscore_vw"]] <- NULL x[["windex_ahrq"]] <- NULL x[["windex_vw"]] <- NULL - } + } else if (grepl("elixhauser_ahrq_", score)) { + x[["score"]] <- NULL + x[["index"]] <- NULL + x[["wscore_ahrq"]] <- NULL + x[["wscore_vw"]] <- NULL + x[["windex_ahrq"]] <- NULL + x[["windex_vw"]] <- NULL + } # Check that all identified comorbidites are either 0 or 1 if (!all(x == 0 | x == 1)) stop("'comorbidity' ended up in an unexpected state.\nPlease report a bug with a reproducible example at https://github.com/ellessenne/comorbidity/issues\n", call. = FALSE) } diff --git a/R/comorbidity.R b/R/comorbidity.R index 02f9b0d..16005ff 100644 --- a/R/comorbidity.R +++ b/R/comorbidity.R @@ -7,7 +7,7 @@ #' Column names must be syntactically valid names, otherwise they are forced to be so by calling the [make.names()] function. #' @param id Column of `x` containing the individual ID. #' @param code Column of `x` containing diagnostic codes. Codes must be in upper case with no punctuation in order to be properly recognised. -#' @param score The comorbidity score to compute. Possible choices are the weighted Charlson score (`charlson`) and the weighted Elixhauser score (`elixhauser`). Values are case-insensitive. +#' @param score The comorbidity score to compute. Possible choices are the weighted Charlson score (`charlson`), the weighted (pre 2019 AHRQ) Elixhauser score (`elixhauser`), and the 2019 AHRQ weighted Elixhauser score (`elixhauser_ahrq_2020`). Values are case-insensitive. #' @param assign0 Apply a hierarchy of comorbidities. If `TRUE`, should a comorbidity be present in a patient with different degrees of severity, then the milder form will be assigned to 0 and therefore not counted. By doing this, a type of comorbidity is not counted more than once in each patient. In particular, the comorbidities that are affected by this argument are: #' * "Mild liver disease" (`mld`) and "Moderate/severe liver disease" (`msld`) for the Charlson score; #' * "Diabetes" (`diab`) and "Diabetes with complications" (`diabwc`) for the Charlson score; @@ -15,10 +15,19 @@ #' * "Hypertension, uncomplicated" (`hypunc`) and "Hypertension, complicated" (`hypc`) for the Elixhauser score; #' * "Diabetes, uncomplicated" (`diabunc`) and "Diabetes, complicated" (`diabc`) for the Elixhauser score; #' * "Solid tumour" (`solidtum`) and "Metastatic cancer" (`metacanc`) for the Elixhauser score. +#' +#' Note: This argument has no effect on Elixhauser AHRQ as these choices are incorporated into AHRQ calculations. If using 'elixhauser_ahrq_2020' it is recommended to specify assign0 = FALSE to avoid confusion. #' @param icd The version of ICD coding to use. Possible choices are ICD-9-CM (`icd9`) or ICD-10 (`icd10`). Defaults to `icd10`, and values are case-insensitive. +#' Note: if 'elixhauser_ahrq_2020' is selected, icd must equal 'icd10'. #' @param factorise Return comorbidities as factors rather than numeric, where (1 = presence of comorbidity, 0 = otherwise). Defaults to `FALSE`. #' @param labelled Attach labels to each comorbidity, compatible with the RStudio viewer via the [utils::View()] function. Defaults to `TRUE`. #' @param tidy.codes Tidy diagnostic codes? If `TRUE`, all codes are converted to upper case and all non-alphanumeric characters are removed using the regular expression \code{[^[:alnum:]]}. Defaults to `TRUE`. +#' @param drg Column of `x` that contains DRG codes associated with the encounter. Defaults to `NULL` but must be specified if score = 'elixhauser_ahrq_2020'. +#' @param icd_rank Column of `x` that contains the rank or position of DRG codes. Defaults to `NULL` but must be specified if score = 'elixhauser_ahrq_2020'. +#' @param poa Column of `x` that contains the present on admission status codes (e.g. 'Y', 'W', 'N', 'U'). Defaults to `NULL` but must be specified if score = 'elixhauser_ahrq_2020'. +#' @param year Column of `x` that contains the calendar year of the admission. Defaults to `NULL`, but either `year` and `quarter` OR `icd10cm_vers` must be specified if score = 'elixhauser_ahrq_2020'. +#' @param quarter Column of `x` that contains the calendar quarter of the admission. Defaults to `NULL`, but either `year` and `quarter` OR `icd10cm_vers` must be specified if score = 'elixhauser_ahrq_2020'. +#' @param icd10cm_vers Column of `x` that contains the ICD10CM version. Must be specified if `year` and `quarter` are not. Must be `NULL` if `year` and `quarter` are not `NULL`. Defaults to `NULL`, but either `year` and `quarter` OR `icd10cm_vers` must be specified if score = 'elixhauser_ahrq_2020'. #' @return A data frame with `id`, columns relative to each comorbidity domain, comorbidity score, weighted comorbidity score, and categorisations of such scores, with one row per individual. #' #' For the Charlson score, the following variables are included in the dataset: @@ -84,6 +93,51 @@ #' * `wscore_vw`, for the weighted version of the Elixhauser score using the algorithm in van Walraven _et al_. (2009); #' * `windex_ahrq`, for the weighted version of the grouped Elixhauser index using the AHRQ algorithm (Moore _et al_., 2017); #' * `windex_vw`, for the weighted version of the grouped Elixhauser index using the algorithm in van Walraven _et al_. (2009). +#' +#' For AHRQ Elixhauser (elixhauser_ahrq_2020), the dataset contains the same variables as 'Elixhauser' with the following exceptions: +#' * Comorbidity columns follow AHRQ's abbreviation formatting. +#' * In place of `hypunc` and `hypc`, those measures are combined to form `HTN_C` +#' +#'#' For AHRQ Elixhauser (elixhauser_ahrq_2020), the dataset contains the following: +#' * The `id` variable as defined by the user; +#' * `AIDS`, Acquired immune deficiency syndrome; +#' * `ALCOHOL`, Alcohol abuse; +#' * `ANEMDF`, (only if poa is supplied) Deficiency anemias; +#' * `ARTH`, Arthropathies; +#' * `BLDLOSS`, (only if poa is supplied) Chronic blood loss anemia; +#' * `CANCER_LEUK`, Leukemia; +#' * `CANCER_LYMPH`, Lymphoma; +#' * `CANCER_METS`, Metastatic cancer; +#' * `CANCER_NSITU`, Solid tumor without metastasis, in situ; +#' * `CANCER_SOLID`, Solid tumor without metastasis, malignant; +#' * `CBVD`, (only if poa is supplied) Cerebrovascular disease; +#' * `CHF`, (only if poa is supplied) Congestive heart failure; +#' * `COAG`, (only if poa is supplied) Coagulopathy; +#' * `DEMENTIA`, Dementia; +#' * `DEPRESS`, Depression; +#' * `DIAB_CX`, Diabetes with chronic complications; +#' * `DIAB_UNCX`, Diabetes without chronic complications; +#' * `DRUG_ABUSE`, Drug abuse; +#' * `HTN_CX`, Hypertension, complicated; +#' * `HTN_UNCX`, Hypertension, uncomplicated; +#' * `LIVER_MLD`, (only if poa is supplied) Liver disease, mild; +#' * `LIVER_SEV`, (only if poa is supplied) Liver disease, moderate to severe; +#' * `LUNG_CHRONIC`, Chronic pulmonary disease; +#' * `NEURO_MOVT`, (only if poa is supplied) Neurological disorders affecting movement; +#' * `NEURO_OTH`, (only if poa is supplied) Other neurological disorders; +#' * `NEURO_SEIZ`, (only if poa is supplied) Seizures and epilepsy; +#' * `OBESE`, Obesity; +#' * `PARALYSIS`, (only if poa is supplied) Paralysis; +#' * `PERIVASC`, Peripheral vascular disease; +#' * `PSYCHOSES`, (only if poa is supplied) Psychoses; +#' * `PULMCIRC`, (only if poa is supplied) Pulmonary circulation disease; +#' * `RENLFL_MOD`, (only if poa is supplied) Renal failure, moderate; +#' * `RENLFL_SEV`, (only if poa is supplied) Renal failure, severe; +#' * `THYROID_HYPO`, Hypothyroidism; +#' * `THYROID_OTH`, Other thyroid disorders; +#' * `ULCER_PEPTIC`, (only if poa is supplied) Peptic ulcer with bleeding; +#' * `VALVE`, (only if poa is supplied) Valvular disease; +#' * `WGHTLOSS`, (only if poa is supplied) Weight loss; #' #' Labels are presented to the user when using the RStudio viewer (e.g. via the [utils::View()] function) for convenience. #' @@ -96,6 +150,8 @@ #' @references Moore BJ, White S, Washington R, Coenen N, and Elixhauser A. _Identifying increased risk of readmission and in-hospital mortality using hospital administrative data: the AHRQ Elixhauser comorbidity index_. Medical Care 2017; 55(7):698-705. #' @references van Walraven C, Austin PC, Jennings A, Quan H and Forster AJ. _A modification of the Elixhauser comorbidity measures into a point system for hospital death using administrative data_. Medical Care 2009; 47(6):626-633. #' @references Menendez ME, Neuhaus V, van Dijk CN, Ring D. _The Elixhauser comorbidity method outperforms the Charlson index in predicting inpatient death after orthopaedic surgery_. Clinical Orthopaedics and Related Research 2014; 472(9):2878-2886. +#' @references _Healthcare Cost and Utilization Project. Elixhauser Comorbidity Software Version 3.7_ Available at https://www.hcup-us.ahrq.gov/toolssoftware/comorbidity/comorbidity.jsp +#' @references _Healthcare Cost and Utilization Project. Elixhauser Comorbidity Software Refined for ICD-10-CM v2021.1_ Available at https://www.hcup-us.ahrq.gov/toolssoftware/comorbidityicd10/comorbidity_icd10.jsp #' #' @examples #' set.seed(1) @@ -112,19 +168,25 @@ #' comorbidity(x = x, id = "id", code = "code", score = "elixhauser", assign0 = FALSE) #' @export -comorbidity <- function(x, id, code, score, assign0, icd = "icd10", factorise = FALSE, labelled = TRUE, tidy.codes = TRUE) { +comorbidity <- function(x, id, code, score, assign0, icd = "icd10", factorise = FALSE, labelled = TRUE, tidy.codes = TRUE, drg = NULL, icd_rank = NULL, poa = NULL, year = NULL, quarter = NULL, icd10cm_vers = NULL) { ### Check arguments arg_checks <- checkmate::makeAssertCollection() # x must be a data.frame (or a data.table) - checkmate::assert_true(all(class(x) %in% c("data.frame", "data.table", "tbl", "tbl_df")), add = arg_checks) + checkmate::assert_true(all(class(x[]) %in% c("data.frame", "data.table", "tbl", "tbl_df")), + add = arg_checks) # id, code, score, icd must be a single string value checkmate::assert_string(id, add = arg_checks) checkmate::assert_string(code, add = arg_checks) checkmate::assert_string(score, add = arg_checks) checkmate::assert_string(icd, add = arg_checks) - # score must be charlson, elixhauser; case insensitive + # score must be charlson, elixhauser, elixhauser_ahrq_2020; case insensitive score <- tolower(score) - checkmate::assert_choice(score, choices = c("charlson", "elixhauser"), add = arg_checks) + checkmate::assert_choice(score, choices = c("charlson", + "elixhauser", + "elixhauser_ahrq_2020", + "elixhauser_ahrq_2021", + "elixhauser_ahrq_2022"), + add = arg_checks) # icd must be icd9, icd10; case insensitive icd <- tolower(icd) checkmate::assert_choice(icd, choices = c("icd9", "icd10"), add = arg_checks) @@ -133,6 +195,44 @@ comorbidity <- function(x, id, code, score, assign0, icd = "icd10", factorise = checkmate::assert_logical(factorise, len = 1, add = arg_checks) checkmate::assert_logical(labelled, len = 1, add = arg_checks) checkmate::assert_logical(tidy.codes, len = 1, add = arg_checks) + # drg and icd_rank must be supplied when score='elixhauser_ahrq_2020' + checkmate::assert_true( + (score=='elixhauser_ahrq_2020' & !is.null(drg) & !is.null(icd_rank)) | + score!='elixhauser_ahrq_2020', + add = arg_checks + ) + # icd_rank and either (year & quarter) or icd10cm_vers must not be null + # when score = 'elixhauser_ahrq_2021' or 'elixhauser_ahrq_2022' + checkmate::assert_true( + ( + (score=='elixhauser_ahrq_2021' | score=='elixhauser_ahrq_2022') & + !is.null(icd_rank) & + !is.null(icd10cm_vers)) | + ((score=='elixhauser_ahrq_2021' | score=='elixhauser_ahrq_2022') & + !is.null(icd_rank) & + !is.null(year) & + !is.null(quarter)) | + (score!='elixhauser_ahrq_2021' & score!='elixhauser_ahrq_2022'), + add = arg_checks + ) + # if icd10cm_vers is specified for elixhauser_ahrq_2021, vers must be integer + # in (33,34,35,36,37,38) + checkmate::assert_true( + score!='elixhauser_ahrq_2021' | + (score=='elixhauser_ahrq_2021' & is.null(icd10cm_vers)) | + (score=='elixhauser_ahrq_2021' & ifelse(is.null(icd10cm_vers), + F, + icd10cm_vers %in% 33:38)) + ) + # if icd10cm_vers is specified for elixhauser_ahrq_2022, vers must be integer + # in (33,34,35,36,37,38,39) + checkmate::assert_true( + score!='elixhauser_ahrq_2022' | + (score=='elixhauser_ahrq_2022' & is.null(icd10cm_vers)) | + (score=='elixhauser_ahrq_2022' & ifelse(is.null(icd10cm_vers), + F, + icd10cm_vers %in% 33:39)) + ) # force names to be syntactically valid: if (any(names(x) != make.names(names(x)))) { names(x) <- make.names(names(x)) @@ -152,60 +252,92 @@ comorbidity <- function(x, id, code, score, assign0, icd = "icd10", factorise = # Report if there are any errors if (!arg_checks$isEmpty()) checkmate::reportAssertions(arg_checks) - ### Tidy codes if required - if (tidy.codes) x <- .tidy(x = x, code = code) - - ### Extract regex for internal use - regex <- lofregex[[score]][[icd]] - - ### Subset only 'id' and 'code' columns - if (data.table::is.data.table(x)) { - x <- x[, c(id, code), with = FALSE] + ############################################################################## + ## Isolate scores computed within comorbidity() vs. within other functions + if (score %in% c("charlson", "elixhauser")) { + ### Tidy codes if required + if (tidy.codes) x <- .tidy(x = x, code = code) + + ### Extract regex for internal use + regex <- lofregex[[score]][[icd]] + + ### Subset only 'id' and 'code' columns + if (data.table::is.data.table(x)) { + x <- x[, c(id, code), with = FALSE] + } else { + x <- x[, c(id, code)] + } + + ## Turn x into a DT + data.table::setDT(x) + + ### Get list of unique codes used in dataset that match comorbidities + loc <- sapply(regex, grep, unique(x[[code]]), value = TRUE) + loc <- utils::stack(loc) + names(loc)[1] <- code + + ### Merge list with original data.table (data.frame) + x <- merge(x, loc, all.x = TRUE, allow.cartesian = TRUE) + x[[code]] <- NULL + x <- unique(x) + + ### Spread wide + xin <- x[, c(id, "ind"), with = FALSE] + xin[, value := 1L] + x <- data.table::dcast.data.table( + xin, stats::as.formula(paste(id, "~ ind")), fill = 0) + x[["NA"]] <- NULL + + ### Add missing columns + for (col in names(regex)) { + if (is.null(x[[col]])) x[[col]] <- 0 + } + data.table::setcolorder(x, c(id, names(regex))) + + ### Turn internal DT into a DF + data.table::setDF(x) + + ### Compute Charlson score and Charlson index + if (score == "charlson") { + x$score <- with(x, ami + chf + pvd + cevd + dementia + copd + rheumd + pud + mld * ifelse(msld == 1 & assign0, 0, 1) + diab * ifelse(diabwc == 1 & assign0, 0, 1) + diabwc + hp + rend + canc * ifelse(metacanc == 1 & assign0, 0, 1) + msld + metacanc + aids) + x$index <- with(x, cut(score, breaks = c(0, 1, 2.5, 4.5, Inf), labels = c("0", "1-2", "3-4", ">=5"), right = FALSE)) + x$wscore <- with(x, ami + chf + pvd + cevd + dementia + copd + rheumd + pud + mld * ifelse(msld == 1 & assign0, 0, 1) + diab * ifelse(diabwc == 1 & assign0, 0, 1) + diabwc * 2 + hp * 2 + rend * 2 + canc * ifelse(metacanc == 1 & assign0, 0, 2) + msld * 3 + metacanc * 6 + aids * 6) + x$windex <- with(x, cut(wscore, breaks = c(0, 1, 2.5, 4.5, Inf), labels = c("0", "1-2", "3-4", ">=5"), right = FALSE)) + } else { + ### Compute pre-2017 Elixhauser scores + x$score <- with(x, chf + carit + valv + pcd + pvd + hypunc * ifelse(hypc == 1 & assign0, 0, 1) + hypc + para + ond + cpd + diabunc * ifelse(diabc == 1 & assign0, 0, 1) + diabc + hypothy + rf + ld + pud + aids + lymph + metacanc + solidtum * ifelse(metacanc == 1 & assign0, 0, 1) + rheumd + coag + obes + wloss + fed + blane + dane + alcohol + drug + psycho + depre) + x$index <- with(x, cut(score, breaks = c(-Inf, 0, 1, 4.5, Inf), labels = c("<0", "0", "1-4", ">=5"), right = FALSE)) + x$wscore_ahrq <- with(x, chf * 9 + carit * 0 + valv * 0 + pcd * 6 + pvd * 3 + ifelse(hypunc == 1 | hypc == 1, 1, 0) * (-1) + para * 5 + ond * 5 + cpd * 3 + diabunc * ifelse(diabc == 1 & assign0, 0, 0) + diabc * (-3) + hypothy * 0 + rf * 6 + ld * 4 + pud * 0 + aids * 0 + lymph * 6 + metacanc * 14 + solidtum * ifelse(metacanc == 1 & assign0, 0, 7) + rheumd * 0 + coag * 11 + obes * (-5) + wloss * 9 + fed * 11 + blane * (-3) + dane * (-2) + alcohol * (-1) + drug * (-7) + psycho * (-5) + depre * (-5)) + x$wscore_vw <- with(x, chf * 7 + carit * 5 + valv * (-1) + pcd * 4 + pvd * 2 + ifelse(hypunc == 1 | hypc == 1, 1, 0) * 0 + para * 7 + ond * 6 + cpd * 3 + diabunc * ifelse(diabc == 1 & assign0, 0, 0) + diabc * 0 + hypothy * 0 + rf * 5 + ld * 11 + pud * 0 + aids * 0 + lymph * 9 + metacanc * 12 + solidtum * ifelse(metacanc == 1 & assign0, 0, 4) + rheumd * 0 + coag * 3 + obes * (-4) + wloss * 6 + fed * 5 + blane * (-2) + dane * (-2) + alcohol * 0 + drug * (-7) + psycho * 0 + depre * (-3)) + x$windex_ahrq <- with(x, cut(wscore_ahrq, breaks = c(-Inf, 0, 1, 4.5, Inf), labels = c("<0", "0", "1-4", ">=5"), right = FALSE)) + x$windex_vw <- with(x, cut(wscore_vw, breaks = c(-Inf, 0, 1, 4.5, Inf), labels = c("<0", "0", "1-4", ">=5"), right = FALSE)) + } } else { - x <- x[, c(id, code)] - } - - ### Turn x into a DT - data.table::setDT(x) - - ### Get list of unique codes used in dataset that match comorbidities - loc <- sapply(regex, grep, unique(x[[code]]), value = TRUE) - loc <- utils::stack(loc) - names(loc)[1] <- code - - ### Merge list with original data.table (data.frame) - x <- merge(x, loc, all.x = TRUE, allow.cartesian = TRUE) - x[[code]] <- NULL - x <- unique(x) - - ### Spread wide - xin <- x[, c(id, "ind"), with = FALSE] - xin[, value := 1L] - x <- data.table::dcast.data.table(xin, stats::as.formula(paste(id, "~ ind")), fill = 0) - x[["NA"]] <- NULL - - ### Add missing columns - for (col in names(regex)) { - if (is.null(x[[col]])) x[[col]] <- 0 - } - data.table::setcolorder(x, c(id, names(regex))) - - ### Turn internal DT into a DF - data.table::setDF(x) - - ### Compute Charlson score and Charlson index - if (score == "charlson") { - x$score <- with(x, ami + chf + pvd + cevd + dementia + copd + rheumd + pud + mld * ifelse(msld == 1 & assign0, 0, 1) + diab * ifelse(diabwc == 1 & assign0, 0, 1) + diabwc + hp + rend + canc * ifelse(metacanc == 1 & assign0, 0, 1) + msld + metacanc + aids) - x$index <- with(x, cut(score, breaks = c(0, 1, 2.5, 4.5, Inf), labels = c("0", "1-2", "3-4", ">=5"), right = FALSE)) - x$wscore <- with(x, ami + chf + pvd + cevd + dementia + copd + rheumd + pud + mld * ifelse(msld == 1 & assign0, 0, 1) + diab * ifelse(diabwc == 1 & assign0, 0, 1) + diabwc * 2 + hp * 2 + rend * 2 + canc * ifelse(metacanc == 1 & assign0, 0, 2) + msld * 3 + metacanc * 6 + aids * 6) - x$windex <- with(x, cut(wscore, breaks = c(0, 1, 2.5, 4.5, Inf), labels = c("0", "1-2", "3-4", ">=5"), right = FALSE)) - } else { - x$score <- with(x, chf + carit + valv + pcd + pvd + hypunc * ifelse(hypc == 1 & assign0, 0, 1) + hypc + para + ond + cpd + diabunc * ifelse(diabc == 1 & assign0, 0, 1) + diabc + hypothy + rf + ld + pud + aids + lymph + metacanc + solidtum * ifelse(metacanc == 1 & assign0, 0, 1) + rheumd + coag + obes + wloss + fed + blane + dane + alcohol + drug + psycho + depre) - x$index <- with(x, cut(score, breaks = c(-Inf, 0, 1, 4.5, Inf), labels = c("<0", "0", "1-4", ">=5"), right = FALSE)) - x$wscore_ahrq <- with(x, chf * 9 + carit * 0 + valv * 0 + pcd * 6 + pvd * 3 + ifelse(hypunc == 1 | hypc == 1, 1, 0) * (-1) + para * 5 + ond * 5 + cpd * 3 + diabunc * ifelse(diabc == 1 & assign0, 0, 0) + diabc * (-3) + hypothy * 0 + rf * 6 + ld * 4 + pud * 0 + aids * 0 + lymph * 6 + metacanc * 14 + solidtum * ifelse(metacanc == 1 & assign0, 0, 7) + rheumd * 0 + coag * 11 + obes * (-5) + wloss * 9 + fed * 11 + blane * (-3) + dane * (-2) + alcohol * (-1) + drug * (-7) + psycho * (-5) + depre * (-5)) - x$wscore_vw <- with(x, chf * 7 + carit * 5 + valv * (-1) + pcd * 4 + pvd * 2 + ifelse(hypunc == 1 | hypc == 1, 1, 0) * 0 + para * 7 + ond * 6 + cpd * 3 + diabunc * ifelse(diabc == 1 & assign0, 0, 0) + diabc * 0 + hypothy * 0 + rf * 5 + ld * 11 + pud * 0 + aids * 0 + lymph * 9 + metacanc * 12 + solidtum * ifelse(metacanc == 1 & assign0, 0, 4) + rheumd * 0 + coag * 3 + obes * (-4) + wloss * 6 + fed * 5 + blane * (-2) + dane * (-2) + alcohol * 0 + drug * (-7) + psycho * 0 + depre * (-3)) - x$windex_ahrq <- with(x, cut(wscore_ahrq, breaks = c(-Inf, 0, 1, 4.5, Inf), labels = c("<0", "0", "1-4", ">=5"), right = FALSE)) - x$windex_vw <- with(x, cut(wscore_vw, breaks = c(-Inf, 0, 1, 4.5, Inf), labels = c("<0", "0", "1-4", ">=5"), right = FALSE)) + if (score == 'elixhauser_ahrq_2020') { + x <- get_ahrq_2020(x, id, code, assign0, drg, icd_rank) + } else if (score == 'elixhauser_ahrq_2021') { + x <- get_ahrq_2021( + df = x, + patient_id = id, + icd_code = code, + icd_seq = icd_rank, + poa_code = poa, + year = year, + quarter = quarter, + icd10cm_vers = icd10cm_vers # If NULL, vers derived from year/quarter columns + ) + } else { + x <- get_ahrq_2022( + df = x, + patient_id = id, + icd_code = code, + icd_seq = icd_rank, + poa_code = poa, + year = year, + quarter = quarter, + icd10cm_vers = icd10cm_vers # If NULL, vers derived from year/quarter columns + ) + } } ### Check output for possible unknown-state errors @@ -220,3 +352,4 @@ comorbidity <- function(x, id, code, score, assign0, icd = "icd10", factorise = ### Return a tidy data.frame return(x) } + diff --git a/R/sample_drg.R b/R/sample_drg.R new file mode 100644 index 0000000..33e021c --- /dev/null +++ b/R/sample_drg.R @@ -0,0 +1,34 @@ +#' @title Simulate DRG codes +#' +#' @description A simple function to simulate DRG codes at random. +#' +#' @param n Number of DRG codes to simulate. +#' +#' @return A vector of `n` DRG diagnostic codes. +#' @examples +#' # Simulate 10 DRG codes +#' sample_drg(10) +#' +#' # Simulate a tidy dataset with 15 individuals and 200 rows +#' set.seed(1) +#' x <- data.frame( +#' id = sample(1:15, size = 200, replace = TRUE), +#' stringsAsFactors = FALSE +#' ) +#' x$drg = sample_drg(15)[x$id] +#' head(x) +#' @export + +sample_drg <- function(n = 1) { + ### Check arguments + arg_checks <- checkmate::makeAssertCollection() + # n must be a single numeric value + checkmate::assert_number(n, add = arg_checks) + # Report if there are any errors + if (!arg_checks$isEmpty()) { + checkmate::reportAssertions(arg_checks) + } + + ### Sample DRG codes + sample(unlist(lofmsdrg), n, replace=T) +} diff --git a/R/sample_year_quarter.R b/R/sample_year_quarter.R new file mode 100644 index 0000000..8f6bba8 --- /dev/null +++ b/R/sample_year_quarter.R @@ -0,0 +1,39 @@ +#' @title Simulate Year and Quarter +#' +#' @description A simple function to simulate Year and Quarter data at random. +#' +#' @param n Number of Year/Quarter pairs to generate. +#' +#' @return A data.frame of with `n` years and quarters. +#' @examples +#' # Simulate 10 Year/Quarter pairs +#' sample_year_quarter(10) +#' +#' # Simulate a tidy dataset with 15 individuals and 200 rows +#' set.seed(1) +#' x <- data.frame( +#' id = sample(1:15, size = 200, replace = TRUE), +#' stringsAsFactors = FALSE +#' ) +#' x[c('Year', 'Quarter')] = sample_year_quarter(15)[x$id,] +#' head(x) +#' @export + +sample_year_quarter <- function(n = 1) { + ### Check arguments + arg_checks <- checkmate::makeAssertCollection() + # n must be a single numeric value + checkmate::assert_number(n, add = arg_checks) + # Report if there are any errors + if (!arg_checks$isEmpty()) { + checkmate::reportAssertions(arg_checks) + } + + ### Sample years/quarters + as.data.frame( + list( + year = sample(2015:2021, n, replace=T), + quarter = sample(1:4, n, replace=T) + ) + ) +} diff --git a/R/sysdata.rda b/R/sysdata.rda index cd66743..31ae15d 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/README.Rmd b/README.Rmd index 1b5defd..db0ab3b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -99,6 +99,20 @@ x2 <- data.frame( all.equal(x1, x2) ``` +Simulating ICD-10 codes for AHRQ Elixhauser requires simulating DRG codes and ICD-10 ranks. The `comorbidity` packages includes a function named `sample_drg()` that allows simulating DRG codes in a straightforward way. For example: + +```{r simulate-data-ahrq} +x = data.frame( + id = sample(1:5, size = 50, replace = TRUE), + code = sample_diag(n = 50), + icd_rank = 1 +) +x <- x[order(x$id, x$code), ] +x$icd_rank = ave(x$icd_rank, x$id, FUN=cumsum) # Simulate ICD Rank +x$drg = sample_drg(5)[x$id] # Simulate DRG +print(head(x, n = 15), row.names = FALSE) +``` + ## Simulating ICD-9 codes ICD-9 codes can be easily simulated too: @@ -179,6 +193,59 @@ elixhauser9 The weighted Elixhauser score is computed using both the AHRQ and the van Walraven algorithm (`wscore_ahrq` and `wscore_vw`). +Alternatively, Elixhauser scores based on AHRQ's SAS program (version 3.7) can be calculated provided that data includes DRG codes and ICD-10 code ranks: + +```{r elixhauser-ahrq-2020} +x = data.frame( + id = sample(1:5, size = 50, replace = TRUE), + code = sample_diag(n = 50), + icd_rank = 1 +) +x <- x[order(x$id, x$code), ] +x$icd_rank = ave(x$icd_rank, x$id, FUN=cumsum) # Simulate ICD Rank +x$drg = sample_drg(5)[x$id] # Simulate DRG + +elixhauser_ahrq_2020 = comorbidity(x = x, + id = 'id', + code = 'code', + score = 'elixhauser_ahrq_2020', + assign0 = FALSE, + drg = 'drg', + icd_rank = 'icd_rank') +elixhauser_ahrq_2020 +``` + +Elixhauser scores Refined for ICD-10-CM based on AHRQ's SAS program (v2021.1) can also be calculated provided that data includes ICD-10 code ranks, present on admission (POA) status codes and either admission calendar year and quarter: + +```{r elixhauser-ahrq-2021} +x = data.frame( + id = sample(1:5, size = 50, replace = TRUE), + code = sample_diag(n = 50), + poa = sample(c('Y', 'W', 'N', 'U'), 50, replace=TRUE), + icd_rank = 1 +) +x <- x[order(x$id, x$code), ] +x$icd_rank = ave(x$icd_rank, x$id, FUN=cumsum) # Simulate ICD Rank +x[c('year', 'quarter')] = sample_year_quarter(5)[x$id,] # Get year/quarter + +elixhauser_2021 <- comorbidity( + x = x, + id = 'id', + code = 'code', + score = 'elixhauser_ahrq_2021', + assign0 = T, + icd_rank = 'icd_rank', + poa = 'poa', + year = 'year', + quarter = 'quarter' + ) +elixhauser_2021 +``` + +```{r} + +``` + ## Citation If you find `comorbidity` useful, please cite it in your publications: @@ -192,6 +259,7 @@ citation("comorbidity") This package is based on the ICD-10-based formulations of the Charlson score and Elixhauser score proposed by Quan _et al_. in 2005. The ICD-9 formulation of the Charlson score is also from Quan _et al_. The ICD-9-based Elixhauser score is according to the AHRQ formulation (Moore _et al_., 2017). +The AHRQ Elixhauser comorbidities are calculated using code from AHRQ's SAS program version 3.7 (Healthcare Cost and Utilization Project (HCUP)). Weights for the Charlson score are based on the original formulation by Charlson _et al_. in 1987, while weights for the Elixhauser score are based on work by van Walraven _et al_. Finally, the categorisation of scores and weighted scores is based on work by Menendez _et al_. Further details on each algorithm are included in the package vignette, which you can access by typing the following in the R console: @@ -206,6 +274,7 @@ vignette("comorbidityscores", package = "comorbidity") * Elixhauser A, Steiner C, Harris DR and Coffey RM. _Comorbidity measures for use with administrative data_. Medical Care 1998; 36(1):8-27. DOI: [10.1097/00005650-199801000-00004 ](https://doi.org/10.1097/00005650-199801000-00004 ) * van Walraven C, Austin PC, Jennings A, Quan H and Forster AJ. _A modification of the Elixhauser comorbidity measures into a point system for hospital death using administrative data_. Medical Care 2009; 47(6):626-633. DOI: [10.1097/mlr.0b013e31819432e5](https://doi.org/10.1097/mlr.0b013e31819432e5) * Menendez ME, Neuhaus V, van Dijk CN, Ring D. _The Elixhauser comorbidity method outperforms the Charlson index in predicting inpatient death after orthopaedic surgery_. Clinical Orthopaedics and Related Research 2014; 472(9):2878-2886. DOI: [10.1007/s11999-014-3686-7](https://doi.org/10.1007/s11999-014-3686-7) +* Healthcare Cost and Utilization Project (HCUP). (2017). _HCUP Elixhauser Comorbidity Software (3.7)_. Agency for Healthcare Research and Quality, Rockville, MD. [www.hcup-us.ahrq.gov/toolssoftware/comorbidity/comorbidity.jsp](www.hcup-us.ahrq.gov/toolssoftware/comorbidity/comorbidity.jsp) ## Copyright diff --git a/README.md b/README.md index 57d9983..73cf852 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ # comorbidity -2019-12-26 +2020-09-29 [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/ellessenne/comorbidity?branch=master&svg=true)](https://ci.appveyor.com/project/ellessenne/comorbidity) @@ -119,6 +119,39 @@ all.equal(x1, x2) ## [1] "Component \"code\": 30 string mismatches" ``` +Simulating ICD-10 codes for AHRQ Elixhauser requires simulating DRG +codes and ICD-10 ranks. The `comorbidity` packages includes a function +named `sample_drg()` that allows simulating DRG codes in a +straightforward way. For example: + +``` r +x = data.frame( + id = sample(1:5, size = 50, replace = TRUE), + code = sample_diag(n = 50), + icd_rank = 1 +) +x <- x[order(x$id, x$code), ] +x$icd_rank = ave(x$icd_rank, x$id, FUN=cumsum) # Simulate ICD Rank +x$drg = sample_drg(5)[x$id] # Simulate DRG +print(head(x, n = 15), row.names = FALSE) +## id code icd_rank drg +## 1 A422 1 976 +## 1 B447 2 976 +## 1 C512 3 976 +## 1 N481 4 976 +## 1 S564 5 976 +## 1 S907 6 976 +## 1 V549 7 976 +## 1 V779 8 976 +## 1 Y606 9 976 +## 2 A921 1 78 +## 2 B812 2 78 +## 2 C694 3 78 +## 2 E568 4 78 +## 2 E643 5 78 +## 2 H118 6 78 +``` + ## Simulating ICD-9 codes ICD-9 codes can be easily simulated too: @@ -272,6 +305,48 @@ elixhauser9 The weighted Elixhauser score is computed using both the AHRQ and the van Walraven algorithm (`wscore_ahrq` and `wscore_vw`). +Alternatively, Elixhauser scores based on AHRQ’s SAS program (version +3.7) can be calculated provided that data includes DRG codes and ICD-10 +code ranks: + +``` r +x = data.frame( + id = sample(1:5, size = 50, replace = TRUE), + code = sample_diag(n = 50), + icd_rank = 1 +) +x <- x[order(x$id, x$code), ] +x$icd_rank = ave(x$icd_rank, x$id, FUN=cumsum) # Simulate ICD Rank +x$drg = sample_drg(5)[x$id] # Simulate DRG + +elixhauser_ahrq = comorbidity(x = x, + id = 'id', + code = 'code', + score = 'elixhauser_ahrq', + assign0 = FALSE, + drg = 'drg', + icd_rank = 'icd_rank') +elixhauser_ahrq +## id CHF VALVE PULMCIRC PERIVASC PARA NEURO CHRNLUNG DM DMCX HYPOTHY RENLFAIL LIVER ULCER AIDS +## 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +## 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +## 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +## 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +## 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +## LYMPH METS TUMOR ARTH COAG OBESE WGHTLOSS LYTES BLDLOSS ANEMDEF ALCOHOL DRUG PSYCH DEPRESS HTN_C +## 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +## 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +## 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +## 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +## 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +## score index wscore_ahrq wscore_vw windex_ahrq windex_vw +## 1 0 0 0 0 0 0 +## 2 0 0 0 0 0 0 +## 3 0 0 0 0 0 0 +## 4 0 0 0 0 0 0 +## 5 0 0 0 0 0 0 +``` + ## Citation If you find `comorbidity` useful, please cite it in your publications: @@ -305,10 +380,12 @@ This package is based on the ICD-10-based formulations of the Charlson score and Elixhauser score proposed by Quan *et al*. in 2005. The ICD-9 formulation of the Charlson score is also from Quan *et al*. The ICD-9-based Elixhauser score is according to the AHRQ formulation (Moore -*et al*., 2017). Weights for the Charlson score are based on the -original formulation by Charlson *et al*. in 1987, while weights for the -Elixhauser score are based on work by van Walraven *et al*. Finally, the -categorisation of scores and weighted scores is based on work by +*et al*., 2017). The AHRQ Elixhauser comorbidities are calculated using +code from AHRQ’s SAS program version 3.7 (Healthcare Cost and +Utilization Project (HCUP)). Weights for the Charlson score are based on +the original formulation by Charlson *et al*. in 1987, while weights for +the Elixhauser score are based on work by van Walraven *et al*. Finally, +the categorisation of scores and weighted scores is based on work by Menendez *et al*. Further details on each algorithm are included in the package vignette, which you can access by typing the following in the R console: @@ -345,6 +422,10 @@ vignette("comorbidityscores", package = "comorbidity") inpatient death after orthopaedic surgery*. Clinical Orthopaedics and Related Research 2014; 472(9):2878-2886. DOI: [10.1007/s11999-014-3686-7](https://doi.org/10.1007/s11999-014-3686-7) + - Healthcare Cost and Utilization Project (HCUP). (2017). *HCUP + Elixhauser Comorbidity Software (3.7)*. Agency for Healthcare + Research and Quality, Rockville, MD. + [www.hcup-us.ahrq.gov/toolssoftware/comorbidity/comorbidity.jsp](www.hcup-us.ahrq.gov/toolssoftware/comorbidity/comorbidity.jsp) ## Copyright diff --git a/data-raw/make-data.R b/data-raw/make-data.R index 62a9c15..46cc58b 100644 --- a/data-raw/make-data.R +++ b/data-raw/make-data.R @@ -1,266 +1,298 @@ -### Clean datasets to include in the package -# Required packages: -library(readxl) -library(stringr) -library(devtools) -library(usethis) -library(haven) - -######################################################################################################################## -### Dataset #1: ICD-10 codes, 2009 version -# Download dataset -download.file(url = "ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10/allvalid2009(detailed%20titles%20headings).xls", destfile = "data-raw/allvalid2009.xls") - -# Read data in Excel format -icd10_2009 <- readxl::read_excel( - "data-raw/allvalid2009.xls", - skip = 7, - col_names = c("Status", "Code", "ICD.title") -) - -# Remove lines where code contains the character "-", i.e. headers: -icd10_2009[grepl("-", icd10_2009[["Code"]]), ] -icd10_2009 <- icd10_2009[!grepl("-", icd10_2009[["Code"]]), ] - -# Produce a "Code.clean" variable with no punctuation -icd10_2009[["Code.clean"]] <- stringr::str_replace_all(string = icd10_2009[["Code"]], pattern = "[^[:alnum:]]", replacement = "") - -# Re-order the columns -icd10_2009 <- icd10_2009[, c(2, 4, 3, 1)] - -# Convert all character columns to ASCII format -icd10_2009[["Code"]] <- iconv(icd10_2009[["Code"]], from = "UTF-8", to = "ASCII") -icd10_2009[["Code.clean"]] <- iconv(icd10_2009[["Code.clean"]], from = "UTF-8", to = "ASCII") -icd10_2009[["ICD.title"]] <- iconv(icd10_2009[["ICD.title"]], from = "UTF-8", to = "ASCII") -icd10_2009[["Status"]] <- iconv(icd10_2009[["Status"]], from = "UTF-8", to = "ASCII") - -# Save data in R format -usethis::use_data(icd10_2009, overwrite = TRUE) - -######################################################################################################################## -### Dataset #2: ICD-10 codes, 2011 version -# Download dataset -download.file(url = "ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10/allvalid2011 (detailed%20titles%20headings).xls", destfile = "data-raw/allvalid2011.xls") - -# Read data in Excel format -icd10_2011 <- readxl::read_excel( - "data-raw/allvalid2011.xls", - skip = 7, - col_names = c("Status", "Code", "ICD.title") -) - -# Remove lines where code contains the character "-", i.e. headers: -icd10_2011[grepl("-", icd10_2011[["Code"]]), ] -icd10_2011 <- icd10_2011[!grepl("-", icd10_2011[["Code"]]), ] - -# Produce a "Code.clean" variable with no punctuation -icd10_2011[["Code.clean"]] <- stringr::str_replace_all(string = icd10_2011[["Code"]], pattern = "[^[:alnum:]]", replacement = "") - -# Re-order the columns -icd10_2011 <- icd10_2011[, c(2, 4, 3, 1)] - -# Convert all character columns to ASCII format -icd10_2011[["Code"]] <- iconv(icd10_2011[["Code"]], from = "UTF-8", to = "ASCII") -icd10_2011[["Code.clean"]] <- iconv(icd10_2011[["Code.clean"]], from = "UTF-8", to = "ASCII") -icd10_2011[["ICD.title"]] <- iconv(icd10_2011[["ICD.title"]], from = "UTF-8", to = "ASCII") -icd10_2011[["Status"]] <- iconv(icd10_2011[["Status"]], from = "UTF-8", to = "ASCII") - -# Save data in R format -usethis::use_data(icd10_2011, overwrite = TRUE) - -######################################################################################################################## -### Dataset #3: ICD-9 codes, 2015 version -# Download dataset -download.file(url = "https://www.cms.gov/Medicare/Coding/ICD9ProviderDiagnosticCodes/Downloads/ICD-9-CM-v32-master-descriptions.zip", destfile = "data-raw/ICD-9-CM-v32-master-descriptions.zip") - -# Unzip files -unzip("data-raw/ICD-9-CM-v32-master-descriptions.zip", exdir = "data-raw") - -# Read ICD-9 diagnostic codes -icd9_2015 <- readxl::read_excel("data-raw/CMS32_DESC_LONG_SHORT_DX.xlsx", skip = 1, col_names = c("Code", "Long_description", "Short_description")) - -# Convert all character columns to ASCII format -icd9_2015[["Code"]] <- iconv(icd9_2015[["Code"]], from = "UTF-8", to = "ASCII") -icd9_2015[["Long_description"]] <- iconv(icd9_2015[["Long_description"]], from = "UTF-8", to = "ASCII") -icd9_2015[["Short_description"]] <- iconv(icd9_2015[["Short_description"]], from = "UTF-8", to = "ASCII") - -# Save data in R format -usethis::use_data(icd9_2015, overwrite = TRUE) - -######################################################################################################################## -### Dataset #4 ICD-10-CM codes, 2018 version -download.file(url = "ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/2018/2018-ICD-10-CM-Codes-File.zip", destfile = "data-raw/2018-ICD-10-CM-Codes-File.zip") - -# Unzip files -unzip("data-raw/2018-ICD-10-CM-Codes-File.zip", exdir = "data-raw") - -# Read files -icd10cm_2018 <- readr::read_tsv(file = "data-raw/icd10cm_codes_2018.txt", col_names = FALSE) -icd10cm_2018[["Code"]] <- substr(icd10cm_2018[[1]], 1, 7) -icd10cm_2018[["Description"]] <- substr(icd10cm_2018[[1]], 9, 400) -icd10cm_2018[[1]] <- NULL - -# Convert all character columns to ASCII format -icd10cm_2018[["Code"]] <- iconv(icd10cm_2018[["Code"]], from = "UTF-8", to = "ASCII") -icd10cm_2018[["Description"]] <- iconv(icd10cm_2018[["Description"]], from = "UTF-8", to = "ASCII") - -# Save data in R format -usethis::use_data(icd10cm_2018, overwrite = TRUE) - -######################################################################################################################## -### Dataset #5 ICD-10-CM codes, 2017 version -download.file(url = "ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/2017/icd10cm_codes_2017.txt", destfile = "data-raw/icd10cm_codes_2017.txt") - -# Read files -icd10cm_2017 <- readr::read_tsv(file = "data-raw/icd10cm_codes_2017.txt", col_names = FALSE) -icd10cm_2017[["Code"]] <- substr(icd10cm_2017[[1]], 1, 7) -icd10cm_2017[["Description"]] <- substr(icd10cm_2017[[1]], 9, 400) -icd10cm_2017[[1]] <- NULL - -# Convert all character columns to ASCII format -icd10cm_2017[["Code"]] <- iconv(icd10cm_2017[["Code"]], from = "UTF-8", to = "ASCII") -icd10cm_2017[["Description"]] <- iconv(icd10cm_2017[["Description"]], from = "UTF-8", to = "ASCII") - -# Save data in R format -usethis::use_data(icd10cm_2017, overwrite = TRUE) - -######################################################################################################################## -### Dataset #6: Adult same-day discharges, 2010 (from Stata) -nhds2010 <- haven::read_dta("https://www.stata-press.com/data/r15/nhds2010.dta") -attr(nhds2010, "spec") <- NULL -nhds2010 <- labelled::remove_labels(nhds2010, user_na_to_na = TRUE) - -# Save data in R format -usethis::use_data(nhds2010, overwrite = TRUE) - -######################################################################################################################## -### Dataset #7: Australian mortality data, 2010 (from Stata) -australia10 <- haven::read_dta("https://www.stata-press.com/data/r15/australia10.dta") -attr(australia10, "spec") <- NULL -australia10 <- labelled::remove_labels(australia10, user_na_to_na = TRUE) - -# Save data in R format -usethis::use_data(australia10, overwrite = TRUE) - -######################################################################################################################## -### Remove unnecessary files -lf <- list.files(path = "data-raw", full.names = TRUE, pattern = ".xls|.txt|.zip|.pdf") -invisible(file.remove(lf)) - -######################################################################################################################## -### Internal Dataset #1: List of regex patterns -# Empty list -lofregex <- list() - -# Charlson score, ICD9 -lofregex[["charlson"]] <- list() -lofregex[["charlson"]][["icd9"]] <- list() -lofregex[["charlson"]][["icd9"]][["ami"]] <- "^410|^412" -lofregex[["charlson"]][["icd9"]][["chf"]] <- "^39891|^40201|^40211|^40291|^40401|^40403|^40411|^40413|^40491|^40493|^4254|^4255|^4256|^4257|^4258|^4259|^428" -lofregex[["charlson"]][["icd9"]][["pvd"]] <- "^0930|^4373|^440|^441|^4431|^4432|^4433|^4434|^4435|^4436|^4437|^4438|^4439|^4471|^5571|^5579|^V434" -lofregex[["charlson"]][["icd9"]][["cevd"]] <- "^36234|^430|^431|^432|^433|^434|^435|^436|^437|^438" -lofregex[["charlson"]][["icd9"]][["dementia"]] <- "^290|^2941|^3312" -lofregex[["charlson"]][["icd9"]][["copd"]] <- "^4168|^4169|^490|^491|^492|^493|^494|^495|^496|^497|^498|^499|^500|^501|^502|^503|^504|^505|^5064|^5081|^5088" -lofregex[["charlson"]][["icd9"]][["rheumd"]] <- "^4465|^7100|^7101|^7102|^7103|^7104|^7140|^7141|^7142|^7148|^725" -lofregex[["charlson"]][["icd9"]][["pud"]] <- "^531|^532|^533|^534" -lofregex[["charlson"]][["icd9"]][["mld"]] <- "^07022|^07023|^07032|^07033|^07044|^07054|^0706|^0709|^570|^571|^5733|^5734|^5738|^5739|^V427" -lofregex[["charlson"]][["icd9"]][["diab"]] <- "^2500|^2501|^2502|^2503|^2508|^2509" -lofregex[["charlson"]][["icd9"]][["diabwc"]] <- "^2504|^2505|^2506|^2507" -lofregex[["charlson"]][["icd9"]][["hp"]] <- "^3341|^342|^343|^3440|^3441|^3442|^3443|^3444|^3445|^3446|^3449" -lofregex[["charlson"]][["icd9"]][["rend"]] <- "^40301|^40311|^40391|^40402|^40403|^40412|^40413|^40492|^40493|^582|^5830|^5831|^5832|^5833|^5834|^5835|^5836|^5837|^585|^586|^5880|^V420|^V451|^V56" -lofregex[["charlson"]][["icd9"]][["canc"]] <- "^140|^141|^142|^143|^144|^145|^146|^147|^148|^149|^150|^151|^152|^153|^154|^155|^156|^157|^158|^159|^160|^161|^162|^163|^164|^165|^166|^167|^168|^169|^170|^171|^172|^174|^175|^176|^177|^178|^179|^180|^181|^182|^183|^184|^185|^186|^187|^188|^189|^190|^191|^192|^193|^194|^195|^200|^201|^202|^203|^204|^205|^206|^207|^208|^2386" -lofregex[["charlson"]][["icd9"]][["msld"]] <- "^4560|^4561|^4562|^5722|^5723|^5724|^5725|^5726|^5727|^5728" -lofregex[["charlson"]][["icd9"]][["metacanc"]] <- "^196|^197|^198|^199" -lofregex[["charlson"]][["icd9"]][["aids"]] <- "^042|^043|^044" - -# Charlson score, ICD10 -lofregex[["charlson"]][["icd10"]] <- list() -lofregex[["charlson"]][["icd10"]][["ami"]] <- "^I21|^I22|^I252" -lofregex[["charlson"]][["icd10"]][["chf"]] <- "^I099|^I110|^I130|^I132|^I255|^I420|^I425|^I426|^I427|^I428|^I429|^I43|^I50|^P290" -lofregex[["charlson"]][["icd10"]][["pvd"]] <- "^I70|^I71|^I731|^I738|^I739|^I771|^I790|^I792|^K551|^K558|^K559|^Z958|^Z959" -lofregex[["charlson"]][["icd10"]][["cevd"]] <- "^G45|^G46|^H340|^I60|^I61|^I62|^I63|^I64|^I65|^I66|^I67|^I68|^I69" -lofregex[["charlson"]][["icd10"]][["dementia"]] <- "^F00|^F01|^F02|^F03|^F051|^G30|^G311" -lofregex[["charlson"]][["icd10"]][["copd"]] <- "^I278|^I279|^J40|^J41|^J42|^J43|^J44|^J45|^J46|^J47|^J60|^J61|^J62|^J63|^J64|^J65|^J66|^J67|^J684|^J701|^J703" -lofregex[["charlson"]][["icd10"]][["rheumd"]] <- "^M05|^M06|^M315|^M32|^M33|^M34|^M351|^M353|^M360" -lofregex[["charlson"]][["icd10"]][["pud"]] <- "^K25|^K26|^K27|^K28" -lofregex[["charlson"]][["icd10"]][["mld"]] <- "^B18|^K700|^K701|^K702|^K703|^K709|^K713|^K714|^K715|^K717|^K73|^K74|^K760|^K762|^K763|^K764|^K768|^K769|^Z944" -lofregex[["charlson"]][["icd10"]][["diab"]] <- "^E100|^E101|^E106|^E108|^E109|^E110|^E111|^E116|^E118|^E119|^E120|^E121|^E126|^E128|^E129|^E130|^E131|^E136|^E138|^E139|^E140|^E141|^E146|^E148|^E149" -lofregex[["charlson"]][["icd10"]][["diabwc"]] <- "^E102|^E103|^E104|^E105|^E107|^E112|^E113|^E114|^E115|^E117|^E122|^E123|^E124|^E125|^E127|^E132|^E133|^E134|^E135|^E137|^E142|^E143|^E144|^E145|^E147" -lofregex[["charlson"]][["icd10"]][["hp"]] <- "^G041|^G114|^G801|^G802|^G81|^G82|^G830|^G831|^G832|^G833|^G834|^G839" -lofregex[["charlson"]][["icd10"]][["rend"]] <- "^I120|^I131|^N032|^N033|^N034|^N035|^N036|^N037|^N052|^N053|^N054|^N055|^N056|^N057|^N18|^N19|^N250|^Z490|^Z491|^Z492|^Z940|^Z992" -lofregex[["charlson"]][["icd10"]][["canc"]] <- "^C00|^C01|^C02|^C03|^C04|^C05|^C06|^C07|^C08|^C09|^C10|^C11|^C12|^C13|^C14|^C15|^C16|^C17|^C18|^C19|^C20|^C21|^C22|^C23|^C24|^C25|^C26|^C30|^C31|^C32|^C33|^C34|^C37|^C38|^C39|^C40|^C41|^C43|^C45|^C46|^C47|^C48|^C49|^C50|^C51|^C52|^C53|^C54|^C55|^C56|^C57|^C58|^C60|^C61|^C62|^C63|^C64|^C65|^C66|^C67|^C68|^C69|^C70|^C71|^C72|^C73|^C74|^C75|^C76|^C81|^C82|^C83|^C84|^C85|^C88|^C90|^C91|^C92|^C93|^C94|^C95|^C96|^C97" -lofregex[["charlson"]][["icd10"]][["msld"]] <- "^I850|^I859|^I864|^I982|^K704|^K711|^K721|^K729|^K765|^K766|^K767" -lofregex[["charlson"]][["icd10"]][["metacanc"]] <- "^C77|^C78|^C79|^C80" -lofregex[["charlson"]][["icd10"]][["aids"]] <- "^B20|^B21|^B22|^B24" - -# Elixhauser score, ICD9 -lofregex[["elixhauser"]] <- list() -lofregex[["elixhauser"]][["icd9"]] <- list() -lofregex[["elixhauser"]][["icd9"]][["chf"]] <- "^39891|^40201|^40211|^40291|^40401|^40403|^40411|^40413|^40491|^40493|^4254|^4255|^4256|^4257|^4258|^4259|^428" -lofregex[["elixhauser"]][["icd9"]][["carit"]] <- "^4260|^42613|^4267|^4269|^42610|^42612|^4270|^4271|^4272|^4273|^4274|^4276|^4277|^4278|^4279|^7850|^99601|^99604|^V450|^V533" -lofregex[["elixhauser"]][["icd9"]][["valv"]] <- "^0932|^394|^395|^396|^397|^424|^7463|^7464|^7465|^7466|^V422|^V433" -lofregex[["elixhauser"]][["icd9"]][["pcd"]] <- "^4150|^4151|^416|^4170|^4178|^4179" -lofregex[["elixhauser"]][["icd9"]][["pvd"]] <- "^0930|^4373|^440|^441|^4431|^4432|^4433|^4434|^4435|^4436|^4437|^4438|^4439|^4471|^5571|^5579|^V434" -lofregex[["elixhauser"]][["icd9"]][["hypunc"]] <- "^401" -lofregex[["elixhauser"]][["icd9"]][["hypc"]] <- "^402|^403|^404|^405" -lofregex[["elixhauser"]][["icd9"]][["para"]] <- "^3341|^342|^343|^3440|^3441|^3442|^3443|^3444|^3445|^3446|^3449" -lofregex[["elixhauser"]][["icd9"]][["ond"]] <- "^3319|^3320|^3321|^3334|^3335|^33392|^334|^335|^3362|^340|^341|^345|^3481|^3483|^7803|^7843" -lofregex[["elixhauser"]][["icd9"]][["cpd"]] <- "^4168|^4169|^490|^491|^492|^493|^494|^495|^496|^497|^498|^499|^500|^501|^502|^503|^504|^505|^5064|^5081|^5088" -lofregex[["elixhauser"]][["icd9"]][["diabunc"]] <- "^2500|^2501|^2502|^2503" -lofregex[["elixhauser"]][["icd9"]][["diabc"]] <- "^2504|^2505|^2506|^2507|^2508|^2509" -lofregex[["elixhauser"]][["icd9"]][["hypothy"]] <- "^2409|^243|^244|^2461|^2468" -lofregex[["elixhauser"]][["icd9"]][["rf"]] <- "^40301|^40311|^40391|^40402|^40403|^40412|^40413|^40492|^40493|^585|^586|^5880|^V420|^V451|^V56" -lofregex[["elixhauser"]][["icd9"]][["ld"]] <- "^07022|^07023|^07032|^07033|^07044|^07054|^0706|^0709|^4560|^4561|^4562|^570|^571|^5722|^5723|^5724|^5725|^5726|^5727|^5728|^5733|^5734|^5738|^5739|^V427" -lofregex[["elixhauser"]][["icd9"]][["pud"]] <- "^5317|^5319|^5327|^5329|^5337|^5339|^5347|^5349" -lofregex[["elixhauser"]][["icd9"]][["aids"]] <- "^042|^043|^044" -lofregex[["elixhauser"]][["icd9"]][["lymph"]] <- "^200|^201|^202|^2030|^2386" -lofregex[["elixhauser"]][["icd9"]][["metacanc"]] <- "^196|^197|^198|^199" -lofregex[["elixhauser"]][["icd9"]][["solidtum"]] <- "^140|^141|^142|^143|^144|^145|^146|^147|^148|^149|^150|^151|^152|^153|^154|^155|^156|^157|^158|^159|^160|^161|^162|^163|^164|^165|^166|^167|^168|^169|^170|^171|^172|^174|^175|^176|^177|^178|^179|^180|^181|^182|^183|^184|^185|^186|^187|^188|^189|^190|^191|^192|^193|^194|^195" -lofregex[["elixhauser"]][["icd9"]][["rheumd"]] <- "^446|^7010|^7100|^7101|^7102|^7103|^7104|^7108|^7109|^7112|^714|^7193|^720|^725|^7285|^72889|^72930" -lofregex[["elixhauser"]][["icd9"]][["coag"]] <- "^286|^2871|^2873|^2874|^2875" -lofregex[["elixhauser"]][["icd9"]][["obes"]] <- "^2780" -lofregex[["elixhauser"]][["icd9"]][["wloss"]] <- "^260|^261|^262|^263|^7832|^7994" -lofregex[["elixhauser"]][["icd9"]][["fed"]] <- "^2536|^276" -lofregex[["elixhauser"]][["icd9"]][["blane"]] <- "^2800" -lofregex[["elixhauser"]][["icd9"]][["dane"]] <- "^2801|^2802|^2803|^2804|^2805|^2806|^2807|^2808|^2809|^281" -lofregex[["elixhauser"]][["icd9"]][["alcohol"]] <- "^2652|^2911|^2912|^2913|^2915|^2916|^2917|^2918|^2919|^3030|^3039|^3050|^3575|^4255|^5353|^5710|^5711|^5712|^5713|^980|^V113" -lofregex[["elixhauser"]][["icd9"]][["drug"]] <- "^292|^304|^3052|^3053|^3054|^3055|^3056|^3057|^3058|^3059|^V6542" -lofregex[["elixhauser"]][["icd9"]][["psycho"]] <- "^2938|^295|^29604|^29614|^29644|^29654|^297|^298" -lofregex[["elixhauser"]][["icd9"]][["depre"]] <- "^2962|^2963|^2965|^3004|^309|^311" - -# Elixhauser score, ICD10 -lofregex[["elixhauser"]][["icd10"]] <- list() -lofregex[["elixhauser"]][["icd10"]][["chf"]] <- "^I099|^I110|^I130|^I132|^I255|^I420|^I425|^I426|^I427|^I428|^I429|^I43|^I50|^P290" -lofregex[["elixhauser"]][["icd10"]][["carit"]] <- "^I441|^I442|^I443|^I456|^I459|^I47|^I48|^I49|^R000|^R001|^R008|^T821|^Z450|^Z950" -lofregex[["elixhauser"]][["icd10"]][["valv"]] <- "^A520|^I05|^I06|^I07|^I08|^I091|^I098|^I34|^I35|^I36|^I37|^I38|^I39|^Q230|^Q231|^Q232|^Q233|^Z952|^Z953|^Z954" -lofregex[["elixhauser"]][["icd10"]][["pcd"]] <- "^I26|^I27|^I280|^I288|^I289" -lofregex[["elixhauser"]][["icd10"]][["pvd"]] <- "^I70|^I71|^I731|^I738|^I739|^I771|^I790|^I792|^K551|^K558|^K559|^Z958|^Z959" -lofregex[["elixhauser"]][["icd10"]][["hypunc"]] <- "^I10" -lofregex[["elixhauser"]][["icd10"]][["hypc"]] <- "^I11|^I12|^I13|^I15" -lofregex[["elixhauser"]][["icd10"]][["para"]] <- "^G041|^G114|^G801|^G802|^G81|^G82|^G830|^G831|^G832|^G833|^G834|^G839" -lofregex[["elixhauser"]][["icd10"]][["ond"]] <- "^G10|^G11|^G12|^G13|^G20|^G21|^G22|^G254|^G255|^G312|^G318|^G319|^G32|^G35|^G36|^G37|^G40|^G41|^G931|^G934|^R470|^R56" -lofregex[["elixhauser"]][["icd10"]][["cpd"]] <- "^I278|^I279|^J40|^J41|^J42|^J43|^J44|^J45|^J46|^J47|^J60|^J61|^J62|^J63|^J64|^J65|^J66|^J67|^J684|^J701|^J703" -lofregex[["elixhauser"]][["icd10"]][["diabunc"]] <- "^E100|^E101|^E109|^E110|^E111|^E119|^E120|^E121|^E129|^E130|^E131|^E139|^E140|^E141|^E149" -lofregex[["elixhauser"]][["icd10"]][["diabc"]] <- "^E102|^E103|^E104|^E105|^E106|^E107|^E108|^E112|^E113|^E114|^E115|^E116|^E117|^E118|^E122|^E123|^E124|^E125|^E126|^E127|^E128|^E132|^E133|^E134|^E135|^E136|^E137|^E138|^E142|^E143|^E144|^E145|^E146|^E147|^E148" -lofregex[["elixhauser"]][["icd10"]][["hypothy"]] <- "^E00|^E01|^E02|^E03|^E890" -lofregex[["elixhauser"]][["icd10"]][["rf"]] <- "^I120|^I131|^N18|^N19|^N250|^Z490|^Z491|^Z492|^Z940|^Z992" -lofregex[["elixhauser"]][["icd10"]][["ld"]] <- "^B18|^I85|^I864|^I982|^K70|^K711|^K713|^K714|^K715|^K717|^K72|^K73|^K74|^K760|^K762|^K763|^K764|^K765|^K766|^K767|^K768|^K769|^Z944" -lofregex[["elixhauser"]][["icd10"]][["pud"]] <- "^K257|^K259|^K267|^K269|^K277|^K279|^K287|^K289" -lofregex[["elixhauser"]][["icd10"]][["aids"]] <- "^B20|^B21|^B22|^B24" -lofregex[["elixhauser"]][["icd10"]][["lymph"]] <- "^C81|^C82|^C83|^C84|^C85|^C88|^C96|^C900|^C902" -lofregex[["elixhauser"]][["icd10"]][["metacanc"]] <- "^C77|^C78|^C79|^C80" -lofregex[["elixhauser"]][["icd10"]][["solidtum"]] <- "^C00|^C01|^C02|^C03|^C04|^C05|^C06|^C07|^C08|^C09|^C10|^C11|^C12|^C13|^C14|^C15|^C16|^C17|^C18|^C19|^C20|^C21|^C22|^C23|^C24|^C25|^C26|^C30|^C31|^C32|^C33|^C34|^C37|^C38|^C39|^C40|^C41|^C43|^C45|^C46|^C47|^C48|^C49|^C50|^C51|^C52|^C53|^C54|^C55|^C56|^C57|^C58|^C60|^C61|^C62|^C63|^C64|^C65|^C66|^C67|^C68|^C69|^C70|^C71|^C72|^C73|^C74|^C75|^C76|^C97" -lofregex[["elixhauser"]][["icd10"]][["rheumd"]] <- "^L940|^L941|^L943|^M05|^M06|^M08|^M120|^M123|^M30|^M310|^M311|^M312|^M313|^M32|^M33|^M34|^M35|^M45|^M461|^M468|^M469" -lofregex[["elixhauser"]][["icd10"]][["coag"]] <- "^D65|^D66|^D67|^D68|^D691|^D693|^D694|^D695|^D696" -lofregex[["elixhauser"]][["icd10"]][["obes"]] <- "^E66" -lofregex[["elixhauser"]][["icd10"]][["wloss"]] <- "^E40|^E41|^E42|^E43|^E44|^E45|^E46|^R634|^R64" -lofregex[["elixhauser"]][["icd10"]][["fed"]] <- "^E222|^E86|^E87" -lofregex[["elixhauser"]][["icd10"]][["blane"]] <- "^D500" -lofregex[["elixhauser"]][["icd10"]][["dane"]] <- "^D508|^D509|^D51|^D52|^D53" -lofregex[["elixhauser"]][["icd10"]][["alcohol"]] <- "^F10|^E52|^G621|^I426|^K292|^K700|^K703|^K709|^T51|^Z502|^Z714|^Z721" -lofregex[["elixhauser"]][["icd10"]][["drug"]] <- "^F11|^F12|^F13|^F14|^F15|^F16|^F18|^F19|^Z715|^Z722" -lofregex[["elixhauser"]][["icd10"]][["psycho"]] <- "^F20|^F22|^F23|^F24|^F25|^F28|^F29|^F302|^F312|^F315" -lofregex[["elixhauser"]][["icd10"]][["depre"]] <- "^F204|^F313|^F314|^F315|^F32|^F33|^F341|^F412|^F432" - -# Export data as internal -usethis::use_data(lofregex, internal = TRUE, overwrite = TRUE) +### Clean datasets to include in the package +# Required packages: +library(readxl) +library(stringr) +library(devtools) +library(usethis) +library(haven) + +######################################################################################################################## +### Dataset #1: ICD-10 codes, 2009 version +# Download dataset +download.file(url = "ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10/allvalid2009(detailed%20titles%20headings).xls", destfile = "data-raw/allvalid2009.xls") + +# Read data in Excel format +icd10_2009 <- readxl::read_excel( + "data-raw/allvalid2009.xls", + skip = 7, + col_names = c("Status", "Code", "ICD.title") +) + +# Remove lines where code contains the character "-", i.e. headers: +icd10_2009[grepl("-", icd10_2009[["Code"]]), ] +icd10_2009 <- icd10_2009[!grepl("-", icd10_2009[["Code"]]), ] + +# Produce a "Code.clean" variable with no punctuation +icd10_2009[["Code.clean"]] <- stringr::str_replace_all(string = icd10_2009[["Code"]], pattern = "[^[:alnum:]]", replacement = "") + +# Re-order the columns +icd10_2009 <- icd10_2009[, c(2, 4, 3, 1)] + +# Convert all character columns to ASCII format +icd10_2009[["Code"]] <- iconv(icd10_2009[["Code"]], from = "UTF-8", to = "ASCII") +icd10_2009[["Code.clean"]] <- iconv(icd10_2009[["Code.clean"]], from = "UTF-8", to = "ASCII") +icd10_2009[["ICD.title"]] <- iconv(icd10_2009[["ICD.title"]], from = "UTF-8", to = "ASCII") +icd10_2009[["Status"]] <- iconv(icd10_2009[["Status"]], from = "UTF-8", to = "ASCII") + +# Save data in R format +usethis::use_data(icd10_2009, overwrite = TRUE) + +######################################################################################################################## +### Dataset #2: ICD-10 codes, 2011 version +# Download dataset +download.file(url = "ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10/allvalid2011 (detailed%20titles%20headings).xls", destfile = "data-raw/allvalid2011.xls") + +# Read data in Excel format +icd10_2011 <- readxl::read_excel( + "data-raw/allvalid2011.xls", + skip = 7, + col_names = c("Status", "Code", "ICD.title") +) + +# Remove lines where code contains the character "-", i.e. headers: +icd10_2011[grepl("-", icd10_2011[["Code"]]), ] +icd10_2011 <- icd10_2011[!grepl("-", icd10_2011[["Code"]]), ] + +# Produce a "Code.clean" variable with no punctuation +icd10_2011[["Code.clean"]] <- stringr::str_replace_all(string = icd10_2011[["Code"]], pattern = "[^[:alnum:]]", replacement = "") + +# Re-order the columns +icd10_2011 <- icd10_2011[, c(2, 4, 3, 1)] + +# Convert all character columns to ASCII format +icd10_2011[["Code"]] <- iconv(icd10_2011[["Code"]], from = "UTF-8", to = "ASCII") +icd10_2011[["Code.clean"]] <- iconv(icd10_2011[["Code.clean"]], from = "UTF-8", to = "ASCII") +icd10_2011[["ICD.title"]] <- iconv(icd10_2011[["ICD.title"]], from = "UTF-8", to = "ASCII") +icd10_2011[["Status"]] <- iconv(icd10_2011[["Status"]], from = "UTF-8", to = "ASCII") + +# Save data in R format +usethis::use_data(icd10_2011, overwrite = TRUE) + +######################################################################################################################## +### Dataset #3: ICD-9 codes, 2015 version +# Download dataset +download.file(url = "https://www.cms.gov/Medicare/Coding/ICD9ProviderDiagnosticCodes/Downloads/ICD-9-CM-v32-master-descriptions.zip", destfile = "data-raw/ICD-9-CM-v32-master-descriptions.zip") + +# Unzip files +unzip("data-raw/ICD-9-CM-v32-master-descriptions.zip", exdir = "data-raw") + +# Read ICD-9 diagnostic codes +icd9_2015 <- readxl::read_excel("data-raw/CMS32_DESC_LONG_SHORT_DX.xlsx", skip = 1, col_names = c("Code", "Long_description", "Short_description")) + +# Convert all character columns to ASCII format +icd9_2015[["Code"]] <- iconv(icd9_2015[["Code"]], from = "UTF-8", to = "ASCII") +icd9_2015[["Long_description"]] <- iconv(icd9_2015[["Long_description"]], from = "UTF-8", to = "ASCII") +icd9_2015[["Short_description"]] <- iconv(icd9_2015[["Short_description"]], from = "UTF-8", to = "ASCII") + +# Save data in R format +usethis::use_data(icd9_2015, overwrite = TRUE) + +######################################################################################################################## +### Dataset #4 ICD-10-CM codes, 2018 version +download.file(url = "ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/2018/2018-ICD-10-CM-Codes-File.zip", destfile = "data-raw/2018-ICD-10-CM-Codes-File.zip") + +# Unzip files +unzip("data-raw/2018-ICD-10-CM-Codes-File.zip", exdir = "data-raw") + +# Read files +icd10cm_2018 <- readr::read_tsv(file = "data-raw/icd10cm_codes_2018.txt", col_names = FALSE) +icd10cm_2018[["Code"]] <- substr(icd10cm_2018[[1]], 1, 7) +icd10cm_2018[["Description"]] <- substr(icd10cm_2018[[1]], 9, 400) +icd10cm_2018[[1]] <- NULL + +# Convert all character columns to ASCII format +icd10cm_2018[["Code"]] <- iconv(icd10cm_2018[["Code"]], from = "UTF-8", to = "ASCII") +icd10cm_2018[["Description"]] <- iconv(icd10cm_2018[["Description"]], from = "UTF-8", to = "ASCII") + +# Save data in R format +usethis::use_data(icd10cm_2018, overwrite = TRUE) + +######################################################################################################################## +### Dataset #5 ICD-10-CM codes, 2017 version +download.file(url = "ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/2017/icd10cm_codes_2017.txt", destfile = "data-raw/icd10cm_codes_2017.txt") + +# Read files +icd10cm_2017 <- readr::read_tsv(file = "data-raw/icd10cm_codes_2017.txt", col_names = FALSE) +icd10cm_2017[["Code"]] <- substr(icd10cm_2017[[1]], 1, 7) +icd10cm_2017[["Description"]] <- substr(icd10cm_2017[[1]], 9, 400) +icd10cm_2017[[1]] <- NULL + +# Convert all character columns to ASCII format +icd10cm_2017[["Code"]] <- iconv(icd10cm_2017[["Code"]], from = "UTF-8", to = "ASCII") +icd10cm_2017[["Description"]] <- iconv(icd10cm_2017[["Description"]], from = "UTF-8", to = "ASCII") + +# Save data in R format +usethis::use_data(icd10cm_2017, overwrite = TRUE) + +######################################################################################################################## +### Dataset #6: Adult same-day discharges, 2010 (from Stata) +nhds2010 <- haven::read_dta("https://www.stata-press.com/data/r15/nhds2010.dta") +attr(nhds2010, "spec") <- NULL +nhds2010 <- labelled::remove_labels(nhds2010, user_na_to_na = TRUE) + +# Save data in R format +usethis::use_data(nhds2010, overwrite = TRUE) + +######################################################################################################################## +### Dataset #7: Australian mortality data, 2010 (from Stata) +australia10 <- haven::read_dta("https://www.stata-press.com/data/r15/australia10.dta") +attr(australia10, "spec") <- NULL +australia10 <- labelled::remove_labels(australia10, user_na_to_na = TRUE) + +# Save data in R format +usethis::use_data(australia10, overwrite = TRUE) + +######################################################################################################################## +### Remove unnecessary files +lf <- list.files(path = "data-raw", full.names = TRUE, pattern = ".xls|.txt|.zip|.pdf") +invisible(file.remove(lf)) + +######################################################################################################################## +### Internal Dataset #1: List of regex patterns +# Empty list +lofregex <- list() + +# Charlson score, ICD9 +lofregex[["charlson"]] <- list() +lofregex[["charlson"]][["icd9"]] <- list() +lofregex[["charlson"]][["icd9"]][["ami"]] <- "^410|^412" +lofregex[["charlson"]][["icd9"]][["chf"]] <- "^39891|^40201|^40211|^40291|^40401|^40403|^40411|^40413|^40491|^40493|^4254|^4255|^4256|^4257|^4258|^4259|^428" +lofregex[["charlson"]][["icd9"]][["pvd"]] <- "^0930|^4373|^440|^441|^4431|^4432|^4433|^4434|^4435|^4436|^4437|^4438|^4439|^4471|^5571|^5579|^V434" +lofregex[["charlson"]][["icd9"]][["cevd"]] <- "^36234|^430|^431|^432|^433|^434|^435|^436|^437|^438" +lofregex[["charlson"]][["icd9"]][["dementia"]] <- "^290|^2941|^3312" +lofregex[["charlson"]][["icd9"]][["copd"]] <- "^4168|^4169|^490|^491|^492|^493|^494|^495|^496|^497|^498|^499|^500|^501|^502|^503|^504|^505|^5064|^5081|^5088" +lofregex[["charlson"]][["icd9"]][["rheumd"]] <- "^4465|^7100|^7101|^7102|^7103|^7104|^7140|^7141|^7142|^7148|^725" +lofregex[["charlson"]][["icd9"]][["pud"]] <- "^531|^532|^533|^534" +lofregex[["charlson"]][["icd9"]][["mld"]] <- "^07022|^07023|^07032|^07033|^07044|^07054|^0706|^0709|^570|^571|^5733|^5734|^5738|^5739|^V427" +lofregex[["charlson"]][["icd9"]][["diab"]] <- "^2500|^2501|^2502|^2503|^2508|^2509" +lofregex[["charlson"]][["icd9"]][["diabwc"]] <- "^2504|^2505|^2506|^2507" +lofregex[["charlson"]][["icd9"]][["hp"]] <- "^3341|^342|^343|^3440|^3441|^3442|^3443|^3444|^3445|^3446|^3449" +lofregex[["charlson"]][["icd9"]][["rend"]] <- "^40301|^40311|^40391|^40402|^40403|^40412|^40413|^40492|^40493|^582|^5830|^5831|^5832|^5833|^5834|^5835|^5836|^5837|^585|^586|^5880|^V420|^V451|^V56" +lofregex[["charlson"]][["icd9"]][["canc"]] <- "^140|^141|^142|^143|^144|^145|^146|^147|^148|^149|^150|^151|^152|^153|^154|^155|^156|^157|^158|^159|^160|^161|^162|^163|^164|^165|^166|^167|^168|^169|^170|^171|^172|^174|^175|^176|^177|^178|^179|^180|^181|^182|^183|^184|^185|^186|^187|^188|^189|^190|^191|^192|^193|^194|^195|^200|^201|^202|^203|^204|^205|^206|^207|^208|^2386" +lofregex[["charlson"]][["icd9"]][["msld"]] <- "^4560|^4561|^4562|^5722|^5723|^5724|^5725|^5726|^5727|^5728" +lofregex[["charlson"]][["icd9"]][["metacanc"]] <- "^196|^197|^198|^199" +lofregex[["charlson"]][["icd9"]][["aids"]] <- "^042|^043|^044" + +# Charlson score, ICD10 +lofregex[["charlson"]][["icd10"]] <- list() +lofregex[["charlson"]][["icd10"]][["ami"]] <- "^I21|^I22|^I252" +lofregex[["charlson"]][["icd10"]][["chf"]] <- "^I099|^I110|^I130|^I132|^I255|^I420|^I425|^I426|^I427|^I428|^I429|^I43|^I50|^P290" +lofregex[["charlson"]][["icd10"]][["pvd"]] <- "^I70|^I71|^I731|^I738|^I739|^I771|^I790|^I792|^K551|^K558|^K559|^Z958|^Z959" +lofregex[["charlson"]][["icd10"]][["cevd"]] <- "^G45|^G46|^H340|^I60|^I61|^I62|^I63|^I64|^I65|^I66|^I67|^I68|^I69" +lofregex[["charlson"]][["icd10"]][["dementia"]] <- "^F00|^F01|^F02|^F03|^F051|^G30|^G311" +lofregex[["charlson"]][["icd10"]][["copd"]] <- "^I278|^I279|^J40|^J41|^J42|^J43|^J44|^J45|^J46|^J47|^J60|^J61|^J62|^J63|^J64|^J65|^J66|^J67|^J684|^J701|^J703" +lofregex[["charlson"]][["icd10"]][["rheumd"]] <- "^M05|^M06|^M315|^M32|^M33|^M34|^M351|^M353|^M360" +lofregex[["charlson"]][["icd10"]][["pud"]] <- "^K25|^K26|^K27|^K28" +lofregex[["charlson"]][["icd10"]][["mld"]] <- "^B18|^K700|^K701|^K702|^K703|^K709|^K713|^K714|^K715|^K717|^K73|^K74|^K760|^K762|^K763|^K764|^K768|^K769|^Z944" +lofregex[["charlson"]][["icd10"]][["diab"]] <- "^E100|^E101|^E106|^E108|^E109|^E110|^E111|^E116|^E118|^E119|^E120|^E121|^E126|^E128|^E129|^E130|^E131|^E136|^E138|^E139|^E140|^E141|^E146|^E148|^E149" +lofregex[["charlson"]][["icd10"]][["diabwc"]] <- "^E102|^E103|^E104|^E105|^E107|^E112|^E113|^E114|^E115|^E117|^E122|^E123|^E124|^E125|^E127|^E132|^E133|^E134|^E135|^E137|^E142|^E143|^E144|^E145|^E147" +lofregex[["charlson"]][["icd10"]][["hp"]] <- "^G041|^G114|^G801|^G802|^G81|^G82|^G830|^G831|^G832|^G833|^G834|^G839" +lofregex[["charlson"]][["icd10"]][["rend"]] <- "^I120|^I131|^N032|^N033|^N034|^N035|^N036|^N037|^N052|^N053|^N054|^N055|^N056|^N057|^N18|^N19|^N250|^Z490|^Z491|^Z492|^Z940|^Z992" +lofregex[["charlson"]][["icd10"]][["canc"]] <- "^C00|^C01|^C02|^C03|^C04|^C05|^C06|^C07|^C08|^C09|^C10|^C11|^C12|^C13|^C14|^C15|^C16|^C17|^C18|^C19|^C20|^C21|^C22|^C23|^C24|^C25|^C26|^C30|^C31|^C32|^C33|^C34|^C37|^C38|^C39|^C40|^C41|^C43|^C45|^C46|^C47|^C48|^C49|^C50|^C51|^C52|^C53|^C54|^C55|^C56|^C57|^C58|^C60|^C61|^C62|^C63|^C64|^C65|^C66|^C67|^C68|^C69|^C70|^C71|^C72|^C73|^C74|^C75|^C76|^C81|^C82|^C83|^C84|^C85|^C88|^C90|^C91|^C92|^C93|^C94|^C95|^C96|^C97" +lofregex[["charlson"]][["icd10"]][["msld"]] <- "^I850|^I859|^I864|^I982|^K704|^K711|^K721|^K729|^K765|^K766|^K767" +lofregex[["charlson"]][["icd10"]][["metacanc"]] <- "^C77|^C78|^C79|^C80" +lofregex[["charlson"]][["icd10"]][["aids"]] <- "^B20|^B21|^B22|^B24" + +# Elixhauser score, ICD9 +lofregex[["elixhauser"]] <- list() +lofregex[["elixhauser"]][["icd9"]] <- list() +lofregex[["elixhauser"]][["icd9"]][["chf"]] <- "^39891|^40201|^40211|^40291|^40401|^40403|^40411|^40413|^40491|^40493|^4254|^4255|^4256|^4257|^4258|^4259|^428" +lofregex[["elixhauser"]][["icd9"]][["carit"]] <- "^4260|^42613|^4267|^4269|^42610|^42612|^4270|^4271|^4272|^4273|^4274|^4276|^4277|^4278|^4279|^7850|^99601|^99604|^V450|^V533" +lofregex[["elixhauser"]][["icd9"]][["valv"]] <- "^0932|^394|^395|^396|^397|^424|^7463|^7464|^7465|^7466|^V422|^V433" +lofregex[["elixhauser"]][["icd9"]][["pcd"]] <- "^4150|^4151|^416|^4170|^4178|^4179" +lofregex[["elixhauser"]][["icd9"]][["pvd"]] <- "^0930|^4373|^440|^441|^4431|^4432|^4433|^4434|^4435|^4436|^4437|^4438|^4439|^4471|^5571|^5579|^V434" +lofregex[["elixhauser"]][["icd9"]][["hypunc"]] <- "^401" +lofregex[["elixhauser"]][["icd9"]][["hypc"]] <- "^402|^403|^404|^405" +lofregex[["elixhauser"]][["icd9"]][["para"]] <- "^3341|^342|^343|^3440|^3441|^3442|^3443|^3444|^3445|^3446|^3449" +lofregex[["elixhauser"]][["icd9"]][["ond"]] <- "^3319|^3320|^3321|^3334|^3335|^33392|^334|^335|^3362|^340|^341|^345|^3481|^3483|^7803|^7843" +lofregex[["elixhauser"]][["icd9"]][["cpd"]] <- "^4168|^4169|^490|^491|^492|^493|^494|^495|^496|^497|^498|^499|^500|^501|^502|^503|^504|^505|^5064|^5081|^5088" +lofregex[["elixhauser"]][["icd9"]][["diabunc"]] <- "^2500|^2501|^2502|^2503" +lofregex[["elixhauser"]][["icd9"]][["diabc"]] <- "^2504|^2505|^2506|^2507|^2508|^2509" +lofregex[["elixhauser"]][["icd9"]][["hypothy"]] <- "^2409|^243|^244|^2461|^2468" +lofregex[["elixhauser"]][["icd9"]][["rf"]] <- "^40301|^40311|^40391|^40402|^40403|^40412|^40413|^40492|^40493|^585|^586|^5880|^V420|^V451|^V56" +lofregex[["elixhauser"]][["icd9"]][["ld"]] <- "^07022|^07023|^07032|^07033|^07044|^07054|^0706|^0709|^4560|^4561|^4562|^570|^571|^5722|^5723|^5724|^5725|^5726|^5727|^5728|^5733|^5734|^5738|^5739|^V427" +lofregex[["elixhauser"]][["icd9"]][["pud"]] <- "^5317|^5319|^5327|^5329|^5337|^5339|^5347|^5349" +lofregex[["elixhauser"]][["icd9"]][["aids"]] <- "^042|^043|^044" +lofregex[["elixhauser"]][["icd9"]][["lymph"]] <- "^200|^201|^202|^2030|^2386" +lofregex[["elixhauser"]][["icd9"]][["metacanc"]] <- "^196|^197|^198|^199" +lofregex[["elixhauser"]][["icd9"]][["solidtum"]] <- "^140|^141|^142|^143|^144|^145|^146|^147|^148|^149|^150|^151|^152|^153|^154|^155|^156|^157|^158|^159|^160|^161|^162|^163|^164|^165|^166|^167|^168|^169|^170|^171|^172|^174|^175|^176|^177|^178|^179|^180|^181|^182|^183|^184|^185|^186|^187|^188|^189|^190|^191|^192|^193|^194|^195" +lofregex[["elixhauser"]][["icd9"]][["rheumd"]] <- "^446|^7010|^7100|^7101|^7102|^7103|^7104|^7108|^7109|^7112|^714|^7193|^720|^725|^7285|^72889|^72930" +lofregex[["elixhauser"]][["icd9"]][["coag"]] <- "^286|^2871|^2873|^2874|^2875" +lofregex[["elixhauser"]][["icd9"]][["obes"]] <- "^2780" +lofregex[["elixhauser"]][["icd9"]][["wloss"]] <- "^260|^261|^262|^263|^7832|^7994" +lofregex[["elixhauser"]][["icd9"]][["fed"]] <- "^2536|^276" +lofregex[["elixhauser"]][["icd9"]][["blane"]] <- "^2800" +lofregex[["elixhauser"]][["icd9"]][["dane"]] <- "^2801|^2802|^2803|^2804|^2805|^2806|^2807|^2808|^2809|^281" +lofregex[["elixhauser"]][["icd9"]][["alcohol"]] <- "^2652|^2911|^2912|^2913|^2915|^2916|^2917|^2918|^2919|^3030|^3039|^3050|^3575|^4255|^5353|^5710|^5711|^5712|^5713|^980|^V113" +lofregex[["elixhauser"]][["icd9"]][["drug"]] <- "^292|^304|^3052|^3053|^3054|^3055|^3056|^3057|^3058|^3059|^V6542" +lofregex[["elixhauser"]][["icd9"]][["psycho"]] <- "^2938|^295|^29604|^29614|^29644|^29654|^297|^298" +lofregex[["elixhauser"]][["icd9"]][["depre"]] <- "^2962|^2963|^2965|^3004|^309|^311" + +# Elixhauser score, ICD10 +lofregex[["elixhauser"]][["icd10"]] <- list() +lofregex[["elixhauser"]][["icd10"]][["chf"]] <- "^I099|^I110|^I130|^I132|^I255|^I420|^I425|^I426|^I427|^I428|^I429|^I43|^I50|^P290" +lofregex[["elixhauser"]][["icd10"]][["carit"]] <- "^I441|^I442|^I443|^I456|^I459|^I47|^I48|^I49|^R000|^R001|^R008|^T821|^Z450|^Z950" +lofregex[["elixhauser"]][["icd10"]][["valv"]] <- "^A520|^I05|^I06|^I07|^I08|^I091|^I098|^I34|^I35|^I36|^I37|^I38|^I39|^Q230|^Q231|^Q232|^Q233|^Z952|^Z953|^Z954" +lofregex[["elixhauser"]][["icd10"]][["pcd"]] <- "^I26|^I27|^I280|^I288|^I289" +lofregex[["elixhauser"]][["icd10"]][["pvd"]] <- "^I70|^I71|^I731|^I738|^I739|^I771|^I790|^I792|^K551|^K558|^K559|^Z958|^Z959" +lofregex[["elixhauser"]][["icd10"]][["hypunc"]] <- "^I10" +lofregex[["elixhauser"]][["icd10"]][["hypc"]] <- "^I11|^I12|^I13|^I15" +lofregex[["elixhauser"]][["icd10"]][["para"]] <- "^G041|^G114|^G801|^G802|^G81|^G82|^G830|^G831|^G832|^G833|^G834|^G839" +lofregex[["elixhauser"]][["icd10"]][["ond"]] <- "^G10|^G11|^G12|^G13|^G20|^G21|^G22|^G254|^G255|^G312|^G318|^G319|^G32|^G35|^G36|^G37|^G40|^G41|^G931|^G934|^R470|^R56" +lofregex[["elixhauser"]][["icd10"]][["cpd"]] <- "^I278|^I279|^J40|^J41|^J42|^J43|^J44|^J45|^J46|^J47|^J60|^J61|^J62|^J63|^J64|^J65|^J66|^J67|^J684|^J701|^J703" +lofregex[["elixhauser"]][["icd10"]][["diabunc"]] <- "^E100|^E101|^E109|^E110|^E111|^E119|^E120|^E121|^E129|^E130|^E131|^E139|^E140|^E141|^E149" +lofregex[["elixhauser"]][["icd10"]][["diabc"]] <- "^E102|^E103|^E104|^E105|^E106|^E107|^E108|^E112|^E113|^E114|^E115|^E116|^E117|^E118|^E122|^E123|^E124|^E125|^E126|^E127|^E128|^E132|^E133|^E134|^E135|^E136|^E137|^E138|^E142|^E143|^E144|^E145|^E146|^E147|^E148" +lofregex[["elixhauser"]][["icd10"]][["hypothy"]] <- "^E00|^E01|^E02|^E03|^E890" +lofregex[["elixhauser"]][["icd10"]][["rf"]] <- "^I120|^I131|^N18|^N19|^N250|^Z490|^Z491|^Z492|^Z940|^Z992" +lofregex[["elixhauser"]][["icd10"]][["ld"]] <- "^B18|^I85|^I864|^I982|^K70|^K711|^K713|^K714|^K715|^K717|^K72|^K73|^K74|^K760|^K762|^K763|^K764|^K765|^K766|^K767|^K768|^K769|^Z944" +lofregex[["elixhauser"]][["icd10"]][["pud"]] <- "^K257|^K259|^K267|^K269|^K277|^K279|^K287|^K289" +lofregex[["elixhauser"]][["icd10"]][["aids"]] <- "^B20|^B21|^B22|^B24" +lofregex[["elixhauser"]][["icd10"]][["lymph"]] <- "^C81|^C82|^C83|^C84|^C85|^C88|^C96|^C900|^C902" +lofregex[["elixhauser"]][["icd10"]][["metacanc"]] <- "^C77|^C78|^C79|^C80" +lofregex[["elixhauser"]][["icd10"]][["solidtum"]] <- "^C00|^C01|^C02|^C03|^C04|^C05|^C06|^C07|^C08|^C09|^C10|^C11|^C12|^C13|^C14|^C15|^C16|^C17|^C18|^C19|^C20|^C21|^C22|^C23|^C24|^C25|^C26|^C30|^C31|^C32|^C33|^C34|^C37|^C38|^C39|^C40|^C41|^C43|^C45|^C46|^C47|^C48|^C49|^C50|^C51|^C52|^C53|^C54|^C55|^C56|^C57|^C58|^C60|^C61|^C62|^C63|^C64|^C65|^C66|^C67|^C68|^C69|^C70|^C71|^C72|^C73|^C74|^C75|^C76|^C97" +lofregex[["elixhauser"]][["icd10"]][["rheumd"]] <- "^L940|^L941|^L943|^M05|^M06|^M08|^M120|^M123|^M30|^M310|^M311|^M312|^M313|^M32|^M33|^M34|^M35|^M45|^M461|^M468|^M469" +lofregex[["elixhauser"]][["icd10"]][["coag"]] <- "^D65|^D66|^D67|^D68|^D691|^D693|^D694|^D695|^D696" +lofregex[["elixhauser"]][["icd10"]][["obes"]] <- "^E66" +lofregex[["elixhauser"]][["icd10"]][["wloss"]] <- "^E40|^E41|^E42|^E43|^E44|^E45|^E46|^R634|^R64" +lofregex[["elixhauser"]][["icd10"]][["fed"]] <- "^E222|^E86|^E87" +lofregex[["elixhauser"]][["icd10"]][["blane"]] <- "^D500" +lofregex[["elixhauser"]][["icd10"]][["dane"]] <- "^D508|^D509|^D51|^D52|^D53" +lofregex[["elixhauser"]][["icd10"]][["alcohol"]] <- "^F10|^E52|^G621|^I426|^K292|^K700|^K703|^K709|^T51|^Z502|^Z714|^Z721" +lofregex[["elixhauser"]][["icd10"]][["drug"]] <- "^F11|^F12|^F13|^F14|^F15|^F16|^F18|^F19|^Z715|^Z722" +lofregex[["elixhauser"]][["icd10"]][["psycho"]] <- "^F20|^F22|^F23|^F24|^F25|^F28|^F29|^F302|^F312|^F315" +lofregex[["elixhauser"]][["icd10"]][["depre"]] <- "^F204|^F313|^F314|^F315|^F32|^F33|^F341|^F412|^F432" + +# Get Elixhauser icd10cm_2020_1 and add to lofregex (see sourced file below for details) +source('AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/get_lofregex.R') + +lofregex[["elixhauser_ahrq_2020"]] <- list() +lofregex[["elixhauser_ahrq_2020"]][["icd10"]] <- icd10cm_2020_1_lofregex + +######################################################################################################################## +### Internal Dataset #2: List of msdrg mappings + +# Mappings retrieved by parsing SAS code, see file sourced below +source('AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/get_lofmsdrg.R') + +######################################################################################################################## +### Internal Dataset #3: Get icd10cm_2021_1 icd mappings +# Creates a list Elixhauser2021Formats with the following objects: +# ElixhauserAHRQ2021Map, ElixhauserAHRQ2021Abbr, ElixhauserAHRQ2021Labels +source('AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/get_mappings.R') + +######################################################################################################################## +### Internal Dataset #4: Get icd10cm_2022_1 icd mappings +# Creates a list Elixhauser2022Formats with the following objects: +# ElixhauserAHRQ2022Map, ElixhauserAHRQ2022Abbr, ElixhauserAHRQ2022Labels +source('AHRQ-Elixhauser/sas-parse/icd10cm_2022_1/get_mappings.R') + +######################################################################################################################## +# Export data as internal +usethis::use_data(lofregex, + lofmsdrg, + Elixhauser2021Formats, + Elixhauser2022Formats, + internal = TRUE, overwrite = TRUE) + +# Clean up space +rm(list=ls()) \ No newline at end of file diff --git a/data/australia10.rda b/data/australia10.rda index 4cef936..dd1d793 100644 Binary files a/data/australia10.rda and b/data/australia10.rda differ diff --git a/data/icd10_2009.rda b/data/icd10_2009.rda index 68e9dc7..ae0a984 100644 Binary files a/data/icd10_2009.rda and b/data/icd10_2009.rda differ diff --git a/data/icd10_2011.rda b/data/icd10_2011.rda index 36b1c96..03816a6 100644 Binary files a/data/icd10_2011.rda and b/data/icd10_2011.rda differ diff --git a/data/icd10cm_2017.rda b/data/icd10cm_2017.rda index 0184057..6c9725b 100644 Binary files a/data/icd10cm_2017.rda and b/data/icd10cm_2017.rda differ diff --git a/data/icd10cm_2018.rda b/data/icd10cm_2018.rda index bb2ec4e..0ff9a17 100644 Binary files a/data/icd10cm_2018.rda and b/data/icd10cm_2018.rda differ diff --git a/data/icd9_2015.rda b/data/icd9_2015.rda index abe2d5b..84d798e 100644 Binary files a/data/icd9_2015.rda and b/data/icd9_2015.rda differ diff --git a/data/nhds2010.rda b/data/nhds2010.rda index c2cedbd..7c34483 100644 Binary files a/data/nhds2010.rda and b/data/nhds2010.rda differ diff --git a/man/comorbidity.Rd b/man/comorbidity.Rd index 724f7dc..43ff1ba 100644 --- a/man/comorbidity.Rd +++ b/man/comorbidity.Rd @@ -13,7 +13,13 @@ comorbidity( icd = "icd10", factorise = FALSE, labelled = TRUE, - tidy.codes = TRUE + tidy.codes = TRUE, + drg = NULL, + icd_rank = NULL, + poa = NULL, + year = NULL, + quarter = NULL, + icd10cm_vers = NULL ) } \arguments{ @@ -25,7 +31,7 @@ Column names must be syntactically valid names, otherwise they are forced to be \item{code}{Column of \code{x} containing diagnostic codes. Codes must be in upper case with no punctuation in order to be properly recognised.} -\item{score}{The comorbidity score to compute. Possible choices are the weighted Charlson score (\code{charlson}) and the weighted Elixhauser score (\code{elixhauser}). Values are case-insensitive.} +\item{score}{The comorbidity score to compute. Possible choices are the weighted Charlson score (\code{charlson}), the weighted (pre 2019 AHRQ) Elixhauser score (\code{elixhauser}), and the 2019 AHRQ weighted Elixhauser score (\code{elixhauser_ahrq_2020}). Values are case-insensitive.} \item{assign0}{Apply a hierarchy of comorbidities. If \code{TRUE}, should a comorbidity be present in a patient with different degrees of severity, then the milder form will be assigned to 0 and therefore not counted. By doing this, a type of comorbidity is not counted more than once in each patient. In particular, the comorbidities that are affected by this argument are: \itemize{ @@ -35,15 +41,30 @@ Column names must be syntactically valid names, otherwise they are forced to be \item "Hypertension, uncomplicated" (\code{hypunc}) and "Hypertension, complicated" (\code{hypc}) for the Elixhauser score; \item "Diabetes, uncomplicated" (\code{diabunc}) and "Diabetes, complicated" (\code{diabc}) for the Elixhauser score; \item "Solid tumour" (\code{solidtum}) and "Metastatic cancer" (\code{metacanc}) for the Elixhauser score. -}} +} + +Note: This argument has no effect on Elixhauser AHRQ as these choices are incorporated into AHRQ calculations. If using 'elixhauser_ahrq_2020' it is recommended to specify assign0 = FALSE to avoid confusion.} -\item{icd}{The version of ICD coding to use. Possible choices are ICD-9-CM (\code{icd9}) or ICD-10 (\code{icd10}). Defaults to \code{icd10}, and values are case-insensitive.} +\item{icd}{The version of ICD coding to use. Possible choices are ICD-9-CM (\code{icd9}) or ICD-10 (\code{icd10}). Defaults to \code{icd10}, and values are case-insensitive. +Note: if 'elixhauser_ahrq_2020' is selected, icd must equal 'icd10'.} \item{factorise}{Return comorbidities as factors rather than numeric, where (1 = presence of comorbidity, 0 = otherwise). Defaults to \code{FALSE}.} \item{labelled}{Attach labels to each comorbidity, compatible with the RStudio viewer via the \code{\link[utils:View]{utils::View()}} function. Defaults to \code{TRUE}.} \item{tidy.codes}{Tidy diagnostic codes? If \code{TRUE}, all codes are converted to upper case and all non-alphanumeric characters are removed using the regular expression \code{[^[:alnum:]]}. Defaults to \code{TRUE}.} + +\item{drg}{Column of \code{x} that contains DRG codes associated with the encounter. Defaults to \code{NULL} but must be specified if score = 'elixhauser_ahrq_2020'.} + +\item{icd_rank}{Column of \code{x} that contains the rank or position of DRG codes. Defaults to \code{NULL} but must be specified if score = 'elixhauser_ahrq_2020'.} + +\item{poa}{Column of \code{x} that contains the present on admission status codes (e.g. 'Y', 'W', 'N', 'U'). Defaults to \code{NULL} but must be specified if score = 'elixhauser_ahrq_2020'.} + +\item{year}{Column of \code{x} that contains the calendar year of the admission. Defaults to \code{NULL}, but either \code{year} and \code{quarter} OR \code{icd10cm_vers} must be specified if score = 'elixhauser_ahrq_2020'.} + +\item{quarter}{Column of \code{x} that contains the calendar quarter of the admission. Defaults to \code{NULL}, but either \code{year} and \code{quarter} OR \code{icd10cm_vers} must be specified if score = 'elixhauser_ahrq_2020'.} + +\item{icd10cm_vers}{Column of \code{x} that contains the ICD10CM version. Must be specified if \code{year} and \code{quarter} are not. Must be \code{NULL} if \code{year} and \code{quarter} are not \code{NULL}. Defaults to \code{NULL}, but either \code{year} and \code{quarter} OR \code{icd10cm_vers} must be specified if score = 'elixhauser_ahrq_2020'.} } \value{ A data frame with \code{id}, columns relative to each comorbidity domain, comorbidity score, weighted comorbidity score, and categorisations of such scores, with one row per individual. @@ -116,6 +137,55 @@ Conversely, for the Elixhauser score the dataset contains the following variable \item \code{windex_vw}, for the weighted version of the grouped Elixhauser index using the algorithm in van Walraven \emph{et al}. (2009). } +For AHRQ Elixhauser (elixhauser_ahrq_2020), the dataset contains the same variables as 'Elixhauser' with the following exceptions: +\itemize{ +\item Comorbidity columns follow AHRQ's abbreviation formatting. +\item In place of \code{hypunc} and \code{hypc}, those measures are combined to form \code{HTN_C} +} + +#' For AHRQ Elixhauser (elixhauser_ahrq_2020), the dataset contains the following: +\itemize{ +\item The \code{id} variable as defined by the user; +\item \code{AIDS}, Acquired immune deficiency syndrome; +\item \code{ALCOHOL}, Alcohol abuse; +\item \code{ANEMDF}, (only if poa is supplied) Deficiency anemias; +\item \code{ARTH}, Arthropathies; +\item \code{BLDLOSS}, (only if poa is supplied) Chronic blood loss anemia; +\item \code{CANCER_LEUK}, Leukemia; +\item \code{CANCER_LYMPH}, Lymphoma; +\item \code{CANCER_METS}, Metastatic cancer; +\item \code{CANCER_NSITU}, Solid tumor without metastasis, in situ; +\item \code{CANCER_SOLID}, Solid tumor without metastasis, malignant; +\item \code{CBVD}, (only if poa is supplied) Cerebrovascular disease; +\item \code{CHF}, (only if poa is supplied) Congestive heart failure; +\item \code{COAG}, (only if poa is supplied) Coagulopathy; +\item \code{DEMENTIA}, Dementia; +\item \code{DEPRESS}, Depression; +\item \code{DIAB_CX}, Diabetes with chronic complications; +\item \code{DIAB_UNCX}, Diabetes without chronic complications; +\item \code{DRUG_ABUSE}, Drug abuse; +\item \code{HTN_CX}, Hypertension, complicated; +\item \code{HTN_UNCX}, Hypertension, uncomplicated; +\item \code{LIVER_MLD}, (only if poa is supplied) Liver disease, mild; +\item \code{LIVER_SEV}, (only if poa is supplied) Liver disease, moderate to severe; +\item \code{LUNG_CHRONIC}, Chronic pulmonary disease; +\item \code{NEURO_MOVT}, (only if poa is supplied) Neurological disorders affecting movement; +\item \code{NEURO_OTH}, (only if poa is supplied) Other neurological disorders; +\item \code{NEURO_SEIZ}, (only if poa is supplied) Seizures and epilepsy; +\item \code{OBESE}, Obesity; +\item \code{PARALYSIS}, (only if poa is supplied) Paralysis; +\item \code{PERIVASC}, Peripheral vascular disease; +\item \code{PSYCHOSES}, (only if poa is supplied) Psychoses; +\item \code{PULMCIRC}, (only if poa is supplied) Pulmonary circulation disease; +\item \code{RENLFL_MOD}, (only if poa is supplied) Renal failure, moderate; +\item \code{RENLFL_SEV}, (only if poa is supplied) Renal failure, severe; +\item \code{THYROID_HYPO}, Hypothyroidism; +\item \code{THYROID_OTH}, Other thyroid disorders; +\item \code{ULCER_PEPTIC}, (only if poa is supplied) Peptic ulcer with bleeding; +\item \code{VALVE}, (only if poa is supplied) Valvular disease; +\item \code{WGHTLOSS}, (only if poa is supplied) Weight loss; +} + Labels are presented to the user when using the RStudio viewer (e.g. via the \code{\link[utils:View]{utils::View()}} function) for convenience. } \description{ @@ -149,4 +219,8 @@ Moore BJ, White S, Washington R, Coenen N, and Elixhauser A. \emph{Identifying i van Walraven C, Austin PC, Jennings A, Quan H and Forster AJ. \emph{A modification of the Elixhauser comorbidity measures into a point system for hospital death using administrative data}. Medical Care 2009; 47(6):626-633. Menendez ME, Neuhaus V, van Dijk CN, Ring D. \emph{The Elixhauser comorbidity method outperforms the Charlson index in predicting inpatient death after orthopaedic surgery}. Clinical Orthopaedics and Related Research 2014; 472(9):2878-2886. + +\emph{Healthcare Cost and Utilization Project. Elixhauser Comorbidity Software Version 3.7} Available at https://www.hcup-us.ahrq.gov/toolssoftware/comorbidity/comorbidity.jsp + +\emph{Healthcare Cost and Utilization Project. Elixhauser Comorbidity Software Refined for ICD-10-CM v2021.1} Available at https://www.hcup-us.ahrq.gov/toolssoftware/comorbidityicd10/comorbidity_icd10.jsp } diff --git a/man/sample_diag.Rd b/man/sample_diag.Rd index b86284f..803c7bb 100644 --- a/man/sample_diag.Rd +++ b/man/sample_diag.Rd @@ -9,7 +9,7 @@ sample_diag(n = 1, version = "ICD10_2011") \arguments{ \item{n}{Number of ICD codes to simulate.} -\item{version}{The version of the ICD coding scheme to use. Possible choices are \code{ICD10_2009}, \code{ICD10_2011}, and \code{ICD9_2015}; defaults to \code{ICD10_2011}. See \link[comorbidity:icd10_2009]{comorbidity::icd10_2009}, \link[comorbidity:icd10_2011]{comorbidity::icd10_2011}, and \link[comorbidity:icd9_2015]{comorbidity::icd9_2015} for further information on the different schemes.} +\item{version}{The version of the ICD coding scheme to use. Possible choices are \code{ICD10_2009}, \code{ICD10_2011}, and \code{ICD9_2015}; defaults to \code{ICD10_2011}. See \link{icd10_2009}, \link{icd10_2011}, and \link{icd9_2015} for further information on the different schemes.} } \value{ A vector of \code{n} ICD diagnostic codes. diff --git a/man/sample_drg.Rd b/man/sample_drg.Rd new file mode 100644 index 0000000..8a0c420 --- /dev/null +++ b/man/sample_drg.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sample_drg.R +\name{sample_drg} +\alias{sample_drg} +\title{Simulate DRG codes} +\usage{ +sample_drg(n = 1) +} +\arguments{ +\item{n}{Number of DRG codes to simulate.} +} +\value{ +A vector of \code{n} DRG diagnostic codes. +} +\description{ +A simple function to simulate DRG codes at random. +} +\examples{ +# Simulate 10 DRG codes +sample_drg(10) + +# Simulate a tidy dataset with 15 individuals and 200 rows +set.seed(1) +x <- data.frame( + id = sample(1:15, size = 200, replace = TRUE), + stringsAsFactors = FALSE +) +x$drg = sample_drg(15)[x$id] +head(x) +} diff --git a/man/sample_year_quarter.Rd b/man/sample_year_quarter.Rd new file mode 100644 index 0000000..b04794b --- /dev/null +++ b/man/sample_year_quarter.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sample_year_quarter.R +\name{sample_year_quarter} +\alias{sample_year_quarter} +\title{Simulate Year and Quarter} +\usage{ +sample_year_quarter(n = 1) +} +\arguments{ +\item{n}{Number of Year/Quarter pairs to generate.} +} +\value{ +A data.frame of with \code{n} years and quarters. +} +\description{ +A simple function to simulate Year and Quarter data at random. +} +\examples{ +# Simulate 10 Year/Quarter pairs +sample_year_quarter(10) + +# Simulate a tidy dataset with 15 individuals and 200 rows +set.seed(1) +x <- data.frame( + id = sample(1:15, size = 200, replace = TRUE), + stringsAsFactors = FALSE +) +x[c('Year', 'Quarter')] = sample_year_quarter(15)[x$id,] +head(x) +} diff --git a/testing.R b/testing.R index c71c3bc..5ece226 100644 --- a/testing.R +++ b/testing.R @@ -7,4 +7,4 @@ dt <- data.frame( `Enc ID` = 1234, DxCode = "N390" ) -comorbidity(dt, id = "Enc ID", code = "DxCode", icd = "icd10", score = "charlson", assign0 = F) +comorbidity(dt, id = "Enc ID", code = "DxCode", icd = "icd10", score = "charlson", assign0 = F) \ No newline at end of file diff --git a/vignettes/comorbidityscores.html b/vignettes/comorbidityscores.html new file mode 100644 index 0000000..db27190 --- /dev/null +++ b/vignettes/comorbidityscores.html @@ -0,0 +1,804 @@ + + + + + + + + + + + + + + + + +Comorbidity scores + + + + + + + + + + + + + + + + + + + + + +

Comorbidity scores

+

Alessandro Gasparini

+

2020-04-21

+ + + +

comorbidity is an R package for computing comorbidity scores based on ICD codes data. As of version 0.1.0, comorbidity can calculate the Charlson comorbidity score and the Elixhauser comorbidity score, using either the ICD-9 or ICD-10 coding system. Details on each score supported by this package are presented in this vignette, along with examples on how to compute the different scores with this software.

+
+

Charlson comorbidity score

+

The Charlson comorbidity index was first developed by Charlson et al. in 1987 to predict one-year mortality for patients who may have a range of comorbid conditions. Each condition is assigned a score depending on the risk of dying associated with each one, and consequently scores are summed to provide a total score to predict mortality.

+

The Charlson comorbidity index includes the following comorbid conditions: acute myocardial infarction, congestive heart failure, peripheral vascular disease, cerebrovascular disease, dementia, chronic obstructive pulmonary disease [COPD], rheumatoid disease, peptic ulcer disease, mild and moderate/severe liver disease, diabetes mellitus with and without complications, hemiplegia/paraplegia, renal disease, cancer (any malignancy) and metastatic solid tumour, AIDS/HIV.

+

Many variations of the Charlson comorbidity index have been presented, as outlined by Sharabiani et al. in their systematic review. comorbidity computes the Quan et al. version of the Charlson score for both ICD-9-CM and ICD-10 coding systems, as outlined in their paper from 2005; in the next subsections, we present the different ICD codes utilised by comorbidity. Categorisation of scores and weighted scores are based on work by Menendez et al.

+
+

ICD-9-CM codes

+

The ICD-9-CM codes used by comorbidity to compute the Charlson comorbidity index are:

+ +

There is a difference between codes reported above for peripheral vascular disease and the paper by Quan et al.; the code 47.1 reported in the paper is replaced by 447.1, as it is likely a typo. See here and here for more details.

+
+
+

ICD-10 codes

+

The ICD-10 codes used by comorbidity to compute the Charlson comorbidity index are:

+ +
+
+

Weights

+

Each condition from the Charlson score is assigned a score when computing the weighted Charlson index, irrespectively of the coding system utilised. In particular, diabetes with complications, hemiplegia/paraplegia, renal disease, and malignancies are assigned a score of 2; moderate/severe liver disease is assigned a score of 3; metastatic solid tumour and AIDS/HIV are assigned a score of 6; the remaining comorbidities are assigned a score of 1. comorbidity allows the option of applying a hierarchy of comorbidities should a more severe version be present: by choosing to do so (and that is the default behaviour of comorbidity) a type of comorbidity is never computed more than once for a given patient.

+
+
+
+

Elixhauser comorbidity score

+

The Elixhauser comorbidity index, analogously as the Charlson comorbidity index, is a method for measuring patient comorbidity based on ICD-9-CM and ICD-10 diagnosis codes found in administrative data developed by Elixhauser et al. in 1998. Over time, there have been changes to the Index based on different research. For instance:

+ +

comorbidity is using the coding definition of Quan et al. (2005) for both ICD-9-CM and ICD-10 coding systems; the actual codes and weights utilised by comorbidity are introduced in the next subsections. However, there is no consensus regarding the weighting algorithm, with several competing definitions. comorbidity implements (and returns) both the AHRQ version of the Elixhauser index (Moore et al., 2017) and the van Walraven et al. (2009) version. The AHRQ Elixhauser comorbidity score only includes 29 comorbidities; the missing comorbidities are therefore assigned a weight of zero. Finally, the categorisation of scores and weighted scores is based on work by Menendez et al.

+
+

ICD-9-CM codes

+

The ICD-9-CM codes used by comorbidity to compute the Elixhauser comorbidity index are:

+ +
+
+

ICD-10 codes

+

The ICD-10 codes used by comorbidity to compute the Elixhauser comorbidity index are:

+ +
+
+

Weights

+

The weights for the Elixhauser comorbidity index are included in the following table, depending on the algorithm used for the weighting process:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Comorbidity DomainAHRQ Algorithmvan Walraven Algorithm
Congestive heart failure97
Cardiac arrhythmias(0)5
Valvular disease0-1
Pulmonary circulation disorders64
Peripheral vascular disorders32
Hypertension (combined uncomplicated and complicated)-10
Paralysis57
Other neurological disorders56
Chronic pulmonary disease33
Diabetes, uncomplicated00
Diabetes, complicated-30
Hypothyroidism00
Renal failure65
Liver disease411
Peptic ulcer disease, excluding bleeding00
AIDS/HIV00
Lymphoma69
Metastatic cancer1412
Solid tumour without metastasis74
Rheumatoid arthritis/collagen vascular diseases00
Coagulopathy113
Obesity-5-4
Weight loss96
Fluid and electrolyte disorders115
Blood loss anaemia-3-2
Deficiency anaemia-2-2
Alcohol abuse-10
Drug abuse-7-7
Psychoses-50
Depression-5-3
+

The AHRQ algorithm does not include cardiac arrhythmias, hence a weight of 0 is assigned.

+
+
+
+

Examples

+

The first step consists in loading the comorbidity package:

+
library(comorbidity)
+

We can utilise the built-in sample_diag() function to simulate ICD diagnostic codes. Both ICD-9 and ICD-10 codes are supported:

+
data9 <- data.frame(
+  id = sample(1:10, size = 250, replace = TRUE),
+  code = sample_diag(n = 250, version = "ICD9_2015"),
+  stringsAsFactors = FALSE
+)
+data9 <- data9[order(data9$id), ]
+data10 <- data.frame(
+  id = sample(1:10, size = 250, replace = TRUE),
+  code = sample_diag(n = 250, version = "ICD10_2011"),
+  stringsAsFactors = FALSE
+)
+data10 <- data10[order(data10$id), ]
+

Then, we can go ahead and compute various comorbidity scores and indices supported by comorbidity. The Charlson score based on ICD-9-CM data is computed as:

+
charlson9 <- comorbidity(x = data9, id = "id", code = "code", score = "charlson", icd = "icd9", assign0 = FALSE)
+str(charlson9)
+#> 'data.frame':    10 obs. of  22 variables:
+#>  $ id      : int  1 2 3 4 5 6 7 8 9 10
+#>  $ ami     : int  0 0 0 0 0 1 0 0 0 0
+#>  $ chf     : int  0 0 0 0 0 0 0 0 0 1
+#>  $ pvd     : num  0 0 0 0 0 0 0 0 0 0
+#>  $ cevd    : int  0 0 0 0 0 0 0 0 0 1
+#>  $ dementia: num  0 0 0 0 0 0 0 0 0 0
+#>  $ copd    : int  0 0 0 1 0 0 0 0 0 0
+#>  $ rheumd  : num  0 0 0 0 0 0 0 0 0 0
+#>  $ pud     : int  1 0 1 1 1 0 0 0 0 1
+#>  $ mld     : num  0 0 0 0 0 0 0 0 0 0
+#>  $ diab    : int  0 1 0 0 0 0 0 0 0 0
+#>  $ diabwc  : num  0 0 0 0 0 0 0 0 0 0
+#>  $ hp      : num  0 0 0 0 0 0 0 0 0 0
+#>  $ rend    : int  0 0 0 0 1 0 0 0 0 0
+#>  $ canc    : int  1 1 1 1 1 1 1 1 1 0
+#>  $ msld    : num  0 0 0 0 0 0 0 0 0 0
+#>  $ metacanc: num  0 0 0 0 0 0 0 0 0 0
+#>  $ aids    : num  0 0 0 0 0 0 0 0 0 0
+#>  $ score   : num  2 2 2 3 3 2 1 1 1 3
+#>  $ index   : Factor w/ 4 levels "0","1-2","3-4",..: 2 2 2 3 3 2 2 2 2 3
+#>  $ wscore  : num  3 3 3 4 5 3 2 2 2 3
+#>  $ windex  : Factor w/ 4 levels "0","1-2","3-4",..: 3 3 3 3 4 3 2 2 2 3
+#>  - attr(*, "variable.labels")= chr  "ID" "Myocardial infarction" "Congestive heart failure" "Peripheral vascular disease" ...
+

The Charlson score based on ICD-10 data:

+
charlson10 <- comorbidity(x = data10, id = "id", code = "code", score = "charlson", icd = "icd10", assign0 = FALSE)
+str(charlson10)
+#> 'data.frame':    10 obs. of  22 variables:
+#>  $ id      : int  1 2 3 4 5 6 7 8 9 10
+#>  $ ami     : int  0 0 0 0 0 1 0 0 0 0
+#>  $ chf     : int  1 0 0 0 0 0 0 0 0 0
+#>  $ pvd     : int  0 0 0 0 0 0 1 0 0 0
+#>  $ cevd    : num  0 0 0 0 0 0 0 0 0 0
+#>  $ dementia: num  0 0 0 0 0 0 0 0 0 0
+#>  $ copd    : int  0 1 0 0 0 0 0 0 0 0
+#>  $ rheumd  : int  0 0 0 0 0 0 0 0 1 0
+#>  $ pud     : num  0 0 0 0 0 0 0 0 0 0
+#>  $ mld     : num  0 0 0 0 0 0 0 0 0 0
+#>  $ diab    : num  0 0 0 0 0 0 0 0 0 0
+#>  $ diabwc  : num  0 0 0 0 0 0 0 0 0 0
+#>  $ hp      : int  0 0 0 0 0 1 0 0 0 0
+#>  $ rend    : num  0 0 0 0 0 0 0 0 0 0
+#>  $ canc    : int  1 1 0 0 0 1 1 0 1 1
+#>  $ msld    : num  0 0 0 0 0 0 0 0 0 0
+#>  $ metacanc: int  0 0 0 0 0 0 1 0 0 0
+#>  $ aids    : num  0 0 0 0 0 0 0 0 0 0
+#>  $ score   : num  2 2 0 0 0 3 3 0 2 1
+#>  $ index   : Factor w/ 4 levels "0","1-2","3-4",..: 2 2 1 1 1 3 3 1 2 2
+#>  $ wscore  : num  3 3 0 0 0 5 9 0 3 2
+#>  $ windex  : Factor w/ 4 levels "0","1-2","3-4",..: 3 3 1 1 1 4 4 1 3 2
+#>  - attr(*, "variable.labels")= chr  "ID" "Myocardial infarction" "Congestive heart failure" "Peripheral vascular disease" ...
+

The Elixhauser score based on ICD-9-CM data:

+
elixhauser9 <- comorbidity(x = data9, id = "id", code = "code", score = "elixhauser", icd = "icd9", assign0 = FALSE)
+str(elixhauser9)
+#> 'data.frame':    10 obs. of  38 variables:
+#>  $ id         : int  1 2 3 4 5 6 7 8 9 10
+#>  $ chf        : int  0 0 0 0 0 0 0 0 0 1
+#>  $ carit      : int  0 0 0 0 0 0 0 0 1 0
+#>  $ valv       : int  0 0 1 0 0 0 0 0 0 0
+#>  $ pcd        : num  0 0 0 0 0 0 0 0 0 0
+#>  $ pvd        : num  0 0 0 0 0 0 0 0 0 0
+#>  $ hypunc     : num  0 0 0 0 0 0 0 0 0 0
+#>  $ hypc       : num  0 0 0 0 0 0 0 0 0 0
+#>  $ para       : num  0 0 0 0 0 0 0 0 0 0
+#>  $ ond        : int  0 0 0 0 0 0 0 1 0 0
+#>  $ cpd        : int  0 0 0 1 0 0 0 0 0 0
+#>  $ diabunc    : int  0 1 0 0 0 0 0 0 0 0
+#>  $ diabc      : num  0 0 0 0 0 0 0 0 0 0
+#>  $ hypothy    : num  0 0 0 0 0 0 0 0 0 0
+#>  $ rf         : int  0 0 0 0 1 0 0 0 0 0
+#>  $ ld         : num  0 0 0 0 0 0 0 0 0 0
+#>  $ pud        : int  0 0 0 1 0 0 0 0 0 0
+#>  $ aids       : num  0 0 0 0 0 0 0 0 0 0
+#>  $ lymph      : int  1 1 0 0 0 0 1 0 1 0
+#>  $ metacanc   : num  0 0 0 0 0 0 0 0 0 0
+#>  $ solidtum   : int  0 0 1 1 1 1 0 1 1 0
+#>  $ rheumd     : int  0 0 1 0 0 0 0 0 0 0
+#>  $ coag       : num  0 0 0 0 0 0 0 0 0 0
+#>  $ obes       : num  0 0 0 0 0 0 0 0 0 0
+#>  $ wloss      : num  0 0 0 0 0 0 0 0 0 0
+#>  $ fed        : int  1 0 0 0 0 0 0 0 0 0
+#>  $ blane      : num  0 0 0 0 0 0 0 0 0 0
+#>  $ dane       : num  0 0 0 0 0 0 0 0 0 0
+#>  $ alcohol    : int  0 0 1 0 0 0 0 0 0 0
+#>  $ drug       : int  0 0 0 0 0 0 1 0 0 0
+#>  $ psycho     : int  0 0 0 1 1 0 0 0 0 0
+#>  $ depre      : int  0 0 0 0 1 0 0 0 0 0
+#>  $ score      : num  2 2 4 4 4 1 2 2 3 1
+#>  $ index      : Factor w/ 4 levels "<0","0","1-4",..: 3 3 3 3 3 3 3 3 3 3
+#>  $ wscore_ahrq: num  17 6 6 5 3 7 -1 12 13 9
+#>  $ wscore_vw  : num  14 9 3 7 6 4 2 10 18 7
+#>  $ windex_ahrq: Factor w/ 4 levels "<0","0","1-4",..: 4 4 4 4 3 4 1 4 4 4
+#>  $ windex_vw  : Factor w/ 4 levels "<0","0","1-4",..: 4 4 3 4 4 3 3 4 4 4
+#>  - attr(*, "variable.labels")= chr  "ID" "Congestive heart failure" "Cardiac arrhythmias" "Valvular disease" ...
+

Finally, the Elixhauser score based on ICD-10 data:

+
elixhauser10 <- comorbidity(x = data10, id = "id", code = "code", score = "elixhauser", icd = "icd10", assign0 = FALSE)
+str(elixhauser10)
+#> 'data.frame':    10 obs. of  38 variables:
+#>  $ id         : int  1 2 3 4 5 6 7 8 9 10
+#>  $ chf        : int  1 0 0 0 0 0 0 0 0 0
+#>  $ carit      : num  0 0 0 0 0 0 0 0 0 0
+#>  $ valv       : num  0 0 0 0 0 0 0 0 0 0
+#>  $ pcd        : num  0 0 0 0 0 0 0 0 0 0
+#>  $ pvd        : int  0 0 0 0 0 0 1 0 0 0
+#>  $ hypunc     : num  0 0 0 0 0 0 0 0 0 0
+#>  $ hypc       : int  1 0 0 0 1 0 0 0 0 0
+#>  $ para       : int  0 0 0 0 0 1 0 0 0 0
+#>  $ ond        : int  0 0 1 1 0 0 1 0 0 0
+#>  $ cpd        : int  0 1 0 0 0 0 0 0 0 0
+#>  $ diabunc    : num  0 0 0 0 0 0 0 0 0 0
+#>  $ diabc      : num  0 0 0 0 0 0 0 0 0 0
+#>  $ hypothy    : num  0 0 0 0 0 0 0 0 0 0
+#>  $ rf         : num  0 0 0 0 0 0 0 0 0 0
+#>  $ ld         : num  0 0 0 0 0 0 0 0 0 0
+#>  $ pud        : num  0 0 0 0 0 0 0 0 0 0
+#>  $ aids       : num  0 0 0 0 0 0 0 0 0 0
+#>  $ lymph      : num  0 0 0 0 0 0 0 0 0 0
+#>  $ metacanc   : int  0 0 0 0 0 0 1 0 0 0
+#>  $ solidtum   : int  1 1 0 0 0 1 1 0 1 1
+#>  $ rheumd     : int  0 0 0 1 0 0 0 0 1 1
+#>  $ coag       : int  0 0 0 0 0 0 0 0 0 1
+#>  $ obes       : int  0 0 0 0 0 0 0 0 0 1
+#>  $ wloss      : num  0 0 0 0 0 0 0 0 0 0
+#>  $ fed        : int  0 0 0 1 0 0 0 1 0 0
+#>  $ blane      : num  0 0 0 0 0 0 0 0 0 0
+#>  $ dane       : int  0 0 1 0 0 0 0 0 0 0
+#>  $ alcohol    : num  0 0 0 0 0 0 0 0 0 0
+#>  $ drug       : int  0 0 0 0 1 0 1 0 0 0
+#>  $ psycho     : int  0 0 0 0 0 1 0 0 0 0
+#>  $ depre      : num  0 0 0 0 0 0 0 0 0 0
+#>  $ score      : num  3 2 2 3 2 3 5 1 2 4
+#>  $ index      : Factor w/ 4 levels "<0","0","1-4",..: 3 3 3 3 3 3 4 3 3 3
+#>  $ wscore_ahrq: num  15 10 3 16 -8 7 22 11 7 13
+#>  $ wscore_vw  : num  11 7 4 11 -7 11 17 5 4 3
+#>  $ windex_ahrq: Factor w/ 4 levels "<0","0","1-4",..: 4 4 3 4 1 4 4 4 4 4
+#>  $ windex_vw  : Factor w/ 4 levels "<0","0","1-4",..: 4 4 3 4 1 4 4 4 3 3
+#>  - attr(*, "variable.labels")= chr  "ID" "Congestive heart failure" "Cardiac arrhythmias" "Valvular disease" ...
+
+
+

References

+ +
+ + + + + + + +