Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for latest AHRQ Elixhauser comorbidity calculation #29

Closed
wants to merge 37 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
22ef2cd
initial ahrq update test'
fiksdala Apr 21, 2020
bf3706f
initial ahrq update test'
fiksdala Apr 21, 2020
ee8be3f
remove score assertion for testing
fiksdala Apr 21, 2020
bf71007
initial ahrq test
fiksdala Apr 21, 2020
385fe22
manually add ahrq groups
fiksdala Apr 21, 2020
9790364
add elixhauser_ahrq, update check_output, cleanup files
fiksdala Apr 22, 2020
47f8548
clean up updates
fiksdala Apr 22, 2020
b9d9b98
duplicate SAS logic, add drg and assertions, clean up files
fiksdala Apr 24, 2020
4afe31c
delete temp testing folder
fiksdala Apr 24, 2020
c279dfa
clean up comments
fiksdala Apr 24, 2020
45359e5
fix drg_flags error
fiksdala Apr 24, 2020
c061579
fix sas logic in comorbidity()
fiksdala Apr 27, 2020
5742669
fix drg groups, speed improvements
fiksdala Apr 30, 2020
8688574
fix drg merge
fiksdala Apr 30, 2020
4ffb667
Clean up
fiksdala Apr 30, 2020
32c0d76
Add icd_rank and drop 1st, fix lofregex and make-data.R
fiksdala May 1, 2020
5a5ea9e
Merge remote-tracking branch 'upstream/master'
fiksdala Sep 16, 2020
99b508c
update documentation
fiksdala Sep 16, 2020
4a1ef8d
apply documentation updates
fiksdala Sep 16, 2020
4156503
fix documentation typos
fiksdala Sep 16, 2020
45b9826
add fork particulars to readme
fiksdala Sep 16, 2020
4fdd0b5
add data simulation support for elixhauser_ahrq
fiksdala Sep 29, 2020
4338293
update readme
fiksdala Sep 29, 2020
391801f
convert ahrq logic to data.table conventions
fiksdala Sep 29, 2020
20db8df
fix data.table/data.frame conflicts
fiksdala Sep 29, 2020
502fb0e
fix id class issue in drg merge
fiksdala Sep 29, 2020
741ae2e
update readme
fiksdala Sep 29, 2020
84521ea
Add support for 2021 version of AHRQ Elixhauser
fiksdala Dec 2, 2020
d171b53
test
Nov 2, 2021
97a6165
kr_package_2022_update
Nov 24, 2021
d7841a5
Merge pull request #1 from fiksdala/kr_branch
fiksdala Nov 24, 2021
20669da
Bug fixes for elixhauser_ahrq_2022
fiksdala Jan 6, 2022
def36ad
Merge pull request #5 from fiksdala/bug_fixes
fiksdala Jan 6, 2022
5350388
bug fixes, confirms 2022 matches AHRQ SAS program
fiksdala Jan 11, 2022
abe83c1
Merge pull request #6 from fiksdala/bug_fixes
fiksdala Jan 11, 2022
0798871
Fixes Elixhauser .rds mapping files
fiksdala Jan 18, 2022
db329e2
Merge pull request #7 from fiksdala/minor_updates
fiksdala Jan 18, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
73 changes: 73 additions & 0 deletions AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/get_lofmsdrg.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
library(stringr)
library(dplyr)

# Make lofmsdrg from comformat_icd10cm_2020_1.txt
download.file(
url = "https://www.hcup-us.AHRQ.gov/toolssoftware/comorbidityicd10/comformat_icd10cm_2020_1.txt",
destfile = "AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/comformat_icd10cm_2020_1.txt"
)

# Read in data
sas_path <- "AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/comformat_icd10cm_2020_1.txt"
sas_AHRQ_raw <- readLines(sas_path)


# Helper functions to format msdrgs
convert_interval = function(interval) {
split_interval = str_split(interval, '-')[[1]]
if (length(split_interval)>1) {
c(as.numeric(split_interval[1]):as.numeric(split_interval[2]))
} else {
as.numeric(split_interval[1])
}
}

format_msdrg = function(x) {
x %>%
str_split(',') %>% # Separate intervals
unlist() %>% # Unlist
str_trim() %>% # Trim whitespace
.[.!=""] %>% # Remove blanks
sapply(convert_interval) %>% # convert to numeric intervals
unlist() %>% # clean up
unname() %>% # clean up
as.vector() # keep consistent with vectors
}

make_lofmsdrg <- function(sas_AHRQ_raw){
raw_msdrg = sas_AHRQ_raw[-(1:grep("ICD-10 MS-DRG V37 Formats",
sas_AHRQ_raw))] # Skip to MS-DRG
raw_msdrg = raw_msdrg[raw_msdrg!="" &
raw_msdrg!="Run;" &
raw_msdrg!=" "] # Drop empty and run
raw_msdrg = str_trim(raw_msdrg)

msdrg_labels = list()
msdrg_num_unformated = c()
for (i in raw_msdrg){
# Get value labels
if (grepl("VALUE", i)){
split_label = str_split(i, 'VALUE')[[1]][[2]] %>%
str_trim() %>%
str_split(" ")
msdrg_labels[[split_label[[1]][1]]] = list()
last_value = split_label[[1]][1]
}
if (grepl("\\d", i[1])){ # Extract numbers
msdrg_num_unformated = append(msdrg_num_unformated,
str_split(i, ' = ')[[1]][1])
}
if (grepl(';', i)){ # assign formatted MS-DRGs to label
msdrg_labels[[split_label[[1]][1]]] = format_msdrg(msdrg_num_unformated)
msdrg_num_unformated = c() # clear MS-DRG list for next loop
}
}
msdrg_labels
}
lofmsdrg = make_lofmsdrg(sas_AHRQ_raw)

# Save lofmsdrg as .Rds
saveRDS(lofmsdrg, 'AHRQ-Elixhauser/sas-formats/icd10cm_2020_1/icd10cm_2020_1_lofmsdrg.Rds')

# Remove comformat_icd10cm_2020_1.txt
file.remove(sas_path)
66 changes: 66 additions & 0 deletions AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/get_lofregex.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
library(stringr)
library(dplyr)

download.file(
url = "https://www.hcup-us.AHRQ.gov/toolssoftware/comorbidityicd10/comformat_icd10cm_2020_1.txt",
destfile = "AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/comformat_icd10cm_2020_1.txt"
)


# Example pattern to extract:
# "D500",
# "O9081",
# "O99011",
# "O99012",
# "O99013",
# "O99019",
# "O9902",
# "O9903"="BLDLOSS" /*Blood loss anemia*/

# Read in data
sas_path <- "AHRQ-Elixhauser/sas-parse/icd10cm_2020_1/comformat_icd10cm_2020_1.txt"
sas_AHRQ_raw <- readLines(sas_path)

# Make list of lists for AHRQ codes to compare
make_sas_list = function(sas_AHRQ_raw){
# Assigns ICD-10 codes to comorbidty labels from sas file located here:
# https://www.hcup-us.AHRQ.gov/toolssoftware/comorbidityicd10/comformat_icd10cm_2020_1.txt
# Omits /**** ICD-10 MS-DRG V37 Formats ****/

# Clean up readlines
sas_AHRQ_prep <- sas_AHRQ_raw[sas_AHRQ_raw!=""] %>% # Remove empty lines
.[-(1:18)] %>% # First 18 elements are extraneous
lapply(function(x) str_split(x,"\\/\\*")[[1]][1] ) %>% # Drop sas comments
unlist() %>%
str_trim() %>% # Trim white space
str_replace_all('\\"', "") %>% # Remove extraneous characters
str_replace_all(',', "") # Remove extraneous characters

AHRQ_list = list() # create empty list
temp_list = c() # placeholder for codes
for(l in sas_AHRQ_prep){
if(grepl("=", l, fixed=T)){
split_l = str_split(l,'=')[[1]]
temp_list = append(temp_list, split_l[1])
# AHRQ_list[[split_l[2]]] = str_c(temp_list, collapse="|")
# Must have ^ so that regex doesn't search for within-code substrings
AHRQ_list[[split_l[2]]] = paste0("^", str_c(temp_list, collapse="|^"))
temp_list = c()
} else {
temp_list = append(temp_list, l)
}
# Omit everything after wghtloss
if(l == "R636=WGHTLOSS"){
break
}
}
AHRQ_list # return the list
}
icd10cm_2020_1_lofregex = make_sas_list(sas_AHRQ_raw)

# Save AHRQ_list object as RDS
saveRDS(icd10cm_2020_1_lofregex,
'AHRQ-Elixhauser/sas-formats/icd10cm_2020_1/icd10cm_2020_1_lofregex.Rds')

# Remove comformat_icd10cm_2020_1.txt
file.remove(sas_path)
243 changes: 243 additions & 0 deletions AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/get_mappings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@

# Download file
download.file(
url = "https://www.hcup-us.ahrq.gov/toolssoftware/comorbidityicd10/ElixhauserComorbidity_v2021-1.zip",
destfile = "AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1.zip"
)
# Unzip
unzip("AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1.zip",
exdir = 'AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1')

# Get raw SAS code line-by-line
raw_format = readLines(
"AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1/Comorb_ICD10CM_Format_v2021-1.sas"
)

# Remove quotes, commas, and whitespace
trim_format = trimws(gsub(',', '', gsub('"', "", raw_format)))
# Remove 'proc' lines
trim_format = trim_format[!grepl('Proc', trim_format)]
# Remove 'run' lines
trim_format = trim_format[!grepl(';', trim_format)]
# Remove 'other' lines
trim_format = trim_format[!grepl('other', trim_format)]

# Separate vector by blank line
format_list = split(trim_format[trim_format!=''],
cumsum(trim_format=="")[trim_format!=''])

# Remove extraneous
format_list = format_list[3:length(format_list)] # Header stuff

# Split into value groups
new_values = unlist(
lapply(format_list,
function(x) {
any(grepl('Value \\$', x))
})
)

ElixhauserAHRQ2021Map = sapply(1:max(cumsum(new_values)),
function(x){
format_list[cumsum(new_values)==x]
})

# Name the value groups
names(ElixhauserAHRQ2021Map) = unlist(
lapply(ElixhauserAHRQ2021Map,
function(x){
strsplit(x[[1]][1], '\\$')[[1]][2]
})
)

# Drop 'value' elements in comfmt
ElixhauserAHRQ2021Map$comfmt = sapply(ElixhauserAHRQ2021Map$comfmt,
function(x) x[!grepl('Value', x)])

# Get icd group names for comfmt
names(ElixhauserAHRQ2021Map$comfmt) = sapply(ElixhauserAHRQ2021Map$comfmt,
function(x){
strsplit(x[grepl(' = ', x)], ' = ')[[1]][2]
})

# Drop ' = XXXX' from icd groups
ElixhauserAHRQ2021Map$comfmt = sapply(ElixhauserAHRQ2021Map$comfmt,
function(x){
x = sapply(x, function(x) {
strsplit(x, ' = ')[[1]][1]
})
names(x) = NULL
x
})

# Drop " = 1" from poaxmpt and 'value' elements
poaxmpt_names = names(ElixhauserAHRQ2021Map)[grepl('poa', names(ElixhauserAHRQ2021Map))]
for (i in poaxmpt_names){
# Drop " = 1"
ElixhauserAHRQ2021Map[[i]] = unlist(
lapply(
strsplit(ElixhauserAHRQ2021Map[[i]][[1]],
' = '),
function(x) {
x[[1]][1]
}
)
)

# Drop 'Value' elements
ElixhauserAHRQ2021Map[[i]] = ElixhauserAHRQ2021Map[[i]][!grepl(
'Value', ElixhauserAHRQ2021Map[[i]])]
}

# Define and save final comorbidities in AHRQ format:
ElixhauserAHRQ2021Abbr = c(
'AIDS',
'ALCOHOL',
'ANEMDEF',
'ARTH',
'BLDLOSS',
'CANCER_LYMPH',
'CANCER_LEUK',
'CANCER_METS',
'CANCER_NSITU',
'CANCER_SOLID',
'CBVD',
'CHF',
'COAG',
'DEMENTIA',
'DEPRESS',
'DIAB_UNCX',
'DIAB_CX',
'DRUG_ABUSE',
'HTN_CX',
'HTN_UNCX',
'LIVER_MLD',
'LIVER_SEV',
'LUNG_CHRONIC',
'NEURO_MOVT',
'NEURO_OTH',
'NEURO_SEIZ',
'OBESE',
'PARALYSIS',
'PERIVASC',
'PSYCHOSES',
'PULMCIRC',
'RENLFL_MOD',
'RENLFL_SEV',
'THYROID_HYPO',
'THYROID_OTH',
'ULCER_PEPTIC',
'VALVE',
'WGHTLOSS'
)

# Define and save value labels (see Comorb_ICD10CM_Format_v2021-1.sas)
ElixhauserAHRQ2021Labels = c(
'AIDS' = 'Acquired immune deficiency syndrome',
'ALCOHOL' = 'Alcohol abuse',
'ANEMDEF' = 'Deficiency anemias',
'ARTH' = 'Arthropathies',
'BLDLOSS' = 'Chronic blood loss anemia',
'CANCER_LEUK' = 'Leukemia',
'CANCER_LYMPH' = 'Lymphoma',
'CANCER_METS' = 'Metastatic cancer',
'CANCER_NSITU' = 'Solid tumor without metastasis, in situ',
'CANCER_SOLID' = 'Solid tumor without metastasis, malignant',
'CBVD' = 'Cerebrovascular disease',
'CBVD_NPOA' = 'Cerebrovascular disease, not on admission',
'CBVD_POA' = 'Cerebrovascular disease, on admission',
'CBVD_SQLA' = 'Cerebrovascular disease, sequela',
'CHF' = 'Congestive heart failure',
'COAG' = 'Coagulopthy',
'DEMENTIA' = 'Dementia',
'DEPRESS' = 'Depression',
'DIAB_CX' = 'Diabetes with chronic complications',
'DIAB_UNCX' = 'Diabetes without chronic complications',
'DRUG_ABUSE' = 'Drug abuse',
'HTN_CX' = 'Hypertension, complicated',
'HTN_UNCX' = 'Hypertension, uncomplicated',
'LIVER_MLD' = 'Liver disease, mild',
'LIVER_SEV' = 'Liver disease, moderate to severe',
'LUNG_CHRONIC' = 'Chronic pulmonary disease',
'NEURO_MOVT' = 'Neurological disorders affecting movement',
'NEURO_OTH' = 'Other neurological disorders',
'NEURO_SEIZ' = 'Seizures and epilepsy',
'OBESE' = 'Obesity',
'PARALYSIS' = 'Paralysis',
'PERIVASC' = 'Peripheral vascular disease',
'PSYCHOSES' = 'Psychoses',
'PULMCIRC' = 'Pulmonary circulation disease',
'RENLFL_MOD' = 'Renal failure, moderate',
'RENLFL_SEV' = 'Renal failure, severe',
'THYROID_HYPO' = 'Hypothyroidism',
'THYROID_OTH' = 'Other thyroid disorders',
'ULCER_PEPTIC' = 'Peptic ulcer disease x bleeding',
'VALVE' = 'Valvular disease',
'WGHTLOSS' = 'Weight loss'
)

ElixhauserAHRQ2021PreExclusion = c(
"AIDS",
"ALCOHOL",
"ANEMDEF",
"ARTH",
"BLDLOSS",
"CANCER_LYMPH",
"CANCER_LEUK",
"CANCER_METS",
"CANCER_NSITU",
"CANCER_SOLID",
"CBVD_SQLA",
"CBVD_POA",
"CBVD_NPOA",
"CBVD",
"CHF",
"COAG",
"DEMENTIA",
"DEPRESS",
"DIAB_UNCX",
"DIAB_CX",
"DRUG_ABUSE",
"HTN_CX",
"HTN_UNCX",
"LIVER_MLD",
"LIVER_SEV",
"LUNG_CHRONIC",
"NEURO_MOVT",
"NEURO_OTH",
"NEURO_SEIZ",
"OBESE",
"PARALYSIS",
"PERIVASC",
"PSYCHOSES",
"PULMCIRC",
"RENLFL_MOD",
"RENLFL_SEV",
"THYROID_HYPO",
"THYROID_OTH",
"ULCER_PEPTIC",
"VALVE",
"WGHTLOSS"
)

# Save list of format objects
Elixhauser2021Formats = list(
ElixhauserAHRQ2021Map = ElixhauserAHRQ2021Map,
ElixhauserAHRQ2021Abbr = ElixhauserAHRQ2021Abbr,
ElixhauserAHRQ2021Labels = ElixhauserAHRQ2021Labels,
ElixhauserAHRQ2021PreExclusion = ElixhauserAHRQ2021PreExclusion
)

saveRDS(Elixhauser2021Formats,
'AHRQ-Elixhauser/sas-formats/icd10cm_2021_1/Elixhauser2021Formats.Rds')

# Remove .zip file
file.remove(
"AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1.zip"
)

# Remove unzipped folder
unlink(
'AHRQ-Elixhauser/sas-parse/icd10cm_2021_1/ElixhauserComorbidity_v2021-1',
recursive = T
)
Loading