Skip to content

Commit

Permalink
need to address race and immunocat
Browse files Browse the repository at this point in the history
  • Loading branch information
dylanrussellmd committed Oct 9, 2023
1 parent 5384e48 commit 6f52379
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 63 deletions.
4 changes: 2 additions & 2 deletions R/col_definitions.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ na_strings <- c("","NULL","NA","N/A","-99","Unknown","Unknown/Not Reported","Not
## ---- GENERIC COLUMNS TO BE PROCESSED ----
yes_no_cols <- c(
# ACS_NSQIP_PUF
"electsurg","smoke","ventilat","hxcopd","ascites","hxchf","hypermed","renafail","dialysis","discancr","wndinf","steroid","wtloss","bleeddis","transfus","emergncy","sssipatos","dssipatos","ossipatos","pnapatos","ventpatos","utipatos","sepsispatos","sepshockpatos","returnor","stillinhosp","reoperation1","retorrelated","reoperation2","retor2related","reoperation3","readmission1","unplannedreadmission1","readmrelated1","readmission2","unplannedreadmission2","readmrelated2","readmission3","unplannedreadmission3","readmrelated3", "readmission4","unplannedreadmission4","readmrelated4","readmission5","unplannedreadmission5","readmrelated5", "etoh", "dnr", "cpneumon", "esovar","hxmi","prvpci","prvpcs","hxangina","hxpvd","restpain","impsens", "coma", "hemi", "hxtia","cva","cvano","tumorcns","para","quad","chemo","radio","pregnancy","proper30", "readmission","unplanreadmission","reoperation", "eol_wdcare","oxygen_support",
"electsurg","smoke","ventilat","hxcopd","ascites","hxchf","hypermed","renafail","dialysis","discancr","wndinf","steroid","wtloss","bleeddis","transfus","emergncy","sssipatos","dssipatos","ossipatos","pnapatos","ventpatos","utipatos","sepsispatos","sepshockpatos","returnor","stillinhosp","reoperation1","retorrelated","reoperation2","retor2related","reoperation3","readmission1","unplannedreadmission1","readmrelated1","readmission2","unplannedreadmission2","readmrelated2","readmission3","unplannedreadmission3","readmrelated3", "readmission4","unplannedreadmission4","readmrelated4","readmission5","unplannedreadmission5","readmrelated5", "etoh", "dnr", "cpneumon", "esovar","hxmi","prvpci","prvpcs","hxangina","hxpvd","restpain","impsens", "coma", "hemi", "hxtia","cva","cvano","tumorcns","para","quad","chemo","radio","pregnancy","proper30", "readmission","unplanreadmission","reoperation", "eol_wdcare","oxygen_support","hxfall","hxdementia",
# PUF_TAR_AAA
"aaa_paas", "aaa_cp_renrevasc","aaa_cp_viscrevasc","aaa_cp_ler","aaa_cp_are","aaa_colitis","aaa_lei","aaa_roa",
# PUF_TAR_AIE
Expand Down Expand Up @@ -56,7 +56,7 @@ numeric_cols <- c(

factor_cols <- c(
# ACS_NSQIP_PUF
"sex","fnstatus1","fnstatus2","typeintoc","airtra","opnote","attend","wound_closure","transt","readmsuspreason1","readmunrelsusp1","readmsuspreason2","readmunrelsusp2","readmsuspreason3","readmunrelsusp3","readmsuspreason4","readmunrelsusp4","readmsuspreason5","readmunrelsusp5","dischdest","anesthes", "surgspec","immuno_cat",
"sex","fnstatus1","fnstatus2","typeintoc","airtra","opnote","attend","wound_closure","transt","readmsuspreason1","readmunrelsusp1","readmsuspreason2","readmunrelsusp2","readmsuspreason3","readmunrelsusp3","readmsuspreason4","readmunrelsusp4","readmsuspreason5","readmunrelsusp5","dischdest","anesthes", "surgspec","immuno_cat","casetype","disfxnstat",
# PUF_TAR_PAN
"pan_drainsys_type","pan_oincis_type","pan_intra_antibiotics","pan_benign_tumorsize","pan_benign_histologic","pan_mstage","pan_tstage","pan_nstage","pan_malig_histologic","pan_resection","pan_drains_type","pan_gastduo","pan_reconstruction","pan_glandtext","pan_ductsize","pan_approach","pan_biliarystent","pan_lapthor","pan_percdrainage1", "pan_percdrainage2","pan_percdrainage3","pan_percdrainage4",
# PUF_TAR_COL
Expand Down
79 changes: 20 additions & 59 deletions R/conv_puf.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ conv_puf_cols <- function(df, filename) {
conv_hispanic(df)
conv_(df, "race", conv_race)
conv_(df, "age", conv_age)
conv_(df, "inout", conv_inout)
conv_(df, "inout", conv_logical, "Inpatient")
conv_(df, "diabetes", insulin, newcol = "insulin")
conv_(df, "diabetes", conv_notno)
conv_(df, "dyspnea", when_dyspnea, newcol = "when_dyspnea")
Expand All @@ -25,6 +25,9 @@ conv_puf_cols <- function(df, filename) {
conv_(df, "preop_covid", conv_notno)
conv_(df, "postop_covid", type_covid, newcol= "type_postop_covid")
conv_(df, "postop_covid", conv_notno)
conv_(df, "homesup", conv_logical, "Lives at home with other individuals")
conv_(df, "delirium", conv_delirium)
conv_(df, "dishomesvc", conv_logical, "Discharged to home with services")
check_comaneurograft(df)
}

Expand Down Expand Up @@ -126,12 +129,13 @@ surgspec <- list(`Cardiac surgery` = "Cardiac Surgery",
`Oral surgery` = "Oral Surgery",
`Obstetrics` = "Obstetrics",
`Other` = "Other")
immuno_cat <- list(`Corticosteroids` = "Corticosteroids",
`Anti-rejection/transplant immunosuppressants` = "Anti-rejection/transplant immunosuppressants",
`Synthetic DMARDs/DMDs` = "Synthetic DMARDs/DMDs",
`Biologic DMARDs/DMDs` = "Biologic DMARDs/DMDs",
`Other` = "Other"
)
casetype <- list(`Elective` = "Elective",
`Urgent` = "Urgent",
`Emergent` = "Emergent")
disfxnstat <- list(`Independent` = "Independent",
`Partially dependent` = "Partially Dependent",
`Totally dependent` = "Totally Dependent",
`Expired` = "Expired")

#### ---- LONG COLUMNS ---- ####
readmission <- paste("readmission", 1:5, sep = "")
Expand Down Expand Up @@ -462,22 +466,6 @@ conv_race <- function(vec, pacific = "asian") {
vec %^% c(common, levels)
}

#' Convert inout to logical
#'
#' @param vec a character vector of values to convert
#'
#' @details If "Inpatient", will result in true. If given NA, will return NA.
#'
#' @return an integer vector
#' @keywords internal
#'
#' @examples
#' nsqipr:::conv_inout(c("Inpatient", "Outpatient", NA))
#'
conv_inout <- function(vec) {
stringi::stri_detect_fixed(vec, "Inpatient", opts_fixed = list(case_insensitive = TRUE))
}

#' Convert age to integer
#'
#' @param vec a character vector of values to convert
Expand Down Expand Up @@ -570,47 +558,20 @@ type_covid <- function(vec) {
vec %^% list(`Lab-confirmed` = "Yes, lab-confirmed diagnosis (or ICD-10 code U07.1)", `Suspected` = "Yes, suspected diagnosis (or ICD-10 code U07.2)")
}

#' Add or update Hispanic ethnicity column
#' Parse a column for delirium screening
#'
#' @param df a data.table to add to or update with an \code{ethnicity_hispanic} column
#' Note that this first converts all instances of "Not screened for delirium" to NA.
#'
#' @details \code{ethnicity_hispanic} was not added until the 2008 NSQIP PUF when \code{race} was revised to
#' \code{race_new}. Data regarding hispanic ethnicity was hard coded directly into the old \code{race} variable
#' (such as "Hispanic, White"). In order to marry early and later datasets, this information must be extracted
#' from \code{race} and a new \code{ethnicity_hispanic} column created.
#'
#' If the data provided already has a \code{ethnicity_hispanic} column present, this column is simply converted
#' into a logical vector.
#' @param vec a character vector of values to convert
#'
#' @return a data table
#' @return a logical vector
#' @keywords internal
#'
#' @examples
#' x <- data.table::data.table(
#' race = c("Hispanic, White", "White, Not of Hispanic Origin","Hispanic, Black",
#' "Black, Not of Hispanic Origin", "Hispanic, Color Unknown", "White", "Black or African American",
#' "American Indian or Alaska Native", "Asian", "Native Hawaiian or Pacific Islander",
#' "Asian or Pacific Islander", NA),
#' ethnicity_hispanic = c(NA, NA, NA, NA, NA, "Yes", "No", "Yes", "No", NA, NA, "Yes")
#' )
#'
#' nsqipr:::conv_hispanic(x)
#' x
#' nsqipr:::conv_delirium(c("Not screened for delirium", "Delirium present on screening", "No delirium present on screening", NA,
#' "not screened for delirium", "delirium present on screening", "no delirium present on screening", NA))
#'
conv_casetype <- function(df) {
if("casetype" %chin% names(df)) {
vec <- ifelse(!is.na(df[["casetype"]]), # if casetype is NOT NA
conv_yesno(df[["ethnicity_hispanic"]]), # do this
conv_hispanic_helper(df)) # else do this
} else {
vec <- conv_hispanic_helper(df)
}
data.table::set(df, j = "ethnicity_hispanic", value = vec)
}

#' @describeIn conv_hispanic A helper function for updating the \code{ethnicity_hispanic} column
conv_hispanic_helper <- function(df) {
ifelse(stringi::stri_detect_regex(df[["race"]], "hispanic", opts_regex = list(case_insensitive = TRUE)),
stringi::stri_detect_regex(df[["race"]], "^hispanic,", opts_regex = list(case_insensitive = TRUE)),
NA)
conv_delirium <- function(vec) {
stringi::stri_replace_all_fixed(vec, "Not screened for delirium", NA, opts_fixed = list(case_insensitive = TRUE)) %>%
stringi::stri_detect_regex("^Delirium present on screening", opts_regex = list(case_insensitive = TRUE))
}
4 changes: 2 additions & 2 deletions R/conv_standard.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ nsqip_dir <- function(dir, csv, rds) {
#'
conv_to_standard <- function(file, cols, csv, rds) {
progbar <- pb(csv, rds) # Creates a progress bar
filename <- fs::path_file(file) # Extracts the filename portion of the file path
filename <- fs::path_file(file) # Extracts the file name portion of the file path
tick(progbar, "reading", filename, 0)

# Call all cleaning functions
df <- data.table::fread(file, sep = "\t", colClasses = "character", showProgress = FALSE, na.strings = na_strings)
df <- data.table::fread(file, sep = "\t", colClasses = "character", showProgress = FALSE, na.strings = na_strings) # This is where the NA strings are converted.
setup(df, filename, progbar, cols)
conv_type_cols(df, filename, progbar)
conv_special_cols(df, filename, progbar)
Expand Down
16 changes: 16 additions & 0 deletions R/utils-conv.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,22 @@ conv_date <- function(vec) {
NA))
}

#' Convert character vector to logical vector
#'
#' @param vec a character vector to convert to logical
#' @param truth the character vector to match to TRUE. This is a fixed case insensitive match.
#'
#' @keywords internal
#' @examples
#'
#' x <- c("sky is blue", "sky is red", "grass is blue", NA)
#' truth <- c("sky is blue")
#' nsqipr:::conv_logical(x, truth)
#'
conv_logical <- function(vec, truth) {
stringi::stri_detect_fixed(vec, truth, opts_fixed = list(case_insensitive = TRUE))
}

#' Add a PUF year column
#'
#' This column notes the file from which the record came.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,9 @@ testthat::test_that("conv_race works", {
testthat::expect_equal(sort(levels((result_hawaiian))), sort(levels(expected_output_hawaiian)))

})

testthat::test_that("conv_delirium works", {
results <- nsqipr:::conv_delirium(c("Not screened for delirium", "Delirium present on screening", "No delirium present on screening", NA,
"not screened for delirium", "delirium present on screening", "no delirium present on screening", NA))
testthat::expect_equal(results, c(NA, TRUE, FALSE, NA, NA, TRUE, FALSE, NA))
})
6 changes: 6 additions & 0 deletions tests/testthat/test-utils-conv.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ testthat::test_that("conv_date works", {
testthat::expect_equal(conv_date("2000"), as.Date("2000-1-1", "%Y-%m-%d"))
})

testthat::test_that("conv_logical works", {
x <- c("sky is blue", "sky is red", "grass is blue", NA)
truth <- c("sky is blue")
testthat::expect_equal(conv_logical(x, truth), c(TRUE, FALSE, FALSE, NA))
})

testthat::test_that("get_pufyear works", {
x <- data.table::data.table(x = rep("name", 10))
get_pufyear(x, "acs_nsqip_puf12.txt")
Expand Down

0 comments on commit 6f52379

Please sign in to comment.