need to address race and immunocat

dylanrussellmd · Oct 9, 2023 · 6f52379 · 6f52379
1 parent 5384e48
commit 6f52379
Show file tree

Hide file tree

Showing 6 changed files with 52 additions and 63 deletions.
diff --git a/R/col_definitions.R b/R/col_definitions.R
@@ -5,7 +5,7 @@ na_strings <- c("","NULL","NA","N/A","-99","Unknown","Unknown/Not Reported","Not
 ## ---- GENERIC COLUMNS TO BE PROCESSED ----
 yes_no_cols <- c(
   # ACS_NSQIP_PUF
-  "electsurg","smoke","ventilat","hxcopd","ascites","hxchf","hypermed","renafail","dialysis","discancr","wndinf","steroid","wtloss","bleeddis","transfus","emergncy","sssipatos","dssipatos","ossipatos","pnapatos","ventpatos","utipatos","sepsispatos","sepshockpatos","returnor","stillinhosp","reoperation1","retorrelated","reoperation2","retor2related","reoperation3","readmission1","unplannedreadmission1","readmrelated1","readmission2","unplannedreadmission2","readmrelated2","readmission3","unplannedreadmission3","readmrelated3", "readmission4","unplannedreadmission4","readmrelated4","readmission5","unplannedreadmission5","readmrelated5", "etoh", "dnr", "cpneumon", "esovar","hxmi","prvpci","prvpcs","hxangina","hxpvd","restpain","impsens", "coma", "hemi", "hxtia","cva","cvano","tumorcns","para","quad","chemo","radio","pregnancy","proper30", "readmission","unplanreadmission","reoperation", "eol_wdcare","oxygen_support",
+  "electsurg","smoke","ventilat","hxcopd","ascites","hxchf","hypermed","renafail","dialysis","discancr","wndinf","steroid","wtloss","bleeddis","transfus","emergncy","sssipatos","dssipatos","ossipatos","pnapatos","ventpatos","utipatos","sepsispatos","sepshockpatos","returnor","stillinhosp","reoperation1","retorrelated","reoperation2","retor2related","reoperation3","readmission1","unplannedreadmission1","readmrelated1","readmission2","unplannedreadmission2","readmrelated2","readmission3","unplannedreadmission3","readmrelated3", "readmission4","unplannedreadmission4","readmrelated4","readmission5","unplannedreadmission5","readmrelated5", "etoh", "dnr", "cpneumon", "esovar","hxmi","prvpci","prvpcs","hxangina","hxpvd","restpain","impsens", "coma", "hemi", "hxtia","cva","cvano","tumorcns","para","quad","chemo","radio","pregnancy","proper30", "readmission","unplanreadmission","reoperation", "eol_wdcare","oxygen_support","hxfall","hxdementia",
   # PUF_TAR_AAA
   "aaa_paas", "aaa_cp_renrevasc","aaa_cp_viscrevasc","aaa_cp_ler","aaa_cp_are","aaa_colitis","aaa_lei","aaa_roa",
   # PUF_TAR_AIE
@@ -56,7 +56,7 @@ numeric_cols <- c(
 
 factor_cols <- c(
   # ACS_NSQIP_PUF
-  "sex","fnstatus1","fnstatus2","typeintoc","airtra","opnote","attend","wound_closure","transt","readmsuspreason1","readmunrelsusp1","readmsuspreason2","readmunrelsusp2","readmsuspreason3","readmunrelsusp3","readmsuspreason4","readmunrelsusp4","readmsuspreason5","readmunrelsusp5","dischdest","anesthes", "surgspec","immuno_cat",
+  "sex","fnstatus1","fnstatus2","typeintoc","airtra","opnote","attend","wound_closure","transt","readmsuspreason1","readmunrelsusp1","readmsuspreason2","readmunrelsusp2","readmsuspreason3","readmunrelsusp3","readmsuspreason4","readmunrelsusp4","readmsuspreason5","readmunrelsusp5","dischdest","anesthes", "surgspec","immuno_cat","casetype","disfxnstat",
   # PUF_TAR_PAN
   "pan_drainsys_type","pan_oincis_type","pan_intra_antibiotics","pan_benign_tumorsize","pan_benign_histologic","pan_mstage","pan_tstage","pan_nstage","pan_malig_histologic","pan_resection","pan_drains_type","pan_gastduo","pan_reconstruction","pan_glandtext","pan_ductsize","pan_approach","pan_biliarystent","pan_lapthor","pan_percdrainage1", "pan_percdrainage2","pan_percdrainage3","pan_percdrainage4",
   # PUF_TAR_COL

diff --git a/R/conv_puf.R b/R/conv_puf.R
@@ -14,7 +14,7 @@ conv_puf_cols <- function(df, filename) {
   conv_hispanic(df)
   conv_(df, "race", conv_race)
   conv_(df, "age", conv_age)
-  conv_(df, "inout", conv_inout)
+  conv_(df, "inout", conv_logical, "Inpatient")
   conv_(df, "diabetes", insulin, newcol = "insulin")
   conv_(df, "diabetes", conv_notno)
   conv_(df, "dyspnea", when_dyspnea, newcol = "when_dyspnea")
@@ -25,6 +25,9 @@ conv_puf_cols <- function(df, filename) {
   conv_(df, "preop_covid", conv_notno)
   conv_(df, "postop_covid", type_covid, newcol= "type_postop_covid")
   conv_(df, "postop_covid", conv_notno)
+  conv_(df, "homesup", conv_logical, "Lives at home with other individuals")
+  conv_(df, "delirium", conv_delirium)
+  conv_(df, "dishomesvc", conv_logical, "Discharged to home with services")
   check_comaneurograft(df)
 }
 
@@ -126,12 +129,13 @@ surgspec <- list(`Cardiac surgery` = "Cardiac Surgery",
                  `Oral surgery` = "Oral Surgery",
                  `Obstetrics` = "Obstetrics",
                  `Other` = "Other")
-immuno_cat <- list(`Corticosteroids` = "Corticosteroids",
-                   `Anti-rejection/transplant immunosuppressants` = "Anti-rejection/transplant immunosuppressants",
-                   `Synthetic DMARDs/DMDs` = "Synthetic DMARDs/DMDs",
-                   `Biologic DMARDs/DMDs` = "Biologic DMARDs/DMDs",
-                   `Other` = "Other"
-                   )
+casetype <- list(`Elective` = "Elective",
+                 `Urgent` = "Urgent",
+                 `Emergent` = "Emergent")
+disfxnstat <- list(`Independent` = "Independent",
+                   `Partially dependent` = "Partially Dependent",
+                   `Totally dependent` = "Totally Dependent",
+                   `Expired` = "Expired")
 
 #### ---- LONG COLUMNS ---- ####
 readmission <- paste("readmission", 1:5, sep = "")
@@ -462,22 +466,6 @@ conv_race <- function(vec, pacific = "asian") {
   vec %^% c(common, levels)
 }
 
-#' Convert inout to logical
-#'
-#' @param vec a character vector of values to convert
-#'
-#' @details If "Inpatient", will result in true. If given NA, will return NA.
-#'
-#' @return an integer vector
-#' @keywords internal
-#'
-#' @examples
-#'  nsqipr:::conv_inout(c("Inpatient", "Outpatient", NA))
-#'
-conv_inout <- function(vec) {
-  stringi::stri_detect_fixed(vec, "Inpatient", opts_fixed = list(case_insensitive = TRUE))
-}
-
 #' Convert age to integer
 #'
 #' @param vec a character vector of values to convert
@@ -570,47 +558,20 @@ type_covid <- function(vec) {
   vec %^% list(`Lab-confirmed` = "Yes, lab-confirmed diagnosis (or ICD-10 code U07.1)", `Suspected` = "Yes, suspected diagnosis (or ICD-10 code U07.2)")
 }
 
-#' Add or update Hispanic ethnicity column
+#' Parse a column for delirium screening
 #'
-#' @param df a data.table to add to or update with an \code{ethnicity_hispanic} column
+#' Note that this first converts all instances of "Not screened for delirium" to NA.
 #'
-#' @details \code{ethnicity_hispanic} was not added until the 2008 NSQIP PUF when \code{race} was revised to
-#' \code{race_new}. Data regarding hispanic ethnicity was hard coded directly into the old \code{race} variable
-#' (such as "Hispanic, White"). In order to marry early and later datasets, this information must be extracted
-#' from \code{race} and a new \code{ethnicity_hispanic} column created.
-#'
-#' If the data provided already has a \code{ethnicity_hispanic} column present, this column is simply converted
-#' into a logical vector.
+#' @param vec a character vector of values to convert
 #'
-#' @return a data table
+#' @return a logical vector
 #' @keywords internal
 #'
 #' @examples
-#' x <- data.table::data.table(
-#' race = c("Hispanic, White", "White, Not of Hispanic Origin","Hispanic, Black",
-#' "Black, Not of Hispanic Origin", "Hispanic, Color Unknown", "White", "Black or African American",
-#' "American Indian or Alaska Native", "Asian", "Native Hawaiian or Pacific Islander",
-#' "Asian or Pacific Islander", NA),
-#' ethnicity_hispanic = c(NA, NA, NA, NA, NA, "Yes", "No", "Yes", "No", NA, NA, "Yes")
-#' )
-#'
-#' nsqipr:::conv_hispanic(x)
-#' x
+#' nsqipr:::conv_delirium(c("Not screened for delirium", "Delirium present on screening", "No delirium present on screening", NA,
+#'                         "not screened for delirium", "delirium present on screening", "no delirium present on screening", NA))
 #'
-conv_casetype <- function(df) {
-  if("casetype" %chin% names(df)) {
-    vec <- ifelse(!is.na(df[["casetype"]]), # if casetype is NOT NA
-                  conv_yesno(df[["ethnicity_hispanic"]]), # do this
-                  conv_hispanic_helper(df)) # else do this
-  } else {
-    vec <- conv_hispanic_helper(df)
-  }
-  data.table::set(df, j = "ethnicity_hispanic", value = vec)
-}
-
-#' @describeIn conv_hispanic A helper function for updating the \code{ethnicity_hispanic} column
-conv_hispanic_helper <- function(df) {
-  ifelse(stringi::stri_detect_regex(df[["race"]], "hispanic", opts_regex = list(case_insensitive = TRUE)),
-         stringi::stri_detect_regex(df[["race"]], "^hispanic,", opts_regex = list(case_insensitive = TRUE)),
-         NA)
+conv_delirium <- function(vec) {
+  stringi::stri_replace_all_fixed(vec, "Not screened for delirium", NA, opts_fixed = list(case_insensitive = TRUE)) %>%
+    stringi::stri_detect_regex("^Delirium present on screening", opts_regex = list(case_insensitive = TRUE))
 }
diff --git a/R/conv_standard.R b/R/conv_standard.R
@@ -29,11 +29,11 @@ nsqip_dir <- function(dir, csv, rds) {
 #'
 conv_to_standard <- function(file, cols, csv, rds) {
   progbar <- pb(csv, rds) # Creates a progress bar
-  filename <- fs::path_file(file) # Extracts the filename portion of the file path
+  filename <- fs::path_file(file) # Extracts the file name portion of the file path
   tick(progbar, "reading", filename, 0)
 
   # Call all cleaning functions
-  df <- data.table::fread(file, sep = "\t", colClasses = "character", showProgress = FALSE, na.strings = na_strings)
+  df <- data.table::fread(file, sep = "\t", colClasses = "character", showProgress = FALSE, na.strings = na_strings) # This is where the NA strings are converted.
   setup(df, filename, progbar, cols)
   conv_type_cols(df, filename, progbar)
   conv_special_cols(df, filename, progbar)

diff --git a/R/utils-conv.R b/R/utils-conv.R
@@ -171,6 +171,22 @@ conv_date <- function(vec) {
          NA))
 }
 
+#' Convert character vector to logical vector
+#'
+#' @param vec a character vector to convert to logical
+#' @param truth the character vector to match to TRUE. This is a fixed case insensitive match.
+#'
+#' @keywords internal
+#' @examples
+#'
+#' x <- c("sky is blue", "sky is red", "grass is blue", NA)
+#' truth <- c("sky is blue")
+#' nsqipr:::conv_logical(x, truth)
+#'
+conv_logical <- function(vec, truth) {
+  stringi::stri_detect_fixed(vec, truth, opts_fixed = list(case_insensitive = TRUE))
+}
+
 #' Add a PUF year column
 #'
 #' This column notes the file from which the record came.

diff --git a/tests/testthat/test-conv_acs.R → tests/testthat/test-conv_puf.R b/tests/testthat/test-conv_acs.R → tests/testthat/test-conv_puf.R
@@ -115,3 +115,9 @@ testthat::test_that("conv_race works", {
   testthat::expect_equal(sort(levels((result_hawaiian))), sort(levels(expected_output_hawaiian)))
 
 })
+
+testthat::test_that("conv_delirium works", {
+  results <- nsqipr:::conv_delirium(c("Not screened for delirium", "Delirium present on screening", "No delirium present on screening", NA,
+                                      "not screened for delirium", "delirium present on screening", "no delirium present on screening", NA))
+  testthat::expect_equal(results, c(NA, TRUE, FALSE, NA, NA, TRUE, FALSE, NA))
+})
diff --git a/tests/testthat/test-utils-conv.R b/tests/testthat/test-utils-conv.R
@@ -88,6 +88,12 @@ testthat::test_that("conv_date works", {
   testthat::expect_equal(conv_date("2000"), as.Date("2000-1-1", "%Y-%m-%d"))
 })
 
+testthat::test_that("conv_logical works", {
+  x <- c("sky is blue", "sky is red", "grass is blue", NA)
+  truth <- c("sky is blue")
+  testthat::expect_equal(conv_logical(x, truth), c(TRUE, FALSE, FALSE, NA))
+})
+
 testthat::test_that("get_pufyear works", {
   x <- data.table::data.table(x = rep("name", 10))
   get_pufyear(x, "acs_nsqip_puf12.txt")