-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrankall.R
135 lines (97 loc) · 5.1 KB
/
rankall.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
library(tidyverse)
# Read in our data
outcomes <- data.table::fread("C:/Users/corma/OneDrive/Documents/R/Assignment3/hospitaldata/outcomes.csv",
header = TRUE, sep = ",", stringsAsFactors = FALSE,
na.strings = "Not Available", select = c(2,7,11,17,23)) %>%
naniar::replace_with_na_all(condition = ~.x %in% "Not Available") %>% # Doing it with :: removes the need to load another package we're only going to use for one function call one time.
as_tibble %>%
mutate(across(3:5, as.numeric)) %>%
mutate(across(2, as.factor)) %>%
data.table::setnames(c("Hospital","State","Heart Attack","Heart Failure","Pneumonia"))
# Create our list of data frames split by state
outcomes.split <- split(outcomes, outcomes$State)
# Sort this list alphabetically - this ensures that I get the right answers
outcomes.split <- lapply(outcomes.split, function(x) x[order(x$Hospital),])
# Create our outcome ranks vectors...
getRank <- function(df, column.name) {
ColumnRank <- rank(df[[column.name]], ties.method = "first", na.last = "keep")
ColumnRank
}
PneumoniaRanks <- sapply(X = outcomes.split, FUN = getRank, column.name = "Pneumonia")
AttackRanks <- sapply(X = outcomes.split, FUN = getRank, column.name = "Heart Attack")
FailureRanks <- sapply(X = outcomes.split, FUN = getRank, column.name = "Heart Failure")
# And bind these vectors back to their respective data frames...
for (i in 1:length(outcomes.split)) {outcomes.split[[i]] <- cbind(outcomes.split[[i]], PneumoniaRanks[[i]])
colnames(outcomes.split[[i]])[ncol(outcomes.split[[i]])] <- "PneumoniaRank"
}
for (i in 1:length(outcomes.split)) {outcomes.split[[i]] <- cbind(outcomes.split[[i]], AttackRanks[[i]])
colnames(outcomes.split[[i]])[ncol(outcomes.split[[i]])] <- "AttackRank"
}
for (i in 1:length(outcomes.split)) {outcomes.split[[i]] <- cbind(outcomes.split[[i]], FailureRanks[[i]])
colnames(outcomes.split[[i]])[ncol(outcomes.split[[i]])] <- "FailureRank"
}
# The actual function itself:
rankall <- function(outcome, num) {
# Let's first get our invalid outcome stop message out of the way...
if(!(outcome %in% c("Heart Failure","Heart Attack","Pneumonia",
"heart failure","heart attack","pneumonia")))
stop("invalid outcome")
# Create an empty data frame to store the results
results <- data.frame(hospital = character(),
state = factor(),
stringsAsFactors = FALSE)
# Create the index number that tells the function which column to look for
# the appropriate rank depending on the outcome entered
if(outcome == "Heart Attack" | outcome == "heart attack"){index = 7}
else if (outcome == "Heart Failure" | outcome == "heart failure"){index = 8}
else if (outcome == "Pneumonia" | outcome == "pneumonia"){index = 6}
# Wow that was so much cleaner than how I did it in the other function
# Now our for loop to create our results data frame
for (i in 1:length(outcomes.split)) {
# First, to determine which rank # constitutes "worst" for
# each state and outcome combination:
worstdeterminant <- outcomes.split[[i]][index]
if (num == "best" | num == "Best") {
z = 1
} else if (num == "worst" | num == "Worst") {
z = max(na.omit(worstdeterminant[,1]))
} else {
z = num
}
# Then to find the appropriate hospital for each state depending on the arguments entered
statesubset <- subset(outcomes.split[[i]], outcomes.split[[i]][index] == z)[1:2]
data.table::setnames(statesubset, c("hospital", "state"))
# We're going to have to rename the columns eventually, doing it here prevents
# an rbind error when creating the dummy rows for states with NAs.
if (nrow(statesubset) == 0) {
results <- rbind(results, data.frame(hospital = NA, state = names(outcomes.split[i])))
}
# Using setnames above solved our rbind error
else {results <- rbind(results, statesubset)}
}
rownames(results) <- NULL # The row names will be nonsensical as they're derived from 54 data frame subsets
# so best to just remove them
# Actually though, we can add back in sensical rownames:
statenames <- as.character(unlist(as.vector(results[2])))
rownames(results) <- statenames
results
}
rankall("Heart Failure",1)
rankall("Heart Failure","best")
rankall("Heart Failure","worst")
rankall("Heart Failure","Worst")
rankall("Heart Failure",150)
rankall("Heart Attack", 15)
rankall("Heart Failure", 15)
rankall("heart failure", 15)
rankall("heart attack", 15)
rankall("Pneumonia", 27)
rankall("pneumonia",27)
# For the quiz:
rm(list=ls())
r <- rankall("Heart Attack",4)
as.character(subset(r,state == "HI")$hospital)
r <- rankall("pneumonia","worst")
as.character(subset(r, state == "NJ")$hospital)
r <- rankall("heart failure",10)
as.character(subset(r, state == "NV")$hospital)