-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_prep.R
74 lines (58 loc) · 2.23 KB
/
data_prep.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
library(readxl)
library(tidyverse)
library(ggplot2)
# reference: https://yanheupenn.shinyapps.io/healthfacts/
# type = 'All' #
type = c('HMO', 'Local PPO', 'MSA', 'PFFS', 'Regional PPO')
rating = 2
premium ='0' #'No Restriction' # c("0", "Below Mean", "Below Median")
MOOP = "Minimum" # c("No Restriction", "Below Mean", "Below Median")
otherdtdir = "F:/.cache/Auto-enrollment-MA/"
setwd(otherdtdir)
pop =read.csv("F:/.cache/Auto-enrollment-MA/county_population.csv")
fips <- read_excel("F:/.cache/Auto-enrollment-MA/state_county_fips.xlsx")
workdir = "E:/Repositories/MA-auto-enrollment/"
setwd(workdir)
ma <- read.csv("3_Output/MA_Plan_Data.csv")
states <- read.csv("1_Input/states.csv")
fips <- fips %>%
full_join(states, by=c("StateAbb" = "Abbreviation")) %>%
filter(!is.na(FIPSCode))
ma <- ma %>%
left_join(fips)
# pop is used to categorize the counties into different size groups
pop['FIPSCode'] = pop$State*1000+pop$County
# AgeGroup 0 means total population, 14+ indicates age 65+
# categorize
poptot = pop[pop$AgeGroup==0 & pop$Year==12,]
poptot = poptot %>%
select(c(FIPSCode, TotalPop)) %>%
inner_join(fips) %>%
mutate(Pop.25pct = quantile(TotalPop, 0.25),
Pop.75pct = quantile(TotalPop, 0.75))
poptot['County.Size'] = "Bottom 25%"
poptot[poptot$TotalPop>poptot$Pop.25pct & poptot$TotalPop<=poptot$Pop.75pct, 'County.Size'] = "Middle 50%"
poptot[poptot$TotalPop>poptot$Pop.75pct, 'County.Size'] = "Top 25%"
# calculate the total 65+ in each county
pop65 <- pop[pop$AgeGroup>=14 & pop$Year==12,]
pop65 <- pop65 %>%
group_by(FIPSCode) %>%
summarise(Total.65plus =sum(TotalPop))
# calculate the people entering the MA in the next 10 years
popelig <- read.csv("1_Input/county_eligible_population.csv")
popelig <- popelig %>%
filter(Year<=2031) %>%
group_by(FIPSCode) %>%
summarise(Pop.eligible =sum(CountyPopulation))
poptot <- poptot %>%
left_join(pop65) %>%
left_join(popelig)
save(ma, file='2_Code/descriptive_analysis/ma.rda')
save(poptot, file='2_Code/descriptive_analysis/poptot.rda')
# ####################
# # data for mapping
# df['HavePlan'] = 1
# df[is.na(df$Plan.Type), 'HavePlan'] = 0 #TODO: see whether use 0 or NA
# df_map <- df %>%
# group_by(State, County) %>%
# summarise(EligibleMA = sum(HavePlan))