-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate-data.r
More file actions
57 lines (49 loc) · 2.01 KB
/
create-data.r
File metadata and controls
57 lines (49 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# load spatial data ----
# from here: https://opengisdata.ky.gov/datasets/kygeonet::kentucky-county-polygons/about
# load ky county polygons from KY Open Data arcgis api
url <- parse_url("https://services3.arcgis.com/ghsX9CKghMvyYjBU/arcgis/rest/services/Ky_County_Polygons_WM/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson")
request <- build_url(url)
ky_county_poly <- st_read(request) |>
st_transform(6473) |>
janitor::clean_names()
ky_county_poly_select <- ky_county_poly |>
select(fips_id, name2, pop10)
# load county level data ----
# from here: https://www.countyhealthrankings.org/health-data/kentucky/data-and-resources
# Define the URL of the Excel file
url <- "https://www.countyhealthrankings.org/sites/default/files/media/document/2025%20County%20Health%20Rankings%20Kentucky%20Data%20-%20v3.xlsx" # Replace with your Office View URL
# Specify the destination file path
destfile <- "data/ky_county_health_rankings_2025.xlsx"
# Download the file
GET(url, write_disk(destfile, overwrite = TRUE))
# Read the Excel file
ky_county_health_rankings_2025 <- readxl::read_excel(destfile, sheet = "Additional Measure Data", skip = 1) |>
janitor::clean_names()
ky_county_health_rankings_2025_select <- ky_county_health_rankings_2025 |>
select(fips, life_expectancy) |>
mutate(
fips = as.integer(fips),
life_expectancy = round(life_expectancy, 2),
# create intervals using quantile breaks
life_expectancy_bin = cut(
life_expectancy,
breaks = quantile(
life_expectancy,
probs = seq(0, 1, length.out = 6),
na.rm = TRUE
),
include.lowest = TRUE
),
# create intervals based on pretty breaks
life_expectancy_pretty = cut(
life_expectancy,
breaks = classIntervals(ky_county_health_rankings_2025$life_expectancy, n = 5, style = "pretty")$brks,
digits = 2
)
)
# combine county data to spatial data ----
ky_county_poly_join <- left_join(
ky_county_poly_select,
ky_county_health_rankings_2025_select,
by = join_by(fips_id == fips)
)