-
Notifications
You must be signed in to change notification settings - Fork 2
/
census.py
126 lines (116 loc) · 4.74 KB
/
census.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import folium
from streamlit_folium import folium_static
import pydeck as pdk
from streamlit_lottie import st_lottie
# Setting the Streamlit title and description
st.title('Census Data Exploration')
st.write('This analysis will use data aggregated on the zip code level, available from, https://catalog.data.gov/dataset/demographic-statistics-by-zip-code')
with st.echo():
st_lottie("https://lottie.host/d7ef67db-4f71-4911-8c41-01468afa98f9/Q4IsgwjN80.json")
# Function to load data
def load_data():
# Define appropriate data types for each column
column_dtypes = {
"JURISDICTION NAME": "int",
"City": "category",
"State": "category",
"Country": "category",
"Timezone": "category",
"Area Code": "str"
}
# Reload the data with the specified data types
df = pd.read_csv('demolatlong.csv', dtype=column_dtypes)
df['JURISDICTION NAME'] = df['JURISDICTION NAME'].astype(str)
return df
df = load_data()
# Data overview section
if st.checkbox('Show Data Overview'):
selected_columns = st.multiselect(
"Select columns to view statistics", df.columns, default=["COUNT PARTICIPANTS", "COUNT FEMALE", "COUNT MALE"]
)
if selected_columns:
st.write(df[selected_columns])
st.divider()
# Heatmap visualization
def create_heatmap(df):
df = df.dropna(subset=["Latitude", "Longitude"])
m = folium.Map(location=[df["Latitude"].mean(), df["Longitude"].mean()], zoom_start=10)
heatmap_data = df[["Latitude", "Longitude"]].values.tolist()
folium.plugins.HeatMap(heatmap_data).add_to(m)
return m
if st.checkbox('Census Participant Density by Zip Code'):
st.write('This map displays Census participation Density by Zip Code.')
heatmap = create_heatmap(df)
folium_static(heatmap)
# Permanent resident alien visualization
def create_pr_alien_map(df):
pr_alien_zipcodes = df[df['COUNT PERMANENT RESIDENT ALIEN'] > 0][['JURISDICTION NAME', 'COUNT PERMANENT RESIDENT ALIEN', 'Latitude', 'Longitude']]
m = folium.Map(location=[40.730610, -73.935242], zoom_start=10)
for _, row in pr_alien_zipcodes.iterrows():
folium.CircleMarker(
location=[row['Latitude'], row['Longitude']],
radius=5,
color='blue',
fill=True,
fill_color='blue',
fill_opacity=0.6,
popup=f"ZIP Code: {row['JURISDICTION NAME']}<br>Count: {row['COUNT PERMANENT RESIDENT ALIEN']}"
).add_to(m)
folium.Marker(
location=[row['Latitude'], row['Longitude']],
icon=folium.DivIcon(html=f"<div style='font-size: 10pt; color: black;'>{row['COUNT PERMANENT RESIDENT ALIEN']}</div>")
).add_to(m)
return m
if st.checkbox('Show Distribution of Permanent Resident Aliens by Zip Code'):
st.write('''
This map displays ZIP codes with non-zero counts of permanent resident aliens. The blue markers represent these ZIP codes,
with their sizes indicating the number of permanent resident aliens in that area.
From the visualization, there's a noticeable concentration of markers in New York City, especially in the Brooklyn and Queens boroughs,
with a few markers dispersed in other regions outside of NYC.
''')
pr_alien_map = create_pr_alien_map(df)
folium_static(pr_alien_map)
# Gender distribution visualization
def plot_gender_distribution(df):
df = df.dropna(subset=["Latitude", "Longitude"])
df["gender_balance"] = df["PERCENT FEMALE"] - df["PERCENT MALE"]
color_scale = [
[0, 'blue'],
[0.5, 'grey'],
[1, 'pink']
]
view_state = pdk.ViewState(
latitude=df["Latitude"].mean(),
longitude=df["Longitude"].mean(),
zoom=10,
pitch=0
)
gender_layer = pdk.Layer(
"ScatterplotLayer",
data=df,
get_position=["Longitude", "Latitude"],
get_color="gender_balance",
get_radius="COUNT PARTICIPANTS * 10",
pickable=True,
opacity=0.6,
stroked=True,
filled=True,
radius_scale=6,
radius_min_pixels=5,
radius_max_pixels=100,
line_width_min_pixels=1,
color_scale=color_scale
)
gender_map = pdk.Deck(
map_style="mapbox://styles/mapbox/light-v9",
initial_view_state=view_state,
layers=[gender_layer]
)
return gender_map
if st.checkbox('Show Gender Distribution by Location'):
gender_map = plot_gender_distribution(df)
st.write('This map displays gender distribution by area, separated by the colors blue, pink, and grey.')
st.pydeck_chart(gender_map)