-
Notifications
You must be signed in to change notification settings - Fork 5
/
mortality_weekly_per_age_per_capita.py
126 lines (95 loc) · 4.88 KB
/
mortality_weekly_per_age_per_capita.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import pandas as pd
import plotly.graph_objects as go
import streamlit as st
import plotly.express as px
#
# https://claude.ai/chat/467c298a-027f-49ac-ae9e-9bd43ef92d8e
# https://chatgpt.com/c/66f0053e-79bc-8004-9e84-b77b055c4de1
def plot_deaths_per_100k_per_year(df, age_group, sex):
# Filter the DataFrame based on the selected age group and sex
df_filtered = df[(df['age_group'] == age_group) & (df['geslacht'] == sex)]
fig = px.line(
df_filtered,
x='week', # X-axis: weeks
y='deaths_per_100k', # Y-axis: deaths per 100k
color='year', # Different line for each year
title=f'Deaths per 100k for {age_group} ({sex}) by Week',
labels={'week': 'Week', 'deaths_per_100k': 'Deaths per 100k'},
)
# Show the plot
st.plotly_chart(fig)
def main():
st.subheader("Weekly mortality/100k")
st.info("reproducing https://x.com/dimgrr/status/1837603581962453167")
# Load the data
# Note: Replace these with the actual file paths on your system
population_df, deaths_df = get_data()
# Define age bins and labels
bins = list(range(0, 95, 5)) + [1000] # [0, 5, 10, ..., 90, 1000]
labels = [f'Y{i}-{i+4}' for i in range(0, 90, 5)] + ['Y_GE90']
# Process the population dataframe
population_df['leeftijd'] = population_df['leeftijd'].astype(int)
population_df['age_group'] = pd.cut(population_df['leeftijd'], bins=bins, labels=labels, right=False)
print (population_df)
population_grouped = population_df.groupby(['jaar', 'age_group', 'geslacht'], observed=False)['aantal'].sum().reset_index()
# Replace age group labels
# for s in ["M", "F", "T"]:
population_grouped['age_group'] = population_grouped['age_group'].cat.add_categories('Y_LT5')
population_grouped.loc[population_grouped['age_group'] == 'Y0-4', 'age_group'] = 'Y_LT5'
# population_grouped['age_group'] = population_grouped.apply(lambda row: f"{row['age_group']}_{row['geslacht']}" if row['age_group'] not in [f'Y_LT5_{s}', f'Y_GE90_{s}'] else row['age_group'], axis=1)
# Process the deaths dataframe
# deaths_df['TIME_PERIOD'] = pd.to_datetime(deaths_df['TIME_PERIOD'])
# deaths_df['week'] = deaths_df['TIME_PERIOD'].dt.isocalendar().week
# deaths_df['year'] = deaths_df['TIME_PERIOD'].dt.year
deaths_df["year"] = (deaths_df["TIME_PERIOD"].str[:4]).astype(int)
deaths_df["week"] = (deaths_df["TIME_PERIOD"].str[6:]).astype(int)
deaths_grouped = deaths_df.groupby(['age', 'sex', 'week', 'year'])['OBS_VALUE'].sum().reset_index()
# Merge deaths and population data
merged_df = pd.merge(deaths_grouped, population_grouped,
left_on=['year', 'age', 'sex'],
right_on=['jaar', 'age_group', 'geslacht'])
# Calculate deaths per 100,000 people
merged_df['deaths_per_100k'] = (merged_df['OBS_VALUE'] / merged_df['aantal']) * 100000
merged_df = merged_df.sort_values(by=['year', 'week'], ascending=[True, True])
merged_df['TIME_PERIOD'] = merged_df['year'].astype(str)+' - '+merged_df['week'].astype(str)
print (merged_df.dtypes)
for sex in ["T", "M", "F"]:
sex_mapping = {'M': 'Male', 'F': 'Female', 'T': 'Total'}
sex_ = sex_mapping.get(sex, 'unknown') # 'unknown' can be a default value for unrecognized sex codes
# Create the plot
make_plot(merged_df, sex, sex_)
# Example usage:
plot_deaths_per_100k_per_year(merged_df, 'Y_GE90', 'M')
def make_plot(merged_df, sex, sex_):
fig = go.Figure()
# Plot each age group for total population
for age in merged_df[merged_df['sex'] == sex]['age'].unique():
age_data = merged_df[(merged_df['age'] == age) & (merged_df['sex'] == sex)]
fig.add_trace(go.Scatter(
x=age_data['TIME_PERIOD'],
#x=age_data['week'] + (age_data['year'] - age_data['year'].min()) * 52,
y=age_data['deaths_per_100k'],
mode='lines',
name=age
))
# Update layout
fig.update_layout(
title=f'Deaths per 100,000 People by Age Group per Week ({sex_} Population)',
xaxis_title='Week (cumulative across years)',
yaxis_title='Deaths per 100,000 People (log scale)',
yaxis_type="log",
legend_title='Age Group',
)
# Show the plot
st.plotly_chart(fig)
return
@st.cache_data()
def get_data():
population_df = pd.read_csv('https://raw.githubusercontent.com/rcsmit/COVIDcases/main/input/bevolking_leeftijd_NL.csv', sep=';')
deaths_df = pd.read_csv('https://raw.githubusercontent.com/rcsmit/COVIDcases/main/input/sterfte_eurostats_NL.csv', sep=',')
return population_df,deaths_df
if __name__ == "__main__":
#read_ogimet()
main()
# If you want to save the plot as an HTML file, uncomment the following line:
# fig.write_html("deaths_per_100k_age_group.html")