forked from CodeForPhilly/pbf-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaggregate.py
209 lines (179 loc) · 10.7 KB
/
aggregate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import streamlit as st
import pandas as pd
import datetime
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from PIL import Image
from year_summary import plot_year_summary
@st.cache(allow_output_mutation = True)
def load_data():
df = pd.read_csv('data/cleaned/app_data.csv')
df["bail_date"] = pd.to_datetime(df["bail_date"])
return df
# st.set_page_config(layout="wide")
def app():
# year-end summary
fig = plot_year_summary()
f_year = go.FigureWidget(fig)
st.plotly_chart(f_year)
st.title('Year-end Summary')
st.write('This section provides a general year-end summary of bail in Philadelphia in 2020, including trends and aggregate-level information for case counts, bail types, and monetary bail set and posted.')
# ----------------------------------------------------
# Summary numbers
# ----------------------------------------------------
# Get bail data
#df = preprocess()
st.subheader('Yearly totals')
st.write("Use the slider to change the range of dates over which the sum is calculated.")
df = load_data()
# Get range of dates and create slider to select date range (workaround since Streamlit doesn't have a date range slider)
# Try.. except block is another workzround. Streamlit caching doesn't work with datetime module
try:
df['bail_date'] = df['bail_date'].map(datetime.datetime.date)
except:
pass
all_dates = sorted(df['bail_date'].unique())
start_date = df['bail_date'].min()
end_date = df['bail_date'].max()
# Slider
date_range = st.slider('Date Range', 1, (end_date-start_date).days + 1, (1,(end_date-start_date).days + 1), 1)
st.write(all_dates[date_range[0]-1].strftime('%b %d, %Y'), '-', all_dates[date_range[1]-1].strftime('%b %d, %Y'))
# Get data based on selected date range
df_selected = df[(df['bail_date'] >= all_dates[date_range[0]-1])&(df['bail_date'] <= all_dates[date_range[1]-1])]
df_bail = df_selected['bail_type'].value_counts()
df_monetary = df_selected[df_selected['bail_type'] == "Monetary"]
series_monetary = df_monetary['bail_set_bin'].value_counts()
df_defender = df_selected['attorney_type'].value_counts()
# Card for Case Count
cases = go.Indicator(
mode = 'number',
value = len(df_selected),
domain = {'row': 0, 'column': 0 },
title = {'text': 'Total Cases'})
# Card for Monetary Bail Frequency
frequency = go.Indicator(
mode = 'number',
value = len(df_selected[df_selected['bail_type'] == 'Monetary']) / len(df_selected[df_selected['bail_type'].notnull()]) * 100.,
number = {'suffix': '%'},
domain = {'row': 0, 'column': 1 },
title = {'text': 'Monetary Bail Frequency'})
# Card for Total Bail Amt
amount = go.Indicator(
mode = 'number',
value = df_selected[df_selected['bail_type'] == 'Monetary']['bail_amount'].sum(),
number = {'prefix': '$'},
domain = {'row': 1, 'column': 0 },
title = {'text': 'Total Bail Set'})
# Card for Total Bail Paid
paid = go.Indicator(
mode = 'number',
value = df_selected[df_selected['bail_type'] == 'Monetary']['bail_paid'].sum(),
number = {'prefix': '$'},
domain = {'row': 1, 'column': 1 },
title = {'text': 'Total Bail Paid'})
# Set up figure as 2x2 grid of the cards in the order specified
card_fig = go.FigureWidget()
card_fig.add_trace(cases)
card_fig.add_trace(frequency)
card_fig.add_trace(amount)
card_fig.add_trace(paid)
card_fig.update_layout(
grid = {'rows': 2, 'columns': 2, 'pattern': "independent"})
st.plotly_chart(card_fig)
# ----------------------------------------------------
# Summary charts
# ----------------------------------------------------
#st.header('Bail type and monetary bail summary')
st.subheader('Bail type')
st.write("""During a defendant's arraignment (a hearing held shortly after they are arrested), one of several [types of bail](https://www.pacodeandbulletin.gov/Display/pacode?file=/secure/pacode/data/234/chapter5/s524.html) may be set:
- **monetary**, where a bail amount is set and the defendant is held in jail until a portion (typically 10%) is paid (\"posted\"),
- **unsecured**, where the defendant is liable for a set bail amount if they do not show up to future court proceedings,
- **ROR** (“released on own recognizance”), where a defendant must agree to show up to all future court proceedings,
- **nonmonetary** bail condition, or
- the defendant may be **denied** bail.""")
# By Bail Type
"""
pie1_fig = go.FigureWidget()
pie1_fig.add_trace(go.Pie(labels=df_bail.index.tolist(), values=df_bail.values.tolist()))
pie1_fig.update_traces(hole=.4, hoverinfo="label+percent+value")
pie1_fig.update_layout(showlegend=True, title_text='Bail Type', title_x=0.45)
pie1_fig.update_layout(margin={"r":0,"t":100,"l":0,"b":0}, height=400, width=400)
st.plotly_chart(pie1_fig)
"""
st.image(Image.open('figures/aggregate_bail_type.png'), width=600)
st.write("The most frequently set bail type in 2020 was monetary bail. Nonmonetary bail was set in under 1% of cases.")
st.subheader('Monetary bail set')
st.write("For cases where monetary bail was set, the median bail set was $40,000. A bail amount of less than $10,000 was set in around 19 percent of cases, and a bail amount of at least $100,000 was set in more than 25 percent of cases.")
st.image(Image.open('figures/aggregate_bailSetBin.png'), width=400)
st.write("While the maximum bail set was $5M, bail of at least $500k was set in only 5 percent of cases. Of the specific values of bail that were set below $500k, the most frequently set bail amount was $50,000.")
st.image(Image.open('figures/aggregate_bailSet500k.png'), width=400)
st.subheader('Monetary bail posted')
st.write("In more than half (56%) of cases where monetary bail was set, bail was not posted, meaning that the defendant was not released from jail. Out of the cases where bail was at least $100,000, less than 30% of defendants posted bail. Though infrequently set, bail amounts below $1000 were also infrequently posted.")
st.write("**<font color='red'>Question for PBF</font>**: do these observations (in particular, low payments of bail set below $1000) match your experience?", unsafe_allow_html=True)
st.image(Image.open('figures/aggregate_bailPostedBin.png'), width=400)
st.write("When bail was posted, the median and most frequently paid amount was $2,500 (corresponding to 10% of bail set at $25,000). ")
st.image(Image.open('figures/aggregate_bailPosted.png'), width=400)
"""
# By Bail Set
pie2_fig = go.FigureWidget()
pie2_fig.add_trace(go.Pie(labels=series_monetary.index.tolist(), values=series_monetary.values.tolist()))
pie2_fig.update_traces(hole=.4, hoverinfo="label+percent+value")
pie2_fig.update_layout(showlegend=True, title_text='Bail Set', title_x=0.45)
pie2_fig.update_layout(margin={"r":0,"t":100,"l":0,"b":0}, height=400, width=40)
st.plotly_chart(pie2_fig)
"""
st.subheader('Attorney types')
# By Atty Type
st.write("Public defenders, representing defendants who cannot afford to hire a lawyer, were appointed in more than two thirds of cases.")
st.image(Image.open('figures/aggregate_attorney_type.png'), width=600)
"""
pie3_fig = go.FigureWidget()
pie3_fig.add_trace(go.Pie(labels=df_defender.index.tolist(), values=df_defender.values.tolist()))
pie3_fig.update_traces(hole=.4, hoverinfo="label+percent+value")
pie3_fig.update_layout(showlegend=True, title_text='Attorney Type', title_x=0.45)
pie3_fig.update_layout(margin={"r":0,"t":100,"l":0,"b":0}, height=400, width=400)
st.plotly_chart(pie3_fig)
"""
# TODO: fix these figures such that the same colors/order are used for each bail type
st.subheader('Charged offenses and bail type')
st.write("The frequency of bail types set was dependent on the types of charges associated with each case.\
For cases involving a charge of assault, monetary bail was most frequently set.\
For cases involving a drug-related charge, monetary bail and ROR were set at similar rates.\
For cases involving a charge of DUI, ROR bail was most frequently set.")
st.write("**<font color='red'>Question for PBF</font>**: are there any specific charges you'd be interested in knowing this (or bail amounts/bail posted) for?", unsafe_allow_html=True),
st.image(Image.open('figures/aggregate_bailType_byOffense.png'), use_column_width=True)
# ----------------------------------------------------
# Moving average plots
# ----------------------------------------------------
st.subheader('Bail trends over the year')
st.write("Use the dropdown menu to view trends in the mean of different bail metrics. Use the slider to change the number of days over which the moving average is calculated.")
st.write("Mean bail amount trended slightly upward over the course of the year. \
Monetary bail case counts dropped in March, following a decrease in total arrests as a COVID-19 mitigation measure, but returned to pre-pandemic levels by October.\
Monetary bail frequency held steady for much of the year, with a slight upward trend starting in September.")
# Make data for each metric + data to initialize the chart
ma_dfs = {'Bail Amount': df.groupby('bail_date').mean()['bail_amount'],
'Monetary Bail Cases': df[df['bail_type'] == 'Monetary'].groupby('bail_date').size(),
'Monetary Bail Frequency': df[df['bail_type'].notnull()].groupby('bail_date').size()
}
# Dropdown for metric
metric = st.selectbox('Metric', ('Bail Amount', 'Monetary Bail Cases', 'Monetary Bail Frequency'))
# Slider for window size
window = st.slider('Window Size (days)', 1, 60, 5, 1)
# Initialize figure
ma_fig = go.FigureWidget()
ma_fig.layout.title.text = 'Mean '+ metric + ' ' + str(window) + '-Day Moving Average'
ma_fig.layout.title.x = 0.5
ma_fig.layout.xaxis.title = 'Date'
ma_fig.layout.yaxis.title = metric
# Add traces and finalize figure, make sure to get data from selected metric and window size
if metric != 'Monetary Bail Frequency':
tmp = ma_dfs[metric].rolling(window=window, min_periods=1).mean()
else:
tmp_denom = ma_dfs[metric].rolling(window=window, min_periods=1).sum()
tmp_num = ma_dfs['Monetary Bail Cases'].rolling(window=window, min_periods=1).sum()
tmp = tmp_num.div(tmp_denom)
ma_fig.add_trace(go.Scatter(x=tmp.index,
y=tmp.values,
mode='lines+markers',
name='lines+markers'))
st.plotly_chart(ma_fig)