-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathexport_agency_stats.py
executable file
·99 lines (87 loc) · 2.88 KB
/
export_agency_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python2
from time import sleep
import requests
import unicodecsv
from utils import get_api_key
token = get_api_key()
url = 'https://www.muckrock.com/api_v1/'
headers = {'Authorization': 'Token %s' % token, 'content-type': 'application/json'}
next_ = url + 'agency'
fields = (
"id",
"name",
"slug",
"status",
"twitter",
"twitter_handles",
"parent",
"appeal_agency",
"url",
"foia_logs",
"foia_guide",
"public_notes",
"absolute_url",
"average_response_time",
"fee_rate",
"success_rate",
"has_portal",
"has_email",
"has_fax",
"has_address",
"number_requests",
"number_requests_completed",
"number_requests_rejected",
"number_requests_no_docs",
"number_requests_ack",
"number_requests_resp",
"number_requests_fix",
"number_requests_appeal",
"number_requests_pay",
"number_requests_partial",
"number_requests_lawsuit",
"number_requests_withdrawn"
)
jurisdiction_fields = (
'name',
'parent',
'level',
)
page = 1
# make this true while exporting data to not crash on errors
SUPRESS_ERRORS = False
# This allows you to cach jurisdiction look ups
jurisdictions = {}
def get_jurisdiction(jurisdiction_id):
global jurisdictions
if jurisdiction_id in jurisdictions:
return jurisdictions[jurisdiction_id]
else:
# print 'getting jurisdiction', jurisdiction_id
sleep(1) # rate limit
r = requests.get(url + 'jurisdiction/' + str(jurisdiction_id), headers=headers)
jurisdiction_json = r.json()
if jurisdiction_json['parent']: # USA has no paremt
parent = get_jurisdiction(jurisdiction_json['parent'])
jurisdiction_json['parent'] = parent['name'] # replace parent id with parent name in jurisdiction json
jurisdictions[jurisdiction_id] = jurisdiction_json
return jurisdiction_json
csv_file = open('agency_stats.csv', 'w')
csv_writer = unicodecsv.writer(csv_file)
jurisdiction_field_names = tuple('jurisdiction {}'.format(f) for f in jurisdiction_fields)
csv_writer.writerow(fields + jurisdiction_field_names)
while next_ is not None:
r = requests.get(next_, headers=headers)
try:
json = r.json()
next_ = json['next']
for datum in json['results']:
agency_values = [datum[field] for field in fields]
jurisdiction = get_jurisdiction(datum['jurisdiction'])
jurisdiction_values = [jurisdiction[field] for field in jurisdiction_fields]
csv_writer.writerow(agency_values + jurisdiction_values)
print 'Page %d of %d' % (page, json['count'] / 20 + 1)
page += 1
except Exception as e:
print 'Error', e
if not SUPRESS_ERRORS:
raise