-
Notifications
You must be signed in to change notification settings - Fork 0
/
minutes.py
41 lines (31 loc) · 967 Bytes
/
minutes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import glob
import json
import logging
from pathlib import Path
import pandas as pd
from mylib.canonicalize import extract_issue
LOGGER = logging.getLogger(__name__)
def main(json_direc, csv_fp):
json_fps = glob.glob(str(Path(json_direc) / '*.json'))
records = []
for fp in json_fps:
with open(fp, 'r') as f:
data = json.load(f)
record = {
'minutes_id': data['issueID'],
'session': data['session'],
'house': data['nameOfHouse'],
'meeting': data['nameOfMeeting'],
'issue': extract_issue(data['issue']),
'date': data['date']
}
records.append(record)
df = pd.DataFrame(records)
df.to_csv(csv_fp, index=False)
LOGGER.info(f'saved {len(df)} records to {csv_fp}')
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
main(
json_direc='./out/minutes',
csv_fp='./out/minutes.csv',
)