-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathupdate_rdb.py
66 lines (55 loc) · 2.32 KB
/
update_rdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""
Given updates.csv, creates a updates.json file
After the json is created:
1. Install mongoimport (https://www.mongodb.com/docs/database-tools/installation/installation-macos/)
2. run the following command in terminal: mongoimport --uri 'mongodb+srv://rdbtest.hcn3xyq.mongodb.net/RDB?retryWrites=true&w=majority' --username='yura' --collection='listings' --file='updates.json' --jsonArray --mode=upsert
2.1. Need to get password from julian.lee@yale.edu
"""
import csv
import json
csvFilePath = 'updates.csv'
jsonFilePath = 'updates.json'
data = []
department_names = {}
with open("valid_departments.txt", 'r') as f:
department_names = [line.rstrip('\n') for line in f]
department_names = set(department_names)
print(department_names)
# Open a csv reader called DictReader
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
i = 0
for row in csvReader:
i += 1
#Split names
if row['name'].find(', ') != -1:
#In format Smith, John
row['lname'] = row['name'][0:row['name'].find(', ')]
row['fname'] = row['name'][row['name'].find(', ') + 2:]
else:
#In format John Smith
row['fname'] = row['name'][0:row['name'].find(' ')]
row['lname'] = row['name'][row['name'].find(' ') + 1:]
#Split departments into a list
replacements = {'&':'and', 'ethnicity, race, and migration':'ethnicity, race and migration', 'obstetrics, gynecology, and reproductive sciences':'obstetrics, gynecology and reproductive sciences'}
curr_departments = []
for department in department_names:
raw_department = department
department = department.lower()
row['departments'] = row['departments'].lower()
for k in replacements.keys():
row['departments'] = row['departments'].replace(k, replacements[k])
if row['departments'].find(department) != -1:
curr_departments.append(raw_department)
if len(curr_departments) == 0:
print(f"failed to extract a department from {row['departments']}")
row['departments'] = curr_departments
del row['name']
del row['custom_desc']
row['_id'] = row['list_id']
del row['list_id']
data.append(row)
# Open a json writer, and use the json.dumps()
# function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))