-
Notifications
You must be signed in to change notification settings - Fork 0
/
magazine_authors_name_parser.py
43 lines (34 loc) · 1.11 KB
/
magazine_authors_name_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""
Given a raw Drupal export of Magazine Authors,
parse the author name into given and family name columns.
"""
import csv
parsed_authors = []
with open("magazine_authors_uncleaned.csv", "r") as authors_csv:
authors = csv.DictReader(authors_csv)
for author in authors:
drupal_full_name = author["drupal_full_name"]
name_split = drupal_full_name.split(sep=" ")
family_name = name_split.pop()
given_name = " ".join(name_split)
parsed_author = {
"given_name": given_name,
"family_name": family_name,
"drupal_full_name": drupal_full_name,
"drupal_author_id": author["drupal_author_id"],
}
parsed_authors.append(parsed_author)
with open("magazine_authors_parsed.csv", "w", newline="") as parsed_authors_csv:
fieldnames = [
"given_name",
"family_name",
"drupal_full_name",
"drupal_author_id",
]
writer = csv.DictWriter(
parsed_authors_csv,
fieldnames=fieldnames,
)
writer.writeheader()
for author in parsed_authors:
writer.writerow(author)