-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path4.Extraction.py
43 lines (33 loc) · 1.39 KB
/
4.Extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""
Created on Sun Nov 24 2019
@author: Bilal Hayat Butt, Sufyan Faizi
@Description: The code is used to fetch data from the files created from 'JsonDump.py', we extract authors from JSON files and populate it in CSV for further analysis.
"""
import json,os,sys
path = os.path.join(os.getcwd(), sys.argv[2], sys.argv[1]+'.csv')
doi_author_file = open(path , 'w',encoding='utf-8')
path = os.path.join(os.getcwd(), sys.argv[2],'Req_count.txt')
with open(path, 'r') as Req_count_file:
for Value in Req_count_file:
file_count = int(Value)
for Jsonfiles in range(file_count):
newpathforDataRead = os.path.join(os.getcwd(), sys.argv[2],'Metadata'+str(Jsonfiles)+'_'+sys.argv[2]+'.json')
with open(newpathforDataRead , 'r') as JsonData:
Data = json.load(JsonData)
for records in Data['message']['items']:
Author_Name = ''
try:
for Author in records['author']:
Author_Name = Author_Name + ',' + Author['given'] + ' ' + Author['family']
doi_author_file.write(records['DOI'] + str(Author_Name) + '\n')
except KeyError:
pass
doi_author_file.close()
path = os.path.join(os.getcwd(), sys.argv[2], sys.argv[1]+'.csv')
fread = open(path , 'r',encoding='utf-8')
data = fread.read()
#data = data.replace(' ' , '_')
fread.close()
fwrite = open(path , 'w',errors='ignore')
fwrite.write(data)
fwrite.close()