-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrab_zotero_metadata.py
303 lines (259 loc) · 13.3 KB
/
grab_zotero_metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
"""
Small script to load necessary libraries and match google scholar entries to Web of Science entries using WOS api
"""
import requests
import pandas as pd
from pyzotero import zotero
import io
import os
#import json
#import time
collections = {
"animals":{
"amphibian":"I5YWZIRR",
"bird":"ZRC54PQF",
"cell culture":"KMUTY94V",
"organoid":"BHNIC5FW",
"fish: any other":"V46CQ3NS",
"fish: zebrafish":"22IG9MCX",
"fish: other teleost":"BB7H8KCR",
"mammals: mouse":"JWRUTIIE",
"mammals: non-placental":"Z4IN47WJ",
"mammals: other placental":"MMU8BX4C",
"mammals: other rodent":"8A9R3YVS",
"mammals: human" : "5NKZQ2X4",
"mammals: non-human primate":"HJ7UFULD",
"other":"WDSXVYN6",
"reptile":"W4T7HAGP",
},
"pub":{
"book chapter":"IL925QCT",
"dispatch or similar":"PQQDDZX4",
"methods article (peer reviewed)":"2JQYH89M",
"other":"4QEYI2KS",
"preprint":"UAYZHHPY",
"research article (peer reviewed)":"N3H5YAW3",
"resource/database":"EJ2U2JHB",
"review (peer reviewed)":"UF9V5CB6",
},
"area":{
"computation":"QVN2AP8C",
"development":"7CALX6AJ",
"function":"ICYG3YV6",
"injury/disease/regeneration":"NI8DPUXS",
"molecular":"B9M3K8VL",
"other":"ZWVM2ZIS",
"structure":"R5ZD6ZLM",
"tool development: biological":"WVKZJ7M5",
"tool development: hardware":"IFSBYJPB",
"tool development: software":"48T6FHZ2",
},
"cell":{
"Amacrine cells":"2ECXQ9WE",
"Bipolar cells":"78A6KXVR",
"Cortex and related":"BLSPC9W3",
"Glia":"B9QIZAVV",
"Horizontal cells":"M6CLZYGV",
"Other":"UZD6C2J9",
"Photoreceptors":"66QHAKSP",
"Ganglion cells":"8QY5WMJH",
"Superior colliculus/tectum and related":"J7U6BRBG",
"Thalamus and related":"A8XHEV8F",
}
}
sheet_url = "https://docs.google.com/spreadsheets/d/15jE_Hc_otR_OvAkRy_aseGhCaO7c29M8DKyNuso6dyU/export#gid=693684877"
garticles = pd.read_excel(sheet_url,header=0)
# Step 2: Connect to Zotero
with open("zotero_api","r") as fid:
api_key = fid.readline()
api_key=api_key[:-1]
library_id = "4584648"
library_type = "group"
#api_key = "your_zotero_api_key"
zot = zotero.Zotero(library_id, library_type, api_key)
zotero_group_items = zot.top(limit=500) # adjust limit as per your needs
# Extract DOIs from the zotero group
zotero_dois = [item['data'].get('DOI', None) for item in zotero_group_items]
g_doi_column = 'Identifier (DOI, ISBN, PMID, arXiv ID). If unknown, please query CrossRef: https://www.crossref.org/guestquery'
# Step 3: Find the DOIs in the Google Sheet not in the Zotero group
google_sheet_dois = garticles[g_doi_column].tolist()
dois_to_add = list(set(google_sheet_dois) - set(zotero_dois))
items_to_add = garticles.loc[garticles[g_doi_column].isin(dois_to_add)]
# Step 4: Add these papers to Zotero subgroups
##to run zotero translator, one needs to have docker installed and run it so (if one is using command line):
#docker pull zotero/translation-server
#docker run -d -p 1969:1969 --rm --name translation-server zotero/translation-server
#KPVJRAE9:14:Animal model;I2DXCS7M:11:Publication type;NC2HRG88:12:Main Areas;BXSDS9PK:13:Cell types
url = "http://127.0.0.1:1969" # zotero translator server running locally
#url = "https://zotero.retina-hub.org/search"
headers = {"content-type": "text/plain", "Accept-Charset": "UTF-8"}
# r = requests.post(url=url, data=dois[0], headers=headers)
# now add entries to the zotero collection, add the type of OA to tags
for idx in items_to_add.index:
if str(items_to_add.loc[idx][g_doi_column])!="nan":
print(idx)
entry = items_to_add.loc[idx][g_doi_column]
#correct for wrong entries that have "https://doi.org/ starting the DOI"
if entry.find("https://doi.org/")==0:
entry=entry[len("https://doi.org/"):]
r = requests.post(url=url, data=entry, headers=headers)
temp = r.json()
r.close()
#temp[0]["tags"].append(articles["oa_status"][idx])
result = zot.create_items(temp)
print(result)
entry_key = result["successful"]["0"]["key"]
#animal species
species = items_to_add.loc[idx]["Species / tissue"]
species = species.split(",")
for animal in species:
animal = animal.lower().strip()
if animal=="amphibian":
zot.addto_collection(collections["animals"]["amphibian"],zot.item(entry_key))
if animal=="bird":
zot.addto_collection(collections["animals"]["bird"],zot.item(entry_key))
if animal=="cell culture":
zot.addto_collection(collections["animals"]["cell culture"],zot.item(entry_key))
if animal=="organoid":
zot.addto_collection(collections["animals"]["organoid"],zot.item(entry_key))
if animal=="fish: any other":
zot.addto_collection(collections["animals"]["fish: any other"],zot.item(entry_key))
if animal=="fish: zebrafish":
zot.addto_collection(collections["animals"]["fish: zebrafish"],zot.item(entry_key))
if animal=="fish: other teleost":
zot.addto_collection(collections["animals"]["fish: other teleost"],zot.item(entry_key))
if animal=="mammals: mouse":
zot.addto_collection(collections["animals"]["mammals: mouse"],zot.item(entry_key))
if animal=="mammals: non-placental":
zot.addto_collection(collections["animals"]["mammals: non-placental"],zot.item(entry_key))
if animal=="mammals: other placental":
zot.addto_collection(collections["animals"]["mammals: other placental"],zot.item(entry_key))
if animal=="mammals: other rodent":
zot.addto_collection(collections["animals"]["mammals: other rodent"],zot.item(entry_key))
if animal=="mammals: human":
zot.addto_collection(collections["animals"]["mammals: human"],zot.item(entry_key))
if animal=="mammals: non-human primate":
zot.addto_collection(collections["animals"]["mammals: non-human primate"],zot.item(entry_key))
if animal=="other":
zot.addto_collection(collections["animals"]["other"],zot.item(entry_key))
if animal=="reptiles":
zot.addto_collection(collections["animals"]["reptile"],zot.item(entry_key))
if animal!="amphibian" and \
animal!="bird"and \
animal!="cell culture"and \
animal!="organoid" and\
animal!="fish: any other" and\
animal!="fish: zebrafish" and\
animal!="fish: other teleost" and\
animal!="mammals: mouse" and\
animal!="mammals: non-placental" and\
animal!="mammals: other placental" and\
animal!="mammals: other rodent" and\
animal!="mammals: human" and\
animal!="mammals: non-human primate" and\
animal!="reptile":
print(animal)
zot.addto_collection(collections["animals"]["other"],zot.item(entry_key))
#publication type
pub_type = items_to_add.loc[idx]["Type"]
pub_type = pub_type.split(",")
for publication in pub_type:
publication = publication.lower().strip()
if publication=="book chapter":
zot.addto_collection(collections["pub"]["book chapter"],zot.item(entry_key))
if publication=="dispatch or similar":
zot.addto_collection(collections["pub"]["dispatch or similar"],zot.item(entry_key))
if publication=="methods article (peer reviewed)":
zot.addto_collection(collections["pub"]["methods article (peer reviewed)"],zot.item(entry_key))
if publication=="other":
zot.addto_collection(collections["pub"]["other"],zot.item(entry_key))
if publication=="preprint":
zot.addto_collection(collections["pub"]["preprint"],zot.item(entry_key))
if publication=="research article (peer reviewed)":
zot.addto_collection(collections["pub"]["research article (peer reviewed)"],zot.item(entry_key))
if publication=="resource/database":
zot.addto_collection(collections["pub"]["resource/database"],zot.item(entry_key))
if publication=="review (peer reviewed)":
zot.addto_collection(collections["pub"]["review (peer reviewed)"],zot.item(entry_key))
if publication!="book chapter" and\
publication!="dispatch or similar" and\
publication!="methods article (peer reviewed)" and\
publication!="other" and\
publication!="preprint" and\
publication!="research article (peer reviewed)" and\
publication!="resource/database" and\
publication!="review (peer reviewed)" :
print(publication)
zot.addto_collection(collections["pub"]["other"],zot.item(entry_key))
#main areas
main_area = items_to_add.loc[idx]["Subject areas"]
main_area = main_area.split(",")
for area in main_area :
area = area.lower().strip()
if area=="computation":
zot.addto_collection(collections["area"]["computation"],zot.item(entry_key))
if area=="development":
zot.addto_collection(collections["area"]["development"],zot.item(entry_key))
if area=="function":
zot.addto_collection(collections["area"]["function"],zot.item(entry_key))
if area=="injury/disease/regeneration":
zot.addto_collection(collections["area"]["injury/disease/regeneration"],zot.item(entry_key))
if area=="molecular":
zot.addto_collection(collections["area"]["molecular"],zot.item(entry_key))
if area=="structure":
zot.addto_collection(collections["area"]["structure"],zot.item(entry_key))
if area=="tool development: biological":
zot.addto_collection(collections["area"]["tool development: biological"],zot.item(entry_key))
if area=="tool development: hardware":
zot.addto_collection(collections["area"]["tool development: hardware"],zot.item(entry_key))
if area=="tool development: software":
zot.addto_collection(collections["area"]["tool development: software"],zot.item(entry_key))
if area != "computation" and \
area!= "development" and \
area!="function" and\
area!="injury/disease/regeneration" and\
area!="molecular" and\
area!="structure" and\
area!="tool development: biological" and\
area!="tool development: hardware" and\
area!="tool development: software":
print(area)
zot.addto_collection(collections["area"]["other"],zot.item(entry_key))
#celltype
cell_type = items_to_add.loc[idx]["Cell types"]
cell_type = cell_type.split(",")
for cell in cell_type :
cell = cell.lower().strip()
print(cell)
if cell=="amacrine cells":
zot.addto_collection(collections["cell"]["Amacrine cells"],zot.item(entry_key))
if cell=="bipolar cells":
zot.addto_collection(collections["cell"]["Bipolar cells"],zot.item(entry_key))
if cell=="cortex and related":
zot.addto_collection(collections["cell"]["Cortex and related"],zot.item(entry_key))
if cell=="horizontal cells":
zot.addto_collection(collections["cell"]["Horizontal cells"],zot.item(entry_key))
if cell=="photoreceptors":
zot.addto_collection(collections["cell"]["Photoreceptors"],zot.item(entry_key))
if cell=="ganglion cells":
zot.addto_collection(collections["cell"]["Ganglion cells"],zot.item(entry_key))
if cell=="glia":
zot.addto_collection(collections["cell"]["Glia"],zot.item(entry_key))
if cell=="superior colliculus/tectum and related":
zot.addto_collection(collections["cell"]["Superior colliculus/tectum"],zot.item(entry_key))
if cell=="thalamus and related":
zot.addto_collection(collections["cell"]["Thalamus and related"],zot.item(entry_key))
if cell!="amacrine cells" and \
cell!="bipolar cells" and \
cell!="cortex and related" and\
cell!="horizontal cells" and\
cell!="photoreceptors" and\
cell !="ganglion cells" and\
cell !="glia" and\
cell!="superior colliculus/tectum and related" and\
cell!="thalamus and related" :
print(cell)
zot.addto_collection(collections["cell"]["Other"],zot.item(entry_key))
#other
#with open(dataPath + "zotMeta.json", "w") as fid:
# json.dump(allMeta, fid)