-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun.py
164 lines (138 loc) · 6.41 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import requests , json , os , sys
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import mapping.mapping as m
import conf as c
REQ_SESSION = requests.Session()
retries = Retry(total=10, backoff_factor=1, status_forcelist=[ 502, 503, 504, 524 ])
REQ_SESSION.mount('http://', HTTPAdapter(max_retries=retries))
#REQ_SESSION.mount('https://', HTTPAdapter(max_retries=retries))
params = {
'key_identity': c.CONF["KEY_IDENTITY"],
'key_credential': c.CONF["KEY_CREDENTIALS"]
}
#print("\nc.CONF",c.CONF)
print("\n")
########################
############ manually
########################
# 1. upload ontologies
# 2. upload custom controlled vocabularies: use the same names as in vocabularies.json
# 3. copy the ids of vocabularies in vocabularies.json and substitute IDs in "templates" folder (except for City,District,Country)
# 4. reconcile City,District,Country to geonames and save mappings in vocabularies.json
# 5. upload templates where vocabularies are already selected (import in next instances -- control vocab ids match with correct number)
# 6. download google spreadsheet tables as tsv in "tables" folder
# !!! doublecheck API url and credentials in conf.py
########################
############ import
########################
# 7. call properties, classes and resource templates APIs, store ids in dedicated json files, return dictionaries 'resource_classes','properties','resource_templates'
#--- e.g. dict_ids["resource_classes"] -> a dict of all the resource_classes in Omeka
print("Get from Omeka the 'resource_classes', 'properties', and 'resource_templates' ...",flush=True)
dict_ids = m.get_ids(c.CONF["OMEKA_API_URL"],['resource_classes','properties','resource_templates'])
print("-> Done\n",flush=True)
# 8. open vocabularies.json
with open(c.VOCABULARIES_INDEX) as json_file:
vocabularies_ids = json.load(json_file)
# 9, 10, 11. create the item-sets and get the ids
print("Add item-sets ...",flush=True)
all_item_sets = dict()
response = REQ_SESSION.get('{}/item_sets/'.format(c.CONF["OMEKA_API_URL"]), verify=False)
for an_items_set in response.json():
all_item_sets[an_items_set["o:title"]] = an_items_set["o:id"]
my_item_sets = {}
for itemset_val in c.ITEM_SETS:
if itemset_val not in all_item_sets:
payload = m.prepare_item_set(itemset_val, dict_ids["properties"])
response = REQ_SESSION.post('{}/item_sets/'.format(c.CONF["OMEKA_API_URL"]), json=payload, params=params, verify=False)
if response.status_code == 200:
omeka_res_data = response.json()
print(omeka_res_data)
itemset_id = omeka_res_data['o:id']
else:
#an error occured STOP the process
sys.exit("Error while creating the Item-set!")
else:
itemset_id = all_item_sets[itemset_val]
my_item_sets[itemset_val] = itemset_id
with open(c.ITEM_SETS_INDEX,"w") as itemsets_file:
itemsets_file.write(json.dumps(my_item_sets))
itemsets_file.close()
print("-> Done\n",flush=True)
## -------
## CREATE
## -------
print("Add items ...",flush=True)
# 12. query tables, create payloads, wherein substitute all properties IDs, classes and vocabularies IDs with the one mapped in the json files
data = m.read_tables(dict_ids["properties"],dict_ids["resource_classes"],dict_ids["resource_templates"],vocabularies_ids, "create")
# 13. iterate over payloads and upload
print("-> add [",len(data), "] item/s to Omeka",flush=True)
print("-> update the OmekaS server",flush=True)
count_done = 0
for payload in data:
print("\npayload\n")
print(payload)
response = REQ_SESSION.post('{}/items/'.format(c.CONF["OMEKA_API_URL"]), json=payload, params=params, verify=False)
count_done += 1
sys.stdout.write('\r-> %d uploaded' %count_done)
sys.stdout.flush()
# 14. dump data created in "created_items.json"
print("-> backup all the items",flush=True)
dataset = REQ_SESSION.get('{}/items/'.format(c.CONF["OMEKA_API_URL"]), verify=False)
dataset = m.get_from_omeka(c.CONF["OMEKA_API_URL"], "items")
m.backup_items(dataset)
## -------
## Lookup
## -------
print("Items lookup ...",flush=True)
# 15. lookup for certain tables/rows following the rules in mapping.json; if entities do not exist, create them
print("-> read the tables",flush=True)
data = m.read_tables(dict_ids["properties"],dict_ids["resource_classes"],dict_ids["resource_templates"],vocabularies_ids, "lookup")
# 16. iterate over payloads and upload
print("-> add [",len(data), "] new item/s to Omeka",flush=True)
print("-> update the OmekaS server",flush=True)
count_done = 0
for payload in data:
response = REQ_SESSION.post('{}/items/'.format(c.CONF["OMEKA_API_URL"]), json=payload, params=params, verify=False)
count_done += 1
sys.stdout.write('\r-> %d uploaded' %count_done)
sys.stdout.flush()
# 17. dump data created in "created_items.json"
print("-> backup all the items",flush=True)
dataset = REQ_SESSION.get('{}/items/'.format(c.CONF["OMEKA_API_URL"]), verify=False)
dataset = m.get_from_omeka(c.CONF["OMEKA_API_URL"], "items")
m.backup_items(dataset)
print("-> Done\n",flush=True)
# -------
# Update
# -------
# 18. update data
print("Update items...",flush=True)
updated_data = m.read_tables(dict_ids["properties"],dict_ids["resource_classes"],dict_ids["resource_templates"],vocabularies_ids, "update")
print("-> update the OmekaS server",flush=True)
print("-> update [",len(updated_data), "] item/s",flush=True)
count_done = 0
for update_payload in updated_data:
res_id = update_payload["o:item"][0]["o:id"]
# get item payload to be updated
response = REQ_SESSION.get('{}/items/{}'.format(c.CONF["OMEKA_API_URL"],res_id), verify=False)
current_data = response.json()
# remove tmp relations
clean_data = {k:v for (k,v) in current_data.items() if "tmp" not in k}
# add new relations
for k,v in update_payload.items():
clean_data[k] = v
# upload
resp = REQ_SESSION.put('{}/items/{}'.format(c.CONF["OMEKA_API_URL"],res_id), json=clean_data, params=params, verify=False)
count_done += 1
sys.stdout.write('\r-> %d uploaded' %count_done)
sys.stdout.flush()
print("-> data updated!",flush=True)
# 19. dump data created in "created_items.json"
print("-> backup all the items",flush=True)
dataset = REQ_SESSION.get('{}/items/'.format(c.CONF["OMEKA_API_URL"]), verify=False)
dataset = m.get_from_omeka(c.CONF["OMEKA_API_URL"], "items")
m.backup_items(dataset)
print("-> Done\n",flush=True)
# TODO
# 20. remove temporary properties (or not?)