-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathusmart.py
82 lines (72 loc) · 2.79 KB
/
usmart.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
try:
from processor import Processor
except:
from .processor import Processor
class ProcessorUSMART(Processor):
def __init__(self):
super().__init__(type="USMART")
def get_datasets(self, owner, start_url, fname):
data = processor.get_json(start_url)
if data != "NULL":
datasets = data["dataset"]
print("Number of datasets: ", str(len(datasets)))
prepped = []
for dataset in datasets:
Title = dataset["title"]
Owner = owner
PageURL = dataset["landingPage"].replace(" ", "%20")
filetypes = dict()
for dist in dataset["distribution"]:
if "/" in dist["mediaType"]:
filetypes[dist["mediaType"].split("/")[1]] = [
dist["accessURL"].replace(" ", "%20"),
dist["title"],
]
else:
filetypes[dist["mediaType"]] = [
dist["accessURL"].replace(" ", "%20"),
dist["title"],
]
DateCreated = dataset["createdAt"]
DateUpdated = dataset["modified"]
Description = '"' + dataset["description"] + '"'
if (
dataset["licence"]
== "http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/"
):
Licence = "OGL3"
else:
Licence = dataset["licence"]
OriginalTags = []
for theme in dataset["theme"]:
OriginalTags.append(theme)
ManualTags = []
if "keyword" in dataset:
for kw in dataset["keyword"]:
ManualTags.append(kw)
else:
ManualTags.append(" ")
for item in filetypes:
print(filetypes[item][1])
line = [
Title,
Owner,
PageURL,
filetypes[item][0],
filetypes[item][1], # FileName
DateCreated,
DateUpdated,
"",
"",
item,
"",
" ".join(OriginalTags),
" ".join(ManualTags),
Licence,
Description,
]
prepped.append(line)
processor.write_csv(fname, prepped)
processor = ProcessorUSMART()
if __name__ == "__main__":
processor.process()