Skip to content

Commit b5a1ae0

Browse files
authored
Update Science Museum ingester with API changes (#4105)
* Update ScienceMuseum ingester to use new API * Update tests * Update url format * Fix urls in tests * Add filesize
1 parent c9c72fd commit b5a1ae0

File tree

12 files changed

+2475
-4253
lines changed

12 files changed

+2475
-4253
lines changed

catalog/dags/providers/provider_api_scripts/science_museum.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -115,21 +115,22 @@ def get_record_data(self, record):
115115
):
116116
return None
117117

118-
title = attributes.get("summary_title")
118+
title = ScienceMuseumDataIngester._get_first_list_value("title", attributes)
119119
creator = self._get_creator_info(attributes)
120120
metadata = self._get_metadata(attributes)
121121
images = []
122122
for image_data in multimedia:
123-
if not (foreign_identifier := image_data.get("admin", {}).get("uid")):
123+
if not (foreign_identifier := image_data.get("@admin", {}).get("uid")):
124124
continue
125-
processed = image_data.get("processed")
125+
processed = image_data.get("@processed")
126126
if not isinstance(processed, dict):
127127
continue
128128
(
129129
url,
130130
height,
131131
width,
132132
filetype,
133+
filesize,
133134
) = self._get_image_info(processed)
134135
if not url:
135136
continue
@@ -144,6 +145,7 @@ def get_record_data(self, record):
144145
"height": height,
145146
"width": width,
146147
"filetype": filetype,
148+
"filesize": filesize,
147149
"license_info": license_info,
148150
"creator": creator,
149151
"title": title,
@@ -154,22 +156,18 @@ def get_record_data(self, record):
154156

155157
@staticmethod
156158
def _get_creator_info(attributes):
157-
creator_info = None
158-
if (life_cycle := attributes.get("lifecycle")) is not None:
159-
creation = life_cycle.get("creation")
160-
if isinstance(creation, list):
161-
maker = creation[0].get("maker")
162-
if isinstance(maker, list):
163-
creator_info = maker[0].get("summary_title")
164-
return creator_info
159+
if not (maker := attributes.get("creation", {}).get("maker", [])):
160+
return None
161+
162+
return maker[0].get("summary", {}).get("title", None)
165163

166164
@staticmethod
167165
def check_url(url: str | None) -> str | None:
168166
if not url:
169167
return None
170168
if url.startswith("http"):
171169
return url
172-
return f"https://coimages.sciencemuseumgroup.org.uk/images/{url}"
170+
return f"https://coimages.sciencemuseumgroup.org.uk/{url}"
173171

174172
@staticmethod
175173
def _get_dimensions(image_data: dict) -> tuple[int | None, int | None]:
@@ -191,15 +189,25 @@ def _get_dimensions(image_data: dict) -> tuple[int | None, int | None]:
191189
@staticmethod
192190
def _get_image_info(
193191
processed: dict,
194-
) -> tuple[str | None, int | None, int | None, str | None]:
195-
height, width, filetype = None, None, None
192+
) -> tuple[str | None, int | None, int | None, str | None, int | None]:
193+
height, width, filetype, filesize = None, None, None, None
196194
image_data = processed.get("large") or processed.get("medium", {})
197195

198196
url = ScienceMuseumDataIngester.check_url(image_data.get("location"))
199197
if url:
200198
filetype = image_data.get("format")
201199
height, width = ScienceMuseumDataIngester._get_dimensions(image_data)
202-
return url, height, width, filetype
200+
201+
if not (
202+
filesize := int(
203+
image_data.get("measurements", {})
204+
.get("filesize", {})
205+
.get("value", 0)
206+
)
207+
):
208+
filesize = None
209+
210+
return url, height, width, filetype, filesize
203211

204212
@staticmethod
205213
def _get_first_list_value(key: str, attributes: dict) -> str | None:
@@ -214,7 +222,7 @@ def _get_metadata(attributes):
214222
for attr_key, metadata_key in [
215223
("identifier", "accession number"),
216224
("name", "name"),
217-
("categories", "category"),
225+
("category", "category"),
218226
("description", "description"),
219227
]:
220228
val = ScienceMuseumDataIngester._get_first_list_value(attr_key, attributes)
@@ -223,7 +231,7 @@ def _get_metadata(attributes):
223231

224232
creditline = attributes.get("legal")
225233
if isinstance(creditline, dict):
226-
line = creditline.get("credit_line")
234+
line = creditline.get("credit")
227235
if line is not None:
228236
metadata["creditline"] = line
229237

@@ -233,9 +241,9 @@ def _get_metadata(attributes):
233241
def _get_license_info(image_data) -> LicenseInfo | None:
234242
# some items do not return license anywhere, but in the UI
235243
# they look like CC
236-
rights = image_data.get("source", {}).get("legal", {}).get("rights")
244+
rights = image_data.get("legal", {}).get("rights")
237245
if isinstance(rights, list):
238-
license_name = rights[0].get("usage_terms")
246+
license_name = rights[0].get("licence")
239247
if not license_name:
240248
return None
241249
license_name = license_name.lower()

catalog/tests/dags/providers/provider_api_scripts/resources/sciencemuseum/large_image.json

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
{
22
"large": {
3+
"resizable": true,
4+
"@type": "image",
35
"format": "jpeg",
6+
"modified": 1472014477000,
47
"location": "3/563/large_1999_0299_0001__0002_.jpg",
58
"location_is_relative": true,
69
"measurements": {
10+
"filesize": {
11+
"units": "bytes",
12+
"value": 58772
13+
},
714
"dimensions": [
815
{
916
"dimension": "height",
@@ -16,9 +23,6 @@
1623
"value": 1536
1724
}
1825
]
19-
},
20-
"modified": 1472014477000,
21-
"resizable": true,
22-
"type": "image"
26+
}
2327
}
2428
}

catalog/tests/dags/providers/provider_api_scripts/resources/sciencemuseum/measurements.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
{
22
"measurements": {
3+
"filesize": {
4+
"units": "bytes",
5+
"value": 58772
6+
},
37
"dimensions": [
48
{
59
"dimension": "height",

catalog/tests/dags/providers/provider_api_scripts/resources/sciencemuseum/medium_image.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
{
22
"medium": {
3+
"resizable": true,
4+
"@type": "image",
35
"format": "jpeg",
6+
"modified": 1472014477000,
47
"location": "3/563/medium_1999_0299_0001__0002_.jpg",
58
"location_is_relative": true,
69
"measurements": {
@@ -16,9 +19,6 @@
1619
"value": 866
1720
}
1821
]
19-
},
20-
"modified": 1472014477000,
21-
"resizable": true,
22-
"type": "image"
22+
}
2323
}
2424
}
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
2-
"accession number": "1999-299/1",
3-
"category": "SCM - Smoking",
4-
"creditline": "McPhail, Mary",
5-
"description": "Packet of 10 'Gold Flake' cigarettes by W D & HO Wills, England, 1920-1950",
6-
"name": "cigarette packet"
2+
"accession number": "A67864",
3+
"category": "SCM - Classical & Medieval Medicine",
4+
"description": "Small bronze amulet, possibly phallic, or stomach, probably Roman, from Italy, 200BC-200AD",
5+
"name": "amulet",
6+
"creditline": "Arte Antica e Moderna"
77
}

0 commit comments

Comments
 (0)