@@ -115,21 +115,22 @@ def get_record_data(self, record):
115
115
):
116
116
return None
117
117
118
- title = attributes . get ( "summary_title" )
118
+ title = ScienceMuseumDataIngester . _get_first_list_value ( "title" , attributes )
119
119
creator = self ._get_creator_info (attributes )
120
120
metadata = self ._get_metadata (attributes )
121
121
images = []
122
122
for image_data in multimedia :
123
- if not (foreign_identifier := image_data .get ("admin" , {}).get ("uid" )):
123
+ if not (foreign_identifier := image_data .get ("@ admin" , {}).get ("uid" )):
124
124
continue
125
- processed = image_data .get ("processed" )
125
+ processed = image_data .get ("@ processed" )
126
126
if not isinstance (processed , dict ):
127
127
continue
128
128
(
129
129
url ,
130
130
height ,
131
131
width ,
132
132
filetype ,
133
+ filesize ,
133
134
) = self ._get_image_info (processed )
134
135
if not url :
135
136
continue
@@ -144,6 +145,7 @@ def get_record_data(self, record):
144
145
"height" : height ,
145
146
"width" : width ,
146
147
"filetype" : filetype ,
148
+ "filesize" : filesize ,
147
149
"license_info" : license_info ,
148
150
"creator" : creator ,
149
151
"title" : title ,
@@ -154,22 +156,18 @@ def get_record_data(self, record):
154
156
155
157
@staticmethod
156
158
def _get_creator_info (attributes ):
157
- creator_info = None
158
- if (life_cycle := attributes .get ("lifecycle" )) is not None :
159
- creation = life_cycle .get ("creation" )
160
- if isinstance (creation , list ):
161
- maker = creation [0 ].get ("maker" )
162
- if isinstance (maker , list ):
163
- creator_info = maker [0 ].get ("summary_title" )
164
- return creator_info
159
+ if not (maker := attributes .get ("creation" , {}).get ("maker" , [])):
160
+ return None
161
+
162
+ return maker [0 ].get ("summary" , {}).get ("title" , None )
165
163
166
164
@staticmethod
167
165
def check_url (url : str | None ) -> str | None :
168
166
if not url :
169
167
return None
170
168
if url .startswith ("http" ):
171
169
return url
172
- return f"https://coimages.sciencemuseumgroup.org.uk/images/ { url } "
170
+ return f"https://coimages.sciencemuseumgroup.org.uk/{ url } "
173
171
174
172
@staticmethod
175
173
def _get_dimensions (image_data : dict ) -> tuple [int | None , int | None ]:
@@ -191,15 +189,25 @@ def _get_dimensions(image_data: dict) -> tuple[int | None, int | None]:
191
189
@staticmethod
192
190
def _get_image_info (
193
191
processed : dict ,
194
- ) -> tuple [str | None , int | None , int | None , str | None ]:
195
- height , width , filetype = None , None , None
192
+ ) -> tuple [str | None , int | None , int | None , str | None , int | None ]:
193
+ height , width , filetype , filesize = None , None , None , None
196
194
image_data = processed .get ("large" ) or processed .get ("medium" , {})
197
195
198
196
url = ScienceMuseumDataIngester .check_url (image_data .get ("location" ))
199
197
if url :
200
198
filetype = image_data .get ("format" )
201
199
height , width = ScienceMuseumDataIngester ._get_dimensions (image_data )
202
- return url , height , width , filetype
200
+
201
+ if not (
202
+ filesize := int (
203
+ image_data .get ("measurements" , {})
204
+ .get ("filesize" , {})
205
+ .get ("value" , 0 )
206
+ )
207
+ ):
208
+ filesize = None
209
+
210
+ return url , height , width , filetype , filesize
203
211
204
212
@staticmethod
205
213
def _get_first_list_value (key : str , attributes : dict ) -> str | None :
@@ -214,7 +222,7 @@ def _get_metadata(attributes):
214
222
for attr_key , metadata_key in [
215
223
("identifier" , "accession number" ),
216
224
("name" , "name" ),
217
- ("categories " , "category" ),
225
+ ("category " , "category" ),
218
226
("description" , "description" ),
219
227
]:
220
228
val = ScienceMuseumDataIngester ._get_first_list_value (attr_key , attributes )
@@ -223,7 +231,7 @@ def _get_metadata(attributes):
223
231
224
232
creditline = attributes .get ("legal" )
225
233
if isinstance (creditline , dict ):
226
- line = creditline .get ("credit_line " )
234
+ line = creditline .get ("credit " )
227
235
if line is not None :
228
236
metadata ["creditline" ] = line
229
237
@@ -233,9 +241,9 @@ def _get_metadata(attributes):
233
241
def _get_license_info (image_data ) -> LicenseInfo | None :
234
242
# some items do not return license anywhere, but in the UI
235
243
# they look like CC
236
- rights = image_data .get ("source" , {}). get ( " legal" , {}).get ("rights" )
244
+ rights = image_data .get ("legal" , {}).get ("rights" )
237
245
if isinstance (rights , list ):
238
- license_name = rights [0 ].get ("usage_terms " )
246
+ license_name = rights [0 ].get ("licence " )
239
247
if not license_name :
240
248
return None
241
249
license_name = license_name .lower ()
0 commit comments