From 1535da4745c72fe2891e87cc0e5fcf25333a414d Mon Sep 17 00:00:00 2001 From: Lucas Thurston Date: Mon, 4 Sep 2023 12:09:26 -0700 Subject: [PATCH] Address Chapman validation issues --- metadata_mapper/mappers/oai/chapman_mapper.py | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/metadata_mapper/mappers/oai/chapman_mapper.py b/metadata_mapper/mappers/oai/chapman_mapper.py index 17985fd7a..7be14c182 100644 --- a/metadata_mapper/mappers/oai/chapman_mapper.py +++ b/metadata_mapper/mappers/oai/chapman_mapper.py @@ -5,16 +5,13 @@ class ChapmanRecord(OaiRecord): - """Mapping discrepancies: - - * `type` field for images contains "Image" in Solr, but "text" in mapped data - - """ - def UCLDC_map(self): return { 'description': self.map_description, - 'identifier': self.map_identifier + 'identifier': self.map_identifier, + "source": self.source_metadata.get("collection_name"), + "spatial": self.map_spatial, + "language": self.source_metadata.get("language") } def map_is_shown_at(self) -> Union[str, None]: @@ -41,13 +38,26 @@ def map_description(self) -> Union[str, None]: return [v for v in filter(bool, aggregate)] + def map_spatial(self): + spatial = [] + for field in ["coverage", "spatial"]: + value = self.split_and_flatten(field)() + if value: + spatial.extend(value) + + return spatial + def is_image_type(self) -> bool: - if "type" not in self.source_metadata: + """ + The `type` field has the value `text` for many images, so it's a useless + indicator of whether its an image or not + """ + if "format" not in self.source_metadata: return False - type: list[str] = self.source_metadata.get("type", []) + type: list[str] = self.source_metadata.get("format", []) - return type and type[0].lower() == "image" + return type and type[0].lower().startswith("image") def map_identifier(self) -> Union[str, None]: if "identifier" not in self.source_metadata: @@ -55,6 +65,7 @@ def map_identifier(self) -> Union[str, None]: identifiers = [i for i in self.source_metadata.get('identifier') if "context" not in i] + return identifiers