Skip to content

Commit

Permalink
Address Chapman validation issues
Browse files Browse the repository at this point in the history
  • Loading branch information
lthurston committed Sep 18, 2023
1 parent be547ac commit 1535da4
Showing 1 changed file with 21 additions and 10 deletions.
31 changes: 21 additions & 10 deletions metadata_mapper/mappers/oai/chapman_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,13 @@


class ChapmanRecord(OaiRecord):
"""Mapping discrepancies:
* `type` field for images contains "Image" in Solr, but "text" in mapped data
"""

def UCLDC_map(self):
return {
'description': self.map_description,
'identifier': self.map_identifier
'identifier': self.map_identifier,
"source": self.source_metadata.get("collection_name"),
"spatial": self.map_spatial,
"language": self.source_metadata.get("language")
}

def map_is_shown_at(self) -> Union[str, None]:
Expand All @@ -41,20 +38,34 @@ def map_description(self) -> Union[str, None]:

return [v for v in filter(bool, aggregate)]

def map_spatial(self):
spatial = []
for field in ["coverage", "spatial"]:
value = self.split_and_flatten(field)()
if value:
spatial.extend(value)

return spatial

def is_image_type(self) -> bool:
if "type" not in self.source_metadata:
"""
The `type` field has the value `text` for many images, so it's a useless
indicator of whether its an image or not
"""
if "format" not in self.source_metadata:
return False

type: list[str] = self.source_metadata.get("type", [])
type: list[str] = self.source_metadata.get("format", [])

return type and type[0].lower() == "image"
return type and type[0].lower().startswith("image")

def map_identifier(self) -> Union[str, None]:
if "identifier" not in self.source_metadata:
return

identifiers = [i for i in self.source_metadata.get('identifier')
if "context" not in i]

return identifiers


Expand Down

0 comments on commit 1535da4

Please sign in to comment.