Skip to content

Commit

Permalink
Fmi (#223)
Browse files Browse the repository at this point in the history
* new mapfile for sciencespo in dc

* changed identifier, language and other stuff in fmi mapfile, adjusted eudatcore reader

* changes for identifier
  • Loading branch information
seolih authored Dec 9, 2021
1 parent 8b5dbe8 commit 8569fd0
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 12 deletions.
47 changes: 42 additions & 5 deletions mdingestion/community/fmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,49 @@ class FMI(Community):
NAME = 'fmi'
IDENTIFIER = NAME
URL = 'https://fmi.b2share.csc.fi/api/oai2d'
SCHEMA = SchemaType.DublinCore
SCHEMA = SchemaType.Eudatcore
SERVICE_TYPE = ServiceType.OAI
OAI_METADATA_PREFIX = 'oai_dc'
OAI_SET = '8343aaf9-f598-4529-9359-7ce2d9bd3e63'
OAI_METADATA_PREFIX = 'eudatcore'
OAI_SET = '77f140b0-d4aa-437e-80d4-32c0abd3746f'

def update(self, doc):
doc.discipline = self.discipline(doc, 'Meteorology')
doc.discipline = self._discipline(doc, 'Meteorology')
doc.publisher = 'Finnish Meteorological Institute'
# doc.funding_reference = self.find('Funder')
doc.keywords = self.keywords()
doc.language = self.language()
self._identifier(doc)
#doc.funding_reference = self.find('Funder')

def _identifier(self, doc):
for id in self.find('identifier'):
if 'doi:' in id:
doc.doi = id.split('doi:')[-1]
if 'pid:' in id:
doc.pid = id.split('pid:')[-1]
if 'url:' in id:
doc.source = id.split('url:')[-1]

def _discipline(self, doc, default=None):
disc = self.find('discipline')
if disc:
_disc = []
for d in disc:
_disc.append(d.split('→')[-1])
disc = _disc
else:
disc = [default]
return disc

def keywords(self):
_keywords = self.find('keyword')
if not _keywords:
_keywords = self.find('subject')
return _keywords

def language(self):
_language = self.find('language')
if not _language or _language[0] == '':
_language = []
for lang in self._reader.parser.doc.find_all('language'):
_language.append(lang.get('language_name'))
return _language
19 changes: 19 additions & 0 deletions mdingestion/community/sciencepodc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from .base import Community
from ..service_types import SchemaType, ServiceType


class SciencesPoDublinCore(Community):
NAME = 'sciencespodc'
IDENTIFIER = NAME
URL = 'https://data.sciencespo.fr/oai'
SCHEMA = SchemaType.DublinCore
SERVICE_TYPE = ServiceType.OAI
OAI_METADATA_PREFIX = 'oai_dc'
OAI_SET = None
# PRODUCTIVE = True
# DATE = '2021-10-20'

def update(self, doc):
doc.discipline = self.discipline(doc, 'Social Sciences')
if not doc.publication_year:
doc.publication_year = self.find('header.datestamp')
15 changes: 8 additions & 7 deletions mdingestion/reader/eudatcore.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ class EudatcoreReader(XMLReader):
def parse(self, doc):
doc.title = self.find('title')
doc.description = self.find('description')
doc.doi = self.find('identifier')
doc.pid = self.find('identifier')
doc.source = self.find('identifier')
doc.keywords = self.find('subject')
doc.discipline = self.discipline(doc)
doc.doi = self.find_doi('identifier', identifierType="DOI")
doc.pid = self.find_pid('identifier', identifierType="PID")
doc.source = self.find_source('identifier', identifierType="URL")
doc.keywords = self.find('keyword')
doc.discipline = self.find('discipline')
doc.related_identifier = self.find('relatedIdentifier')
doc.creator = self.find('creator')
doc.publisher = self.find('publisher')
Expand All @@ -25,11 +25,12 @@ def parse(self, doc):
doc.rights = self.find('rights')
doc.contact = self.find('contact')
doc.language = self.find('language')
doc.resource_type = self.find('resource_type')
doc.resource_type = self.find('resourceType')
doc.format = self.find('format')
doc.size = self.find('size')
doc.version = self.find('version')
doc.temporal_coverage = self.find('temporal_coverage')
doc.instrument = self.find('instrument')
doc.temporal_coverage = self.find('temporalCoverage')
doc.geometry = self.find_geometry()
doc.places = self.find('geoLocationPlace')

Expand Down

0 comments on commit 8569fd0

Please sign in to comment.