From fece2a2d83d0acc1e4ebb74d59ac2735c90e35eb Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Fri, 23 Feb 2024 12:51:47 -0500 Subject: [PATCH] adds copyright to tagged output modified: adsmanparse/classic_serializer.py modified: adsmanparse/translator.py modified: requirements.txt --- adsmanparse/classic_serializer.py | 2 ++ adsmanparse/translator.py | 5 +++++ requirements.txt | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/adsmanparse/classic_serializer.py b/adsmanparse/classic_serializer.py index f772733..9abc18a 100644 --- a/adsmanparse/classic_serializer.py +++ b/adsmanparse/classic_serializer.py @@ -29,6 +29,7 @@ def _clean_string(self, data): data = re.sub(r"&[rl]squo;", "\'", data) data = re.sub(r"&[rl]dquo;", "\"", data) data = re.sub(r" ", " ", data) + data = re.sub(r"‌", " ", data) return data def __init__(self, **kwargs): @@ -43,6 +44,7 @@ def __init__(self, **kwargs): ('language', {'tag': 'M'}), ('comments', {'tag': 'X', 'join': '; '}), ('source', {'tag': 'G'}), + ('copyright', {'tag': 'C'}), ('uatkeys', {'tag': 'U', 'join': ', '}), ('keywords', {'tag': 'K', 'join': ', '}), ('subjectcategory', {'tag': 'Q', 'join': '; '}), diff --git a/adsmanparse/translator.py b/adsmanparse/translator.py index 714aec5..c5a616f 100644 --- a/adsmanparse/translator.py +++ b/adsmanparse/translator.py @@ -337,6 +337,10 @@ def _get_bibcode(self, bibstem=None): except Exception as err: print('Couldnt make a bibcode: %s' % str(err)) + def _get_copyright(self): + copyright_statement=self.data.get("copyright", {}).get("statement", None) + if copyright_statement: + self.output["copyright"] = copyright_statement def _special_handling(self, bibstem=None): # Special data handling rules on a per-bibstem basis @@ -397,3 +401,4 @@ def translate(self, data=None, publisher=None, bibstem=None, parsedfile=False): self._get_properties(parsedfile) self._get_publication() self._get_bibcode(bibstem=bibstem) + self._get_copyright() diff --git a/requirements.txt b/requirements.txt index 2a29b16..175afce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ git+https://github.com/adsabs/ADSIngestParser@v0.9.13 -git+https://github.com/adsabs/ADSIngestEnrichment@v0.9.5 +git+https://github.com/adsabs/ADSIngestEnrichment@v0.9.6 adsputils==1.5.2 habanero==0.7.4 namedentities==1.9.4