diff --git a/docs/papers_in_catalogue.csv b/docs/papers_in_catalogue.csv index f7df183d..891bac4c 100644 --- a/docs/papers_in_catalogue.csv +++ b/docs/papers_in_catalogue.csv @@ -100,7 +100,7 @@ "Zhao et al. (2017)","26","8600-8600","`ADS `__" "Basu et al. (2018)","6","325-1280","`ADS `__" "Brinkman et al. (2018)","12","327-1400","`ADS `__" -"Gentile et al. (2018)","28","430-2100","`ADS `__" +"Gentile et al. (2018)","28","430-2100","`ADS `__" "Jankowski et al. (2018)","441","728-3100","`ADS `__" "Johnston et al. (2018)","586","1360-1360","`ADS `__" "RoZko et al. (2018)","2","325-5900","`ADS `__" diff --git a/pulsar_spectra/catalogue.py b/pulsar_spectra/catalogue.py index 94b3b004..aa806f23 100644 --- a/pulsar_spectra/catalogue.py +++ b/pulsar_spectra/catalogue.py @@ -112,7 +112,7 @@ "Demorest_2013":"https://ui.adsabs.harvard.edu/abs/2013ApJ...762...94D", "Esamdin_2004":"https://ui.adsabs.harvard.edu/abs/2004A&A...425..949E", "Freire_2007":"https://ui.adsabs.harvard.edu/abs/2007ApJ...662.1177F", - "Gentile_2018":"https://ui.adsabs.harvard.edu/abs/2018ApJ...868..122B", + "Gentile_2018":"https://ui.adsabs.harvard.edu/abs/2018ApJ...862...47G/abstract", "Giacani_2001":"https://ui.adsabs.harvard.edu/abs/2001AJ....121.3133G", "Han_1999":"https://ui.adsabs.harvard.edu/abs/1999A&AS..136..571H", "Hoensbroech_1997":"https://ui.adsabs.harvard.edu/abs/1997A%26AS..126..121V/abstract", @@ -191,23 +191,72 @@ def convert_antf_ref(ref_code, ref_dict=None): if ref_dict is None: ref_dict = get_antf_references() try: - ref_string_list = ref_dict[ref_code].split() + ref_string = ref_dict[ref_code] except KeyError or TypeError: # If the psrcat database file is changed this will update the ref_code logger.debug(ref_dict) psrqpy.QueryATNF(version=ATNF_VER, checkupdate=True) ref_dict = get_antf_references() logger.debug(ref_dict) - ref_string_list = ref_dict[ref_code].split() + ref_string = ref_dict[ref_code] + + # These one doesn't even have a title so returning maunally + if ref_code == "san16": + return "Sanpa-arsa_2016" + elif ref_code == "gg74": + return "Gomez-Gonzalez_1974" # Find the parts we need - author = ref_string_list[0][:-1] - #logger.debug(ref_string_list) - for ref_part in ref_string_list: - if ref_part.endswith('.') and len(ref_part) == 5 and ref_part[:-1].isnumeric(): - year = ref_part[:-1] - elif ref_part.endswith('.') and len(ref_part) == 6 and ref_part[:-2].isnumeric(): - year = ref_part[:-1] + if ref_string.startswith("eds "): + # Remove the eds part which I think is a typo + ref_string = ref_string[4:] + author = ref_string.split(",")[0].replace(" ", "") + + # Get only author year part of the string, example string: + # Anderson, S. B., Wolszczan, A., Kulkarni, S. R. & Prince, T. A., 1997. Observations of two millisecond pulsars in the globular cluster NGC 5904. ApJ, 482, 870-873. + if "ArXiv" in ref_string: + author_year_title = ref_string.split(". ArXiv")[0] + elif "arXiv" in ref_string: + author_year_title = ref_string.split(". arXiv")[0] + elif "ApJ" in ref_string: + author_year_title = ref_string.split(". ApJ")[0] + elif "-" not in ref_string: + # Has no refence code so skip the removal + author_year_title = ref_string + else: + author_year_title = ref_string[:ref_string[:-1].rfind('.')] + + if "New York" in author_year_title: + # Different format for American Institute of Physics, New York references + author_year = author_year_title + elif "IAU Circ. No" in author_year_title: + # Different format for IAU Circular references + author_year = author_year_title.split("IAU Circ. No")[0].replace("M.", "").replace("M5.", "") + else: + removal_patterns = [ + ". Phys. Rev", # This journal isn't removed in previous logic so remove it here + ". ATel", # This journal isn't removed in previous logic so remove it here + "(", # Remove the brackets + ":", # Remove the colins + "1E", # Remove weird name convertion + "NGC", # NGC often in titles that ruin formatting + "PSR", # Parts of pulsar names are mistaken for years + "Sgr", # Parts of soft gamma ray repeaters are mistaken for years + ] + for pattern in removal_patterns: + author_year_title = author_year_title.split(pattern)[0] + author_year = author_year_title[:author_year_title.rfind('.')] + + # Loop through what is left to find the year + for ref_part in author_year.split(): + if ref_part.endswith("."): + # Remove trailing full stop + ref_part = ref_part[:-1] + if len(ref_part) == 4 and ref_part.isnumeric(): + year = ref_part + elif len(ref_part) == 5 and ref_part[:-1].isnumeric(): + year = ref_part + return f"{author}_{year}" @@ -247,7 +296,7 @@ def flux_from_atnf(pulsar, query=None, ref_dict=None, assumed_error=0.5): # Find all flux queries from keys flux_queries = [] for table_param in query.keys(): - if re.match("S\d*\d$", table_param) or re.match("S\d*G$", table_param): + if re.match(r"S\d*\d$", table_param) or re.match(r"S\d*G$", table_param): flux_queries.append(table_param) freq_all = [] diff --git a/tests/test_catalogue.py b/tests/test_catalogue.py index 8927accb..d6142cb8 100644 --- a/tests/test_catalogue.py +++ b/tests/test_catalogue.py @@ -1,10 +1,12 @@ #! /usr/bin/env python import os +import re import yaml import psrqpy +import pandas as pd -from pulsar_spectra.catalogue import collect_catalogue_fluxes, CAT_YAMLS, ADS_REF, ATNF_VER +from pulsar_spectra.catalogue import collect_catalogue_fluxes, get_antf_references, convert_antf_ref, CAT_YAMLS, ADS_REF, ATNF_VER import logging logger = logging.getLogger(__name__) @@ -82,6 +84,44 @@ def test_catalogue_format(): assert flux_err != 0. +def test_convert_antf_ref(): + ref_dict = get_antf_references() + + # Get ref codes for all pulsar fluxes + query = psrqpy.QueryATNF(version=ATNF_VER).pandas + flux_queries = [] + for table_param in query.keys(): + if re.match(r"S\d*\d$", table_param) or re.match(r"S\d*G$", table_param): + flux_queries.append(table_param) + + ref_codes = [] + jnames = list(query['PSRJ']) + for query_id, pulsar in enumerate(jnames): + for flux_query in flux_queries: + ref_code = query[flux_query+"_REF"][query_id] + if not pd.isna(ref_code): + ref_codes.append(ref_code) + + print(ref_codes) + for ref_code in list(set(ref_codes)): + print(f"{ref_code}: '{ref_dict[ref_code]}'") + ref = convert_antf_ref(ref_code, ref_dict=ref_dict) + print(ref) + author, year = ref.split("_") + + # Author has no numbers + for char in author: + assert not char.isdigit() + + if len(year) == 5 and year[-1].isalpha(): + # Format ends with a letter so remove it before tests + year = year[:-1] + # Assert year is 4 digits + assert len(year) == 4 + # Assert year is a reasonable year + assert 1900 < int(year) < 2100 + + # TODO finish below def todo_test_for_duplicate_data():