Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Link and reference convert fix #90

Merged
merged 4 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/papers_in_catalogue.csv
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
"Zhao et al. (2017)","26","8600-8600","`ADS <https://ui.adsabs.harvard.edu/abs/2017ApJ...845..156Z/abstract>`__"
"Basu et al. (2018)","6","325-1280","`ADS <https://ui.adsabs.harvard.edu/abs/2018MNRAS.475.1469B/abstract>`__"
"Brinkman et al. (2018)","12","327-1400","`ADS <https://ui.adsabs.harvard.edu/abs/2018MNRAS.474.2012B>`__"
"Gentile et al. (2018)","28","430-2100","`ADS <https://ui.adsabs.harvard.edu/abs/2018ApJ...868..122B>`__"
"Gentile et al. (2018)","28","430-2100","`ADS <https://ui.adsabs.harvard.edu/abs/2018ApJ...862...47G/abstract>`__"
"Jankowski et al. (2018)","441","728-3100","`ADS <https://ui.adsabs.harvard.edu/abs/2018MNRAS.473.4436J/abstract>`__"
"Johnston et al. (2018)","586","1360-1360","`ADS <https://ui.adsabs.harvard.edu/abs/2018MNRAS.474.4629J/abstract>`__"
"RoZko et al. (2018)","2","325-5900","`ADS <https://ui.adsabs.harvard.edu/abs/2018MNRAS.479.2193R>`__"
Expand Down
71 changes: 60 additions & 11 deletions pulsar_spectra/catalogue.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
"Demorest_2013":"https://ui.adsabs.harvard.edu/abs/2013ApJ...762...94D",
"Esamdin_2004":"https://ui.adsabs.harvard.edu/abs/2004A&A...425..949E",
"Freire_2007":"https://ui.adsabs.harvard.edu/abs/2007ApJ...662.1177F",
"Gentile_2018":"https://ui.adsabs.harvard.edu/abs/2018ApJ...868..122B",
"Gentile_2018":"https://ui.adsabs.harvard.edu/abs/2018ApJ...862...47G/abstract",
"Giacani_2001":"https://ui.adsabs.harvard.edu/abs/2001AJ....121.3133G",
"Han_1999":"https://ui.adsabs.harvard.edu/abs/1999A&AS..136..571H",
"Hoensbroech_1997":"https://ui.adsabs.harvard.edu/abs/1997A%26AS..126..121V/abstract",
Expand Down Expand Up @@ -191,23 +191,72 @@ def convert_antf_ref(ref_code, ref_dict=None):
if ref_dict is None:
ref_dict = get_antf_references()
try:
ref_string_list = ref_dict[ref_code].split()
ref_string = ref_dict[ref_code]
except KeyError or TypeError:
# If the psrcat database file is changed this will update the ref_code
logger.debug(ref_dict)
psrqpy.QueryATNF(version=ATNF_VER, checkupdate=True)
ref_dict = get_antf_references()
logger.debug(ref_dict)
ref_string_list = ref_dict[ref_code].split()
ref_string = ref_dict[ref_code]

# These one doesn't even have a title so returning maunally
if ref_code == "san16":
return "Sanpa-arsa_2016"
elif ref_code == "gg74":
return "Gomez-Gonzalez_1974"

# Find the parts we need
author = ref_string_list[0][:-1]
#logger.debug(ref_string_list)
for ref_part in ref_string_list:
if ref_part.endswith('.') and len(ref_part) == 5 and ref_part[:-1].isnumeric():
year = ref_part[:-1]
elif ref_part.endswith('.') and len(ref_part) == 6 and ref_part[:-2].isnumeric():
year = ref_part[:-1]
if ref_string.startswith("eds "):
# Remove the eds part which I think is a typo
ref_string = ref_string[4:]
author = ref_string.split(",")[0].replace(" ", "")

# Get only author year part of the string, example string:
# Anderson, S. B., Wolszczan, A., Kulkarni, S. R. & Prince, T. A., 1997. Observations of two millisecond pulsars in the globular cluster NGC 5904. ApJ, 482, 870-873.
if "ArXiv" in ref_string:
author_year_title = ref_string.split(". ArXiv")[0]
elif "arXiv" in ref_string:
author_year_title = ref_string.split(". arXiv")[0]
elif "ApJ" in ref_string:
author_year_title = ref_string.split(". ApJ")[0]
elif "-" not in ref_string:
# Has no refence code so skip the removal
author_year_title = ref_string
else:
author_year_title = ref_string[:ref_string[:-1].rfind('.')]

if "New York" in author_year_title:
# Different format for American Institute of Physics, New York references
author_year = author_year_title
elif "IAU Circ. No" in author_year_title:
# Different format for IAU Circular references
author_year = author_year_title.split("IAU Circ. No")[0].replace("M.", "").replace("M5.", "")
else:
removal_patterns = [
". Phys. Rev", # This journal isn't removed in previous logic so remove it here
". ATel", # This journal isn't removed in previous logic so remove it here
"(", # Remove the brackets
":", # Remove the colins
"1E", # Remove weird name convertion
"NGC", # NGC often in titles that ruin formatting
"PSR", # Parts of pulsar names are mistaken for years
"Sgr", # Parts of soft gamma ray repeaters are mistaken for years
]
for pattern in removal_patterns:
author_year_title = author_year_title.split(pattern)[0]
author_year = author_year_title[:author_year_title.rfind('.')]

# Loop through what is left to find the year
for ref_part in author_year.split():
if ref_part.endswith("."):
# Remove trailing full stop
ref_part = ref_part[:-1]
if len(ref_part) == 4 and ref_part.isnumeric():
year = ref_part
elif len(ref_part) == 5 and ref_part[:-1].isnumeric():
year = ref_part

return f"{author}_{year}"


Expand Down Expand Up @@ -247,7 +296,7 @@ def flux_from_atnf(pulsar, query=None, ref_dict=None, assumed_error=0.5):
# Find all flux queries from keys
flux_queries = []
for table_param in query.keys():
if re.match("S\d*\d$", table_param) or re.match("S\d*G$", table_param):
if re.match(r"S\d*\d$", table_param) or re.match(r"S\d*G$", table_param):
flux_queries.append(table_param)

freq_all = []
Expand Down
42 changes: 41 additions & 1 deletion tests/test_catalogue.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
#! /usr/bin/env python

import os
import re
import yaml
import psrqpy
import pandas as pd

from pulsar_spectra.catalogue import collect_catalogue_fluxes, CAT_YAMLS, ADS_REF, ATNF_VER
from pulsar_spectra.catalogue import collect_catalogue_fluxes, get_antf_references, convert_antf_ref, CAT_YAMLS, ADS_REF, ATNF_VER

import logging
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -82,6 +84,44 @@ def test_catalogue_format():
assert flux_err != 0.


def test_convert_antf_ref():
ref_dict = get_antf_references()

# Get ref codes for all pulsar fluxes
query = psrqpy.QueryATNF(version=ATNF_VER).pandas
flux_queries = []
for table_param in query.keys():
if re.match(r"S\d*\d$", table_param) or re.match(r"S\d*G$", table_param):
flux_queries.append(table_param)

ref_codes = []
jnames = list(query['PSRJ'])
for query_id, pulsar in enumerate(jnames):
for flux_query in flux_queries:
ref_code = query[flux_query+"_REF"][query_id]
if not pd.isna(ref_code):
ref_codes.append(ref_code)

print(ref_codes)
for ref_code in list(set(ref_codes)):
print(f"{ref_code}: '{ref_dict[ref_code]}'")
ref = convert_antf_ref(ref_code, ref_dict=ref_dict)
print(ref)
author, year = ref.split("_")

# Author has no numbers
for char in author:
assert not char.isdigit()

if len(year) == 5 and year[-1].isalpha():
# Format ends with a letter so remove it before tests
year = year[:-1]
# Assert year is 4 digits
assert len(year) == 4
# Assert year is a reasonable year
assert 1900 < int(year) < 2100



# TODO finish below
def todo_test_for_duplicate_data():
Expand Down
Loading