pyfiner_util.py

import numpy as np
import sys
import argparse
import os
import warnings
from scipy.optimize import least_squares
warnings.simplefilter("ignore", UserWarning)


default_parameter_file = '@pyfiner.par'


"""
We read in the necessary constants contained in the coefs_pca.npz file
This file contains the harmonic coefficients of the Fourier transform
of the K light curve PCA eigenvectors, the corresponding harmonic
coefficients of the J-band fits, as well as the upper and lower limits
of the principal component amplitudes, in the format of U1, U2/U1, U3/U1
and U4/U1
"""
try:
    npzfile = np.load(os.path.dirname(__file__) + '/pyfiner_coefs.npz')
    harmonic_coef_k = npzfile['k']
    harmonic_coef_j = npzfile['j']
    maxs = npzfile['maxs']
    mins = npzfile['mins']
except:
    print("Coefficient file not found")
    exit()


# The file we have read in has the information up to the 12 Fourier order
harmonic_order = 12


def return_lc_phases(period, times, epoch=0.0):
    """
    Compute the phases of a periodic time series.
    :param period: float
        The periodicity of the time series.
    :param times: np.ndarray
        Array of the time values.
    :param epoch: np.ndarray
        Array of the measurements at `times`.
    :return: np.ndarray
        The array of the phases running along `times`.
    """
    return np.modf((times - epoch) / period)[0]


def return_harmonic_lc(order, period, coefs, intercept, positions):
    """
    Returns a Fourier sum with mixed sine and cosine terms at a set of evaluation points.
    :param order: int
        Fourier order, i.e., twice the number of mixed Fourier terms.
    :param period: float
        The period of the Fourier sum.
    :param coefs: array-like
        The list of Fourier coefficients in the order of (sin1, cos1, sin2, cos2, ...)
    :param intercept: float
        The intercept (zero-frequency term) of the Fourier sum.
    :param positions: numpy.ndarray
        Array of the evaluation points of the Fourier sum.
    :return:
    """
    result = np.zeros(positions.size)
    for i in range(1, order + 1):
        result = result + \
                 coefs[i * 2 - 2] * np.sin(i * 2 * np.pi * positions / period) + \
                 coefs[i * 2 - 1] * np.cos(i * 2 * np.pi * positions / period)
    return result + intercept


def return_residuals(x, t, y):
    """
    Compute the residuals of a periodic light curve with principal component amplitudes U1, U2, U3, U4.
    This function is passed to the `least_squares` function of `scipy.optimize` as its first argument,
    in order to optimize the period, U1, U2, U3, U4 parameters.
    :param x: numpy.ndarray
        Array of the model parameters: [epoch, period, U1, U2, U3, U4]
    :param t: numpy.ndarray
        Array of the measurement times (Julian dates).
    :param y: numpy.ndarray
        Array of the measurements (magnitudes) at times `t`.
    :return: numpy.ndarray
        Array of the residuals.
    """
    phases = return_lc_phases(x[2], t, x[1])
    U = np.array((x[3], x[3] * x[4], x[3] * x[5], x[3] * x[6]))
    return y - return_harmonic_lc(harmonic_order, x[2], (U * harmonic_coef_k.T).T.sum(axis=0), x[0], phases * x[2])


def calc_int_average(lc):
    """
    Calculates the magnitude of the intensity average of a light curve.
    The phases of the light curve have to be sampled densely and evenly in the phase range of [0,1)
    :param lc: numpy.ndarray
        Array of the magnitudes.
    :return: float
        The magnitude of the intensity average
    """
    return -2.5 * np.log10(np.average((10. ** (-lc / 2.5))))


def fit_lightcurve(lightcurve, jd0, period, phase_grid, min_mag, max_mag, median_cut, per_ival, sigma_threshold,
                   n_restarts):
    output = {}

    lightcurve[0] -= jd0

    n_k_initial = lightcurve.shape[1]

    lightcurve_init = np.copy(lightcurve)

    # Initial threshold rejections:
    mask1 = (lightcurve[1] < max_mag) * (lightcurve[1] > min_mag)
    mask2 = (lightcurve[1] < np.median(lightcurve[1][mask1] + median_cut)) * \
            (lightcurve[1] > np.median(lightcurve[1][mask1] - median_cut))
    lightcurve = lightcurve[:, mask2]

    # -----------------------

    # if lightcurve.shape[1] < min_k_points:
    #     print(identifier, 18 * "nan ", lightcurve.shape[1], n_k_initial)
    #     continue

    # We start by defining the trusted regions
    # We also define a few variables to store the costs and fits of the first round of fits

    lower = np.array((-np.inf, 0.0, period - per_ival, mins[0], mins[1], mins[2], mins[3]))
    upper = np.array((np.inf, 2.0, period + per_ival, maxs[0], maxs[1], maxs[2], maxs[3]))

    first_fits = np.zeros((5, 7))
    costs = np.zeros(5)

    # We fit the models starting from different phases, then choose the best one to
    # be the starting point in the next step of the fitting; we also do a sigma clipping,
    # where sigma is estimated from the Median Absolute Deviation

    for i in range(n_restarts):
        parameters_to_fit = np.asarray((np.median(lightcurve[1]),
                                        1.0 + (-2 + i) * period / 5.,
                                        period,
                                        0.7, 0., 0., 0.))
        regression = least_squares(return_residuals, x0=parameters_to_fit, args=(lightcurve[0], lightcurve[1]),
                                   loss='huber', f_scale=0.05, bounds=(lower, upper))
        first_fits[i] = regression.x
        costs[i] = regression.cost

    best_restart_index = np.argmin(costs)
    residuals = return_residuals(first_fits[best_restart_index], lightcurve[0], lightcurve[1])
    residuals_std = np.median(np.abs(residuals)) * 1.4826
    mask_sigma = (np.abs(residuals) < sigma_threshold * residuals_std)
    lightcurve = lightcurve[:, mask_sigma]

    # We refit the model on the remaining light-curve points, but starting from the phase
    # determined in the previous step

    parameters_to_fit = np.asarray((np.median(lightcurve[1]),
                                    first_fits[best_restart_index, 1],
                                    first_fits[best_restart_index, 2],
                                    first_fits[best_restart_index, 3],
                                    first_fits[best_restart_index, 4],
                                    first_fits[best_restart_index, 5],
                                    first_fits[best_restart_index, 6]))
    regression = least_squares(return_residuals, x0=parameters_to_fit, args=(lightcurve[0], lightcurve[1]),
                               loss='huber', f_scale=0.05, bounds=(lower, upper))

    # The vector U contains the fitted amplitudes of the individual principal components
    U = np.array((regression.x[3], regression.x[3] * regression.x[4], regression.x[3] * regression.x[5],
                  regression.x[3] * regression.x[6]))

    residuals_k = return_residuals(regression.x, lightcurve[0], lightcurve[1])
    rmse = np.sqrt(np.mean(residuals_k ** 2))

    # We start plotting the results:

    phases_obs = return_lc_phases(regression.x[2], lightcurve[0], 0.0)

    synlc = return_harmonic_lc(harmonic_order, 1.0, (U * harmonic_coef_k.T).T.sum(axis=0),
                               regression.x[0], phase_grid - regression.x[1] / regression.x[2])

    k_int_mean = \
        calc_int_average(
            return_harmonic_lc(harmonic_order, 1.0, (U * harmonic_coef_k.T).T.sum(axis=0),
                               0.0, np.linspace(0.0, 0.995, 200) - regression.x[1] / regression.x[2]) +
            regression.x[0])

    output['period'] = regression.x[2]
    output['times'] = lightcurve[0]
    output['magnitudes'] = lightcurve[1]
    output['int_mean'] = k_int_mean
    output['n_data'] = len(phases_obs)
    output['n_data_initial'] = n_k_initial
    output['regression'] = regression
    output['rmse'] = rmse
    output['U'] = U
    output['fitted_shape'] = synlc
    output['phases_obs'] = phases_obs
    output['cost'] = regression.cost
    output['cost/N'] = regression.cost / output['n_data']
    output['intercept'] = regression.x[0]

    return output


def predict_lightcurve(waveband, lightcurve, jd0, results, phase_grid):
    """
    Predict a light curve in one waveband from the shape of the light curve in another waveband
    using the method of Hajdu et al. (2018):
    :param waveband: string
        The waveband of the predcited light curve.
    :param lightcurve: numpy.ndarray, shape: (n_data, 2)
        Array containing the (sparse) times (:,0) and the magnitudes (:,1) of the light curve that we want to predict.
    :param jd0:
    :param results:
    :param phase_grid:
    :return:
    """

    if waveband == 'J':
        harmonic_coef = harmonic_coef_j
    elif waveband == 'H':
        harmonic_coef = harmonic_coef_k
    else:
        raise NotImplementedError('Prediction of {} light curve not implemented'.format(waveband))

    U = results['U']
    kreg = results['regression']

    output = {}

    if lightcurve is None:
        output['mag_mean'] = np.nan
        output['mag_wmean'] = np.nan
        output['mag_std'] = np.nan
        output['int_std'] = np.nan
        output['mag_median'] = np.nan
        # output['int_mean'] = np.nan
        output['int_wmean'] = np.nan
        output['int_median'] = np.nan
        output['n_data'] = 0
    else:
        lightcurve[0] -= jd0

        output['phases'] = return_lc_phases(kreg.x[2], lightcurve[0], 0.0)
        output['predicted_shape'] = return_harmonic_lc(harmonic_order, 1.0, (U * harmonic_coef.T).T.sum(axis=0),
                                                       0.0, phase_grid - kreg.x[1] / kreg.x[2])

        output['predicted_at_obs'] = return_harmonic_lc(harmonic_order, kreg.x[2],
                                                        (U * harmonic_coef.T).T.sum(axis=0),
                                                        0.0, lightcurve[0] - kreg.x[1])
        output['mag_mean'] = \
            np.mean(lightcurve[1] - output['predicted_at_obs'])

        output['mag_wmean'] = \
            np.average(lightcurve[1] - output['predicted_at_obs'], weights=1 / lightcurve[2] ** 2)

        output['mag_median'] = \
            np.median(lightcurve[1] - output['predicted_at_obs'])

        output['int_wmean'] = \
            calc_int_average(return_harmonic_lc(harmonic_order, 1.0, (U * harmonic_coef.T).T.sum(axis=0),
                                                0.0, np.linspace(0.0, 0.995, 200) - kreg.x[1] /
                                                kreg.x[2]) + output['mag_wmean'])
        output['int_median'] = output['int_wmean'] + output['mag_median'] - output['mag_mean']

        output['n_data'] = len(lightcurve[0])

        if output['n_data'] > 1:
            # Standard deviations around the weighted means corrected for small sample sizes:
            output['mag_std'] = \
                np.sqrt(np.sum(np.abs(lightcurve[1] - output['predicted_at_obs'] - output['mag_wmean']) ** 2) /
                        (output['n_data'] - 1.5 + 1. / (8 * (output['n_data'] - 1))))

            output['int_std'] = \
                np.sqrt(np.sum(np.abs(lightcurve[1] - output['predicted_at_obs'] - output['int_wmean']) ** 2) /
                        (output['n_data'] - 1.5 + 1. / (8 * (output['n_data'] - 1))))

        else:
            output['mag_std'] = lightcurve[2][0]
            output['int_std'] = lightcurve[2][0]

    return output


def print_results(identifier, results):
    textstr = '{:7s} {:9.7f} {:6.3f} {:6.3f} {:2d} {:6.3f} {:6.3f} {:6.3f} {:6.3f} {:6.3f} ' \
        .format(identifier,
                results['K']['period'],
                results['K']['intercept'],
                results['K']['int_mean'],
                results['J']['n_data'],
                results['J']['mag_mean'],
                results['J']['mag_wmean'],
                results['J']['mag_std'],
                results['J']['int_wmean'],
                results['J']['int_std'])

    textstr += '{:6.3f} {:6.3f} {:2d} {:6.3f} {:6.3f} {:6.3f} {:6.3f} {:6.3f} ' \
        .format(results['J']['mag_median'],
                results['J']['int_median'],
                results['H']['n_data'],
                results['H']['mag_mean'],
                results['H']['mag_wmean'],
                results['H']['mag_std'],
                results['H']['int_wmean'],
                results['H']['int_std'],
                results['H']['mag_median'],
                results['H']['int_median'])

    textstr += '{:9.6f} {:9.6f} {:9.6f} {:9.6f} {:6.4f} {:6.4f} ' \
        .format(results['K']['U'][0],
                results['K']['U'][1],
                results['K']['U'][2],
                results['K']['U'][3],
                results['K']['rmse'],
                results['K']['cost'])

    textstr += '{:8.6f} {:4d} {:4d}' \
        .format(results['K']['cost/N'],
                results['K']['n_data'],
                results['K']['n_data_initial'])

    print(textstr)


def convert_arg_line_to_args(arg_line):
    """
    Custom line parser for argparse.
    :param arg_line: str
    One line of the input parameter file.
    :return: None
    """
    if arg_line:
        if arg_line[0] == '#':
            return
        for arg in arg_line.split():
            if not arg.strip():
                continue
            if '#' in arg:
                break
            yield arg


def argparser():
    """
    Creates an argparse.ArgumentParser object for reading in parameters from a file.
    :return:
    """
    ap = argparse.ArgumentParser(fromfile_prefix_chars='@',
                                 description='Train and deploy a deep-learned [Fe/H] estimator'
                                             'based on Gaia time-series photometry.',
                                 epilog="")

    # use custom line parser for the parameter file
    ap.convert_arg_line_to_args = convert_arg_line_to_args

    ap.add_argument('--rootdir',
                    action='store',
                    type=str,
                    default=os.path.expanduser('~'),
                    help='Full path of the root directory '
                         '(all other directory and file names will be relative to this).')

    ap.add_argument('--input_list',
                    action='store',
                    type=str,
                    default=os.path.expanduser('input.lst'),
                    help='An ASCII file containing the identifiers and the periods of the stars, '
                         'one object per line. The file must start with the header `# id period`.')

    ap.add_argument('--k_lc_dir',
                    action='store',
                    type=str,
                    default=os.path.expanduser('k_lc'),
                    help='Name of the subdirecory containing the K light curves. ')

    ap.add_argument('--j_lc_dir',
                    action='store',
                    type=str,
                    default=os.path.expanduser('j_lc'),
                    help='Name of the subdirecory containing the J light curves. ')

    ap.add_argument('--h_lc_dir',
                    action='store',
                    type=str,
                    default=os.path.expanduser('h_lc'),
                    help='Name of the subdirecory containing the H light curves. ')

    ap.add_argument('--k_lc_suffix',
                    action='store',
                    type=str,
                    default=os.path.expanduser('_k.dat'),
                    help='Suffix of the K light curve files in the scheme: <object_id><k_lc_suffix>')

    ap.add_argument('--j_lc_suffix',
                    action='store',
                    type=str,
                    default=os.path.expanduser('_j.dat'),
                    help='Suffix of the K light curve files in the scheme: <object_id><j_lc_suffix>')

    ap.add_argument('--h_lc_suffix',
                    action='store',
                    type=str,
                    default=os.path.expanduser('_h.dat'),
                    help='Suffix of the K light curve files in the scheme: <object_id><h_lc_suffix>')

    ap.add_argument('--plotdir',
                    action='store',
                    type=str,
                    default=os.path.expanduser('figures'),
                    help='Name of the subdirectory for stroring the figures generated by PyFiNeR.')

    ap.add_argument('--figname_suffix',
                    action='store',
                    type=str,
                    default=os.path.expanduser('_pyfiner'),
                    help='Suffix for the generated figure files in the scheme: <object_id><figname_suffix>.<figformat>')

    ap.add_argument('--figformat',
                    action='store',
                    type=str,
                    default=os.path.expanduser('pdf'),
                    help='File format of the generated figures.')

    # ---------------------
    # ALGORITHM PARAMETERS:
    # ---------------------

    ap.add_argument('--fit_period',
                    action='store_true',
                    help='If True, the period will be fitted along with the rest of the light-curve parameters.')

    ap.add_argument('--per_ival',
                    action='store',
                    type=float,
                    default=0.001,
                    help='Half of the trusted region interval around the input value of the period. '
                    '(used if --fit_period is set).')

    ap.add_argument('--min_mag',
                    action='store',
                    type=float,
                    default=9.0,
                    help='Minimum threshold value of the magnitudes in the K light curve.')

    ap.add_argument('--max_mag',
                    action='store',
                    type=float,
                    default=20.0,
                    help='Maximum threshold value of the magnitudes in the K light curve.')

    ap.add_argument('--median_cut',
                    action='store',
                    type=float,
                    default=0.5,
                    help='Magnitude threshold around the median, beyond which all points will be rejected.')

    ap.add_argument('--sigma_threshold',
                    action='store',
                    type=float,
                    default=3.5,
                    help='Rejection threshold in units of residual standard deviation.')

    ap.add_argument('--n_restarts',
                    action='store',
                    type=int,
                    default=5,
                    help='The number of times the K-band light-curve regression restarts'
                         '(with a new initial phase value)')

    return ap


def mywarn(*args, **kwargs):
    print('WARNING: ', *args, file=sys.stderr, **kwargs)