nanograv · abhisrkckl · Aug 12, 2024 · Aug 12, 2024 · Aug 12, 2024 · Aug 12, 2024
diff --git a/CHANGELOG-unreleased.md b/CHANGELOG-unreleased.md
@@ -14,6 +14,8 @@ the released changes.
 - `maskParameter.__repr__()` output now includes the frozen attribute.
 - Changed default value of `FDJUMPLOG` to `Y`
 - Bumped `black` version to 24.x
+- Replaced `pint.utils.find_optimal_nharms` by a more general function `pint.noise_analysis.find_optimal_nharms` which optimizes the `Nharms` for multiple noise components simultaneously. 
+- Updated the example `rednoise-fit-example.py`
 ### Added
 - Type hints in `pint.derived_quantities`, `pint.modelutils`, `pint.binaryconvert`, `pint.config`, 
 `pint.erfautils`, `pint.fits_utils`, `pint.logging` and `pint.residuals`
@@ -28,6 +30,7 @@ the released changes.
 - Documentation: Fixed empty descriptions in the timing model components table
 - BIC implementation
 - `event_optimize`: Fixed a bug that was causing the results.txt file to be written without the median values. 
+- SWX model now has SWXP_0001 frozen by default, and new segments should also have SWXP frozen
 ### Removed
 - Removed the argument `--usepickle` in `event_optimize` as the `load_events_weights` function checks the events file type to see if the 
 file is a pickle file.

diff --git a/docs/examples/rednoise-fit-example.py b/docs/examples/rednoise-fit-example.py
@@ -18,12 +18,12 @@
 # %%
 from pint import DMconst
 from pint.models import get_model
+from pint.noise_analysis import find_optimal_nharms
 from pint.simulation import make_fake_toas_uniform
 from pint.logging import setup as setup_log
 from pint.fitter import WLSFitter
 from pint.utils import (
     dmwavex_setup,
-    find_optimal_nharms,
     wavex_setup,
     plrednoise_from_wavex,
     pldmnoise_from_dmwavex,
@@ -100,7 +100,11 @@
 m1 = deepcopy(m)
 m1.remove_component("PLRedNoise")
 
-nharm_opt, d_aics = find_optimal_nharms(m1, t, "WaveX", 30)
+aics, nharm_opt = find_optimal_nharms(
+    m1, t, include_components=["WaveX"], nharms_max=30
+)
+d_aics = aics - np.min(aics)
+nharm_opt = nharm_opt[0]
 
 print("Optimum no of harmonics = ", nharm_opt)
 
@@ -118,7 +122,7 @@
 plt.ylabel("AIC - AIC$_\\min{} + 1$")
 plt.legend()
 plt.yscale("log")
-# plt.savefig("sim3-aic.pdf")
+plt.show()
 
 # %%
 # Now create a new model with the optimum number of harmonics
@@ -181,7 +185,7 @@
 )
 plt.axvline(fyr, color="black", ls="dotted")
 plt.axhline(0, color="grey", ls="--")
-plt.ylabel("Fourier coeffs ($\mu$s)")
+plt.ylabel("Fourier coeffs ($\\mu$s)")
 plt.xscale("log")
 plt.legend(fontsize=8)
 
@@ -200,6 +204,7 @@
 plt.xlabel("Frequency (Hz)")
 plt.axvline(fyr, color="black", ls="dotted", label="1 yr$^{-1}$")
 plt.legend()
+plt.show()
 
 # %% [markdown]
 # Note the outlier in the 1 year^-1 bin. This is caused by the covariance with RA and DEC, which introduce a delay with the same frequency.
@@ -259,7 +264,12 @@
 
 m2 = deepcopy(m1)
 
-nharm_opt, d_aics = find_optimal_nharms(m2, t, "DMWaveX", 30)
+aics, nharm_opt = find_optimal_nharms(
+    m2, t, include_components=["DMWaveX"], nharms_max=30
+)
+d_aics = aics - np.min(aics)
+nharm_opt = nharm_opt[0]
+
 print("Optimum no of harmonics = ", nharm_opt)
 
 # %%
@@ -273,7 +283,7 @@
 plt.ylabel("AIC - AIC$_\\min{} + 1$")
 plt.legend()
 plt.yscale("log")
-# plt.savefig("sim3-aic.pdf")
+plt.show()
 
 # %%
 # Now create a new model with the optimum number of
@@ -351,7 +361,7 @@
 )
 plt.axvline(fyr, color="black", ls="dotted")
 plt.axhline(0, color="grey", ls="--")
-plt.ylabel("Fourier coeffs ($\mu$s)")
+plt.ylabel("Fourier coeffs ($\\mu$s)")
 plt.xscale("log")
 plt.legend(fontsize=8)
 
@@ -370,3 +380,4 @@
 plt.xlabel("Frequency (Hz)")
 plt.axvline(fyr, color="black", ls="dotted", label="1 yr$^{-1}$")
 plt.legend()
+plt.show()
diff --git a/requirements.txt b/requirements.txt
@@ -10,3 +10,4 @@ uncertainties
 loguru
 nestle>=0.2.0
 numdifftools
+joblib
diff --git a/setup.cfg b/setup.cfg
@@ -44,6 +44,7 @@ install_requires =
     loguru
     nestle>=0.2.0
     numdifftools
+    joblib
 
 [options.packages.find]
 where = src

diff --git a/src/pint/models/solar_wind_dispersion.py b/src/pint/models/solar_wind_dispersion.py
@@ -526,7 +526,8 @@ class SolarWindDispersionX(SolarWindDispersionBase):
     """This class provides a SWX model - multiple Solar Wind segments.
 
     This model lets the user specify time ranges and fit for a different
-    SWXDM (max solar wind DM) value and SWXP (radial power-law index) in each time range.
+    SWXDM (max solar wind DM) value with a different SWXP (radial power-law index)
+    in each time range.
 
     The default radial power-law index value of 2 corresponds to the Edwards et al. model.
     Other values are for the You et al./Hazboun et al. model.
@@ -537,6 +538,8 @@ class SolarWindDispersionX(SolarWindDispersionBase):
     to the standard model: the standard model goes from opposition (min) to conjunction (max),
     while this model goes from 0 to conjunction (max), so the scaling is ``((conjunction - opposition)/conjuction)``.
 
+    Also note that fitting for SWXP is possible but not advised.
+
     See `Solar Wind Examples <examples/solar_wind.html>`_.
 
     To compare against a standard model:
@@ -568,7 +571,7 @@ class SolarWindDispersionX(SolarWindDispersionBase):
     def __init__(self):
         super().__init__()
 
-        self.add_swx_range(None, None, swxdm=0, swxp=2, frozen=False, index=1)
+        self.add_swx_range(None, None, swxdm=0, swxp=2, index=1)
 
         self.set_special_params(["SWXDM_0001", "SWXP_0001", "SWXR1_0001", "SWXR2_0001"])
         self.dm_value_funcs += [self.swx_dm]
@@ -698,7 +701,7 @@ def add_swx_range(
         swxp : float or astropy.quantity.Quantity
             Solar wind power-law index
         frozen : bool
-            Indicates whether SWXDM and SWXP will be fit.
+            Indicates whether SWXDM will be fit.
 
         Returns
         -------
@@ -752,7 +755,7 @@ def add_swx_range(
                 value=swxp,
                 description="Solar wind power-law index",
                 parameter_type="float",
-                frozen=frozen,
+                frozen=True,
                 tcb2tdb_scale_factor=u.Quantity(1),
             )
         )

diff --git a/src/pint/noise_analysis.py b/src/pint/noise_analysis.py
@@ -0,0 +1,230 @@
+from copy import deepcopy
+from typing import List, Optional, Tuple
+from itertools import product as cartesian_product
+
+from joblib import Parallel, cpu_count, delayed
+import numpy as np
+from astropy import units as u
+
+from pint.models.chromatic_model import ChromaticCM
+from pint.models.dispersion_model import DispersionDM
+from pint.models.phase_offset import PhaseOffset
+from pint.models.timing_model import TimingModel
+from pint.toa import TOAs
+from pint.logging import setup as setup_log
+from pint.utils import (
+    akaike_information_criterion,
+    cmwavex_setup,
+    dmwavex_setup,
+    wavex_setup,
+)
+
+
+def find_optimal_nharms(
+    model: TimingModel,
+    toas: TOAs,
+    include_components: List[str] = ["WaveX", "DMWaveX", "CMWaveX"],
+    nharms_max: int = 45,
+    chromatic_index: float = 4,
+    num_parallel_jobs: Optional[int] = None,
+) -> Tuple[tuple, np.ndarray]:
+    """Find the optimal number of harmonics for `WaveX`/`DMWaveX`/`CMWaveX` using the
+    Akaike Information Criterion.
+
+    This function runs a brute force search over a grid of harmonic numbers, from 0 to
+    `nharms_max`. This is executed in multiple processes using the `joblib` library the
+    number of processes is controlled through the `num_parallel_jobs` argument.
+
+    Please note that the execution time scales as `O(nharms_max**len(include_components))`,
+    which can quickly become large. Hence, if you are using large values of `nharms_max`, it
+    is recommended that this be run on a cluster with a large number of CPUs.
+
+    Parameters
+    ----------
+    model: `pint.models.timing_model.TimingModel`
+        The timing model. Should not already contain `WaveX`/`DMWaveX` or `PLRedNoise`/`PLDMNoise`.
+    toas: `pint.toa.TOAs`
+        Input TOAs
+    component: list[str]
+        Component names; a non-empty sublist of ["WaveX", "DMWaveX", "CMWaveX"]
+    nharms_max: int, optional
+        Maximum number of harmonics (default is 45) for each component
+    chromatic_index: float
+        Chromatic index for `CMWaveX`
+    num_parallel_jobs: int, optional
+        Number of parallel processes. The default is the number of available CPU cores.
+
+    Returns
+    -------
+    aics: ndarray
+        Array of AIC values.
+    nharms_opt: tuple
+        Optimal numbers of harmonics
+    """
+    assert len(set(include_components).intersection(set(model.components.keys()))) == 0
+    assert len(include_components) > 0
+
+    idxs = list(
+        cartesian_product(
+            *np.repeat([np.arange(nharms_max + 1)], len(include_components), axis=0)
+        )
+    )
+
+    if num_parallel_jobs is None:
+        num_parallel_jobs = cpu_count()
+
+    aics_flat = Parallel(n_jobs=num_parallel_jobs, verbose=13)(
+        delayed(
+            lambda ii: compute_aic(model, toas, include_components, ii, chromatic_index)
+        )(ii)
+        for ii in idxs
+    )
+
+    aics = np.reshape(aics_flat, [nharms_max + 1] * len(include_components))
+
+    assert np.isfinite(aics).all(), "Infs/NaNs found in AICs!"
+
+    return aics, np.unravel_index(np.argmin(aics), aics.shape)
+
+
+def compute_aic(
+    model: TimingModel,
+    toas: TOAs,
+    include_components: List[str],
+    nharms: np.ndarray,
+    chromatic_index: float,
+):
+    """Given a pre-fit model and TOAs, add the `[CM|DM]WaveX` components to the model,
+    fit the model to the TOAs, and compute the Akaike Information criterion using the
+    post-fit timing model.
+
+    Parameters
+    ----------
+    model: `pint.models.timing_model.TimingModel`
+        The pre-fit timing model. Should not already contain `WaveX`/`DMWaveX` or `PLRedNoise`/`PLDMNoise`.
+    toas: `pint.toa.TOAs`
+        Input TOAs
+    component: list[str]
+        Component names; a non-empty sublist of ["WaveX", "DMWaveX", "CMWaveX"]
+    nharms: ndarray
+        The number of harmonics for each component
+    chromatic_index: float
+        Chromatic index for `CMWaveX`
+
+    Returns
+    -------
+    aic: float
+        The AIC value.
+    """
+    setup_log(level="WARNING")
+
+    model1 = prepare_model(
+        model, toas.get_Tspan(), include_components, nharms, chromatic_index
+    )
+
+    from pint.fitter import Fitter
+
+    # Downhill fitters don't work well here.
+    # TODO: Investigate this.
+    ftr = Fitter.auto(toas, model1, downhill=False)
+    ftr.fit_toas(maxiter=10)
+
+    return akaike_information_criterion(ftr.model, toas)
+
+
+def prepare_model(
+    model: TimingModel,
+    Tspan: u.Quantity,
+    include_components: List[str],
+    nharms: np.ndarray,
+    chromatic_index: float,
+):
+    """Given a pre-fit model and TOAs, add the `[CM|DM]WaveX` components to the model. Also sets parameters like
+    `PHOFF` and `DM` and `CM` derivatives as free.
+
+    Parameters
+    ----------
+    model: `pint.models.timing_model.TimingModel`
+        The pre-fit timing model. Should not already contain `WaveX`/`DMWaveX` or `PLRedNoise`/`PLDMNoise`.
+    Tspan: u.Quantity
+        The observation time span
+    component: list[str]
+        Component names; a non-empty sublist of ["WaveX", "DMWaveX", "CMWaveX"]
+    nharms: ndarray
+        The number of harmonics for each component
+    chromatic_index: float
+        Chromatic index for `CMWaveX`
+
+    Returns
+    -------
+    aic: float
+        The AIC value.
+    """
+
+    model1 = deepcopy(model)
+
+    for comp in ["PLRedNoise", "PLDMNoise", "PLCMNoise"]:
+        if comp in model1.components:
+            model1.remove_component(comp)
+
+    if "PhaseOffset" not in model1.components:
+        model1.add_component(PhaseOffset())
+    model1.PHOFF.frozen = False
+
+    for jj, comp in enumerate(include_components):
+        if comp == "WaveX":
+            nharms_wx = nharms[jj]
+            if nharms_wx > 0:
+                wavex_setup(model1, Tspan, n_freqs=nharms_wx, freeze_params=False)
+        elif comp == "DMWaveX":
+            nharms_dwx = nharms[jj]
+            if nharms_dwx > 0:
+                if "DispersionDM" not in model1.components:
+                    model1.add_component(DispersionDM())
+
+                model1["DM"].frozen = False
+
+                if model1["DM1"].quantity is None:
+                    model1["DM1"].quantity = 0 * model1["DM1"].units
+                model1["DM1"].frozen = False
+
+                if "DM2" not in model1.params:
+                    model1.components["DispersionDM"].add_param(
+                        model["DM1"].new_param(2)
+                    )
+                if model1["DM2"].quantity is None:
+                    model1["DM2"].quantity = 0 * model1["DM2"].units
+                model1["DM2"].frozen = False
+
+                if model1["DMEPOCH"].quantity is None:
+                    model1["DMEPOCH"].quantity = model1["PEPOCH"].quantity
+
+                dmwavex_setup(model1, Tspan, n_freqs=nharms_dwx, freeze_params=False)
+        elif comp == "CMWaveX":
+            nharms_cwx = nharms[jj]
+            if nharms_cwx > 0:
+                if "ChromaticCM" not in model1.components:
+                    model1.add_component(ChromaticCM())
+                    model1["TNCHROMIDX"].value = chromatic_index
+
+                model1["CM"].frozen = False
+                if model1["CM1"].quantity is None:
+                    model1["CM1"].quantity = 0 * model1["CM1"].units
+                model1["CM1"].frozen = False
+
+                if "CM2" not in model1.params:
+                    model1.components["ChromaticCM"].add_param(
+                        model1["CM1"].new_param(2)
+                    )
+                if model1["CM2"].quantity is None:
+                    model1["CM2"].quantity = 0 * model1["CM2"].units
+                model1["CM2"].frozen = False
+
+                if model1["CMEPOCH"].quantity is None:
+                    model1["CMEPOCH"].quantity = model1["PEPOCH"].quantity
+
+                cmwavex_setup(model1, Tspan, n_freqs=nharms_cwx, freeze_params=False)
+        else:
+            raise ValueError(f"Unsupported component {comp}.")
+
+    return model1