Merge pull request #131 from andreicuceu/new-metals

New metal modelling
andreicuceu · Dec 2, 2023 · 3d0486c · 3d0486c
2 parents 9ba6e10 + 542ed82
commit 3d0486c
Show file tree

Hide file tree

Showing 21 changed files with 1,573 additions and 536 deletions.
diff --git a/README.rst b/README.rst
@@ -153,7 +153,10 @@ You can run Vega interactively using Ipython or a Jupyter notebook. This `exampl
 
 This process is much more powerful compared to running in terminal as you directly have access to all the output, model components and fit results. Additionally, Vega was built in a modular structure with the aim of the user being able to call each module independently. Therefore, you have access to much more functionality this way. The `documentation`_ is the best source on how to run these modules independently, but if you can't find something there, please open an issue and we will try to help you and also improve the documentation.
 
+Vega also has a FitResults module for analysing the results of a fit. You can find example usage of it in this `notebook`_.
+
 .. _example: https://github.com/andreicuceu/Vega/blob/master/examples/Vega_tutorial.ipynb
+.. _notebook: https://github.com/andreicuceu/Vega/blob/master/examples/FitResultsTutorial.ipynb
 
 Credits
 -------

diff --git a/bin/run_vega.py b/bin/run_vega.py
@@ -1,71 +1,7 @@
 #!/usr/bin/env python
-from vega import VegaInterface
-from vega.minimizer import Minimizer
-import matplotlib.pyplot as plt
 import argparse
 
-
-def run_vega(config_path):
-    # Initialize Vega
-    vega = VegaInterface(config_path)
-
-    # Check if we need to run over a Monte Carlo mock
-    if 'control' in vega.main_config:
-        run_montecarlo = vega.main_config['control'].getboolean('run_montecarlo', False)
-        if run_montecarlo and vega.mc_config is not None:
-            # Get the MC seed and forecast flag
-            seed = vega.main_config['control'].getfloat('mc_seed', 0)
-            forecast = vega.main_config['control'].getboolean('forecast', False)
-
-            # Create the mocks
-            vega.monte_carlo_sim(vega.mc_config['params'], seed=seed, forecast=forecast)
-
-            # Set to sample the MC params
-            sampling_params = vega.mc_config['sample']
-            vega.minimizer = Minimizer(vega.chi2, sampling_params)
-        elif run_montecarlo:
-            raise ValueError('You asked to run over a Monte Carlo simulation,'
-                             ' but no "[monte carlo]" section provided.')
-
-    # # run compute_model once to initialize all the caches
-    # _ = vega.compute_model(run_init=False)
-
-    # Run minimizer
-    vega.minimize()
-
-    # Run chi2scan
-    scan_results = None
-    if 'chi2 scan' in vega.main_config:
-        scan_results = vega.analysis.chi2_scan()
-
-    # Write output
-    if vega.minimizer is not None:
-        for par, val in vega.bestfit.values.items():
-            vega.params[par] = val
-    corr_funcs = vega.compute_model(vega.params, run_init=False)
-    vega.output.write_results(corr_funcs, vega.params, vega.minimizer, scan_results, vega.models)
-
-    plt.rc('axes', labelsize=16)
-    plt.rc('axes', titlesize=16)
-    plt.rc('legend', fontsize=16)
-    plt.rc('xtick', labelsize=14)
-    plt.rc('ytick', labelsize=14)
-
-    num_pars = len(vega.sample_params['limits'])
-    for name in vega.plots.data:
-        bestfit_legend = f'Correlation: {name}, Total '
-        bestfit_legend += r'$\chi^2_\mathrm{best}/(N_\mathrm{data}-N_\mathrm{pars})$'
-        bestfit_legend += f': {vega.chisq:.1f}/({vega.total_data_size}-{num_pars}) '
-        bestfit_legend += f'= {vega.reduced_chisq:.3f}, PTE={vega.p_value:.2f}'
-        if not vega.bestfit.fmin.is_valid:
-            bestfit_legend = 'Invalid fit! Disregard these results.'
-
-        vega.plots.plot_4wedges(models=[vega.bestfit_model[name]], corr_name=name, title=None,
-                                mu_bin_labels=True, no_font=True, model_colors=['r'], xlim=None)
-        vega.plots.fig.suptitle(bestfit_legend, fontsize=18, y=1.03)
-        vega.plots.fig.savefig(f'{vega.output.outfile[:-5]}_{name}.png', dpi='figure',
-                               bbox_inches='tight', facecolor='white')
-
+from vega import run_vega
 
 if __name__ == '__main__':
     pars = argparse.ArgumentParser(

diff --git a/bin/run_vega_mc_mpi.py b/bin/run_vega_mc_mpi.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+from vega import VegaInterface
+from mpi4py import MPI
+from astropy.io import fits
+import argparse
+import sys
+
+
+if __name__ == '__main__':
+    pars = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description='Run Vega in parallel.')
+
+    pars.add_argument('config', type=str, default=None, help='Config file')
+    args = pars.parse_args()
+
+    mpi_comm = MPI.COMM_WORLD
+    cpu_rank = mpi_comm.Get_rank()
+    num_cpus = mpi_comm.Get_size()
+
+    def print_func(message):
+        if cpu_rank == 0:
+            print(message)
+        sys.stdout.flush()
+        mpi_comm.barrier()
+
+    print_func('Initializing Vega')
+
+    # Initialize Vega and get the sampling parameters
+    vega = VegaInterface(args.config)
+    sampling_params = vega.sample_params['limits']
+
+    print_func('Finished initializing Vega')
+
+    # Check if we need the distortion
+    use_distortion = vega.main_config['control'].getboolean('use_distortion', True)
+    if not use_distortion:
+        for key, data in vega.data.items():
+            data._distortion_mat = None
+        test_model = vega.compute_model(vega.params, run_init=True)
+
+    # Run monte carlo
+    run_montecarlo = vega.main_config['control'].getboolean('run_montecarlo', False)
+    if not run_montecarlo or (vega.mc_config is None):
+        raise ValueError('Warning: You called "run_vega_mc_mpi.py" without asking'
+                         ' for monte carlo. Add "run_montecarlo = True" to the "[control]" section.')
+
+    # Activate monte carlo mode
+    vega.monte_carlo = True
+
+    # Check if we need to run a forecast
+    forecast = vega.main_config['control'].getboolean('forecast', False)
+    if forecast:
+        print('Warning: You called "run_vega_mc_mpi.py" with forecast=True.')
+        # raise ValueError('You asked to run a forecast. Use run_vega.py instead.')
+
+    # Get the MC seed and the number of mocks to run
+    seed = vega.main_config['control'].getint('mc_seed', 0)
+    num_mc_mocks = vega.main_config['control'].getint('num_mc_mocks', 1)
+    num_local_mc = num_mc_mocks // num_cpus
+    if num_mc_mocks % num_cpus != 0:
+        num_local_mc += 1
+
+    # Get fiducial model
+    use_measured_fiducial = vega.main_config['control'].getboolean('use_measured_fiducial', False)
+    if use_measured_fiducial:
+        fiducial_model = {}
+        for name in vega.corr_items.keys():
+            fiducial_path = vega.main_config['control'].get(f'mc_fiducial_{name}')
+            with fits.open(fiducial_path) as hdul:
+                fiducial_model[name] = hdul[1].data['DA']
+    else:
+        fiducial_model = vega.compute_model(vega.mc_config['params'], run_init=False)
+
+    # Run the mocks
+    local_seed = int(seed + cpu_rank)
+    vega.analysis.run_monte_carlo(
+        fiducial_model, num_mocks=num_local_mc, seed=local_seed, forecast=forecast)
+
+    # Write output
+    if num_cpus > 1:
+        vega.output.write_monte_carlo(cpu_rank)
+    else:
+        vega.output.write_monte_carlo()
diff --git a/bin/run_vega_mpi.py b/bin/run_vega_mpi.py
@@ -55,10 +55,6 @@ def print_func(message):
         raise ValueError('You asked to run over a Monte Carlo simulation,'
                          ' but no "[monte carlo]" section provided.')
 
-    # run compute_model once to initialize all the caches
-    # vega.set_fast_metals()
-    # _ = vega.compute_model(run_init=False)
-
     # Run sampler
     if vega.has_sampler:
         print_func('Running the sampler')

diff --git a/examples/FitResultsTutorial.ipynb b/examples/FitResultsTutorial.ipynb
diff --git a/requirements.txt b/requirements.txt
@@ -10,3 +10,4 @@ cachetools
 matplotlib
 gitpython
 getdist
+picca
diff --git a/setup.py b/setup.py
@@ -13,7 +13,7 @@
 
 scripts = glob.glob('bin/*')
 
-requirements = ['numpy', 'scipy', 'astropy', 'numba', 'iminuit', 'h5py', 'mcfit',
+requirements = ['numpy', 'scipy', 'astropy', 'numba', 'iminuit', 'h5py', 'mcfit', 'picca',
                 'setuptools', 'cachetools', 'matplotlib', 'gitpython', 'getdist']
 
 setup_requirements = ['pytest-runner', ]

diff --git a/tests/test_data.py b/tests/test_data.py
@@ -30,10 +30,12 @@ def test_data():
 
         assert np.allclose(data.data_vec, hdul[1].data['DA'])
 
-        rp_rt_grid = corr_item.rp_rt_grid
-        assert np.allclose(rp_rt_grid[0], hdul[1].data['RP'])
-        assert np.allclose(rp_rt_grid[1], hdul[1].data['RT'])
-        assert np.allclose(corr_item.z_grid, hdul[1].data['Z'])
+        rp_grid = corr_item.model_coordinates.rp_grid
+        rt_grid = corr_item.model_coordinates.rt_grid
+        z_grid = corr_item.model_coordinates.z_grid
+        assert np.allclose(rp_grid, hdul[1].data['RP'])
+        assert np.allclose(rt_grid, hdul[1].data['RT'])
+        assert np.allclose(z_grid, hdul[1].data['Z'])
 
         hdul.close()
 

diff --git a/vega/__init__.py b/vega/__init__.py
@@ -12,3 +12,4 @@
 from vega.plots.rt_wedges import RtWedge
 from vega.plots.plot import VegaPlots
 from vega.postprocess.fit_results import FitResults
+from vega.scripts.run_vega import run_vega
diff --git a/vega/analysis.py b/vega/analysis.py
@@ -1,5 +1,9 @@
+import sys
+
 import numpy as np
 
+from vega.minimizer import Minimizer
+
 
 class Analysis:
     """Vega analysis class.
@@ -11,23 +15,32 @@ class Analysis:
     - Run FastMC analysis
     """
 
-    def __init__(self, minimizer, main_config, mc_config=None):
+    def __init__(self, chi2_func, sampler_params, main_config, corr_items, data, mc_config=None):
         """
 
         Parameters
         ----------
-        minimizer : Minimizer
-            Minimizer object initialized from the same vega instance
+        chi2_func : function
+            Chi2 function to minimize
+        sampler_params : dict
+            Dictionary with the sampling parameters
         main_config : ConfigParser
             Main config file
+        corr_items : dict
+            Dictionary with the correlation items
+        data : dict
+            Dictionary with the data
         mc_config : dict, optional
             Monte Carlo config with the model and sample parameters,
             by default None
         """
         self.config = main_config
-        self.minimizer = minimizer
+        self._chi2_func = chi2_func
+        self._scan_minimizer = Minimizer(chi2_func, sampler_params)
+        self._corr_items = corr_items
+        self._data = data
         self.mc_config = mc_config
-        pass
+        self.has_monte_carlo = False
 
     def chi2_scan(self):
         """Compute a chi^2 scan over one or two parameters.
@@ -75,9 +88,9 @@ def chi2_scan(self):
                 sample_params['values'][par1] = value
 
                 # Minimize and get bestfit values
-                self.minimizer.minimize(sample_params)
-                result = self.minimizer.values
-                result['fval'] = self.minimizer.fmin.fval
+                self._scan_minimizer.minimize(sample_params)
+                result = self._scan_minimizer.values
+                result['fval'] = self._scan_minimizer.fmin.fval
                 self.scan_results.append(result)
 
                 print('INFO: finished chi2scan iteration {} of {}'.format(
@@ -91,13 +104,119 @@ def chi2_scan(self):
                     sample_params['values'][par2] = value_2
 
                     # Minimize and get bestfit values
-                    self.minimizer.minimize(sample_params)
-                    result = self.minimizer.values
-                    result['fval'] = self.minimizer.fmin.fval
+                    self._scan_minimizer.minimize(sample_params)
+                    result = self._scan_minimizer.values
+                    result['fval'] = self._scan_minimizer.fmin.fval
                     self.scan_results.append(result)
 
                     print('INFO: finished chi2scan iteration {} of {}'.format(
                         i * len(self.grids[par2]) + j + 1,
                         len(self.grids[par1]) * len(self.grids[par2])))
 
         return self.scan_results
+
+    def create_monte_carlo_sim(self, fiducial_model, seed=None, scale=None, forecast=False):
+        """Compute Monte Carlo simulations for each Correlation item.
+
+        Parameters
+        ----------
+        fiducial_model : dict
+            Fiducial model for the correlation functions
+        seed : int, optional
+            Seed for the random number generator, by default None
+        scale : float/dict, optional
+            Scaling for the covariance, by default None
+        forecast : boolean, optional
+            Forecast option. If true, we don't add noise to the mock,
+            by default False
+
+        Returns
+        -------
+        dict
+            Dictionary with MC mocks for each item
+        """
+        mocks = {}
+        for name in self._corr_items:
+            # Get scale
+            if scale is None:
+                item_scale = self._corr_items[name].cov_rescale
+            elif type(scale) is float or type(scale) is int:
+                item_scale = scale
+            elif type(scale) is dict and name in scale:
+                item_scale = scale[name]
+            else:
+                item_scale = 1.
+
+            # Create the mock
+            mocks[name] = self._data[name].create_monte_carlo(
+                fiducial_model[name], item_scale, seed, forecast)
+
+        return mocks
+
+    def run_monte_carlo(self, fiducial_model, num_mocks=1, seed=0, scale=None, forecast=False):
+        """Run Monte Carlo simulations
+
+        Parameters
+        ----------
+        fiducial_model : dict
+            Fiducial model for the correlation functions
+        num_mocks : int, optional
+            Number of mocks to run, by default 1
+        seed : int, optional
+            Starting seed, by default 0
+        scale : float/dict, optional
+            Scaling for the covariance, by default None
+        """
+        assert self.mc_config is not None, 'No Monte Carlo config provided'
+
+        np.random.seed(seed)
+        sample_params = self.mc_config['sample']
+        minimizer = Minimizer(self._chi2_func, sample_params)
+
+        self.mc_bestfits = {}
+        self.mc_covariances = []
+        self.mc_chisq = []
+        self.mc_valid_minima = []
+        self.mc_valid_hesse = []
+        self.mc_mocks = {}
+        self.mc_failed_mask = []
+
+        for i in range(num_mocks):
+            print(f'INFO: Running Monte Carlo realization {i}')
+            sys.stdout.flush()
+
+            # Create the mocks
+            mocks = self.create_monte_carlo_sim(
+                fiducial_model, seed=None, scale=scale, forecast=forecast)
+
+            for name, cf_mock in mocks.items():
+                if name not in self.mc_mocks:
+                    self.mc_mocks[name] = []
+                self.mc_mocks[name].append(cf_mock)
+
+            try:
+                # Run minimizer
+                minimizer.minimize()
+                self.mc_failed_mask.append(False)
+            except ValueError:
+                print('WARNING: Minimizer failed for mock {}'.format(i))
+                self.mc_failed_mask.append(True)
+                self.mc_chisq.append(np.nan)
+                self.mc_valid_minima.append(False)
+                self.mc_valid_hesse.append(False)
+                continue
+
+            for param, value in minimizer.values.items():
+                if param not in self.mc_bestfits:
+                    self.mc_bestfits[param] = []
+                self.mc_bestfits[param].append([value, minimizer.errors[param]])
+
+            self.mc_covariances.append(minimizer.covariance)
+            self.mc_chisq.append(minimizer.fmin.fval)
+            self.mc_valid_minima.append(minimizer.fmin.is_valid)
+            self.mc_valid_hesse.append(not minimizer.fmin.hesse_failed)
+
+        for param in self.mc_bestfits.keys():
+            self.mc_bestfits[param] = np.array(self.mc_bestfits[param])
+
+        self.has_monte_carlo = True
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,3 +10,4 @@ cachetools @@
     matplotlib
     gitpython
     getdist
+    picca