diff --git a/.gitignore b/.gitignore index d486fd7f..d522b397 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,12 @@ __pycache__ .github/workflows/.python-tests.yml.swp docs/build/ docs/site/ +data/validation_figures/tests/ +data/cosmopower_models/*.dat +data/cosmopower_models/*.txt +data/cosmopower_models/*.npz +data/DESI_cov/ + +*.DS_Store +build/ +data/NNmodels/testing_models/ \ No newline at end of file diff --git a/config_files/config_predict.yaml b/config_files/config_predict.yaml index 0ad223d3..ecd5a35b 100644 --- a/config_files/config_predict.yaml +++ b/config_files/config_predict.yaml @@ -3,8 +3,9 @@ ## Emulator type: you can choose between a NN emulator or a GP emulator. emulator_type: "NN" -## Emulator label: if you want to train a predefined model, you can use the emulator_label. Other possible labels are in the README.md file. -emulator_label: "Nyx_alphap_cov" +## Emulator label: if you want to train a predefined model, you can use the emulator_label. Other possible labels are in the docs. +emulator_label: "Cabayol23+" +training_set: null ## If the emulator needs to be trained without a specific simulation. drop_sim: null @@ -25,3 +26,18 @@ average_over_z: false ## Where to save the trained model from the project root directory save_plot_path: "data/validation_figures/tests/test_p1d_err.png" save_predictions_path: null + +hyperparameters: + kmax_Mpc: 4 + ndeg: 5 + nepochs: 100 + step_size: 75 + drop_z: null + weighted_emulator: true + nhidden: 5 + max_neurons: 50 + seed: 32 + lr0: 1e-3 + batch_size: 100 + weight_decay: 1e-4 + z_max: 10 diff --git a/config_files/config_train.yaml b/config_files/config_train.yaml index ef16c77a..d57548b3 100644 --- a/config_files/config_train.yaml +++ b/config_files/config_train.yaml @@ -4,22 +4,24 @@ emulator_type: "NN" ## Emulator label: if you want to train a predefined model, you can use the emulator_label. Other possible labels are in the README.md file. -emulator_label: "Cabayol23+" +emulator_label: null #"Cabayol23+" ## For the data, you can choose between loading and archive or a training set. You can only provide one of them. ### If you choose to load an archive, you can choose between Nyx or Gadget and a version. archive: - file: Gadget # Can be either "Nyx" or "Gadget" + file: "Gadget" # Can be either "Nyx" or "Gadget" version: "Cabayol23" #nyx version or Gadget postprocessing version training_set: null #Predefined training sets. You can find the options in the README.md file. +drop_sim: null + ## If no emulator_label is provided, you need to provide the hyperparameters for training a new model and the emulator parameters emulator_params: ["Delta2_p", "n_p", "alpha_p", "sigT_Mpc", "gamma", "kF_Mpc"] hyperparameters: - kmax_Mpc: 4 + kmax_Mpc: 4.0 ndeg: 5 - nepochs: 100 + nepochs: 1 step_size: 75 drop_sim: null drop_z: null @@ -27,10 +29,10 @@ hyperparameters: nhidden: 5 max_neurons: 50 seed: 32 - lr0: 1e-3 + lr0: 0.001 batch_size: 100 - weight_decay: 1e-4 - z_max: 10 + weight_decay: 0.0001 + z_max: 10.0 # Where to save the trained model from the project root directory -save_path: "data/NNmodels/testing_models/test.pt" +save_path: "data/NNmodels/testing_models/test.pt" \ No newline at end of file diff --git a/data/cosmopower_models/Pk_cp_NN_nrun.pkl b/data/cosmopower_models/Pk_cp_NN_nrun.pkl new file mode 100644 index 00000000..03a9d08b Binary files /dev/null and b/data/cosmopower_models/Pk_cp_NN_nrun.pkl differ diff --git a/data/cosmopower_models/Pk_cp_NN_sumnu.pkl b/data/cosmopower_models/Pk_cp_NN_sumnu.pkl index 2d4d56f9..921e39de 100644 Binary files a/data/cosmopower_models/Pk_cp_NN_sumnu.pkl and b/data/cosmopower_models/Pk_cp_NN_sumnu.pkl differ diff --git a/docs/docs/developers/CreateNewEmulator.md b/docs/docs/developers/CreateNewEmulator.md index 9c277d4c..7ff6d4b1 100644 --- a/docs/docs/developers/CreateNewEmulator.md +++ b/docs/docs/developers/CreateNewEmulator.md @@ -90,11 +90,14 @@ emulator = emulator_manager(emulator_label="New_Emulator" ``` In the first case, since you are specifying the `model path`, there is no naming convention for the model file. However, in the second case, the saved models must be stored in the following way: + - The folder must be `data/NNmodels/` from the root of the repository. - For a specific emulator label, you need to create a new folder, e.g. `New_Emulator`. - For the emulator using all training simulations, the model file is named `New_Emulator.pt`. - For the emulator using the training set excluding a given simulation, the model file is named `New_Emulator_drop_sim_{simulation suite}_{simulation index}.pt`. For example, if you exclude the 10th simulation from the mpg training set, the model file is named `New_Emulator_drop_sim_mpg_10.pt`. +**For this reason, if the emulator label is provided, it will be saved following the naming convention even if another model path is specified.** + The emulator manager will automatically find the correct model file for the given emulator label. To set this up, you need to add the new emulator label to the `folder` dictionary in the `emulator_manager.py` file. ```python folder = { diff --git a/docs/docs/developers/trainingOptions.md b/docs/docs/developers/UnderDevelopment.md similarity index 95% rename from docs/docs/developers/trainingOptions.md rename to docs/docs/developers/UnderDevelopment.md index 6f6b9214..1df3ca15 100644 --- a/docs/docs/developers/trainingOptions.md +++ b/docs/docs/developers/UnderDevelopment.md @@ -1,4 +1,4 @@ -# TRAINING OPTIONS +# UNDER DEVELOPMENT SOLUTIONS There are several features that can be used to customize the training of the emulators. This tutorial will guide you through the process of training emulators with different options. @@ -10,9 +10,9 @@ The emulator supports weighting the training simulations with a covariance matri To train an emulator with a covariance matrix, you need to provide a covariance matrix for the training simulations. Currently, the emulator only supports a diagonal covariance matrix. It is important that the covariance matrix is given in the __k__ binning of the training simulations. -The function '_load_DESIY1_err' in the `nn_emulator.py` file loads a covariance matrix. The covariance must be a json file with the relative error as a function of __z__ for each __k__ bin. +The function `_load_DESIY1_err` in the `nn_emulator.py` file loads a covariance matrix. The covariance must be a json file with the relative error as a function of __z__ for each __k__ bin. -From the relative error file in 'data/DESI_cov/rel_err_DESI_Y1.npy', we can generate the json file with the following steps: +From the relative error file in `data/DESI_cov/rel_err_DESI_Y1.npy`, we can generate the json file with the following steps: First we load the data from the relative error file: diff --git a/docs/docs/developers/documentation.md b/docs/docs/developers/documentation.md index 40b8c145..0a43cdfe 100644 --- a/docs/docs/developers/documentation.md +++ b/docs/docs/developers/documentation.md @@ -17,9 +17,9 @@ The `gh-pages` branch is automatically updated when a PR is merged into `main`. In order to write documentation, you can use the following structure: - `docs/docs/developers`: Documentation for developers -- `docs/docs/`: Documentation for users +- `docs/docs/users`: Documentation for users -You can add new pages by adding a new `.md` file to the `docs/docs/` folder. Remember to add the new page to the `mkdocs.yml` file so that it is included in the documentation. The new page will automatically be added to the navigation menu. - -To have a cleaner structure, add the new page to the corresponding `index.md` file. +To add a new page, you should create a new `.md` file to the `docs/docs/` folder. +To define where this document should be included and the structure of the documentation, add the new page to the `mkdocs.yml`. The new page will automatically be added to the navigation menu. +To have a cleaner structure, add the new page to the corresponding `index.md` file. The documentation is structured with an index file for each section. diff --git a/docs/docs/developers/index.md b/docs/docs/developers/index.md index b47b0721..fe6d52b4 100644 --- a/docs/docs/developers/index.md +++ b/docs/docs/developers/index.md @@ -5,7 +5,7 @@ Welcome to the LaCE developer documentation! This section contains information f ## Contents - [Creating New Emulators](CreateNewEmulator.md): Learn how to create and add new emulator types to LaCE -- [Training Options](trainingOptions.md): Implemented solutions to improve the emulators performance +- [Training Options](UnderDevelopment.md): Implemented solutions to improve the emulators performance - [Code Testing](advancedTesting.md): Information to mantain and extend the automated testing - [Documentation](documentation.md): How to write and maintain documentation diff --git a/docs/docs/users/Emulators_trainingSets.md b/docs/docs/users/Emulators_trainingSets.md new file mode 100644 index 00000000..bb2dfbc8 --- /dev/null +++ b/docs/docs/users/Emulators_trainingSets.md @@ -0,0 +1,56 @@ +# PREDEFINED EMULATORS AND TRAINING SETS + +## PREDEFINED EMULATORS +LaCE provides a set of predefined emulators that have been validated. These emulators are: + +- Neural network emulators: + - Gadget emulators: + - Cabayol23: Neural network emulating the optimal P1D of Gadget simulations fitting coefficients to a 5th degree polynomial. It goes to scales of 4Mpc^{-1} and z<=4.5. + - Cabayol23+: Neural network emulating the optimal P1D of Gadget simulations fitting coefficients to a 5th degree polynomial. It goes to scales of 4Mpc^{-1} and z<=4.5. Updated version compared to Cabayol+23 paper. + - Cabayol23_extended: Neural network emulating the optimal P1D of Gadget simulations fitting coefficients to a 7th degree polynomial. It goes to scales of 8Mpc^{-1} and z<=4.5. + - Cabayol23+_extended: Neural network emulating the optimal P1D of Gadget simulations fitting coefficients to a 5th degree polynomial. It goes to scales of 4Mpc^{-1} and z<=4.5. Updated version compared to Cabayol+23 paper. + - Nyx emulators: + - Nyx_v0: Neural network emulating the optimal P1D of Nyx simulations fitting coefficients to a 6th degree polynomial. It goes to scales of 4Mpc^{-1} and z<=4.5. + - Nyx_v0_extended: Neural network emulating the optimal P1D of Nyx simulations fitting coefficients to a 6th degree polynomial. It goes to scales of 8Mpc^{-1} and z<=4.5. + - Nyx_alphap: Neural network emulating the optimal P1D of Nyx simulations fitting coefficients to a 6th degree polynomial. It goes to scales of 4Mpc^{-1} and z<=4.5. + - Nyx_alphap_extended: Neural network emulating the optimal P1D of Nyx simulations fitting coefficients to a 6th degree polynomial. It goes to scales of 8Mpc^{-1} and z<=4.5. + - Nyx_alphap_cov: Neural network under testing for the Nyx_alphap emulator. + +- Gaussian Process emulators: + - Gadget emulators: + - "Pedersen21": Gaussian process emulating the optimal P1D of Gadget simulations. Pedersen+21 paper. + - "Pedersen23": Updated version of Pedersen21 emulator. Pedersen+23 paper. + - "Pedersen21_ext": Extended version of Pedersen21 emulator. + - "Pedersen21_ext8": Extended version of Pedersen21 emulator up to k=8 Mpc^-1. + - "Pedersen23_ext": Extended version of Pedersen23 emulator. + - "Pedersen23_ext8": Extended version of Pedersen23 emulator up to k=8 Mpc^-1. + +## PREDEFINED TRAINING SETS + +Similarly, LaCE provides a set of predefined training sets that have been used to train the emulators. These training sets correspond to a simulations suite, a postprocessing and the addition (or not) of mean flux rescalings. The training sets are: + +- "Pedersen21": Training set used in [Pedersen+21 paper](https://arxiv.org/abs/2103.05195). Gadget simulations without mean flux rescalings. +- "Cabayol23": Training set used in [Cabayol+23 paper](https://arxiv.org/abs/2303.05195). Gadget simulations with mean flux rescalings and measuring the P1D along the three principal axes of the simulation box. +- "Nyx_Oct2023": Training set using Nyx version from October 2023. +- "Nyx_Jul2024": Training set using Nyx version from July 2024. + +## CONNECTION BETWEEN PREDEFINED EMULATORS AND TRAINING SETS +The following table shows the default training set for each predefined emulator. + +| Emulator | Training Set | Simulation | Type | Description | +|----------|--------------|------------|------|-------------| +| Cabayol23 | Cabayol23 | Gadget | NN | Neural network emulator trained on Gadget simulations with mean flux rescaling | +| Cabayol23+ | Cabayol23 | Gadget | NN | Updated version of Cabayol23 emulator | +| Cabayol23_extended | Cabayol23 | Gadget | NN | Extended version of Cabayol23 emulator (k up to 8 Mpc^-1) | +| Cabayol23+_extended | Cabayol23 | Gadget | NN | Extended version of Cabayol23+ emulator (k up to 8 Mpc^-1) | +| Nyx_v0 | Nyx_Oct2023 | Nyx | NN | Neural network emulator trained on Nyx simulations | +| Nyx_v0_extended | Nyx_Oct2023 | Nyx | NN | Extended version of Nyx_v0 emulator (k up to 8 Mpc^-1) | +| Nyx_alphap | Nyx_Oct2023 | Nyx | NN | Neural network emulator trained on updated Nyx simulations | +| Nyx_alphap_extended | Nyx_Oct2023 | Nyx | NN | Extended version of Nyx_alphap emulator (k up to 8 Mpc^-1) | +| Nyx_alphap_cov | Nyx_Jul2024 | Nyx | NN | Testing version of Nyx_alphap emulator | +| Pedersen21 | Pedersen21 | Gadget | GP | GP emulator trained on Gadget simulations without mean flux rescaling | +| Pedersen23 | Pedersen21 | Gadget | GP | Updated version of Pedersen21 GP emulator | +| Pedersen21_ext | Pedersen21 | Gadget | GP | Extended version of Pedersen21 GP emulator | +| Pedersen21_ext8 | Pedersen21 | Gadget | GP | Extended version of Pedersen21 GP emulator (k up to 8 Mpc^-1) | +| Pedersen23_ext | Pedersen21 | Gadget | GP | Extended version of Pedersen23 GP emulator | +| Pedersen23_ext8 | Pedersen21 | Gadget | GP | Extended version of Pedersen23 GP emulator (k up to 8 Mpc^-1) | \ No newline at end of file diff --git a/docs/docs/users/Simulations_list.md b/docs/docs/users/Simulations_list.md new file mode 100644 index 00000000..8c8d4e32 --- /dev/null +++ b/docs/docs/users/Simulations_list.md @@ -0,0 +1,27 @@ +# AVAILABLE SIMULATIONS + +This section contains the list of simulations available in the archives. + +## Gadget simulations +The Gadget simulations contain 30 training simulations, which are named as "mpg_{x}", where x is an integer number from 0 to 29. +Additionaly, there are 7 test simulations: + +- "mpg_central": The simulation parameters are at the center of the parameter space. +- "mpg_neutrinos": The simulation contains massive neutrinos. +- "mpg_running": The simulation has a non-zero running of the spectral index. +- "mpg_growth": The growth factor of the simulation is different from that of the training set. +- "mpg_reio": The reionization history is different from that of the training set. +- "mpg_seed": Identical to the central simulation with different initial conditions. Meant to test the impact of cosmic variance. +- "mpg_curved": The simulation has a different curvature power spectrum from that of the training set. + +For information about the simulation parameters can be found in [Pedersen+21](https://arxiv.org/abs/2103.05195). + +## Nyx simulations + +The Nyx simulation suite contains 18 training simulations, which are named as "nyx_{x}", where x is an integer number from 0 to 17. Additionally, there are 2 test simulations: + +- "nyx_central": The simulation parameters are at the center of the parameter space. +- "nyx_seed": Identical to the central simulation with different initial conditions. Meant to test the impact of cosmic variance. + + +For information about the simulation parameters can be found in [TBD](..). \ No newline at end of file diff --git a/docs/docs/users/archive.md b/docs/docs/users/archive.md index d23a0c00..24b3a4ad 100644 --- a/docs/docs/users/archive.md +++ b/docs/docs/users/archive.md @@ -1,11 +1,12 @@ -# ARCHIVE +# ARCHIVES The LaCE emulators support two types of archives: + - Gadget archive: Contains the P1D of Gadget simulations described in [Pedersen+21](https://arxiv.org/abs/2011.15127). - Nyx archive: Contains the P1D of Nyx simulations described in (In prep.) ## Loading a Gadget Archive -The Gadget archive contains 30 training simulations and seven test simulations. Each simulation contains 11 snapshotts covering redshifts from 2 to 4.5 in steps of 0.25 +The Gadget archive contains 30 training simulations and 7 test simulations. Each simulation contains 11 snapshotts covering redshifts from 2 to 4.5 in steps of 0.25. To laod a Gadget archive, you can use the `GadgetArchive` class: ```python @@ -17,16 +18,23 @@ The P1D from the Gadget archive with the Pedersen+21 post-processing can be acce ```python archive = GadgetArchive(postproc='Pedersen21') ``` -This post-processing measures the P1D along one of the three box axes and contains three mean-flux rescaling per snapshot. +This post-processing measures the P1D along one of the three box axes and does not contain mean-flux rescalings. + +On the other hand, the P1D from the Gadget archive with the Cabayol+23 post-processing can be accessed as follows: +```python +archive = GadgetArchive(postproc='Cabayol23') +``` +This post-processing measures the P1D along the three box principal axes and contains five mean-flux rescaling per snapshot. ## Loading a Nyx Archive To load the Nyx archive, you can use the `NyxArchive` class: + ```python from lace.archive.nyx_archive import NyxArchive ``` -Since the Nyx archive is not publicly available yet, you need to set the `NYX_PATH` environment variable to the path to the Nyx files on your local computer. +Since the Nyx archive is not publicly available yet, **you need to set the `NYX_PATH` environment variable to the path to the Nyx files** on your local computer (or the cluster where you are running the code). -There are two versions of the Nyx archive available: `Oct2023` and `Jul2024`. The first one contains 17 training simulations and 4 test simulations, and the second one contains 17 training simulations and 3 test simulations. Each simulation contains 14 snapshotts covering redshifts from 2.2 to 4.8 in steps of 0.2 plus additional snapshotts at higher redshifts for some of the simulations. In both cases, it is not recommended to use simulation number 14. +There are two versions of the Nyx archive available: Oct2023 and Jul2024. The first one contains 17 training simulations and 4 test simulations, and the second one contains 17 training simulations and 3 test simulations (the simulations are better described [here](./Simulations_list.md)). Each simulation contains 14 snapshots covering redshifts from 2.2 to 4.8 in steps of 0.2 plus additional snapshotts at higher redshifts for some of the simulations. In both cases, it is not recommended to use simulation number 14. The P1D from the Nyx archive with the Oct2023 version can be accessed as follows: ```python @@ -50,17 +58,4 @@ For the test set, the equivalent function is: ```python archive.get_testing_data(sim_label='mpg_central') ``` -where you can replace `sim_label` by any of the test simulation labels available in the archive. This will only load the fiducial snapshots without mean flux rescalings. - -## Key keywords in the archive -The archive contains many keywords that can be used to access specific data. Here is a non-exhaustive list of the most important ones: - -- `sim_label`: The label of the simulation. It can be any of the test simulation labels available in the archive. -- `z`: The snapshot redshift. -- `ind_axis`: Indicates the axis along which the P1D is measured. It can be 0, 1, 2 or 'average' -- `ind_rescaling`: The index of mean-flux rescaling of the P1D. -- `val_scaling`: The value of mean-flux rescaling of the P1D. -- `cosmo_params`: A dictionary containing the cosmological parameters of the simulation. -- `p1d_Mpc`: The P1D in Mpc. -- `k_Mpc`: The wavevector in Mpc. - \ No newline at end of file +where you can replace `sim_label` by any of the test simulation labels available in the archive (see [here](./Simulations_list.md)). This will only load the fiducial snapshots without mean flux rescalings. ¡ \ No newline at end of file diff --git a/docs/docs/users/compressedParameters.md b/docs/docs/users/compressedParameters.md index fac5350b..833038ab 100644 --- a/docs/docs/users/compressedParameters.md +++ b/docs/docs/users/compressedParameters.md @@ -9,9 +9,9 @@ To estimate the compressed parameters with cosmopower, one needs to follow the [ 3. [Training your own cosmopower emulator](#training-your-own-cosmopower-emulator) ## Making predictions with cosmopower -To see examples of how to make predictions with cosmopower, one can follow the tutorial notebook in the notebooks folder. +To see examples of how to make predictions with cosmopower, one can follow the tutorial notebook in the notebooks folder (Tutorial_compressed_parameters.ipynb). -Cosmopower emulates the linear matter power spectrum. To use the emulator, one needs to provide a set of cosmological parameters that depend on the comsological parameters used to train the emulator. +Cosmopower emulates the linear matter power spectrum. To use the emulator, one needs to provide a set of cosmological parameters that depend on the cosmological parameters used to train the emulator. LaCE contains several trained cosmopower emulators, and you can also train your own emulator as described in [Training your own cosmopower emulator](#training-your-own-cosmopower-emulator). To load an emulator, you need to do: @@ -19,9 +19,10 @@ To load an emulator, you need to do: cp_nn = cp.cosmopower_NN(restore=True, restore_filename=emu_path) ``` -There are two trained cosmopower emulators that you can find in the data folder: +There are three trained cosmopower emulators that you can find in the data folder: - `Pk_cp_NN.pkl`: $\Lambda$CDM emulator. - `Pk_cp_NN_sumnu.pkl`: $\Lambda$ CDM + $\sum m_\nu$ emulator. +- `Pk_cp_NN_nrun.pkl`: $\Lambda$ CDM + $\sum m_\nu$ + $n_{\rm run}$ emulator. When providing the path to the emulator, you need to give the path to the `Pk_cp_NN.pkl` or `Pk_cp_NN_sumnu.pkl` file __without__ the `.pkl` extension. @@ -30,7 +31,15 @@ To know the parameters that the emulator uses, you can do: ```python print(cp_nn.parameters()) ``` -And then to make predictions, you need to provide a dictionary with the parameters that the emulator uses. +And to access the k values, you can do: + +```python +print(cp_nn.modes) +``` + +### Making predictions + +To make predictions with cosmopower, you need to provide a dictionary with the parameters that the emulator uses. ```python # Define the cosmology dictionary @@ -39,21 +48,20 @@ cosmo = {'H0': [cosmo_params["H0"]], 'mnu': [cosmo_params["mnu"]], 'Omega_m': [(cosmo_params["omch2"] + cosmo_params["ombh2"]) / (cosmo_params["H0"]/100)**2], 'Omega_Lambda': [1- (cosmo_params["omch2"] + cosmo_params["ombh2"]) / (cosmo_params["H0"]/100)**2], - 'omega_cdm': [cosmo_params["omch2"]], - 'omega_b': [cosmo_params["ombh2"]], + 'omch2': [cosmo_params["omch2"]], + 'ombh2': [cosmo_params["ombh2"]], 'As': [cosmo_params["As"]], - 'ns': [cosmo_params["ns"]]} + 'ns': [cosmo_params["ns"]], + 'nrun': [cosmo_params["nrun"]]} ``` -And call the emulator with: +Some of these parameters are not used by the emulator, but they are needed to convert to km/s. When calling the emulator, it will inform you if some of the parameters are not used. + +To call the emulator: ```python Pk_Mpc = cp_nn.ten_to_predictions_np(cosmo) ``` -To access the __k__ values, you can do: -```python -k_Mpc = cp_nn.modes -``` Then to convert to km/s, you can do: ```python @@ -66,14 +74,9 @@ k_kms, Pk_kms = linPCosmologyCosmopower.convert_to_kms(cosmo, ## Estimating compressed parameters ### For indivudual cosmologies -Once you have the predictions, you can estimate the compressed parameters with: -```python -# call the class. -linP_Cosmology_Cosmopower = linPCosmologyCosmopower() -``` -Fit the polynomial to the power spectrum: +Once you have the predictions, you can estimate the compressed parameters fitting a polynomial to the power spectrum: ```python -linP_kms = linP_Cosmology_Cosmopower.fit_polynomial( +linP_kms = linPCosmologyCosmopower.fit_polynomial( xmin = kmin_kms / kp_kms, xmax= kmax_kms / kp_kms, x = k_kms / kp_kms, @@ -81,11 +84,10 @@ linP_kms = linP_Cosmology_Cosmopower.fit_polynomial( deg=2 ) ``` - -And then estimate the star parameters: +and then estimate the star parameters: ```python -starparams_CP = linP_Cosmology_Cosmopower.get_star_params(linP_kms = linP_kms, +starparams_CP = linPCosmologyCosmopower.get_star_params(linP_kms = linP_kms, kp_kms = kp_kms) ``` @@ -94,7 +96,7 @@ where `linP_kms` are the linear matter power spectrum predictions in km/s and `k ### For a cosmology chain To estimate the parameters for a cosmology chain, you first need to call the class: ```python -fitter_compressed_params = linPCosmologyCosmopower() +fitter_compressed_params = linPCosmologyCosmopower(cosmopower_model = cosmopower_model) ``` Then check the expected parameters of the cosmopower model: @@ -102,39 +104,47 @@ Then check the expected parameters of the cosmopower model: print(fitter_compressed_params.cp_emulator.cp_emulator.parameters()) ``` -And create a dictionary with the naming convertion between the parameters in your chain and the parameters used by the cosmopower model. We need additional parameters to convert to km/s. +And create a dictionary with the naming convertion between the parameters in your chain and the parameters used by the cosmopower model. We need additional parameters to convert to km/s. The dictionary keys are the parameters used by the cosmopower model and the values are the parameters in your chain. ```python param_mapping = { 'h': 'h', 'm_ncdm': 'm_ncdm', - 'omega_cdm': 'omega_cdm', + 'omch2': 'omega_cdm', 'Omega_m': 'Omega_m', 'Omega_Lambda': 'Omega_Lambda', 'ln_A_s_1e10': 'ln_A_s_1e10', - 'n_s': 'n_s' + 'ns': 'n_s', + 'nrun': 'nrun' } + ``` -And then call the function to estimate the parameters: +To estimate the parameters, you can do: ```python linP_cosmology_results = fitter_compressed_params.fit_linP_cosmology(chains_df = df, param_mapping = param_mapping) ``` -By default in loads the $\Lambda$CDM + \sum m_\nu emulator. If you want to use another emualtor, specify the name in the class argument 'cosmopower_model' and save the model in the data/cosmopower_models folder. The model needs to be a `.pkl` file and the it must be called without the `.pkl` extension. +By default in loads the $\Lambda$CDM + $\sum m_\nu$ + $n_{\rm run}$ emulator. If you want to use another emulator, specify the name in the class argument `cosmopower_model` and save the model in the `data/cosmopower_models` folder. The model needs to be a `.pkl` file and the it must be called without the `.pkl` extension. ## Training your own cosmopower emulator To train your own cosmopower emulator, you can follow the tutorial notebook. + First, one needs to create a LH sampler with the parameters of interest. For example, to create a LH sampler with the $\Lambda$CDM + $\sum m_\nu$ emulator, you can do: ```python -dict_params_ranges = { - 'ombh2': [0.015, 0.03], - 'omch2': [0.05, 0.3], - 'H0': [60, 80], - 'ns': [0.8, 1.2], - 'As': [5e-10, 4e-9], - 'mnu': [0, 2],} +#Define the parameters used by the emulator and the ranges +params = ["ombh2", "omch2", "H0", "ns", "As", "mnu", "nrun"] +ranges = [ + [0.015, 0.03], + [0.05, 0.16], + [60, 80], + [0.8, 1.2], + [5e-10, 4e-9], + [0, 2], + [0, 0.05] +] +dict_params_ranges = dict(zip(params, ranges)) ``` And create the LH sample as: @@ -157,12 +167,15 @@ Make sure that the input file is the one you created in the previous step. This Once the power spectrum is generated, you can train the emulator with: ```python -cosmopower_prepare_training(params = ["H0", "mnu", "omega_cdm", "omega_b", "As", "ns"], - Pk_filename = "linear_sumnu.dat") +cosmopower_prepare_training(params = params, + Pk_filename = "linear_test.dat") ``` followed by: ```python -cosmopower_train_model(model_save_filename = "Pk_cp_NN_test") +cosmopower_train_model( + model_save_filename = "Pk_cp_NN_test", + model_params = params +) ``` diff --git a/docs/docs/users/emulatorPredictions.md b/docs/docs/users/emulatorPredictions.md index ba06c854..82681e97 100644 --- a/docs/docs/users/emulatorPredictions.md +++ b/docs/docs/users/emulatorPredictions.md @@ -1,7 +1,7 @@ # MAKING PREDICTIONS WITH LACE -## Loading an emulator -The easiest way to load an emulator is to use the `set_emulator` class. This can be done with an [archive](archive.md) or a training set. This will load a trained emulator with the specifications of the emulator label. +## Loading a predefined emulator +The easiest way to load an emulator is to use the `set_emulator` class. This can be done with an [archive](archive.md) or a training set. This will load a trained emulator with the specifications of the [emulator label](Emulators_trainingSets.md). ```python archive = gadget_archive.GadgetArchive(postproc="Pedersen21") @@ -19,24 +19,9 @@ emulator = set_emulator( ) ``` -The supported emulators are: - -- "Pedersen21": Gaussian process emulating the optimal P1D of Gadget simulations. Pedersen+21 paper. -- "Pedersen23": Updated version of Pedersen21 emulator. Pedersen+23 paper. -- "Pedersen21_ext": Extended version of Pedersen21 emulator. -- "Pedersen21_ext8": Extended version of Pedersen21 emulator up to k=8 Mpc^-1. -- "Pedersen23_ext": Extended version of Pedersen23 emulator. -- "Pedersen23_ext8": Extended version of Pedersen23 emulator up to k=8 Mpc^-1. -- "CH24": Emulator from Chabanier & Haehnelt 2024 paper. -- "Cabayol23": Neural network emulating the optimal P1D of Gadget simulations from Cabayol+23 paper. -- "Cabayol23+": Updated version of Cabayol23 emulator. -- "Cabayol23_extended": Extended version of Cabayol23 emulator to k=8 Mpc^-1. -- "Cabayol23+_extended": Extended version of Cabayol23+ emulator to k=8 Mpc^-1. -- "Nyx_v0": Neural network emulating the optimal P1D of Nyx simulations. -- "Nyx_v0_extended": Extended version of Nyx_v0 emulator to k=8 Mpc^-1. -- "Nyx_alphap": Nyx emulator including alpha_p parameter. -- "Nyx_alphap_extended": Extended version of Nyx_alphap emulator to k=8 Mpc^-1. -- "Nyx_alphap_cov": Nyx emulator under testing using the covariance matrix from DESI Y1. +The supported emulators can be found in the [Emulators_trainingSets.md](./Emulators_trainingSets.md) file. + +## Loading a custom emulator Another option is to load an emulator model that does not correspond to a predifined emulator label. This can be done by, for example: @@ -50,27 +35,46 @@ emulator = NNEmulator(training_set='Cabayol23', where `model_path` is the path to the `.pt` file containing the trained model and `train=False` indicates that the model is not being trained. In the model you are loading has been trained by dropping simulations, you should specify which simulations to drop using the `drop_sim` argument. ## Making predictions + To emulate the P1D of a simulation, you can use the `emulate_p1d_Mpc` method. This method requires a dictionary containing the simulation parameters. ```python p1d = emulator.emulate_p1d_Mpc(sim_params, k_Mpc) ``` +## Predicitng from a config file + One can also make predictions and plot them by using the `predict.py` script in the `scripts` folder. This script allows to make predictions on a test set, plot the P1D errors and save the predictions. An example of how to use this script is: -```bash +```bash python python bin/predict.py --config config_files/config_predict.yaml ``` -The config file accepts the following fields: +Similarly to the [training script](emulatorTraining.md), the config file accepts the following fields: + +There are two ways of loading an emulator: +1. By providing an emulator label (see list of supported emulators [here](./Emulators_trainingSets.md)) +- `emulator_type`: Choose between "NN" (neural network) or "GP" (Gaussian process) emulator +- `emulator_label`: Label of the predefined model to use (see list of supported emulators [here](./Emulators_trainingSets.md)) +- `drop_sim`: Simulation to exclude from training (optional) +- `archive`: Configuration for loading simulation archive + - `file`: "Nyx" or "Gadget" + - `version`: Version of the simulation archive + - `sim_test`: Label of the test simulation to use for predictions. See list of available simulations [here](./Simulations_list.md). +- `average_over_z`: Whether to average P1D errors over redshift (true/false) +- `save_plot_path`: Path where to save the validation plot. If None, the plot is not saved. +- `save_predictions_path`: Path where to save the predictions. If None, the predictions are not saved. + +2. By providing a path to the directory containing the trained model (this directory should contain the `.pt` file. - `emulator_type`: Choose between "NN" (neural network) or "GP" (Gaussian process) emulator -- `emulator_label`: Label of the predefined model to use (see list of supported emulators above) - `drop_sim`: Simulation to exclude from training (optional) - `archive`: Configuration for loading simulation archive - `file`: "Nyx" or "Gadget" - `version`: Version of the simulation archive - - `sim_test`: Label of the test simulation to use for predictions -- `emulator_params`: List of parameters used by the emulator + - `sim_test`: Label of the test simulation to use for predictions. See list of available simulations [here](./Simulations_list.md). +- `emulator_params`: List of parameters used by the emulator. The default is `["Delta2_p", "n_p", "alpha_p", "sigT_Mpc", "gamma", "kF_Mpc"]`. - `average_over_z`: Whether to average P1D errors over redshift (true/false) - `save_plot_path`: Path where to save the validation plot. If None, the plot is not saved. - `save_predictions_path`: Path where to save the predictions. If None, the predictions are not saved. +- `hyperparameters`: Dictionary containing the hyperparameters of the emulator. This will be used **ONLY** if `emulator_label` is not provided. +- `model_path`: Path to the directory containing the trained model (this directory should contain the `.pt` file). diff --git a/docs/docs/users/emulatorTraining.md b/docs/docs/users/emulatorTraining.md index 6e0a9cab..7d5d34d2 100644 --- a/docs/docs/users/emulatorTraining.md +++ b/docs/docs/users/emulatorTraining.md @@ -1,58 +1,75 @@ -# NN - EMULATOR TRAINING +# EMULATOR TRAINING The training of the emulators is done with the code `train.py`. which is in the `scripts` folder. This code is used to train the emulators with the data available in the repository. -In order to train the emulator, one needs to specify the training configuration file. This file is a .yaml file that contains the parameters for the training. An example of this file is `config.yaml` in the `scripts` folder. +In order to train the emulator, one needs to specify the training configuration file. This file is a .yaml file that contains the parameters for the training. An example of this file is `config.yaml` in the `config_files` folder. To run the training, one needs to run the following command: -``` +```python python scripts/train.py --config=path/to/config.yaml ``` +## Configuration file instructions + The configuratoin file contains the following parameters: 1. `emulator_type`: Specifies the emulator type, either "NN" or "GP". -2. `emulator_label`: Specifies the predefined model to train. For example, "Cabayol23+". This parameter is optional, if it is not provided, the code will train a new model based on the provided hyperparameters. The options from the emulator_label are: - - Cabayol23: Neural network emulating the optimal P1D of Gadget simulations fitting coefficients to a 5th degree polynomial. It goes to scales of 4Mpc^{-1} and z<=4.5. - - Cabayol23+: Neural network emulating the optimal P1D of Gadget simulations fitting coefficients to a 5th degree polynomial. It goes to scales of 4Mpc^{-1} and z<=4.5. Updated version compared to Cabayol+23 paper. - - Cabayol23_extended: Neural network emulating the optimal P1D of Gadget simulations fitting coefficients to a 7th degree polynomial. It goes to scales of 8Mpc^{-1} and z<=4.5. - - Cabayol23+_extended: Neural network emulating the optimal P1D of Gadget simulations fitting coefficients to a 5th degree polynomial. It goes to scales of 4Mpc^{-1} and z<=4.5. Updated version compared to Cabayol+23 paper. - - Nyx_v0: Neural network emulating the optimal P1D of Nyx simulations fitting coefficients to a 6th degree polynomial. It goes to scales of 4Mpc^{-1} and z<=4.5. - - Nyx_v0_extended: Neural network emulating the optimal P1D of Nyx simulations fitting coefficients to a 6th degree polynomial. It goes to scales of 8Mpc^{-1} and z<=4.5. - - Nyx_alphap: Neural network emulating the optimal P1D of Nyx simulations fitting coefficients to a 6th degree polynomial. It goes to scales of 4Mpc^{-1} and z<=4.5. - - Nyx_alphap_extended: Neural network emulating the optimal P1D of Nyx simulations fitting coefficients to a 6th degree polynomial. It goes to scales of 8Mpc^{-1} and z<=4.5. - - Nyx_alphap_cov: Neural network under testing for the Nyx_alphap emulator. - -3. Data source (choose one): +2. `emulator_label`: Specifies the predefined model to train. For example, "Cabayol23+". This parameter is optional, if it is not provided, the code will train a new model based on the provided hyperparameters. The options from the emulator_label are defined in the [Emulators_trainingSets.md](./Emulators_trainingSets.md) file. + +3. Data source (choose one): The data source can be either an archive or a predefined training set. For archive, the following options are available: a. `archive`: - `file`: Specifies the simulation type, either "Nyx" or "Gadget". - `version`: Specifies the Nyx version or Gadget postprocessing version. Options are: - - "Pedersen21": Gadget postprocessing in Pedersen+21 paper. - - "Cabayol23": Gadget postprocessing in Cabayol+23 paper. + - "Pedersen21": Gadget postprocessing in [Pedersen+21 paper](https://arxiv.org/abs/2103.05195). + - "Cabayol23": Gadget postprocessing in [Cabayol+23 paper](https://arxiv.org/abs/2303.05195). - "Nyx23_Oct2023": Nyx version from October 2023. - "Nyx23_Jul2024": Nyx version from July 2024. - b. `training_set`: Specifies a predefined training set. Set to null if using an archive. The options are: - - "Pedersen21": Gadget postprocessing in Pedersen+21 paper. - - "Cabayol23": Gadget postprocessing in Cabayol+23 paper. - - "Oct2023": Nyx version from October 2023. - - "Jul2024": Nyx version from July 2024. + b. `training_set`: Specifies a predefined training set. The options are defined in the [Emulators_trainingSets.md](./Emulators_trainingSets.md) file. + There is the option of dropping simulations from the training set or archive. This is done providing a simulation to the `drop_sim` parameter. The simulations are listed in [Simulations_list.md](./Simulations_list.md). 4. `emulator_params`: List of parameters the emulator will use for predictions. By default, it uses ['Delta2_p', 'n_p','mF', 'sigT_Mpc', 'gamma', 'kF_Mpc']. Nyx_alphap emulators use ['Delta2_p', 'n_p','mF', 'sigT_Mpc', 'gamma', 'kF_Mpc', 'alpha_p']. -5. `hyperparameters`: Neural network and training configuration: - `kmax_Mpc`: Maximum k value in Mpc^-1 to consider. - `ndeg`: Degree of the polynomial fit. - `nepochs`: Number of training epochs. - `step_size`: Number of epochs between learning rate adjustments. - `drop_sim`: Simulation to exclude from training (optional). - `drop_z`: Redshift to exclude from training (optional). - `nhidden`: Number of hidden layers. - `max_neurons`: Maximum number of neurons per layer. - `seed`: Random seed for reproducibility. - `lr0`: Initial learning rate. - `batch_size`: Number of samples per batch. - `weight_decay`: L2 regularization factor. - `z_max`: Maximum redshift to consider. - -5. `save_path`: Location to save the trained model, relative to the project root directory. +5. `drop_sim`: Simulation to exclude from training (optional). + +6. `training_set`: Predefined training set to use for training. The options are defined in the Emulators_trainingSets.md](./Emulators_trainingSets.md) file. + +7. `save_path`: Location to save the trained model, relative to the project root directory. + + + +8. `hyperparameters`: Neural network and training configuration. These parameters need to be specified when the emulator_label is not provided. If the emulator_label is provided, the hyperparameters will be taken from the predefined emulator. The parameters are: + + • `kmax_Mpc`: Maximum k value in Mpc^-1 to consider. + + • `ndeg`: Degree of the polynomial fit. + + • `nepochs`: Number of training epochs. + + • `step_size`: Number of epochs between learning rate adjustments. + + • `drop_sim`: Simulation to exclude from training (optional). + + • `drop_z`: Redshift to exclude from training (optional). + + • `nhidden`: Number of hidden layers. + + • `max_neurons`: Maximum number of neurons per layer. + + • `seed`: Random seed for reproducibility. + + • `lr0`: Initial learning rate. + + • `batch_size`: Number of samples per batch. + + • `weight_decay`: L2 regularization factor. + + • `z_max`: Maximum redshift to consider. + + +To train the **GP emulator**, the `emulator_type` parameter is set to "GP" and the `emulator_label` to one of the predefined emulators in the [Emulators_trainingSets.md](./Emulators_trainingSets.md) file. **The GP emulator on the Nyx archive is not supported**. The hyperparameters used by the GP emulator are: + + - `kmax_Mpc`: Maximum k value in Mpc^-1 to consider. + - `ndeg`: Degree of the polynomial fit. + - `drop_sim`: Simulation to exclude from training (optional). + - `z_max`: Maximum redshift to consider. diff --git a/docs/docs/users/index.md b/docs/docs/users/index.md index f0f14e44..8ff4b5be 100644 --- a/docs/docs/users/index.md +++ b/docs/docs/users/index.md @@ -4,7 +4,9 @@ Welcome to the LaCE user documentation! This section contains information for us ## Contents -- [Archive](archive.md) -- [Emulator Predictions](emulatorPredictions.md) -- [Emulator Training](emulatorTraining.md) +- [Available simulations](Simulations_list.md) +- [Predefined emulators and training sets](Emulators_trainingSets.md) +- [Loading data from archive](archive.md) +- [Making predictions with emulators](emulatorPredictions.md) +- [Training new emulators](emulatorTraining.md) - [Compressed Parameters](compressedParameters.md) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index c627731a..40f28735 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -8,14 +8,17 @@ nav: - Installation: installation.md - User Guide: - Overview: users/index.md + - Available Simulations: users/Simulations_list.md + - Emulators and Training Sets: users/Emulators_trainingSets.md - Archive: users/archive.md - - Emulator Predictions: users/emulatorPredictions.md - Emulator Training: users/emulatorTraining.md + - Emulator Predictions: users/emulatorPredictions.md - Compressed Parameters: users/compressedParameters.md + - Developer Guide: - Overview: developers/index.md - Creating New Emulators: developers/CreateNewEmulator.md - - Training Options: developers/trainingOptions.md + - Under development solutions: developers/UnderDevelopment.md - Testing: developers/advancedTesting.md - Documentation: developers/documentation.md diff --git a/lace/cosmo/fit_linP_cosmopower.py b/lace/cosmo/fit_linP_cosmopower.py index 0499f273..24ebe3a4 100644 --- a/lace/cosmo/fit_linP_cosmopower.py +++ b/lace/cosmo/fit_linP_cosmopower.py @@ -17,7 +17,7 @@ class linPCosmologyCosmopower: z_star : float = 3 fit_min_kms : float = 0.5 fit_max_kms : float = 2 - cosmopower_model : str = "Pk_cp_NN_sumnu" + cosmopower_model : str = "Pk_cp_NN_nrun" def __post_init__(self): self.cp_emulator = cp.cosmopower_NN(restore=True, @@ -40,12 +40,18 @@ def measure_Hz(params: dict, zstar: float): Omega_bc = np.array(params["Omega_m"]) Omega_lambda = np.array(params["Omega_Lambda"]) - #Omega_nu = 3*np.array(params["omega_ncdm"]) H0 = np.array(params["h"])*100 - Omega_r = 1 - Omega_bc - Omega_lambda - Hz = H0 * np.sqrt((Omega_bc*(1+zstar)**3 + Omega_r*(1+zstar)**4 + Omega_lambda)) + h = np.array(params["h"]) + Omega_nu = np.array(params["omnuh2"]) / h**2 + + # Compute H(z) + Hz = H0 * np.sqrt( + (Omega_bc+Omega_nu) * (1 + zstar)**3 + + Omega_lambda + ) return Hz + @staticmethod def convert_to_hMpc(Pk_Mpc: np.ndarray, k_Mpc: np.ndarray, @@ -76,17 +82,18 @@ def get_star_params(linP_kms: np.poly1d, alpha_star = 2.0 * linP_kms[2] results = { - "Delta2_star": Delta2_star, - "n_star": n_star, - "alpha_star": alpha_star, + "Delta2_star_cp": Delta2_star, + "n_star_cp": n_star, + "alpha_star_cp": alpha_star, } return results - def fit_linP_cosmology(self, chains_df: pd.DataFrame, - param_mapping: dict): + def fit_linP_cosmology(self, + chains_df: pd.DataFrame, + param_mapping: dict, + chunk_size: int = 100_000): linP_cosmology_results = [] # Calculate number of chunks needed - chunk_size = 100_000 n_chunks = len(chains_df) // chunk_size + (1 if len(chains_df) % chunk_size != 0 else 0) # Split DataFrame into chunks @@ -94,28 +101,33 @@ def fit_linP_cosmology(self, chains_df: pd.DataFrame, logger.info(f"Fitting linear power to cosmology in {n_chunks} chunks") param_descriptions = { 'h': 'Reduced Hubble parameter', - 'm_ncdm': 'Sum of neutrino masses', - 'omega_cdm': 'Total CDM density / h^2', - 'Omega_m': 'Total matter density', + 'mnu': 'Sum of neutrino masses', + 'ombh2': 'Total CDM density / h^2', + 'omch2': 'Total matter density', 'ln_A_s_1e10': 'log(As/1e10)', - 'n_s': 'spectral index' + 'ns': 'spectral index', + 'nrun': 'running of the spectral index' } param_info = [f"{value}: ({param_descriptions.get(key, 'The emulator does not use this parameter.')})" for key, value in param_mapping.items()] logger.info(f"Using parameter mapping: {param_mapping}\n " + "\n ".join(param_info)) for chunk in chunks: # create a dict of cosmological parameters - params = { - 'H0': [chunk[param_mapping['h']].values[ii]*100 for ii in range(len(chunk))], - 'h': [chunk[param_mapping['h']].values[ii] for ii in range(len(chunk))], - 'mnu': [3*chunk[param_mapping['m_ncdm']].values[ii] for ii in range(len(chunk))], - 'omega_cdm': [chunk[param_mapping['omega_cdm']].values[ii] for ii in range(len(chunk))], - 'omega_b': [(chunk[param_mapping['Omega_m']].values[ii] - chunk[param_mapping['omega_cdm']].values[ii]/chunk[param_mapping['h']].values[ii]**2) * chunk[param_mapping['h']].values[ii]**2 for ii in range(len(chunk))], - 'Omega_m': [chunk[param_mapping['Omega_m']].values[ii] for ii in range(len(chunk))], - 'Omega_Lambda': [chunk[param_mapping['Omega_Lambda']].values[ii] for ii in range(len(chunk))], - 'As': [np.exp(chunk[param_mapping['ln_A_s_1e10']].values[ii])/1e10 for ii in range(len(chunk))], - 'ns': [chunk[param_mapping['n_s']].values[ii] for ii in range(len(chunk))] - } + params = {} + for param, mapping in param_mapping.items(): + if param == 'h': + params['H0'] = (chunk[mapping] * 100).tolist() + params['h'] = chunk[mapping].tolist() + params['mnu'] = (chunk[mapping]).tolist() + elif param == 'ln_A_s_1e10': + params['As'] = (np.exp(chunk[mapping]) / 1e10).tolist() + elif param == 'omch2' and 'Omega_m' in param_mapping and 'h' in param_mapping: + h = chunk[param_mapping['h']] + params['ombh2'] = ((chunk[param_mapping['Omega_m']] - chunk[mapping] / h**2) * h**2).tolist() + params['omch2'] = chunk[mapping].tolist() + else: + params[param] = chunk[mapping].tolist() + Pk_Mpc = self.cp_emulator.ten_to_predictions_np(params) k_Mpc = self.cp_emulator.modes diff --git a/lace/cosmo/train_linP_cosmopower.py b/lace/cosmo/train_linP_cosmopower.py index 54f2a1a6..69bc1376 100644 --- a/lace/cosmo/train_linP_cosmopower.py +++ b/lace/cosmo/train_linP_cosmopower.py @@ -171,15 +171,11 @@ def create_LH_sample(dict_params_ranges: dict, nsamples: int=400000, filename: str="LHS_params.npz"): - ombh2 = np.linspace(dict_params_ranges['ombh2'][0], dict_params_ranges['ombh2'][1], nsamples) - omch2 = np.linspace(dict_params_ranges['omch2'][0], dict_params_ranges['omch2'][1], nsamples) - H0 = np.linspace(dict_params_ranges['H0'][0], dict_params_ranges['H0'][1], nsamples) - ns = np.linspace(dict_params_ranges['ns'][0], dict_params_ranges['ns'][1], nsamples) - As = np.linspace(dict_params_ranges['As'][0], dict_params_ranges['As'][1], nsamples) - mnu = np.linspace(dict_params_ranges['mnu'][0], dict_params_ranges['mnu'][1], nsamples) - + params = {} + for param, range_values in dict_params_ranges.items(): + params[param] = np.linspace(range_values[0], range_values[1], nsamples) - AllParams = np.vstack([ombh2, omch2, H0, ns, As, mnu]) + AllParams = np.vstack(list(params.values())) n_params = len(AllParams) lhd = pyDOE.lhs(n_params, samples=nsamples, criterion=None) @@ -189,13 +185,9 @@ def create_LH_sample(dict_params_ranges: dict, for i in range(n_params): AllCombinations[:, i] = AllParams[i][idx[:, i]] - params = {'omega_b': AllCombinations[:, 0], - 'omega_cdm': AllCombinations[:, 1], - 'H0': AllCombinations[:, 2], - 'ns': AllCombinations[:, 3], - 'As': AllCombinations[:, 4], - 'mnu': AllCombinations[:, 5], - } + params = {} + for i, param in enumerate(dict_params_ranges.keys()): + params[param] = AllCombinations[:, i] np.savez(PROJ_ROOT / 'data' / 'cosmopower_models' / filename, **params) @@ -209,16 +201,11 @@ def generate_training_spectra(input_LH_filename: str, if os.path.exists(PROJ_ROOT / 'data' / 'cosmopower_models' / output_filename): os.remove(PROJ_ROOT / 'data' / 'cosmopower_models' / output_filename) - for ii in tqdm(range(len(LH_params["H0"]))): - cosmo = get_cosmology( - H0=LH_params["H0"][ii], - mnu=LH_params["mnu"][ii], - omch2=LH_params["omega_cdm"][ii], - ombh2=LH_params["omega_b"][ii], - omk=0, - As=LH_params["As"][ii], - ns=LH_params["ns"][ii] - ) + params_list = list(LH_params.keys()) + for ii in tqdm(range(len(LH_params[params_list[0]]))): + cosmo_params = {param: LH_params[param][ii] for param in params_list} + cosmo_params["omk"] = 0 + cosmo = get_cosmology(**cosmo_params) camb_results = get_camb_results(cosmo, zs=[3]) @@ -231,7 +218,7 @@ def generate_training_spectra(input_LH_filename: str, ) params_lhs = np.array([ - LH_params[param][ii] for param in ["H0", "mnu", "omega_cdm", "omega_b", "As", "ns"] ]) + LH_params[param][ii] for param in params_list ]) cosmo_array = np.hstack((params_lhs, linP_Mpc.flatten())) @@ -267,14 +254,14 @@ def cosmopower_prepare_training(params : List = ["H0", "mnu", "omega_cdm", "omeg logger.info(f"Number of valid spectra: {len(linear_log_spectra)}") - linear_parameters_dict = {params[i]: linear_parameters[:, i] for i in range(len(params))} + linear_parameters_dict = {params[i]: linear_parameters[:, i] for i in range(n_params)} linear_log_spectra_dict = {'modes': k_modes, 'features': linear_log_spectra} np.savez(PROJ_ROOT / "data" / "cosmopower_models" / "camb_linear_params.npz", **linear_parameters_dict) np.savez(PROJ_ROOT / "data" / "cosmopower_models" / "camb_linear_logpower.npz", **linear_log_spectra_dict) -def cosmopower_train_model(model_params: List = ["H0", "mnu", "omega_cdm", "omega_b", "As", "ns"], +def cosmopower_train_model(model_params: List, model_save_filename: str = "Pk_cp_NN"): training_parameters = np.load(PROJ_ROOT / "data" / "cosmopower_models" / "camb_linear_params.npz") diff --git a/lace/emulator/nn_emulator.py b/lace/emulator/nn_emulator.py index daf1459b..9a126127 100644 --- a/lace/emulator/nn_emulator.py +++ b/lace/emulator/nn_emulator.py @@ -620,7 +620,23 @@ def save_emulator(self): "metadata": metadata, } - torch.save(model_with_metadata, self.save_path) + if not self.emulator_label: + save_path = self.save_path + else: + # Create model directory under data/NNmodels/{emulator_label} + dir_path = PROJ_ROOT / "data" / "NNmodels" / self.emulator_label + dir_path.mkdir(parents=True, exist_ok=True) + self.print(f"Creating and using model directory: {dir_path}") + + # Build filename based on whether we're dropping simulations + filename = f"{self.emulator_label}" + if self.drop_sim is not None: + filename += f"_drop_sim_{self.drop_sim}_{self.drop_z}" + filename += ".pt" + + save_path = dir_path / filename + + torch.save(model_with_metadata, save_path) def emulate_p1d_Mpc(self, model, k_Mpc, return_covar=False, z=None): """ diff --git a/lace/utils/plotting_functions.py b/lace/utils/plotting_functions.py index 2893c813..e8a5f106 100644 --- a/lace/utils/plotting_functions.py +++ b/lace/utils/plotting_functions.py @@ -3,7 +3,6 @@ import matplotlib.pyplot as plt from lace.utils import poly_p1d import matplotlib.cm as cm -import corner def plot_p1d_vs_emulator(testing_data, emulator, save_path=None): """ @@ -87,11 +86,11 @@ def plot_p1d_vs_emulator(testing_data, emulator, save_path=None): plt.close() def create_corner_plot( - main_data_dict, + list_of_dfs, + params_to_plot, labels=None, figsize=(8, 8), - additional_data_dicts=None, - additional_colors=None, + colors=None, legend_labels=None, truth_values=None, # Add truth values as a parameter save_path=None @@ -112,13 +111,26 @@ def create_corner_plot( Returns: matplotlib.figure.Figure: The generated corner plot figure. """ + import corner + + #create a list of dictionaries + dirs_list = [] + for df in list_of_dfs: + dict_ = {} + for param in params_to_plot: + dict_[f"{param}"] = df.loc[:,f'{param}'].values + dirs_list.append(dict_) + + if colors is None: + # Generate random colors if not specified + colors = [f"C{i}" for i in range(len(dirs_list[1:]))] # Convert the main dictionary to a 2D array - main_data = np.array(list(main_data_dict.values())).T # Shape (N_samples, N_parameters) + main_data = np.array(list(dirs_list[0].values())).T # Shape (N_samples, N_parameters) # Default labels to dictionary keys if not provided if labels is None: - labels = list(main_data_dict.keys()) + labels = list(dirs_list[0].keys()) # Generate the base corner plot with the main data figure = corner.corner( @@ -128,11 +140,12 @@ def create_corner_plot( show_titles=False, plot_density=False, plot_datapoints=False, - color="steelblue", + color=colors[0], fill_contours=False, levels=(0.68, 0.95), # 1-sigma and 2-sigma contours smooth=1.0, # Smoothing for the KDE scale_hist=True, + hist_kwargs={'density': True}, # Normalize the histograms figsize=figsize, ) @@ -145,34 +158,17 @@ def create_corner_plot( ax = axes[i, i] ax.axvline(truth_values[i], color="black") - """# Loop over the histograms - for yi in range(ndim): - for xi in range(yi): - ax = axes[yi, xi] - ax.axvline(truth_values[xi], color="black") - ax.axvline(truth_values[xi], color="black") - ax.axhline(truth_values[yi], color="black") - ax.axhline(truth_values[yi], color="black") - ax.plot(truth_values[xi], truth_values[yi], "black") - ax.plot(truth_values[xi], truth_values[yi], "black")""" - # Overlay contours for additional datasets if provided legend_handles = [] - if additional_data_dicts: - if additional_colors is None: - # Generate random colors if not specified - additional_colors = [f"C{i}" for i in range(len(additional_data_dicts))] - - - + if len(dirs_list)>1: if legend_labels is None: # Generate default labels if not specified - legend_labels = [f"Dataset {i+1}" for i in range(len(additional_data_dicts))] + legend_labels = [f"Dataset {i+1}" for i in range(len(dirs_list[1:]))] - for idx, additional_data_dict in enumerate(additional_data_dicts): + for idx, additional_data_dict in enumerate(dirs_list[1:]): # Convert the additional dictionary to a 2D array additional_data = np.array(list(additional_data_dict.values())).T # Shape (N_samples, N_parameters) - color = additional_colors[idx % len(additional_colors)] + color = colors[idx+1] # Overlay contours on existing axes figure = corner.corner( @@ -184,6 +180,7 @@ def create_corner_plot( plot_datapoints=False, fill_contours=False, # No fill for overlays levels=(0.68, 0.95), # 1-sigma and 2-sigma contours + hist_kwargs={'density': True}, # Normalize the histograms smooth=1.0, ) diff --git a/notebooks/Chains_postprocessing.py b/notebooks/Chains_postprocessing.py deleted file mode 100644 index a952b1c8..00000000 --- a/notebooks/Chains_postprocessing.py +++ /dev/null @@ -1,120 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: light -# format_version: '1.5' -# jupytext_version: 1.16.4 -# kernelspec: -# display_name: emulators2 -# language: python -# name: emulators2 -# --- - -# # THIS NOTEBOOK PLOTS COSMOLOGICAL PARAMETERS FROM MCMC CHAINS - -# %load_ext autoreload -# %autoreload 2 - -# + -import corner -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd -from lace.utils.plotting_functions import create_corner_plot - - -import matplotlib as mpl -mpl.rcParams.update(mpl.rcParamsDefault) -plt.rcParams["font.family"] = "serif" -# - - -# ## DEFINE PATHS TO CHAINS TO BE PLOTTED - -file_dirs = [ - "/pscratch/sd/l/lcabayol/P1D/mockChallengeLym1d/mock_challenge_v0/chains_H067/mock_v2.1.txt", - "/pscratch/sd/l/lcabayol/P1D/mockChallengeLym1d/mock_challenge_v0/chains_H074/mock_v2.1.txt"] - - -# ## OPEN AND SAVE CHAINS IN DATAFRAMES - -with open(file_dirs[0], 'r') as file: - for line in file: - if line.strip().startswith("#"): # Check for a commented line - first_comment = line.strip() - break - parameters_list = first_comment.lstrip("#").split() - -dfs = [] -for file in file_dirs: - chains = np.loadtxt(file) - df = pd.DataFrame(chains, columns = parameters_list) - dfs.append(df) - -# ## PLOT COSMOLOGICAL PARAMETERS - -# ### DEFINE PARAMETERS TO BE PLOTTED - -parameters_of_interest = ["sigma8", "Omega_m", "h"]#omega_cdm -labels = [r"$\sigma_8$", r"$\Omega_m$", r"$h$"]#\Omega_{\rm cdm} h^2 - -dirs_list = [] -for df in dfs: - dict_ = {} - for param in parameters_of_interest: - dict_[f"{param}"] = df.loc[:,f'{param}'].values - dirs_list.append(dict_) - -create_corner_plot(dirs_list[0], - labels, - additional_data_dicts = dirs_list[1:], - additional_colors = ['crimson'], - legend_labels = [r"$H_0 = 67$ km s$^{−1}$ Mpc$^{−1}$",r"$H_0 = 74 $km s$^{−1}$ Mpc$^{−1}$"]) - #legend_labels = [r"$\Omega_{\rm cdm} h^2$=0.117",r"$\Omega_{\rm cdm} h^2$=0.13"]) - -# ## COMPRESSED PARAMETERS WITH COSMOPOWER - -from lace.cosmo.fit_linP_cosmopower import linPCosmologyCosmopower - - -fitter_compressed_params = linPCosmologyCosmopower() - -# DEFINES THE MAPPING BETWEEN PARAMETER NAMES AND INTERNAL NAMING -param_mapping = { - 'h': 'h', - 'm_ncdm': 'm_ncdm', - 'omega_cdm': 'omega_cdm', - 'Omega_m': 'Omega_m', - 'ln_A_s_1e10': 'ln_A_s_1e10', - 'n_s': 'n_s' -} - -for ii, df in enumerate(dfs): - df['m_ncdm'] = np.zeros(shape = len(df)) - df['ln_A_s_1e10'] = np.log(df.A_s*1e10) - linP_cosmology_results = fitter_compressed_params.fit_linP_cosmology(chains_df = df, - param_mapping = param_mapping) - df_star_params = pd.DataFrame(linP_cosmology_results) - df = pd.concat((df, df_star_params),axis=1) - dfs[ii] = df - -# ## PLOT COMPRESSED PARAMETERS - -parameters_of_interest = ["Delta2_star", "n_star", "alpha_star"] -labels = [r"$\Delta^2_*$", r"$n_*$", r"$\alpha_*$"] -true_vals = [0.36,-2.3, -0.21] - -dirs_list = [] -for df in dfs: - dict_ = {} - for param in parameters_of_interest: - dict_[f"{param}"] = df.loc[:,f'{param}'].values - dirs_list.append(dict_) - -create_corner_plot(dirs_list[0], - labels, - additional_data_dicts = dirs_list[1:], - additional_colors = ['crimson'], - truth_values = true_vals, - legend_labels = [r"$H_0 = 67$ km s$^{−1}$ Mpc$^{−1}$",r"$H_0 = 74$km s$^{−1}$ Mpc$^{−1}$"]) diff --git a/notebooks/compressed_parameters/Chains_postprocessing.py b/notebooks/compressed_parameters/Chains_postprocessing.py new file mode 100644 index 00000000..47400f8e --- /dev/null +++ b/notebooks/compressed_parameters/Chains_postprocessing.py @@ -0,0 +1,189 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: lace_test +# language: python +# name: python3 +# --- + +# # THIS NOTEBOOK PLOTS COSMOLOGICAL PARAMETERS FROM MCMC CHAINS + +# %load_ext autoreload +# %autoreload 2 + +# + +import corner +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +from lace.utils.plotting_functions import create_corner_plot +from lace.cosmo.fit_linP_cosmopower import linPCosmologyCosmopower + +from cup1d.utils.utils import purge_chains + +import matplotlib as mpl +mpl.rcParams.update(mpl.rcParamsDefault) +plt.rcParams["font.family"] = "serif" + + +# - + +def open_Chain(fname, source_code): + #access parameters + if source_code == "lym1d": + with open(fname, 'r') as file: + for line in file: + if line.strip().startswith("#"): + first_comment = line.strip() + break + parameters_list = first_comment.lstrip("#").split() + + chains = np.loadtxt(fname) + df = pd.DataFrame(chains, columns = parameters_list) + df['m_ncdm'] = 0 + df['m_ncdm'] = 3* df['m_ncdm'] #the parameter is the sum of the masses of the neutrinos + df['nrun'] = 0 + df['ln_A_s_1e10'] = np.log(df.A_s*1e10) + df['omnuh2'] = df.m_ncdm / 94.07 + + + elif source_code=='cup1d_old': + f = np.load(fname, allow_pickle=True) + df = pd.DataFrame(f.tolist()) + + mask = df[df.lnprob>-14] + df['ln_A_s_1e10'] = np.log(df.As*1e10) + df["Omega_m"] = 0.316329 + df["Omega_Lambda"] = 1 - df.Omega_m.values + df["h"] = df.H0.values / 100 + df["ombh2"] = 0.049009 * df.h**2 + df["omch2"] = (df.Omega_m - 0.049009) * df.h**2 + df['mnu'] = 0 + df['nrun'] = 0 + df['omnuh2'] = df.mnu / 94.07 + + elif source_code=='cup1d': + + + data = np.load(fname, allow_pickle=True).item() + keep, discard = purge_chains(data["posterior"]["lnprob"], abs_diff=2, nsplit=8) + posterior_dict = data['posterior'] + param_not = ['param_names', 'param_names_latex','param_percen', 'param_mle', 'lnprob_mle'] + df_dict = {key: posterior_dict[key][:, keep].flatten() for key in posterior_dict.keys() if key not in param_not} + df = pd.DataFrame(df_dict) + + df['ln_A_s_1e10'] = np.log(df.As*1e10) + df["h"] = data["like"]["cosmo_fid_label"]["cosmo"]["H0"]/100 + df['mnu'] = data["like"]["cosmo_fid_label"]["cosmo"]["mnu"] + df['omnuh2'] = df.mnu / 94.07 + df["ombh2"] = data["like"]["cosmo_fid_label"]["cosmo"]["ombh2"] + df["omch2"] = data["like"]["cosmo_fid_label"]["cosmo"]["omch2"] + df["Omega_m"] = (df.omch2.values + df.ombh2.values + df.omnuh2.values) / df.h**2 + df["Omega_Lambda"] = 1 - df.Omega_m.values + #df['nrun'] = data["like"]["cosmo_fid_label"]["cosmo"]["nrun"] + + return df + + +# ## DEFINE PATHS TO CHAINS TO BE PLOTTED + +file_dirs = [ + #["/pscratch/sd/l/lcabayol/P1D/mockChallengeLym1d/mock_challenge_v0/chains_fiducialMockChallenge/mock_v2.1.txt","lym1d"], + #"/pscratch/sd/l/lcabayol/P1D/mockChallengeLym1d/mock_challenge_v0/chains_Omh2_0.132/mock_v2.1.txt", + #"/pscratch/sd/l/lcabayol/P1D/mockChallengeLym1d/mock_challenge_v0/chains/mock_v2.1.txt"] + #["/pscratch/sd/l/lcabayol/P1D/mockChallengeLym1d/mock_challenge_v0/chains_H067/mock_v2.1.txt","lym1d"], + #["/pscratch/sd/l/lcabayol/P1D/mockChallengeLym1d/mock_challenge_v0/chains_H074/mock_v2.1.txt","lym1d"]] + ["/Users/lauracabayol/Documents/DESI/cup1d_chains/sampler_results_planck.npy","cup1d"], + ["/Users/lauracabayol/Documents/DESI/cup1d_chains/sampler_results_h74.npy","cup1d"]] + + +# ## OPEN AND SAVE CHAINS IN DATAFRAMES + +dfs = [] +for file in file_dirs: + df = open_Chain(file[0], file[1]) + dfs.append(df) + +fitter_compressed_params = linPCosmologyCosmopower(cosmopower_model = "Pk_cp_NN_nrun") + +# + +#The model expects the following parameter: h, m_ncdm, omch2, Omega_m, Omega_Lambda, ln_A_s_1e10, n_s, nrun. + +#Use the following mapping to convert the parameter names in the dataframe to the expected parameter names. +#If you are using an emulator with other parameters, modify the dictionary and add only the parameters used by the model (e.g. if your emulator is **not** using neutrino masses, you should not include mnu in the dictionary). + +#parameter expected by the module:parameter name in your dataframe + + +param_mapping = { + 'h': 'h', + 'mnu': 'mnu', + 'omch2': 'omch2', + 'Omega_m': 'Omega_m', + 'Omega_Lambda': 'Omega_Lambda', + 'ln_A_s_1e10': 'ln_A_s_1e10', + 'ns': 'ns', + 'nrun': 'nrun', + 'omnuh2': 'omnuh2' +} +# - + +for ii, df in enumerate(dfs): + linP_cosmology_results = fitter_compressed_params.fit_linP_cosmology(chains_df = df, + param_mapping = param_mapping) + df_star_params = pd.DataFrame(linP_cosmology_results) + df = pd.concat((df, df_star_params),axis=1) + dfs[ii] = df + +# ## PLOT COSMOLOGICAL PARAMETERS + +# ### DEFINE PARAMETERS TO BE PLOTTED + +#parameters_of_interest = ["sigma8", "Omega_m", "h", "omega_cdm"] +parameters_of_interest = ["As", "ns"] +#labels = [r"$\sigma_8$", r"$\Omega_m$", r"$h$",r"$\Omega_{\rm cdm} h^2$"] +labels = ["$A_s$", "$n_s$"] + + +# + +create_corner_plot(list_of_dfs = dfs, + params_to_plot = parameters_of_interest, + labels = labels, + colors = ['steelblue','crimson'], + legend_labels = [r"h = 0.67", r"h = 0.74"]) + + +# - + + + +# + +#checking that CP is deriving the same parameters as those in the chain. + +parameters_of_interest = ["Delta2_star", "n_star", "alpha_star"] +labels = [r"$\Delta^2_*$", r"$n_*$", r"$\alpha_*$"] + +create_corner_plot(list_of_dfs = [df_star_params.rename(columns = {"Delta2_star_cp": "Delta2_star", "n_star_cp": "n_star", "alpha_star_cp": "alpha_star"}), dfs[1]], + params_to_plot = parameters_of_interest, + labels = labels, + colors = ['steelblue','crimson'], + legend_labels = [r"CP", r"Scaling CAMB"]) + +# + +parameters_of_interest = ["Delta2_star", "n_star", "alpha_star"] +labels = [r"$\Delta^2_*$", r"$n_*$", r"$\alpha_*$"] + +create_corner_plot(list_of_dfs = dfs, + params_to_plot = parameters_of_interest, + labels = labels, + colors = ['steelblue', 'crimson'], + legend_labels = [r"h=0.67", r"h=0.74"]) +# - + + diff --git a/notebooks/Tutorial_compressedParams.py b/notebooks/compressed_parameters/Tutorial_compressedParams.py similarity index 60% rename from notebooks/Tutorial_compressedParams.py rename to notebooks/compressed_parameters/Tutorial_compressedParams.py index cce479c1..c60666ae 100644 --- a/notebooks/Tutorial_compressedParams.py +++ b/notebooks/compressed_parameters/Tutorial_compressedParams.py @@ -20,11 +20,12 @@ # # + [markdown] vscode={"languageId": "plaintext"} -# ##### DESCLAIMER: Cosmopower is not installed by default in the LaCE package. You can install it as pip install cosmopower. +# ##### DESCLAIMER: Cosmopower is not installed by default in the LaCE package. You can install it as pip install cosmopower. It requires the explicit installation of the dependencies (see [installation instructions](https://igmhub.github.io/LaCE/installation/)) # # + [markdown] vscode={"languageId": "plaintext"} -# Additional documentation on how to use cosmopower can be found [here](https://igmhub.github.io/LaCE/compressedParameters/) +# Additional documentation on how to use cosmopower can be found [here](https://igmhub.github.io/LaCE/users/compressedParameters/) +# # + import numpy as np @@ -48,7 +49,7 @@ # ### 1.1. LOAD THE GADGET TEST SIMULATION -test_sim = "mpg_neutrinos" +test_sim = "mpg_neutrino" emulator_mnu = True archive = gadget_archive.GadgetArchive(postproc="Cabayol23") @@ -58,12 +59,13 @@ cosmo_params = testing_data[0]['cosmo_params'] star_params = testing_data[0]['star_params'] -star_params +cosmo_params # ### 1.2. FIT THE COMPRESSED PARAMETERS WITH COSMOPOWER # # + +# Define fitting parameters kp_kms = 0.009 z_star = 3 fit_min=0.5 @@ -73,34 +75,39 @@ kmax_kms = fit_max * kp_kms # - -emu_path = (PROJ_ROOT / "data" / "cosmopower_models" / "Pk_cp_NN_sumnu").as_posix() +# Load the emulator +emu_path = (PROJ_ROOT / "data" / "cosmopower_models" / "Pk_cp_NN_nrun").as_posix() cp_nn = cp.cosmopower_NN(restore=True, restore_filename=emu_path) +# Access the emulator parameters and the __k__ modes k_Mpc = cp_nn.modes logger.info(f"Emulator parameters: {cp_nn.parameters}") -# Define the cosmology dictionary +# Define the cosmology dictionary. Some parameters are used by the emulator (e.g. parameters from the previous cell) and others are used to convert to kms cosmo = {'H0': [cosmo_params["H0"]], 'h': [cosmo_params["H0"]/100], 'mnu': [cosmo_params["mnu"]], 'Omega_m': [(cosmo_params["omch2"] + cosmo_params["ombh2"]) / (cosmo_params["H0"]/100)**2], 'Omega_Lambda': [1- (cosmo_params["omch2"] + cosmo_params["ombh2"]) / (cosmo_params["H0"]/100)**2], - 'omega_cdm': [cosmo_params["omch2"]], - 'omega_b': [cosmo_params["ombh2"]], + 'omch2': [cosmo_params["omch2"]], + 'ombh2': [cosmo_params["ombh2"]], 'As': [cosmo_params["As"]], - 'ns': [cosmo_params["ns"]]} + 'ns': [cosmo_params["ns"]], + 'nrun': [cosmo_params["nrun"]]} + -# Compute the power spectrum +# Emulate the power spectrum with cosmopower Pk_Mpc = cp_nn.ten_to_predictions_np(cosmo) k_Mpc = cp_nn.modes.reshape(1,len(cp_nn.modes)) +# + # Call the class to fit the compressed parameters linP_Cosmology_Cosmopower = linPCosmologyCosmopower() - -k_kms, Pk_kms = linPCosmologyCosmopower.convert_to_kms(cosmo, k_Mpc, Pk_Mpc, z_star = z_star) - +# Convert to kms +k_kms, Pk_kms = linP_Cosmology_Cosmopower.convert_to_kms(cosmo, k_Mpc, Pk_Mpc, z_star = z_star) +# Fit the polynomial linP_kms = linP_Cosmology_Cosmopower.fit_polynomial( xmin = kmin_kms / kp_kms, xmax= kmax_kms / kp_kms, @@ -111,6 +118,7 @@ starparams_CP = linP_Cosmology_Cosmopower.get_star_params(linP_kms = linP_kms, kp_kms = kp_kms) +# - logger.info(f"Star parameters of: {test_sim} with CP emulator with neutrino masses {emulator_mnu}") logger.info(f"Percent relative error [%] on Delta2_star: {(starparams_CP['Delta2_star'] / star_params['Delta2_star'] - 1)*100:.2f}%") @@ -119,60 +127,85 @@ # ## 2. MEASURING COMPRESSED PARAMETERS FROM COSMOLOGICAL CHAINS -df = pd.read_csv(PROJ_ROOT / "data/utils" / "mini_chain_test.csv") +# The previous section shows how to fit the compressed parameters from a single cosmology. Let's see how to do it for a chain. -df.rename(columns = {'omega_cdm': 'omega_c'}, inplace = True) +#open the chain and define the parameters used by the emulator and not present in the chain +df = pd.read_csv(PROJ_ROOT / "data/utils" / "mini_chain_test.csv") +df['nrun'] = 0 -# ##### We need to provide a dictionary that maps the parameter names expected by the emulator to the column names of the dataframe. -# ##### These parameters must be in the dataframe. They are used either to emulate P(k) or to convert to s/km. +# We need to provide a dictionary that maps the parameter names expected by the emulator to the column names of the dataframe. # +# These parameters must be in the dataframe. They are used either to emulate P(k) or to convert to kms. -#parameter expected:parameter name in the dataframe +# + +#The model expects the following parameter: h, m_ncdm, omch2, Omega_m, Omega_Lambda, ln_A_s_1e10, n_s, nrun. + +#Use the following mapping to convert the parameter names in the dataframe to the expected parameter names. +#If you are using an emulator with other parameters, modify the dictionary and add only the parameters used by the model (e.g. if your emulator is **not** using neutrino masses, you should not include mnu in the dictionary). + +#parameter expected by the module:parameter name in your dataframe param_mapping = { 'h': 'h', 'm_ncdm': 'm_ncdm', - 'omega_cdm': 'omega_c', + 'omch2': 'omega_cdm', 'Omega_m': 'Omega_m', 'Omega_Lambda': 'Omega_Lambda', 'ln_A_s_1e10': 'ln_A_s_1e10', - 'n_s': 'n_s' + 'ns': 'n_s', + 'nrun': 'nrun' } +# - -fitter_compressed_params = linPCosmologyCosmopower() +fitter_compressed_params = linPCosmologyCosmopower(cosmopower_model = "Pk_cp_NN_nrun") linP_cosmology_results = fitter_compressed_params.fit_linP_cosmology(chains_df = df, param_mapping = param_mapping) +linP_cosmology_results + # ## 3. TRAIN COSMOPOWER MODELS # ### 2.1. CREATE THE LATIN HYPERCUBE OF PARAMETERS # # -dict_params_ranges = { - 'ombh2': [0.015, 0.03], - 'omch2': [0.05, 0.16], - 'H0': [60, 80], - 'ns': [0.8, 1.2], - 'As': [5e-10, 4e-9], - 'mnu': [0, 2],} - +#Define the parameters used by the emulator and the ranges +params = ["ombh2", "omch2", "H0", "ns", "As", "mnu", "nrun"] +ranges = [ + [0.015, 0.03], + [0.05, 0.16], + [60, 80], + [0.8, 1.2], + [5e-10, 4e-9], + [0, 2], + [0, 0.05] +] +dict_params_ranges = dict(zip(params, ranges)) + +#Create the Latin Hypercube sample create_LH_sample(dict_params_ranges = dict_params_ranges, nsamples = 10, filename = "LHS_params_test.npz") # ### 2.2 GENERATE THE TRAINING SPECTRA +#Generate the training power spectra with CAMB generate_training_spectra(input_LH_filename = 'LHS_params_test.npz', output_filename = "linear_test.dat") # ### 2.3 PREPARE THE TRAINING FILES FOR COSMOPOWER -cosmopower_prepare_training(params = ["H0", "mnu", "omega_cdm", "omega_b", "As", "ns"], - Pk_filename = "linear_test.dat") +#Prepare the training files for cosmopower. The input Pk file must be that generated in the previous step. +cosmopower_prepare_training(params = params, + Pk_filename = "linear_test.dat") # ### 2.4 TRAIN THE COSMOPOWER MODEL # -cosmopower_train_model(model_save_filename = "Pk_cp_NN_test") +# Explain that it will always take the files generated in the previous step. + +cosmopower_train_model( + model_save_filename = "Pk_cp_NN_test", + model_params = params +) # diff --git a/notebooks/compressed_parameters/cosmopowerCAMB_comparison.py b/notebooks/compressed_parameters/cosmopowerCAMB_comparison.py new file mode 100644 index 00000000..768f8771 --- /dev/null +++ b/notebooks/compressed_parameters/cosmopowerCAMB_comparison.py @@ -0,0 +1,190 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: lace_test +# language: python +# name: python3 +# --- + +# %load_ext autoreload +# %autoreload 2 + +# # COSMOPOWER vs CAMB COMPARISON + +# + +import numpy as np +import cosmopower as cp +import pandas as pd +import logging +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +from lace.archive import gadget_archive +from lace.cosmo.fit_linP_cosmopower import linPCosmologyCosmopower +from lace.emulator.constants import PROJ_ROOT + +# - + +# # LOAD DATA + +test_sim = "mpg_central" +z_star=3 + + +archive = gadget_archive.GadgetArchive(postproc="Cabayol23") + +testing_data = archive.get_testing_data(sim_label=test_sim) + +cosmo_params = testing_data[0]['cosmo_params'] +cosmo_params["omnuh2"] = cosmo_params["mnu"] / 94.07 +star_params = testing_data[0]['star_params'] + +# ### 1.2. P(K) WITH COSMOPOWER +# + +# Load the emulator +emu_path = (PROJ_ROOT / "data" / "cosmopower_models" / "Pk_cp_NN_nrun").as_posix() +cp_nn = cp.cosmopower_NN(restore=True, + restore_filename=emu_path) + +# Access the emulator parameters and the __k__ modes +k_Mpc = cp_nn.modes +logger.info(f"Emulator parameters: {cp_nn.parameters}") + +# Define the cosmology dictionary. Some parameters are used by the emulator (e.g. parameters from the previous cell) and others are used to convert to kms +cosmo_cp = {'H0': [cosmo_params["H0"]], + 'h': [cosmo_params["H0"]/100], + 'mnu': [cosmo_params["mnu"]], + 'Omega_m': [(cosmo_params["omch2"] + cosmo_params["ombh2"]) / (cosmo_params["H0"]/100)**2], + 'Omega_Lambda': [1- (cosmo_params["omch2"] + cosmo_params["ombh2"]) / (cosmo_params["H0"]/100)**2], + 'omch2': [cosmo_params["omch2"]], + 'ombh2': [cosmo_params["ombh2"]], + 'omnuh2': [cosmo_params["omnuh2"]], + 'As': [cosmo_params["As"]], + 'ns': [cosmo_params["ns"]], + 'nrun': [cosmo_params["nrun"]]} + + +# Emulate the power spectrum with cosmopower +Pk_Mpc_cp = cp_nn.ten_to_predictions_np(cosmo_cp) +k_Mpc_cp = cp_nn.modes.reshape(1,len(cp_nn.modes)) + +# Convert to kms +k_kms_cp, Pk_kms_cp = linPCosmologyCosmopower.convert_to_kms(cosmo_cp, k_Mpc_cp, Pk_Mpc_cp, z_star = z_star) + +# ### 1.3. P(K) WITH CAMB +# + +from lace.cosmo import camb_cosmo +from cup1d.likelihood import CAMB_model + +cosmo_camb = camb_cosmo.get_cosmology( + H0=cosmo_params["H0"], + mnu=cosmo_params["mnu"], + omch2=cosmo_params["omch2"], + ombh2=cosmo_params["ombh2"], + omk=cosmo_params["omk"], + As=cosmo_params["As"], + ns=cosmo_params["ns"], + nrun=cosmo_params["nrun"] +) + +fun_cosmo = CAMB_model.CAMBModel( + zs= [3], + cosmo=cosmo_camb, + z_star=z_star, + kp_kms=0.009, +) + +k_Mpc_camb, zs, linP_Mpc_camb = fun_cosmo.get_linP_Mpc() + +k_kms_camb, Pk_kms_camb = linPCosmologyCosmopower.convert_to_kms(cosmo_cp, k_Mpc_camb, linP_Mpc_camb, z_star = z_star) + +# + +import matplotlib.pyplot as plt +import logging +import numpy as np + +# Disable matplotlib debug logging +logging.getLogger('matplotlib').setLevel(logging.WARNING) + +plt.rcParams.update({ + 'font.family': 'serif', + 'font.size': 12, + 'axes.labelsize': 14, + 'axes.titlesize': 14, + 'xtick.labelsize': 12, + 'axes.titlesize': 16, + 'ytick.labelsize': 12, + 'legend.fontsize': 12, + 'figure.figsize': [8, 8], + 'axes.prop_cycle': plt.cycler(color=['#1f77b4', '#d62728', '#2ca02c', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']) +}) + +fig, (ax1, ax2) = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]}, sharex=True) + +# Main panel +ax1.set_title(f"Simulation: {test_sim}") +ax1.loglog(k_Mpc_camb, linP_Mpc_camb[0], label='CAMB') +ax1.loglog(k_Mpc_cp[0], Pk_Mpc_cp[0], label='CP', ls=':') +ax1.set_ylabel(r"$P(k)$ [Mpc]") +ax1.legend() + +# Ratio panel +ratio = np.interp(k_Mpc_camb, k_Mpc_cp[0], Pk_Mpc_cp[0]) / linP_Mpc_camb[0] +ax2.semilogx(k_Mpc_camb, ratio-1) +ax2.set_ylabel('CP / CAMB - 1') +ax2.set_xlabel(r"$k$ [1/Mpc]") +ax2.axhline(y=0, color='k', linestyle='--', alpha=0.5) + +plt.tight_layout() +plt.show() + +# + +import matplotlib.pyplot as plt +import logging +import numpy as np + +# Disable matplotlib debug logging +logging.getLogger('matplotlib').setLevel(logging.WARNING) + +plt.rcParams.update({ + 'font.family': 'serif', + 'font.size': 12, + 'axes.labelsize': 14, + 'axes.titlesize': 14, + 'xtick.labelsize': 12, + 'axes.titlesize': 16, + 'ytick.labelsize': 12, + 'legend.fontsize': 12, + 'figure.figsize': [8, 8], + 'axes.prop_cycle': plt.cycler(color=['#1f77b4', '#d62728', '#2ca02c', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']) +}) + +fig, (ax1, ax2) = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]}, sharex=True) + +# Main panel +ax1.set_title(f"Simulation: {test_sim}") +ax1.loglog(k_kms_camb[0], Pk_kms_camb[0], label='CAMB') +ax1.loglog(k_kms_cp[0], Pk_kms_cp[0], label='CP', ls=':') +ax1.set_ylabel(r"$P(k)$ [kms]") +ax1.legend() + +# Ratio panel +ratio = np.interp(k_kms_camb[0], k_kms_cp[0], Pk_kms_cp[0]) / Pk_kms_camb[0] +ax2.semilogx(k_kms_camb[0], ratio) +ax2.set_ylabel('CP / CAMB') +ax2.set_xlabel(r"$k$ [s/km]") +ax2.axhline(y=1, color='k', linestyle='--', alpha=0.5) + +plt.tight_layout() +plt.show() +# - + + diff --git a/notebooks/compressed_parameters/optimize_kp.py b/notebooks/compressed_parameters/optimize_kp.py new file mode 100644 index 00000000..04c199ff --- /dev/null +++ b/notebooks/compressed_parameters/optimize_kp.py @@ -0,0 +1,382 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: lace_test +# language: python +# name: python3 +# --- + +# # OPTIMIZATION OF k* + +# %load_ext autoreload +# %autoreload 2 + +# + +import corner +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import cosmopower as cp + +from lace.utils.plotting_functions import create_corner_plot +from lace.cosmo.fit_linP_cosmopower import linPCosmologyCosmopower +from lace.archive import gadget_archive +from lace.emulator.constants import PROJ_ROOT + +from lace.cosmo import camb_cosmo +from cup1d.likelihood import CAMB_model + +import logging +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +import matplotlib as mpl +mpl.rcParams.update(mpl.rcParamsDefault) +plt.rcParams["font.family"] = "serif" + + +# - + +def open_Chain(fname, source_code): + #access parameters + if source_code == "lym1d": + with open(fname, 'r') as file: + for line in file: + if line.strip().startswith("#"): + first_comment = line.strip() + break + parameters_list = first_comment.lstrip("#").split() + + chains = np.loadtxt(fname) + df = pd.DataFrame(chains, columns = parameters_list) + df['m_ncdm'] = 0 + df['nrun'] = 0 + df['ln_A_s_1e10'] = np.log(df.A_s*1e10) + + elif source_code=='cup1d': + f = np.load(fname, allow_pickle=True) + df = pd.DataFrame(f.tolist()) + + df = df[df.lnprob>-14] + df['ln_A_s_1e10'] = np.log(df.As*1e10) + df["Omega_m"] = 0.316329 + df["Omega_Lambda"] = 1 - df.Omega_m.values + df["h"] = df.H0.values / 100 + df["ombh2"] = 0.049009 * df.h**2 + df["omch2"] = (df.Omega_m - 0.049009) * df.h**2 + df['mnu'] = 0 + df["omnuh2"] = df.mnu / 94.07 + df['nrun'] = 0 + return df + + + +file_dirs = [["/Users/lauracabayol/Documents/DESI/cup1d_chains/chain.npy", "cup1d"]] + +# ## OPEN AND SAVE CHAINS IN DATAFRAMES + +dfs = [] +for file in file_dirs: + df = open_Chain(file[0], file[1]) + #df = df.sample(100_000) + dfs.append(df) + +# + +# DEFINES THE MAPPING BETWEEN PARAMETER NAMES AND INTERNAL NAMING +param_mapping = { + 'h': 'h', + 'mnu': 'mnu', + 'omch2': 'omch2', + 'Omega_m': 'Omega_m', + 'Omega_Lambda': 'Omega_Lambda', + 'ln_A_s_1e10': 'ln_A_s_1e10', + 'ns': 'ns', + 'omnuh2': 'omnuh2', + 'nrun': 'nrun' +} + +# ## LOOP OVER Kp + +# + +#kps = np.arange(0.003,0.02,0.001) +kps = np.arange(0.003,0.011,0.001) + +parameters_of_interest = ["Delta2_star_cp", "n_star_cp", "alpha_star_cp"] +labels = [r"$\Delta^2_*$", r"$n_*$", r"$\alpha_*$"] + + +# + +# to estimate the true value of the compressed parameters +from lace.cosmo import camb_cosmo +from cup1d.likelihood import CAMB_model + + +z_star = 3 +cosmo_camb = camb_cosmo.get_cosmology( + H0=0.6732117*100, + mnu=0, + omch2=0.12027891481623526, + ombh2=0.02221156458376476, + omk=0, + As=2.1e-09, + ns=0.966, + nrun=0 +) + +# + +df_dict = {} +correlations = {} +sigma_68_cp = {} +for ii, kp in enumerate(kps): + dict_ = {} + fitter_compressed_params =linPCosmologyCosmopower(cosmopower_model = "Pk_cp_NN_nrun", + fit_min_kms = 0.5, + fit_max_kms = 2, + kp_kms = kp) + linP_cosmology_results = fitter_compressed_params.fit_linP_cosmology(chains_df = df, + param_mapping = param_mapping) + + df_star_params = pd.DataFrame(linP_cosmology_results) + + corrs =np.corrcoef(df_star_params.values, rowvar=False) + corr_delta_n = corrs[0,1] + corr_delta_alpha = corrs[0,2] + corr_n_alpha = corrs[1,2] + + correlations[f"{np.round(kp,4)}"] = np.c_[corr_delta_n,corr_delta_alpha,corr_n_alpha] + df_dict[f"{np.round(kp,4)}"] = df_star_params + + fun_cosmo = CAMB_model.CAMBModel( + zs= [3], + cosmo=cosmo_camb, + z_star=z_star, + kp_kms=kp, + ) + results_camb = fun_cosmo.get_linP_params() + q16, q50, q84 = np.percentile(df_star_params["Delta2_star_cp"], [16, 50, 84]) + sigma_68 = 0.5*(q84 - q16) + + sigma_68_cp[f"{np.round(kp,4)}"] = sigma_68 / results_camb["Delta2_star"] + + if np.round(kp,4) == 0.009: + q16, q50, q84 = np.percentile(df["Delta2_star"], [16, 50, 84]) + sigma_68_chain = 0.5*(q84 - q16) / results_camb["Delta2_star"] + + + + +# + +keys = sorted(correlations.keys(), key=lambda x: float(x)) +keys_float = [float(k) for k in keys] + +values = [correlations[k][0] for k in keys] +columns = np.array(values).T + +labels = [r"corr($\Delta^2_* - n_*$)", r"corr($\Delta^2_* - \alpha_*$)", r"corr($\alpha_* - n_*$)"] +colors = ["crimson", "navy", "goldenrod"] + +# Create a figure with two subplots +fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10), gridspec_kw={'height_ratios': [2, 1]}) + +# Plot correlations +for i, column in enumerate(columns): + ax1.plot(keys_float, np.abs(column), label=f'{labels[i]}', marker='.', color=colors[i]) + +# Customize the first subplot +ax1.set_xlabel('kp [s/km]', fontsize=12) +ax1.set_ylabel('abs(Correlation)', fontsize=12) +ax1.legend() +ax1.grid(True) +ax1.set_title('Correlations between parameters', fontsize=14) + +# Plot errors +error_keys = sorted(sigma_68_dict.keys(), key=lambda x: float(x)) +error_values = [sigma_68_dict[k] for k in error_keys] +ax2.plot([float(k) for k in error_keys], error_values, marker='.', color='green') + +# Add sigma68 from chain as black cross +ax2.scatter(float(0.009), sigma_68_chain, marker='x', color='black', label='Chain', s=80) +ax2.legend() + +# Customize the second subplot +ax2.set_xlabel('kp [s/km]', fontsize=12) +ax2.set_ylabel(r'$\sigma_{68}[\Delta^2_{\star, \rm CP}]$ / $\Delta^2_{\star, \rm CAMB}$', fontsize=12) +ax2.grid(True) + +plt.tight_layout() +plt.show() +# - + +parameters_of_interest = ["Delta2_star", "n_star"] +labels = [r"$\Delta^2_*$", r"$n_*$"] +create_corner_plot(list_of_dfs = [df_dict["0.009"].rename(columns = {"Delta2_star_cp":"Delta2_star", + "n_star_cp":"n_star", + "alpha_star_cp":"alpha_star"}), + df], + params_to_plot = parameters_of_interest, + labels = labels, + colors = ['steelblue', 'crimson'], + legend_labels = [r"$k_p=0.009$, CP", r"$k_p=0.009$, CAMB"]) + +# ## PLOT ELIPLSES + +parameters_of_interest = ["Delta2_star_cp", "n_star_cp", "alpha_star_cp"] +labels = [r"$\Delta^2_*$", r"$n_*$", r"$\alpha_*$"] + +create_corner_plot(list_of_dfs = [df_dict["0.004"]], + params_to_plot = parameters_of_interest, + labels = labels, + colors = ['steelblue'], + legend_labels = [r"$k_p=0.004$"]) + +create_corner_plot(list_of_dfs = [df_dict["0.009"]], + params_to_plot = parameters_of_interest, + labels = labels, + colors = ['steelblue'], + legend_labels = [r"$k_p=0.009$"]) + +create_corner_plot(list_of_dfs = [df_dict["0.01"]], + params_to_plot = parameters_of_interest, + labels = labels, + colors = ['steelblue'], + legend_labels = [r"$k_p=0.01$"]) + +# ## DELTA^2* PRECISION VS k* + +# ### In the mpg central simulation + +test_sim = "mpg_neutrinos" +archive = gadget_archive.GadgetArchive(postproc="Cabayol23") +testing_data = archive.get_testing_data(sim_label=test_sim) + +z_star = 3 +fit_min=0.5 +fit_max=2 + +cosmo_params = testing_data[0]['cosmo_params'] +cosmo_params["omnuh2"] = cosmo_params["mnu"] / 94.07 +star_params = testing_data[0]['star_params'] + +# Load cosmopower emulator +emu_path = (PROJ_ROOT / "data" / "cosmopower_models" / "Pk_cp_NN_nrun").as_posix() +cp_nn = cp.cosmopower_NN(restore=True, + restore_filename=emu_path) +logger.info(f"Emulator parameters: {cp_nn.parameters}") + +cosmo_cp = {'H0': [cosmo_params["H0"]], + 'h': [cosmo_params["H0"]/100], + 'mnu': [cosmo_params["mnu"]], + 'Omega_m': [(cosmo_params["omch2"] + cosmo_params["ombh2"]) / (cosmo_params["H0"]/100)**2], + 'Omega_Lambda': [1- (cosmo_params["omch2"] + cosmo_params["ombh2"]) / (cosmo_params["H0"]/100)**2], + 'omch2': [cosmo_params["omch2"]], + 'ombh2': [cosmo_params["ombh2"]], + 'omnuh2': [cosmo_params["omnuh2"]], + 'As': [cosmo_params["As"]], + 'ns': [cosmo_params["ns"]], + 'nrun': [cosmo_params["nrun"]]} + +# + +cosmo_camb = camb_cosmo.get_cosmology( + H0=cosmo_params["H0"], + mnu=cosmo_params["mnu"], + omch2=cosmo_params["omch2"], + ombh2=cosmo_params["ombh2"], + omk=cosmo_params["omk"], + As=cosmo_params["As"], + ns=cosmo_params["ns"], + nrun=cosmo_params["nrun"] +) + + +# - + +# Emulate the power spectrum with cosmopower +Pk_Mpc_cp = cp_nn.ten_to_predictions_np(cosmo_cp) +k_Mpc_cp = cp_nn.modes.reshape(1,len(cp_nn.modes)) +k_kms_cp, Pk_kms_cp = linPCosmologyCosmopower.convert_to_kms(cosmo_cp, k_Mpc_cp, Pk_Mpc_cp , z_star = z_star) + + +# + +kps = np.arange(0.003,0.011,0.001) +#kps = np.arange(0.008,0.015,0.001) + +errors = {} +#kps = [0.003] + +for ii, kp in enumerate(kps): + + kmin_kms = fit_min * kp + kmax_kms = fit_max * kp + + poly_linP = linPCosmologyCosmopower.fit_polynomial( + xmin = kmin_kms / kp, + xmax= kmax_kms / kp, + x = k_kms_cp / kp, + y = Pk_kms_cp, + deg=2 + ) + + results = linPCosmologyCosmopower.get_star_params(linP_kms = poly_linP, + kp_kms = kp) + + fun_cosmo = CAMB_model.CAMBModel( + zs= [3], + cosmo=cosmo_camb, + z_star=z_star, + kp_kms=kp, + ) + results_camb = fun_cosmo.get_linP_params() + + err_delta2 = np.abs(results["Delta2_star_cp"] - results_camb["Delta2_star"]) / results_camb["Delta2_star"] + + errors[f"{np.round(kp,4)}"] = err_delta2 + + + +# + +keys = sorted(correlations.keys(), key=lambda x: float(x)) +keys_float = [float(k) for k in keys] + +values = [correlations[k][0] for k in keys] +columns = np.array(values).T + +labels = [r"corr($\Delta^2_* - n_*$)", r"corr($\Delta^2_* - \alpha_*$)", r"corr($\alpha_* - n_*$)"] +colors = ["crimson", "navy", "goldenrod"] + +# Create a figure with two subplots +fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10), gridspec_kw={'height_ratios': [2, 1]}) + +# Plot correlations +for i, column in enumerate(columns): + ax1.plot(keys_float, np.abs(column), label=f'{labels[i]}', marker='o', color=colors[i]) + +# Customize the first subplot +ax1.set_xlabel('kp [s/km]', fontsize=12) +ax1.set_ylabel('abs(Correlation)', fontsize=12) +ax1.legend() +ax1.grid(True) +ax1.set_title('Correlations between parameters', fontsize=14) + +# Plot errors +error_keys = sorted(errors.keys(), key=lambda x: float(x)) +error_values = [errors[k] for k in error_keys] +ax2.plot(error_keys, error_values, marker='o', color='green') + +# Customize the second subplot +ax2.set_xlabel('kp [s/km]', fontsize=12) +ax2.set_ylabel(r'$\Delta^2_{\star, \rm CP}\ /\ \Delta^2_{\star, \rm CAMB}$ -1', fontsize=12) +ax2.grid(True) +ax2.set_title('Relative Error in $\Delta^2_*$ for the MPG neutrinos simulation', fontsize=14) + +plt.tight_layout() +plt.show() +# - + +# diff --git a/scripts/predict.py b/scripts/predict.py index db4929c9..daaa6c56 100644 --- a/scripts/predict.py +++ b/scripts/predict.py @@ -101,6 +101,10 @@ def make_p1d_err_plot(p1ds_err: np.ndarray, kMpc_test: np.ndarray, zs_test: np.n plt.tight_layout() if config["save_plot_path"] is not None: + save_path = PROJ_ROOT / config["save_plot_path"] + if not save_path.parent.exists(): + logger.warning(f"Creating directory {save_path.parent} as it does not exist") + save_path.parent.mkdir(parents=True) plt.savefig(PROJ_ROOT / config["save_plot_path"], bbox_inches='tight') plt.close() logger.info(f"P1D error plot saved to {PROJ_ROOT / config['save_plot_path']}") @@ -109,17 +113,32 @@ def make_p1d_err_plot(p1ds_err: np.ndarray, kMpc_test: np.ndarray, zs_test: np.n def main(): args = parse_args() config = load_config(args.config) + hyperparameters = { + key: (int(value) if key in ['ndeg', 'nepochs', 'step_size', 'nhidden', 'max_neurons', 'seed', 'batch_size'] + else float(value) if key in ['kmax_Mpc', 'lr0', 'weight_decay', 'z_max'] + else bool(value) if key == 'weighted_emulator' + else value) + for key, value in config["hyperparameters"].items() + } emu_params = config["emulator_params"] logger.info(f"Emulator parameters: {emu_params}") - archive = create_archive(config["archive"]) - + if config["archive"] is not None: + archive = create_archive(config["archive"]) + logger.info("Setting up emulator") - emulator = set_emulator( - emulator_label=config["emulator_label"], - archive=archive, - drop_sim=config["drop_sim"]) + if config["emulator_label"] is not None: + emulator = set_emulator( + emulator_label=config["emulator_label"], + archive=archive, + drop_sim=config["drop_sim"]) + else: + emulator = NNEmulator( + archive=archive, + train=False, + models_dir=config["model_path"], + **hyperparameters) logger.info("Getting testing data") test_data = archive.get_testing_data(sim_label=config["sim_test"]) @@ -129,7 +148,12 @@ def main(): make_p1d_err_plot(p1ds_err, kMpc_test, zs, config) if config["save_predictions_path"] is not None: - json.dump(predicted_p1d, PROJ_ROOT / open(PROJ_ROOT / config["save_predictions_path"], "w")) + save_path = PROJ_ROOT / config["save_predictions_path"] + if not save_path.parent.exists(): + logger.warning(f"Creating directory {save_path.parent} as it does not exist") + save_path.parent.mkdir(parents=True) + with open(PROJ_ROOT / config["save_predictions_path"], "w") as f: + json.dump(predicted_p1d, f) logger.info("Main function completed") diff --git a/scripts/train.py b/scripts/train.py index c347607e..a08ef927 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -1,6 +1,9 @@ +#! /usr/bin/env python3 + import argparse import yaml import torch +import logging from pathlib import Path # our modules from lace.archive import (gadget_archive, @@ -9,6 +12,8 @@ from lace.emulator.gp_emulator import GPEmulator from lace.emulator.constants import PROJ_ROOT +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + def parse_args(): parser = argparse.ArgumentParser(description='Train emulators from config') parser.add_argument('--config', type=str, required=True, @@ -16,6 +21,7 @@ def parse_args(): return parser.parse_args() def load_config(config_path: Path) -> dict: + logging.info(f"Loading configuration from {config_path}") with open(config_path, 'r') as f: return yaml.safe_load(f) @@ -24,9 +30,16 @@ def main(): args = parse_args() config = load_config(args.config) + save_path = PROJ_ROOT / config["save_path"] + if not save_path.parent.exists(): + logging.warning(f"Creating directory {save_path.parent} as it does not exist") + save_path.parent.mkdir(parents=True) + emu_params = config["emulator_params"] + logging.info(f"Emulator parameters: {emu_params}") def create_archive(archive_config): + logging.info(f"Creating archive with config: {archive_config}") if archive_config["file"] == "Nyx": return nyx_archive.NyxArchive(nyx_version=archive_config["version"]) elif archive_config["file"] == "Gadget": @@ -35,32 +48,44 @@ def create_archive(archive_config): raise ValueError(f"Archive {archive_config['file']} not supported") def create_nn_emulator_with_label(config): + logging.info("Creating NN emulator with label") if config.get("training_set"): return NNEmulator(training_set=config["training_set"], emulator_label=config["emulator_label"], + drop_sim=config.get("drop_sim"), save_path=PROJ_ROOT / config["save_path"]) elif config.get("archive"): archive = create_archive(config["archive"]) return NNEmulator(archive=archive, emulator_label=config["emulator_label"], + drop_sim=config.get("drop_sim"), save_path=PROJ_ROOT / config["save_path"]) else: raise ValueError("Either training_set or archive must be provided") def create_gp_emulator_with_label(config): + logging.info("Creating GP emulator with label") if config.get("training_set"): return GPEmulator(training_set=config["training_set"], - emulator_label=config["emulator_label"]) + emulator_label=config["emulator_label"], + drop_sim=config.get("drop_sim")) elif config.get("archive"): archive = create_archive(config["archive"]) return GPEmulator(archive=archive, emulator_label=config["emulator_label"], - ) + drop_sim=config.get("drop_sim")) else: raise ValueError("Either training_set or archive must be provided") def create_nn_emulator_without_label(config): - hyperparameters = config["hyperparameters"] + logging.info("Creating NN emulator without label") + hyperparameters = { + key: (int(value) if key in ['ndeg', 'nepochs', 'step_size', 'nhidden', 'max_neurons', 'seed', 'batch_size'] + else float(value) if key in ['kmax_Mpc', 'lr0', 'weight_decay', 'z_max'] + else bool(value) if key == 'weighted_emulator' + else value) + for key, value in config["hyperparameters"].items() + } if config.get("archive"): archive = create_archive(config["archive"]) return NNEmulator(archive=archive, @@ -74,21 +99,30 @@ def create_nn_emulator_without_label(config): raise ValueError("Either archive or training_set must be provided") def create_gp_emulator_without_label(config): - hyperparameters = config["hyperparameters"] + logging.info("Creating GP emulator without label") + hyperparameters = { + key: float(value) if key in ['kmax_Mpc', 'lr0', 'weight_decay', 'z_max'] + else value + for key, value in config["hyperparameters"].items() + } if config.get("archive"): archive = create_archive(config["archive"]) - return GPEmulator(archive=archive) + return GPEmulator(archive=archive, + drop_sim=config.get("drop_sim")) elif config.get("training_set"): - return GPEmulator(training_set=config["training_set"]) + return GPEmulator(training_set=config["training_set"], + drop_sim=config.get("drop_sim")) else: raise ValueError("Either archive or training_set must be provided") if config["emulator_type"] == "NN": + logging.info("Creating NN emulator") if config.get("emulator_label"): emulator = create_nn_emulator_with_label(config) else: emulator = create_nn_emulator_without_label(config) elif config["emulator_type"] == "GP": + logging.info("Creating GP emulator") if config.get("emulator_label"): emulator = create_gp_emulator_with_label(config) else: @@ -96,6 +130,7 @@ def create_gp_emulator_without_label(config): else: raise ValueError("emulator_type must be either 'NN' or 'GP'") + logging.info("Emulator created successfully") if __name__ == "__main__": main() \ No newline at end of file