From ab3bf4ea9432f08d527de0ef5a02f7796c3f4d99 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Thu, 12 Jan 2023 06:30:10 +0100 Subject: [PATCH 01/26] update --- ronswanson/database.py | 138 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 133 insertions(+), 5 deletions(-) diff --git a/ronswanson/database.py b/ronswanson/database.py index a0c7f2a..706d5ad 100644 --- a/ronswanson/database.py +++ b/ronswanson/database.py @@ -2,16 +2,19 @@ from dataclasses import dataclass from typing import Dict, List, Optional, Union from pathlib import Path - +import collections import h5py import numpy as np import plotly.graph_objects as go from astromodels import TemplateModel, TemplateModelFactory +from astromodels.functions.template_model import TemplateFile from astromodels.utils.logging import silence_console_log +from astromodels.utils import get_user_data_path from tqdm.auto import tqdm from ronswanson.grids import Parameter, ParameterGrid +from ronswanson.utils.cartesian_product import cartesian_jit from ronswanson.utils.color import Colors from .utils.logging import setup_logger @@ -66,6 +69,12 @@ def __init__( self._parameter_names: List[str] = parameter_names + for i, name in enumerate(self._parameter_names): + + if not isinstance(name, str): + + self._parameter_names[i] = name.decode() + self._energy_grid: np.ndarray = energy_grid # self._values: Dict[str, np.ndarray] = values @@ -275,6 +284,47 @@ def from_file(cls, file_name: str, output: int = 0) -> "Database": meta_data=meta_data, ) + def to_hdf5( + self, file_name: Union[str, Path], overwrite: bool = False + ) -> None: + + path = Path(file_name).absolute() + + if path.exists() and (not overwrite): + + msg = f"{path} exists!" + + log.error(msg) + + raise RuntimeError(msg) + + with h5py.File(path.as_posix(), "w") as f: + + energy_grp: h5py.Group = f.create_group("energy_grid") + + energy_grp.create_dataset("energy_grid_0", data=self._energy_grid) + + values_grp = f.create_group("values") + + values_grp.create_dataset("output_0", data=self._values) + + par_name_grp = f.create_group("parameter_names") + + for i, name in enumerate(self._parameter_names): + + par_name_grp.attrs[f"par{i}"] = name + + f.create_dataset("parameters", data=self._grid_points) + + f.create_dataset("run_time", data=self._run_time) + + if self._meta_data is not None: + + meta_grp = f.create_group("meta") + + for k, v in self._meta_data.items(): + + meta_grp.create_dataset(k, data=v) def replace_nan_inf_with(self, value: float = 0.0) -> None: @@ -302,7 +352,7 @@ def _get_sub_selection( parameter_selection[k] = np.ones(len(v), dtype=bool) - for k, v in kwargs.items(): + for k, v in selections_dict.items(): if k in self._parameter_names: @@ -340,7 +390,7 @@ def _get_sub_selection( sub_grid=sub_grid, sub_values=sub_values, sub_range=sub_parameter_ranges, - selection=selection + selection=selection, ) # @classmethod @@ -350,8 +400,6 @@ def _get_sub_selection( # return Database() - - def to_3ml( self, name: str, @@ -411,6 +459,86 @@ def to_3ml( return TemplateModel(name) + @classmethod + def from_astromodels(cls, model_name: str) -> "Database": + # Get the data directory + + data_dir_path: Path = get_user_data_path() + + # Sanitize the data file + + filename_sanitized = data_dir_path.absolute() / f"{model_name}.h5" + + if not filename_sanitized.exists(): + + msg = f"The data file {filename_sanitized} does not exists. Did you use the TemplateFactory?" + + log.error(msg) + + raise RuntimeError(msg) + + # Open the template definition and read from it + + data_file: Path = filename_sanitized + + # use the file shadow to read + + template_file: TemplateFile = TemplateFile.from_file( + filename_sanitized.as_posix() + ) + + parameters_grids = [] + + for key in template_file.parameter_order: + + try: + + # sometimes this is + # stored binary + + k = key.decode() + + except (AttributeError): + + # if not, then we + # load as a normal str + + k = key + + parameters_grids.append(np.array(template_file.parameters[key])) + + parameter_grid_cart = cartesian_jit(parameters_grids) + + energies = template_file.energies + + shape = 1 + for dim in template_file.grid.shape[:-1]: + shape *= dim + + values = template_file.grid.reshape(shape, template_file.grid.shape[-1]) + + return cls( + grid_points=parameter_grid_cart, + parameter_names=template_file.parameter_order, + energy_grid=energies, + run_time=np.zeros(parameter_grid_cart.shape[0]), + values=values, + ) + + def new_from_selections(self, **selections) -> "Database": + + selection_container: SelectionContainer = self._get_sub_selection( + selections + ) + + return Database( + selection_container.sub_grid, + self.parameter_names, + self.energy_grid, + self._run_time[selection_container.selection], + selection_container.sub_values, + ) + def check_for_missing_parameters( self, parameter_grid: ParameterGrid, create_new_grid: bool = False ) -> None: From a1965eabe9e5dd01d3626f99cf755d68f3e67227 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Fri, 10 Mar 2023 12:52:16 +0100 Subject: [PATCH 02/26] added the ability to do LHS sampling of points --- ronswanson/database.py | 75 ++++++++++++++++++++++++++++++-- ronswanson/grids.py | 11 +++++ ronswanson/script_generator.py | 19 +++++++- ronswanson/simulation_builder.py | 59 ++++++++++++++++++++----- 4 files changed, 149 insertions(+), 15 deletions(-) diff --git a/ronswanson/database.py b/ronswanson/database.py index 706d5ad..c0bfbf0 100644 --- a/ronswanson/database.py +++ b/ronswanson/database.py @@ -393,6 +393,22 @@ def _get_sub_selection( selection=selection, ) + def _get_sub_selection_via_index( + self, selection_index: np.ndarray + ) -> SelectionContainer: + + sub_grid = self._grid_points[selection_index, ...] + sub_values = self._values[selection_index, ...] + + sub_parameter_ranges = {} + + return SelectionContainer( + sub_grid=sub_grid, + sub_values=sub_values, + sub_range=sub_parameter_ranges, + selection=selection_index, + ) + # @classmethod # def create_sub_selected_database(self, **selection) -> "Database": @@ -525,11 +541,21 @@ def from_astromodels(cls, model_name: str) -> "Database": values=values, ) - def new_from_selections(self, **selections) -> "Database": + def new_from_selections( + self, selection_index: Optional[np.ndarray] = None, **selections + ) -> "Database": - selection_container: SelectionContainer = self._get_sub_selection( - selections - ) + if selection_index is None: + + selection_container: SelectionContainer = self._get_sub_selection( + selections + ) + + else: + + selection_container: SelectionContainer = ( + self._get_sub_selection_via_index(selection_index) + ) return Database( selection_container.sub_grid, @@ -761,6 +787,47 @@ def update_database( f["run_time"][idx] = r.attrs["run_time"] +def merge_outputs( + *files_names: List[Union[str, Path]], out_file_name: Union[str, Path] +) -> None: + + with h5py.File(out_file_name, "w") as out_file: + + energy_grp: h5py.Group = out_file.create_group("energy_grid") + values_grp = out_file.create_group("values") + par_name_grp = out_file.create_group("parameter_names") + + for n_output, file_name in enumerate(files_names): + + with h5py.File(file_name, "r") as f: + + if n_output == 0: + + for k, v in f["parameter_names"].attrs.items(): + + par_name_grp.attrs[k] = v + + if "meta" in f.keys(): + + meta_grp = out_file.create_group("meta") + + for key in list(f["meta"].keys()): + + meta_grp.create_dataset(key, data=f[f"meta/{key}"]) + + out_file.create_dataset("run_time", data=f["run_time"]) + + out_file.create_dataset("parameters", data=f["parameters"]) + + energy_grp.create_dataset( + f"energy_grid_{n_output}", + data=f["energy_grid/energy_grid_0"], + ) + values_grp.create_dataset( + f"output_{n_output}", data=f["values/output_0"] + ) + + def merge_databases( *file_names: List[str], new_name: str = "merged_db.h5" ) -> None: diff --git a/ronswanson/grids.py b/ronswanson/grids.py index f84aa4f..383f0d7 100644 --- a/ronswanson/grids.py +++ b/ronswanson/grids.py @@ -258,6 +258,17 @@ def __post_init__(self): cartesian_jit([p.grid for p in self.parameter_list]), ) + @property + def min_max_values(self) -> np.ndarray: + + out = [] + + for p in self.parameter_list: + + out.append([min(p.grid), max(p.grid)]) + + return np.array(out) + @property def n_points(self) -> int: diff --git a/ronswanson/script_generator.py b/ronswanson/script_generator.py index 89ecda7..8f7bfba 100644 --- a/ronswanson/script_generator.py +++ b/ronswanson/script_generator.py @@ -25,6 +25,8 @@ def __init__( has_complete_params: bool = False, current_size: int = 0, clean: bool = True, + lhs_sampling: bool = False, + lhs_points_file: Optional[str] = None, ) -> None: """ @@ -60,6 +62,8 @@ def __init__( self._has_complete_params: bool = has_complete_params self._current_size: int = current_size self._clean: bool = clean + self._lhs_sampling: bool = lhs_sampling + self._lhs_points_file: Optional[str] = lhs_points_file super().__init__(file_name) @@ -93,8 +97,21 @@ def _build_script(self) -> None: f"pg = ParameterGrid.from_yaml('{self._parameter_file}')" ) + if self._lhs_sampling: + + self._add_line( + f"with h5py.File('{self._lhs_points_file}', 'r') as f:" + ) + self._add_line("lhs_params = f['lhs_points'][()]", indent_level=1) + self._add_line("def func(i):") - self._add_line("params = pg.at_index(i)", indent_level=1) + + if self._lhs_sampling: + + self._add_line("params = lhs_params[i]", indent_level=1) + + else: + self._add_line("params = pg.at_index(i)", indent_level=1) if self._has_complete_params: diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index 1e2d2f0..42d33cb 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -11,6 +11,8 @@ import yaml from omegaconf import MISSING, OmegaConf from tqdm.auto import tqdm +from smt.sampling_methods import LHS + from ronswanson.utils.color import Colors from ronswanson.utils.check_complete import check_complete_ids @@ -88,6 +90,8 @@ class YAMLStructure: gather: Optional[GatherConfigStructure] = None num_meta_parameters: Optional[int] = None finish_missing: bool = False + lhs_sampling: bool = False + n_lhs_points: int = 10 class SimulationBuilder: @@ -107,6 +111,8 @@ def __init__( num_meta_parameters: Optional[int] = None, clean: bool = True, finish_missing: bool = False, + lhs_sampling: bool = False, + n_lhs_points: int = 10, ): """TODO describe function @@ -152,12 +158,22 @@ def __init__( self._clean: bool = clean - self._n_iterations: int = parameter_grid.n_points - self._current_database_size: int = 0 self._finish_missing: bool = finish_missing + self._lhs_sampling: bool = lhs_sampling + + self._n_lhs_points: int = n_lhs_points + + if self._lhs_sampling: + + self._n_iterations: int = self._n_lhs_points + + else: + + self._n_iterations = parameter_grid.n_points + if not self._finish_missing: self._initialize_database() @@ -275,16 +291,39 @@ def from_yaml(cls, file_name: str) -> "SimulationBuilder": **inputs, ) + def _compute_lhs_sampling(self): + + pg = ParameterGrid.from_yaml(self._parameter_file) + + sampling = LHS(xlimits=pg.min_max_values, criterion="maximin") + + points = sampling(self._n_lhs_points) + + with h5py.File("lhs_points.h5", "w") as f: + + f.create_dataset("lhs_points", data = points, compression="gzip") + + + + def _initialize_database(self) -> None: + pg = ParameterGrid.from_yaml(self._parameter_file) + + if self._lhs_sampling: + + n_points = self._n_lhs_points + + else: + + n_points = pg.n_points + if not Path(self._out_file).exists(): with h5py.File(self._out_file, "w") as f: f.attrs["has_been_touched"] = False - pg = ParameterGrid.from_yaml(self._parameter_file) - # store the parameter names p_name_group = f.create_group("parameter_names") @@ -309,7 +348,7 @@ def _initialize_database(self) -> None: f.create_dataset( "parameters", - shape=(pg.n_points,) + np.array(pg.parameter_names).shape, + shape=(n_points,) + np.array(pg.parameter_names).shape, maxshape=(None,) + np.array(pg.parameter_names).shape, # compression="gzip", ) @@ -322,13 +361,13 @@ def _initialize_database(self) -> None: val_grp.create_dataset( f"output_{i}", - shape=(pg.n_points,) + pg.energy_grid[i].grid.shape, + shape=(n_points,) + pg.energy_grid[i].grid.shape, maxshape=(None,) + pg.energy_grid[i].grid.shape, # compression="gzip", ) f.create_dataset( - "run_time", shape=(pg.n_points,), maxshape=(None,) + "run_time", shape=(n_points,), maxshape=(None,) ) if self._num_meta_parameters is not None: @@ -340,7 +379,7 @@ def _initialize_database(self) -> None: for i in range(self._num_meta_parameters): meta_grp.create_dataset( - f"meta_{i}", shape=(pg.n_points,), maxshape=(None,) + f"meta_{i}", shape=(n_points,), maxshape=(None,) ) else: @@ -376,8 +415,6 @@ def _initialize_database(self) -> None: with h5py.File(self._out_file, "a") as f: - pg = ParameterGrid.from_yaml(self._parameter_file) - dataset: h5py.Dataset = f["parameters"] self._current_database_size = dataset.shape[0] @@ -596,6 +633,8 @@ def _generate_python_script(self) -> None: self._has_complete_params, self._current_database_size, clean=self._clean, + lhs_sampling=self._lhs_sampling, + lhs_points_file=str(self._base_dir / "lhs_points.h5") ) py_gen.write(str(self._base_dir)) From a339b0de9c0a210e8541889f89b972ea760233e9 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Fri, 10 Mar 2023 12:58:51 +0100 Subject: [PATCH 03/26] add call for computation --- ronswanson/simulation_builder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index 42d33cb..5754514 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -169,6 +169,7 @@ def __init__( if self._lhs_sampling: self._n_iterations: int = self._n_lhs_points + self._compute_lhs_sampling() else: From 8a9ea1e67ff373fe51d6a21e158b52a71e976f3c Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Fri, 10 Mar 2023 13:08:51 +0100 Subject: [PATCH 04/26] add h5py import --- ronswanson/script_generator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ronswanson/script_generator.py b/ronswanson/script_generator.py index 8f7bfba..026753a 100644 --- a/ronswanson/script_generator.py +++ b/ronswanson/script_generator.py @@ -72,6 +72,7 @@ def _build_script(self) -> None: self._add_line(self._import_line) self._add_line("from joblib import Parallel, delayed") self._add_line("import json") + self._add_line("import h5py") self._add_line("import numpy as np") self._add_line("from tqdm.auto import tqdm") self._add_line("from ronswanson import ParameterGrid") From c087cb9d1e29ee86abff56d8527297e5134bf356 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Fri, 10 Mar 2023 13:15:21 +0100 Subject: [PATCH 05/26] make parameter dicts --- ronswanson/script_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ronswanson/script_generator.py b/ronswanson/script_generator.py index 026753a..0e12b4f 100644 --- a/ronswanson/script_generator.py +++ b/ronswanson/script_generator.py @@ -109,7 +109,7 @@ def _build_script(self) -> None: if self._lhs_sampling: - self._add_line("params = lhs_params[i]", indent_level=1) + self._add_line("params = {k:v for k,v in zip(pg.parameter_names,lhs_params[i])}", indent_level=1) else: self._add_line("params = pg.at_index(i)", indent_level=1) From e4fed73765347164a950bc34196228af3d3c301d Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Fri, 10 Mar 2023 13:28:14 +0100 Subject: [PATCH 06/26] fix n_points for LHS --- ronswanson/script_generator.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ronswanson/script_generator.py b/ronswanson/script_generator.py index 0e12b4f..b32171b 100644 --- a/ronswanson/script_generator.py +++ b/ronswanson/script_generator.py @@ -105,6 +105,14 @@ def _build_script(self) -> None: ) self._add_line("lhs_params = f['lhs_points'][()]", indent_level=1) + + self._add_line("n_points = len(lhs_params)") + + else: + + self._add_line("n_points = pg.n_points") + + self._add_line("def func(i):") if self._lhs_sampling: From b272ae2d784fdbd77b4d731daf35c7ceda1f7ce0 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Fri, 10 Mar 2023 13:34:37 +0100 Subject: [PATCH 07/26] fix typo --- ronswanson/script_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ronswanson/script_generator.py b/ronswanson/script_generator.py index b32171b..3b80e36 100644 --- a/ronswanson/script_generator.py +++ b/ronswanson/script_generator.py @@ -142,7 +142,7 @@ def _build_script(self) -> None: if self._n_nodes is None: - self._add_line("iteration = [i for i in range(0, pg.n_points)]") + self._add_line("iteration = [i for i in range(0, n_points)]") else: From e7452ae23cb4ba6a01b58507d4e2f134f4fa0fa8 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Fri, 17 Mar 2023 10:22:05 +0100 Subject: [PATCH 08/26] fixed splits --- ronswanson/simulation_builder.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index 5754514..027b2a3 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -113,6 +113,7 @@ def __init__( finish_missing: bool = False, lhs_sampling: bool = False, n_lhs_points: int = 10, + n_lhs_split: Optional[int] = None, ): """TODO describe function @@ -166,6 +167,8 @@ def __init__( self._n_lhs_points: int = n_lhs_points + self._n_lhs_split: Optional[int] = n_lhs_split + if self._lhs_sampling: self._n_iterations: int = self._n_lhs_points @@ -298,15 +301,30 @@ def _compute_lhs_sampling(self): sampling = LHS(xlimits=pg.min_max_values, criterion="maximin") - points = sampling(self._n_lhs_points) + if self._n_lhs_split is None: - with h5py.File("lhs_points.h5", "w") as f: + points = sampling(self._n_lhs_points) + + else: + + total_points: int = self._n_lhs_points + + points_per_split: int = self._n_lhs_points // self._n_lhs_split - f.create_dataset("lhs_points", data = points, compression="gzip") + current_n_points: int = points_per_split + points = sampling(points_per_split) + + while current_n_points < total_points: + + points = sampling.expand_lhs(points, points_per_split) + + current_n_points += points_per_split + + with h5py.File("lhs_points.h5", "w") as f: + f.create_dataset("lhs_points", data=points, compression="gzip") - def _initialize_database(self) -> None: pg = ParameterGrid.from_yaml(self._parameter_file) @@ -635,7 +653,7 @@ def _generate_python_script(self) -> None: self._current_database_size, clean=self._clean, lhs_sampling=self._lhs_sampling, - lhs_points_file=str(self._base_dir / "lhs_points.h5") + lhs_points_file=str(self._base_dir / "lhs_points.h5"), ) py_gen.write(str(self._base_dir)) From 422d304a0e05dff6794c1c6b056df4fea5c26ead Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Fri, 17 Mar 2023 10:38:44 +0100 Subject: [PATCH 09/26] add config --- ronswanson/simulation_builder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index 027b2a3..ae195c6 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -92,6 +92,7 @@ class YAMLStructure: finish_missing: bool = False lhs_sampling: bool = False n_lhs_points: int = 10 + n_lhs_split: Optional[int] = None class SimulationBuilder: From beb2ed86d6d9b0146320f7f285094028144edff5 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Mon, 20 Mar 2023 11:32:17 +0100 Subject: [PATCH 10/26] switch to using scipy over smt --- ronswanson/simulation_builder.py | 36 +++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index ae195c6..e3790e9 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -11,7 +11,7 @@ import yaml from omegaconf import MISSING, OmegaConf from tqdm.auto import tqdm -from smt.sampling_methods import LHS +from scipy.stats import qmc from ronswanson.utils.color import Colors @@ -300,27 +300,39 @@ def _compute_lhs_sampling(self): pg = ParameterGrid.from_yaml(self._parameter_file) - sampling = LHS(xlimits=pg.min_max_values, criterion="maximin") + sampling = qmc.LatinHypercube( + d=pg.n_parameters, optimization="random-cd" + ) - if self._n_lhs_split is None: + l_bounds = pg.min_max_values[:, 0] - points = sampling(self._n_lhs_points) + u_bounds = pg.min_max_values[:, 1] - else: + samples = qmc.random(n=self._n_lhs_points) + + points = qmc.scale(samples, l_bounds, u_bounds) + + # sampling = LHS(xlimits=pg.min_max_values, criterion="maximin") + + # if self._n_lhs_split is None: + + # points = sampling(self._n_lhs_points) + + # else: - total_points: int = self._n_lhs_points + # total_points: int = self._n_lhs_points - points_per_split: int = self._n_lhs_points // self._n_lhs_split + # points_per_split: int = self._n_lhs_points // self._n_lhs_split - current_n_points: int = points_per_split + # current_n_points: int = points_per_split - points = sampling(points_per_split) + # points = sampling(points_per_split) - while current_n_points < total_points: + # while current_n_points < total_points: - points = sampling.expand_lhs(points, points_per_split) + # points = sampling.expand_lhs(points, points_per_split) - current_n_points += points_per_split + # current_n_points += points_per_split with h5py.File("lhs_points.h5", "w") as f: From aad106e346bcc55b76bdc2e7709f24e2c0c77711 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Tue, 21 Mar 2023 11:19:17 +0100 Subject: [PATCH 11/26] just try without f-ing optimization --- ronswanson/simulation_builder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index e3790e9..5162cc9 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -301,7 +301,8 @@ def _compute_lhs_sampling(self): pg = ParameterGrid.from_yaml(self._parameter_file) sampling = qmc.LatinHypercube( - d=pg.n_parameters, optimization="random-cd" + d=pg.n_parameters, + #optimization="random-cd" ) l_bounds = pg.min_max_values[:, 0] From ba5326ca6e983148a043cc78a48846f6574ae409 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Tue, 21 Mar 2023 11:20:23 +0100 Subject: [PATCH 12/26] OR be smart --- ronswanson/simulation_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index 5162cc9..2705e1f 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -302,7 +302,7 @@ def _compute_lhs_sampling(self): sampling = qmc.LatinHypercube( d=pg.n_parameters, - #optimization="random-cd" + optimization="random-cd" ) l_bounds = pg.min_max_values[:, 0] From 370ec707d16c8344871b7936dbc2f4625c53c896 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Tue, 21 Mar 2023 11:21:48 +0100 Subject: [PATCH 13/26] and try with no typos --- ronswanson/simulation_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index 2705e1f..5c212db 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -309,7 +309,7 @@ def _compute_lhs_sampling(self): u_bounds = pg.min_max_values[:, 1] - samples = qmc.random(n=self._n_lhs_points) + samples = sampling.random(n=self._n_lhs_points) points = qmc.scale(samples, l_bounds, u_bounds) From f3787c1f367ec9cebd57c8e83d3ad65ff5be1fad Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Wed, 22 Mar 2023 07:47:43 +0100 Subject: [PATCH 14/26] allow parameter file to be skipped --- ronswanson/simulation_builder.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index 5c212db..ba9ec4b 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -92,7 +92,7 @@ class YAMLStructure: finish_missing: bool = False lhs_sampling: bool = False n_lhs_points: int = 10 - n_lhs_split: Optional[int] = None + skip_lhs_generator: bool = False class SimulationBuilder: @@ -114,7 +114,7 @@ def __init__( finish_missing: bool = False, lhs_sampling: bool = False, n_lhs_points: int = 10, - n_lhs_split: Optional[int] = None, + skip_lhs_generator: bool = False, ): """TODO describe function @@ -168,12 +168,15 @@ def __init__( self._n_lhs_points: int = n_lhs_points - self._n_lhs_split: Optional[int] = n_lhs_split + self._skip_lhs_generator: bool = skip_lhs_generator if self._lhs_sampling: self._n_iterations: int = self._n_lhs_points - self._compute_lhs_sampling() + + if not self._skip_lhs_generator: + + self._compute_lhs_sampling() else: @@ -301,8 +304,7 @@ def _compute_lhs_sampling(self): pg = ParameterGrid.from_yaml(self._parameter_file) sampling = qmc.LatinHypercube( - d=pg.n_parameters, - optimization="random-cd" + d=pg.n_parameters, optimization="random-cd" ) l_bounds = pg.min_max_values[:, 0] From 0dc4dc52d2373334d35cbff9e69f37506892da4f Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Tue, 16 May 2023 14:26:19 +0200 Subject: [PATCH 15/26] add debug info --- ronswanson/simulation_builder.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index b9d0924..31b4ad5 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -255,6 +255,9 @@ def from_yaml(cls, file_name: str) -> "SimulationBuilder": parameter_grid = ParameterGrid.from_yaml(inputs.pop("parameter_grid")) + log.debug("read parameter grid") + + simulation_input = inputs.pop("simulation") if "time" in simulation_input: From cb3ab2c38b43e52e598991e0b22b1e41399ddfc0 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Tue, 16 May 2023 14:30:35 +0200 Subject: [PATCH 16/26] more debug output --- ronswanson/grids.py | 4 ++++ ronswanson/simulation_builder.py | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ronswanson/grids.py b/ronswanson/grids.py index 383f0d7..368d6ce 100644 --- a/ronswanson/grids.py +++ b/ronswanson/grids.py @@ -307,6 +307,8 @@ def from_dict(cls, d: Dict[str, Dict[str, Any]]) -> "ParameterGrid": is_multi_output = True + log.debug(f"found {n_energy_grids} energy grids") + if not is_multi_output: energy_grid = [EnergyGrid.from_dict(d.pop("energy_grid"))] @@ -331,6 +333,8 @@ def from_dict(cls, d: Dict[str, Dict[str, Any]]) -> "ParameterGrid": @classmethod def from_yaml(cls, file_name: str) -> "ParameterGrid": + log.debug(f"reading: {file_name}") + with open(file_name, 'r') as f: inputs = yaml.load(stream=f, Loader=yaml.SafeLoader) diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index 31b4ad5..6f94b95 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -257,7 +257,6 @@ def from_yaml(cls, file_name: str) -> "SimulationBuilder": log.debug("read parameter grid") - simulation_input = inputs.pop("simulation") if "time" in simulation_input: From 0793bb74ea46d83a5dcbfb6ac9e8ad71961dc313 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Tue, 16 May 2023 14:34:14 +0200 Subject: [PATCH 17/26] update output --- ronswanson/grids.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ronswanson/grids.py b/ronswanson/grids.py index 368d6ce..c08c3dd 100644 --- a/ronswanson/grids.py +++ b/ronswanson/grids.py @@ -209,6 +209,9 @@ def grid(self) -> np.ndarray: @classmethod def from_dict(cls, name: str, d: Dict[str, Any]) -> "Parameter": + log.debug(f"read parameter: {name}") + log.debug(f"inputs: {d}") + inputs = {} inputs["custom"] = d["custom"] @@ -320,6 +323,8 @@ def from_dict(cls, d: Dict[str, Dict[str, Any]]) -> "ParameterGrid": for i in range(n_energy_grids) ] + log.debug("now reading parameters") + pars = list(d.keys()) pars.sort() From 34e8dcd89d1fb12038080b0149a55e5d210ea8ea Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Tue, 16 May 2023 14:52:36 +0200 Subject: [PATCH 18/26] try without full grid --- ronswanson/grids.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ronswanson/grids.py b/ronswanson/grids.py index c08c3dd..025c267 100644 --- a/ronswanson/grids.py +++ b/ronswanson/grids.py @@ -255,11 +255,13 @@ class ParameterGrid: def __post_init__(self): - object.__setattr__( - self, - 'full_grid', - cartesian_jit([p.grid for p in self.parameter_list]), - ) + # object.__setattr__( + # self, + # 'full_grid', + # cartesian_jit([p.grid for p in self.parameter_list]), + # ) + + pass @property def min_max_values(self) -> np.ndarray: @@ -333,6 +335,8 @@ def from_dict(cls, d: Dict[str, Dict[str, Any]]) -> "ParameterGrid": Parameter.from_dict(par_name, d[par_name]) for par_name in pars ] + log.debug("parameters have been read in") + return cls(par_list, energy_grid) @classmethod From ef042608921ef654bab6b4fe940e92807c2c6a50 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Tue, 16 May 2023 15:02:27 +0200 Subject: [PATCH 19/26] put back grid --- ronswanson/grids.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ronswanson/grids.py b/ronswanson/grids.py index 025c267..c94886e 100644 --- a/ronswanson/grids.py +++ b/ronswanson/grids.py @@ -255,11 +255,11 @@ class ParameterGrid: def __post_init__(self): - # object.__setattr__( - # self, - # 'full_grid', - # cartesian_jit([p.grid for p in self.parameter_list]), - # ) + object.__setattr__( + self, + 'full_grid', + cartesian_jit([p.grid for p in self.parameter_list]), + ) pass From e8418173d65c558e4bc315881e61dfb1647d2c7c Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Fri, 26 May 2023 10:55:00 +0200 Subject: [PATCH 20/26] added LHS unit generator --- ronswanson/__init__.py | 2 +- ronswanson/simulation_builder.py | 42 ++++++++++++-------------- ronswanson/utils/__init__.py | 1 + ronswanson/utils/configuration.py | 6 ++-- ronswanson/utils/lhs_unit_generator.py | 29 ++++++++++++++++++ 5 files changed, 53 insertions(+), 27 deletions(-) create mode 100644 ronswanson/utils/lhs_unit_generator.py diff --git a/ronswanson/__init__.py b/ronswanson/__init__.py index 3cf1a3b..13cd747 100644 --- a/ronswanson/__init__.py +++ b/ronswanson/__init__.py @@ -10,7 +10,7 @@ from .simulation import Simulation from .simulation_builder import SimulationBuilder from .grids import ParameterGrid -from .utils import ronswanson_config, show_configuration +from .utils import ronswanson_config, show_configuration, generate_lhs_unit_cube from .utils.logging import update_logging_level __all__ = [ diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index 6f94b95..1667dce 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -1,7 +1,7 @@ import json import shutil import time -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Optional @@ -86,13 +86,16 @@ class YAMLStructure: parameter_grid: str = MISSING out_file: str = MISSING clean: bool = True - simulation: SimulationConfigStructure = SimulationConfigStructure() + simulation: SimulationConfigStructure = field( + default_factory=SimulationConfigStructure + ) gather: Optional[GatherConfigStructure] = None num_meta_parameters: Optional[int] = None finish_missing: bool = False lhs_sampling: bool = False n_lhs_points: int = 10 skip_lhs_generator: bool = False + lhs_unit_file: Optional[str] = None class SimulationBuilder: @@ -115,6 +118,7 @@ def __init__( lhs_sampling: bool = False, n_lhs_points: int = 10, skip_lhs_generator: bool = False, + lhs_unit_file: Optional[str] = None, ): """TODO describe function @@ -170,6 +174,8 @@ def __init__( self._skip_lhs_generator: bool = skip_lhs_generator + self._lhs_unit_file: Optional[str] = lhs_unit_file + if self._lhs_sampling: self._n_iterations: int = self._n_lhs_points @@ -305,39 +311,29 @@ def _compute_lhs_sampling(self): pg = ParameterGrid.from_yaml(self._parameter_file) - sampling = qmc.LatinHypercube( - d=pg.n_parameters, optimization="random-cd" - ) - l_bounds = pg.min_max_values[:, 0] u_bounds = pg.min_max_values[:, 1] - samples = sampling.random(n=self._n_lhs_points) - - points = qmc.scale(samples, l_bounds, u_bounds) - - # sampling = LHS(xlimits=pg.min_max_values, criterion="maximin") + if self._lhs_unit_file is None: - # if self._n_lhs_split is None: + log.info("Sampling LHS points") - # points = sampling(self._n_lhs_points) - - # else: - - # total_points: int = self._n_lhs_points + sampling = qmc.LatinHypercube( + d=pg.n_parameters, optimization="random-cd" + ) - # points_per_split: int = self._n_lhs_points // self._n_lhs_split + samples = sampling.random(n=self._n_lhs_points) - # current_n_points: int = points_per_split + else: - # points = sampling(points_per_split) + log.info(f"reading LHS points from {self._lhs_unit_file}") - # while current_n_points < total_points: + with h5py.File(self._lhs_unit_file, "r") as f: - # points = sampling.expand_lhs(points, points_per_split) + samples = f["lhs_points"][()] - # current_n_points += points_per_split + points = qmc.scale(samples, l_bounds, u_bounds) with h5py.File("lhs_points.h5", "w") as f: diff --git a/ronswanson/utils/__init__.py b/ronswanson/utils/__init__.py index c7f2d77..f6bb884 100644 --- a/ronswanson/utils/__init__.py +++ b/ronswanson/utils/__init__.py @@ -1,2 +1,3 @@ from .configuration import ronswanson_config, show_configuration from .color import Colors +from .lhs_unit_generator import generate_lhs_unit_cube diff --git a/ronswanson/utils/configuration.py b/ronswanson/utils/configuration.py index b1d78c7..89f369f 100644 --- a/ronswanson/utils/configuration.py +++ b/ronswanson/utils/configuration.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import List, Optional @@ -36,8 +36,8 @@ class SLURM: @dataclass class RonSwansonConfig: - logging: Logging = Logging() - slurm: SLURM = SLURM() + logging: Logging = field(default_factory = Logging) + slurm: SLURM = field(default_factory = SLURM) # Read the default config diff --git a/ronswanson/utils/lhs_unit_generator.py b/ronswanson/utils/lhs_unit_generator.py new file mode 100644 index 0000000..a8e3b2d --- /dev/null +++ b/ronswanson/utils/lhs_unit_generator.py @@ -0,0 +1,29 @@ +import h5py +from scipy.stats import qmc + + +def generate_lhs_unit_cube( + n_parameters: int, n_samples: int, file_name: str +) -> None: + """ + generate a until latin hypercube for faster generation + of points to be scaled later + + :param n_parameters: + :type n_parameters: int + :param n_samples: + :type n_samples: int + :param file_name: + :type file_name: str + :returns: + + """ + + + sampling = qmc.LatinHypercube(d=n_parameters, optimization="random-cd") + + samples = sampling.random(n_samples) + + with h5py.File(file_name, "w") as f: + + f.create_dataset("lhs_points", data=samples, compression="gzip") From 4381f720c21a78fbdbb0d459f93b571d597358ab Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Tue, 30 May 2023 15:53:17 +0200 Subject: [PATCH 21/26] small change in script --- ronswanson/script_generator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ronswanson/script_generator.py b/ronswanson/script_generator.py index 3b80e36..62e53e7 100644 --- a/ronswanson/script_generator.py +++ b/ronswanson/script_generator.py @@ -218,6 +218,7 @@ def _build_script(self) -> None: self._add_line('import h5py') self._add_line('import sys') self._add_line('from pathlib import Path') + self._add_line('from tqdm.auto import tqdm') self._end_line() self._end_line() self._add_line('rank = MPI.COMM_WORLD.rank') @@ -323,7 +324,7 @@ def _build_script(self) -> None: self._end_line() self._end_line() self._end_line() - self._add_line('f.close()') + self._add_line('database.close()') class SLURMGenerator(ScriptGenerator): From 582c8a6e88e5d04a65d771e0da0c1f72f2aefaf0 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Tue, 30 May 2023 15:53:45 +0200 Subject: [PATCH 22/26] and another --- ronswanson/script_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ronswanson/script_generator.py b/ronswanson/script_generator.py index 62e53e7..d67f02f 100644 --- a/ronswanson/script_generator.py +++ b/ronswanson/script_generator.py @@ -283,7 +283,7 @@ def _build_script(self) -> None: self._end_line() self._end_line() - self._add_line('for sim_id in sim_ids:') + self._add_line('for sim_id in tqdm(sim_ids):') self._end_line() self._add_line( 'this_file: Path = multi_file_dir / f"sim_store_{sim_id}.h5"', From 21e1c05aaa5c5ed8b509d655f5d3ad955a8c3d65 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Tue, 30 May 2023 19:04:23 +0200 Subject: [PATCH 23/26] specify dtype --- ronswanson/simulation_builder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index 1667dce..6f81587 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -396,6 +396,7 @@ def _initialize_database(self) -> None: f"output_{i}", shape=(n_points,) + pg.energy_grid[i].grid.shape, maxshape=(None,) + pg.energy_grid[i].grid.shape, + dtype="float64" # compression="gzip", ) From c06bb72c4ad1613236b2443dc3f48351b0c06b31 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Thu, 1 Jun 2023 10:32:21 +0200 Subject: [PATCH 24/26] make linear exceution loud --- ronswanson/script_generator.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ronswanson/script_generator.py b/ronswanson/script_generator.py index d67f02f..8f47871 100644 --- a/ronswanson/script_generator.py +++ b/ronswanson/script_generator.py @@ -113,7 +113,7 @@ def _build_script(self) -> None: self._add_line("n_points = pg.n_points") - self._add_line("def func(i):") + self._add_line("def func(i, silent: bool=True):") if self._lhs_sampling: @@ -121,6 +121,9 @@ def _build_script(self) -> None: else: self._add_line("params = pg.at_index(i)", indent_level=1) + self._add_line("if not silent:", indent_level=1) + self._add_line("log.info(f'{params}')", indent_level=2) + if self._has_complete_params: @@ -161,7 +164,7 @@ def _build_script(self) -> None: # just do a straight for loop self._add_line("for i in tqdm(iteration):") - self._add_line("func(i)", indent_level=1) + self._add_line("func(i, silent=False)", indent_level=1) self._add_line( f"gather('{self._database_file}', {self._current_size}, clean=True)" From 2c4cf9780905684a51f72eae99b8b50376b1fcb4 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Thu, 1 Jun 2023 10:37:57 +0200 Subject: [PATCH 25/26] fix typo --- ronswanson/script_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ronswanson/script_generator.py b/ronswanson/script_generator.py index 8f47871..b1e7aeb 100644 --- a/ronswanson/script_generator.py +++ b/ronswanson/script_generator.py @@ -121,8 +121,8 @@ def _build_script(self) -> None: else: self._add_line("params = pg.at_index(i)", indent_level=1) - self._add_line("if not silent:", indent_level=1) - self._add_line("log.info(f'{params}')", indent_level=2) + self._add_line("if not silent:", indent_level=1) + self._add_line("log.info(f'{params}')", indent_level=2) if self._has_complete_params: From 4d510de65aa1c5c48a2244b58268dc4d2457c087 Mon Sep 17 00:00:00 2001 From: "J. Michael Burgess" Date: Wed, 26 Jul 2023 10:54:02 +0200 Subject: [PATCH 26/26] check LHS point in generation --- ronswanson/database.py | 16 +++++++++++----- ronswanson/simulation_builder.py | 3 +++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/ronswanson/database.py b/ronswanson/database.py index c0bfbf0..390f717 100644 --- a/ronswanson/database.py +++ b/ronswanson/database.py @@ -1,16 +1,18 @@ +import collections from collections import OrderedDict from dataclasses import dataclass -from typing import Dict, List, Optional, Union from pathlib import Path -import collections +from typing import Dict, List, Optional, Union +import astropy.units as u import h5py import numpy as np import plotly.graph_objects as go from astromodels import TemplateModel, TemplateModelFactory from astromodels.functions.template_model import TemplateFile -from astromodels.utils.logging import silence_console_log from astromodels.utils import get_user_data_path +from astromodels.utils.logging import silence_console_log +from joblib import Parallel, delayed from tqdm.auto import tqdm from ronswanson.grids import Parameter, ParameterGrid @@ -19,8 +21,6 @@ from .utils.logging import setup_logger -from joblib import Parallel, delayed - log = setup_logger(__name__) @@ -166,6 +166,12 @@ def energy_grid(self) -> np.ndarray: return self._energy_grid + @property + def energy_grid_nu(self) -> np.ndarray: + + return (self._energy_grid * u.keV).to("Hz", equivalencies = u.spectral()) + + @property def meta_data(self) -> Optional[Dict[str, np.ndarray]]: diff --git a/ronswanson/simulation_builder.py b/ronswanson/simulation_builder.py index 6f81587..2ca6ede 100644 --- a/ronswanson/simulation_builder.py +++ b/ronswanson/simulation_builder.py @@ -315,6 +315,9 @@ def _compute_lhs_sampling(self): u_bounds = pg.min_max_values[:, 1] + log.info(f"LHS min values: {l_bounds}") + log.info(f"LHS max values: {u_bounds}") + if self._lhs_unit_file is None: log.info("Sampling LHS points")