Merge pull request #170 from madeline-scyphers/develop

0.10.3 Release
madeline-scyphers · Jul 10, 2024 · 9c3c06a · 9c3c06a
2 parents 907a941 + 5dad648
commit 9c3c06a
Show file tree

Hide file tree

Showing 26 changed files with 292 additions and 91 deletions.
diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
@@ -34,6 +34,24 @@ jobs:
       fail-fast: false
       matrix:
         include:
+#          # something in 0.3.7 was introduced that caused both run_n_trials and run_all_trials to not run
+#          # the expected number of trials. Removing support for 0.3.7 and above until we can figure out what's going on
+#          - os: ubuntu
+#            label: linux-64-0.4.0
+#            prefix: /usr/share/miniconda3/envs/boa
+#            ax_version: 0.4.0
+#          - os: ubuntu
+#            label: linux-64-0.3.7
+#            prefix: /usr/share/miniconda3/envs/boa
+#            ax_version: 0.3.7
+          - os: ubuntu
+            label: linux-64-0.3.6
+            prefix: /usr/share/miniconda3/envs/boa
+            ax_version: 0.3.6
+          - os: ubuntu
+            label: linux-64-0.3.5
+            prefix: /usr/share/miniconda3/envs/boa
+            ax_version: 0.3.5
           - os: ubuntu
             label: linux-64-0.3.4
             prefix: /usr/share/miniconda3/envs/boa
@@ -69,7 +87,7 @@ jobs:
         with:
             miniforge-variant: Mambaforge
             miniforge-version: latest
-            python-version: 3.9
+            python-version: '3.11'
             activate-environment: boa
             use-mamba: true
 
@@ -84,7 +102,7 @@ jobs:
 
       - name: Update base environment
         if: steps.cache.outputs.cache-hit != 'true'
-        run: mamba env update -n boa -f environment_dev.yml --prune
+        run: mamba env update -n boa -f environment_dev.yml
 
       - name: update ax versions
         if: ${{ matrix.ax_version }} # only update when ax_version is set

diff --git a/boa/async_opt.py b/boa/async_opt.py
@@ -130,7 +130,7 @@ def run(config_path, scheduler_path, num_trials, experiment_dir=None):
             scheduler.options = dataclasses.replace(scheduler.options, total_trials=num_trials)
     else:
         controller = Controller(config_path=config_path, wrapper=SyntheticWrapper(config=config))
-        controller.initialize_scheduler()
+        controller.initialize_scheduler(get_exp_kw={"check_for_nans": False})
         scheduler = controller.scheduler
 
     if not scheduler.opt_csv.exists() and scheduler.experiment.trials:
@@ -140,7 +140,7 @@ def run(config_path, scheduler_path, num_trials, experiment_dir=None):
         )
 
     if scheduler.opt_csv.exists():
-        exp_attach_data_from_opt_csv(list(config.objective.metric_names), scheduler)
+        exp_attach_data_from_opt_csv(config.objective.metric_names, scheduler)
 
     generator_runs = scheduler.generation_strategy._gen_multiple(
         experiment=scheduler.experiment, num_generator_runs=scheduler.wrapper.config.trials
@@ -156,20 +156,19 @@ def run(config_path, scheduler_path, num_trials, experiment_dir=None):
     if scheduler.experiment.fetch_data().df.empty:
         trials = scheduler.experiment.trials
         metrics = scheduler.experiment.metrics
-        for metric in metrics.keys():
-            scheduler.experiment.attach_data(
-                Data(
-                    df=pd.DataFrame.from_records(
-                        dict(
-                            trial_index=list(trials.keys()),
-                            arm_name=[f"{i}_0" for i in trials.keys()],
-                            metric_name=metric,
-                            mean=None,
-                            sem=0.0,
-                        )
+        scheduler.experiment.attach_data(
+            Data(
+                df=pd.DataFrame(
+                    dict(
+                        trial_index=[i for i in trials.keys() for m in metrics.keys()],
+                        arm_name=[f"{i}_0" for i in trials.keys() for m in metrics.keys()],
+                        metric_name=[m for i in trials.keys() for m in metrics.keys()],
+                        mean=None,
+                        sem=0.0,
                     )
                 )
             )
+        )
 
     scheduler.save_data(metrics_to_end=True, ax_kwargs=dict(always_include_field_columns=True))
     return scheduler
@@ -187,7 +186,7 @@ def exp_attach_data_from_opt_csv(metric_names, scheduler):
     new_data = df.loc[df["trial_index"].isin(nan_trials)]
     if new_data.empty:
         return
-    metric_data = new_data[metric_names].to_dict()
+    metric_data = new_data[list(metric_names)].to_dict()
     if check_min_package_version("ax-platform", "0.3.3"):
         kw = dict(combine_with_last_data=True)
     else:

diff --git a/boa/ax_instantiation_utils.py b/boa/ax_instantiation_utils.py
@@ -87,13 +87,10 @@ def get_experiment(config: BOAConfig, runner: Runner, wrapper: BaseWrapper = Non
     search_space = instantiate_search_space_from_json(config.parameters, config.parameter_constraints)
 
     info_only_metrics = BoaInstantiationBase.get_metrics_from_obj_config(
-        config.objective, wrapper=wrapper, info_only=True
+        config.objective, wrapper=wrapper, info_only=True, **kwargs
     )
 
-    optimization_config = BoaInstantiationBase.make_optimization_config(
-        config.objective,
-        wrapper=wrapper,
-    )
+    optimization_config = BoaInstantiationBase.make_optimization_config(config.objective, wrapper=wrapper, **kwargs)
 
     exp = Experiment(
         search_space=search_space,

diff --git a/boa/config/config.py b/boa/config/config.py
@@ -289,7 +289,7 @@ def __init__(self, **config):
 
     @property
     def metric_names(self):
-        return (metric.name for metric in self.metrics)
+        return [metric.name for metric in self.metrics]
 
 
 @define
@@ -910,6 +910,8 @@ def update_dict(original: dict, param: dict):
 
 
 if __name__ == "__main__":  # pragma: no cover
-    from tests.conftest import TEST_CONFIG_DIR
+    from boa.definitions import ROOT
+
+    TEST_CONFIG_DIR = ROOT / "tests" / "test_configs"
 
     c = BOAConfig.from_jsonlike(pathlib.Path(TEST_CONFIG_DIR / "test_config_generic.yaml"))
diff --git a/boa/config/converters.py b/boa/config/converters.py
@@ -14,6 +14,9 @@
     from .config import BOAMetric
 
 
+STOPPING_STRATEGY_MAPPING = {"improvement": "ImprovementGlobalStoppingStrategy"}
+
+
 def _convert_noton_type(converter, type_, default_if_none=None) -> Any:
     def type_converter(val):
         if default_if_none is not None and val is None:
@@ -66,6 +69,8 @@ def _load_stopping_strategy(d: Optional[dict], module: ModuleType):
     if "type" not in d:
         raise ValueError("Type missing from stopping strategy key")  # can't work with it if type not set
     type_ = d.pop("type")
+    if type_ in STOPPING_STRATEGY_MAPPING:
+        type_ = STOPPING_STRATEGY_MAPPING[type_]
     for key, value in d.items():
         if isinstance(value, dict):
             d[key] = _load_stopping_strategy(d=value, module=module)

diff --git a/boa/controller.py b/boa/controller.py
@@ -114,24 +114,30 @@ def start_logger(self):
         get_logger("ax", filename=str(Path(self.wrapper.experiment_dir) / "optimization.log"))
         return self.logger
 
-    def initialize_scheduler(self, **kwargs) -> tuple[Scheduler, BaseWrapper]:
+    def initialize_scheduler(self, get_exp_kw=None, get_scheduler_kw=None) -> tuple[Scheduler, BaseWrapper]:
         """
         Sets experiment and scheduler
 
         Parameters
         ----------
-        kwargs
-            kwargs to pass to get_experiment and get_scheduler
+        get_exp_kw
+            keyword arguments for :meth:`.get_experiment`
+        get_scheduler_kw
+            keyword arguments for :meth:`.get_scheduler`
 
         Returns
         -------
         returns a tuple with the first element being the scheduler
         and the second element being your wrapper (both initialized
         and ready to go)
         """
+        get_exp_kw = get_exp_kw or {}
+        get_scheduler_kw = get_scheduler_kw or {}
 
-        self.experiment = get_experiment(self.config, WrappedJobRunner(wrapper=self.wrapper), self.wrapper, **kwargs)
-        self.scheduler = get_scheduler(self.experiment, config=self.config, **kwargs)
+        self.experiment = get_experiment(
+            self.config, WrappedJobRunner(wrapper=self.wrapper), self.wrapper, **get_exp_kw
+        )
+        self.scheduler = get_scheduler(self.experiment, config=self.config, **get_scheduler_kw)
         return self.scheduler, self.wrapper
 
     def run(self, scheduler: Scheduler = None, wrapper: BaseWrapper = None) -> Scheduler:

diff --git a/boa/instantiation_base.py b/boa/instantiation_base.py
@@ -5,22 +5,31 @@
 from ax.service.utils.instantiation import InstantiationBase
 
 from boa.config import BOAMetric, BOAObjective
-from boa.metrics.metrics import get_metric_from_config
+from boa.metrics.metrics import PassThroughMetric, get_metric_from_config
 from boa.metrics.modular_metric import ModularMetric
+from boa.wrappers.base_wrapper import BaseWrapper
 
 
 class BoaInstantiationBase(InstantiationBase):
     @classmethod
     def make_optimization_config(
         cls,
         objective: BOAObjective,
+        wrapper: BaseWrapper = None,
         status_quo_defined: bool = False,
         **kwargs,
     ):
+        outcome_constraints = cls.make_outcome_constraints(objective.outcome_constraints, status_quo_defined)
+        for constraint in outcome_constraints:
+            if not isinstance(constraint.metric, ModularMetric) or not getattr(constraint.metric, "wrapper", None):
+                constraint.metric = PassThroughMetric(
+                    name=constraint.metric.name, lower_is_better=constraint.metric.lower_is_better, wrapper=wrapper
+                )
+
         return cls.optimization_config_from_objectives(
-            cls.make_objectives(objective, **kwargs),
+            cls.make_objectives(objective, wrapper=wrapper, **kwargs),
             cls.make_objective_thresholds(objective.objective_thresholds, status_quo_defined),
-            cls.make_outcome_constraints(objective.outcome_constraints, status_quo_defined),
+            outcome_constraints,
         )
 
     @classmethod

diff --git a/boa/logger.py b/boa/logger.py
@@ -48,11 +48,12 @@ def set_handlers(logger, level=DEFAULT_LOG_LEVEL, filename=None):
 
 
 def get_formatter():
-    fmt = "[%(levelname)s %(asctime)s %(processName)s %(threadName)s] %(name)s: %(message)s"
+    fmt = "[%(levelname)s %(asctime)s %(processName)s %(threadName)s {%(filename)s:%(lineno)d}] %(name)s: %(message)s"
     formatter = logging.Formatter(fmt=fmt)
     return formatter
 
 
+# PosixPath('/private/var/folders/10/qs3h5zj10bn52ys456cjq0z40000gn/T/tmpqt74j2js_20240709T180234/run_model.R')
 def build_stream_handler(level: int = DEFAULT_LOG_LEVEL) -> logging.StreamHandler:
     """Build the default stream handler used for most BOA logging. Sets
     default level to INFO, instead of WARNING.

diff --git a/boa/metrics/metrics.py b/boa/metrics/metrics.py
@@ -301,7 +301,7 @@ def get_metric_from_config(config: BOAMetric, instantiate=True, **kwargs) -> Mod
     if config.metric_type == MetricType.METRIC or config.metric_type == MetricType.BOA_METRIC:
         metric = get_metric_by_class_name(instantiate=instantiate, **kw)
     elif config.metric_type == MetricType.SKLEARN_METRIC:
-        kwargs["sklearn_"] = True
+        kw["sklearn_"] = True
         metric = get_metric_by_class_name(instantiate=instantiate, **kw)
     elif config.metric_type == MetricType.SYNTHETIC_METRIC:
         metric = setup_synthetic_metric(instantiate=instantiate, **kw)

diff --git a/boa/metrics/modular_metric.py b/boa/metrics/modular_metric.py
@@ -13,6 +13,7 @@
 
 import pandas as pd
 from ax import Data, Metric, Trial
+from ax.core.metric import MetricFetchE
 from ax.core.types import TParameterization
 from ax.metrics.noisy_function import NoisyFunctionMetric
 from ax.utils.common.result import Err, Ok
@@ -107,6 +108,9 @@ class ModularMetric(NoisyFunctionMetric, metaclass=MetricRegister):
     properties
         Arbitrary dictionary of properties to store. Properties need to be json
         serializable
+    check_for_nans
+        If True, check for NaNs in the results of the metric and fail the trial if found.
+        If nans are not dealt with in some way, they can cause the optimization to fail.
     kwargs
     """
 
@@ -122,6 +126,7 @@ def __init__(
         wrapper: Optional[BaseWrapper] = None,
         properties: Optional[dict[str]] = None,
         weight: Optional[float] = None,
+        check_for_nans: Optional[bool] = True,
         **kwargs,
     ):
         """"""  # remove init docstring from parent class to stop it showing in sphinx
@@ -151,6 +156,8 @@ def __init__(
             **get_dictionary_from_callable(NoisyFunctionMetric.__init__, kwargs),
         )
         self.properties = properties or {}
+        self._trial_data_cache = {}
+        self.check_for_nans = check_for_nans
 
     @classmethod
     def is_available_while_running(cls) -> bool:
@@ -161,21 +168,35 @@ def weight(self):
         return self._weight
 
     def fetch_trial_data(self, trial: Trial, **kwargs):
-        try:
-            wrapper_kwargs = (
-                self.wrapper._fetch_trial_data(
-                    parameters=trial.arm.parameters,
-                    param_names=self.param_names,
-                    trial=trial,
-                    metric_name=self.name,
-                    **kwargs,
-                )
-                if self.wrapper
-                else {}
+        if trial.index in self._trial_data_cache:
+            return Ok(Data(df=pd.DataFrame(self._trial_data_cache[trial.index])))
+        wrapper_kwargs = (
+            self.wrapper._fetch_trial_data(
+                parameters=trial.arm.parameters,
+                param_names=self.param_names,
+                trial=trial,
+                metric_name=self.name,
+                **kwargs,
             )
-        except IOError:  # ScriptWrapper failed to fetch data
-            trial.mark_failed(unsafe=True)
-            return Ok(Data(df=pd.DataFrame(columns=list(Data.REQUIRED_COLUMNS))))
+            if self.wrapper
+            else {}
+        )
+        if self.check_for_nans:
+            if isinstance(wrapper_kwargs, dict):
+                nan_checks = list(wrapper_kwargs.values())
+            elif isinstance(wrapper_kwargs, list):
+                nan_checks = wrapper_kwargs
+            else:
+                nan_checks = [wrapper_kwargs]
+            for elem in nan_checks:
+                if (
+                    (isinstance(elem, str) and ("nan" == elem.lower() or "na" == elem.lower()))
+                    or (isinstance(elem, float) and pd.isna(elem))
+                    or (elem is None)
+                ):
+                    m = f"NaNs in Results for Trial {trial.index}, failing trial"
+                    return Err(MetricFetchE(message=m, exception=ValueError(m)))
+
         wrapper_kwargs = wrapper_kwargs if wrapper_kwargs is not None else {}
         if wrapper_kwargs is not None and not isinstance(wrapper_kwargs, dict):
             wrapper_kwargs = {"wrapper_args": wrapper_kwargs}
@@ -196,6 +217,10 @@ def fetch_trial_data(self, trial: Trial, **kwargs):
                 trial_df = trial_data.unwrap().df
                 trial_df["sem"] = safe_kwargs["sem"]
                 trial_data = Ok(Data(df=trial_df))
+            if not isinstance(trial_data, Err):
+                self._trial_data_cache[trial.index] = trial_data.unwrap().df.to_dict(
+                    orient="list"
+                )  # the format ax uses to put them in
         finally:
             # We remove the extra parameters from the arms for json serialization
             [arm._parameters.pop("kwargs") for arm in trial.arms_by_name.values()]
@@ -246,7 +271,9 @@ def serialize_init_args(cls, obj: Any) -> dict[str, Any]:
         )
 
     @classmethod
-    def deserialize_init_args(cls, args: dict[str, Any]) -> dict[str, Any]:
+    def deserialize_init_args(
+        cls, args: dict[str, Any], decoder_registry=None, class_decoder_registry=None
+    ) -> dict[str, Any]:
         """Given a dictionary, deserialize the properties needed to initialize the
         object. Used for storage.
         """

diff --git a/environment.yml b/environment.yml
@@ -1,24 +1,27 @@
-name: boa-test
+name: boa
 channels:
-- pytorch
 - conda-forge
 dependencies:
-- python=3.10
+- python=3.11
   # mac x86 or Apple Silicon macs on rosetta python need pytorch>2
   # so if on either of those, it should install pytorch>2 by default
   # but if not and something doesn't work, upgrade pytorch, torchvision,
   # and torchaudio
-- pytorch::pytorch
-- numpy
-- pandas
-- scipy
-- scikit-learn
-- click
-- panel
+  # Windows will need to install pytorch from the pytorch channel first
+- pytorch<3
+- numpy<2
+- pandas<3
+- scipy<2
+- scikit-learn<2
+- click<9
+- panel<2
 - plotly>=5.10.0
 - notebook>=5.3
 - ipywidgets>=7.5
-- ax-platform  >=0.3.1,<=0.3.4
-- ruamel.yaml
-- attrs
-- jinja2
+# something in 0.3.7 was introduced that caused both run_n_trials and run_all_trials to not run
+# the expected number of trials. Removing support for 0.3.7 and above until we can figure out what's going on
+# When fixed, add back into environment_dev.yml and CI.yaml
+- ax-platform  >=0.3.1,<=0.3.6
+- ruamel.yaml <1
+- attrs<24
+- jinja2<4