Skip to content

Commit

Permalink
Merge pull request #170 from madeline-scyphers/develop
Browse files Browse the repository at this point in the history
0.10.3 Release
  • Loading branch information
madeline-scyphers authored Jul 10, 2024
2 parents 907a941 + 5dad648 commit 9c3c06a
Show file tree
Hide file tree
Showing 26 changed files with 292 additions and 91 deletions.
22 changes: 20 additions & 2 deletions .github/workflows/CI.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,24 @@ jobs:
fail-fast: false
matrix:
include:
# # something in 0.3.7 was introduced that caused both run_n_trials and run_all_trials to not run
# # the expected number of trials. Removing support for 0.3.7 and above until we can figure out what's going on
# - os: ubuntu
# label: linux-64-0.4.0
# prefix: /usr/share/miniconda3/envs/boa
# ax_version: 0.4.0
# - os: ubuntu
# label: linux-64-0.3.7
# prefix: /usr/share/miniconda3/envs/boa
# ax_version: 0.3.7
- os: ubuntu
label: linux-64-0.3.6
prefix: /usr/share/miniconda3/envs/boa
ax_version: 0.3.6
- os: ubuntu
label: linux-64-0.3.5
prefix: /usr/share/miniconda3/envs/boa
ax_version: 0.3.5
- os: ubuntu
label: linux-64-0.3.4
prefix: /usr/share/miniconda3/envs/boa
Expand Down Expand Up @@ -69,7 +87,7 @@ jobs:
with:
miniforge-variant: Mambaforge
miniforge-version: latest
python-version: 3.9
python-version: '3.11'
activate-environment: boa
use-mamba: true

Expand All @@ -84,7 +102,7 @@ jobs:

- name: Update base environment
if: steps.cache.outputs.cache-hit != 'true'
run: mamba env update -n boa -f environment_dev.yml --prune
run: mamba env update -n boa -f environment_dev.yml

- name: update ax versions
if: ${{ matrix.ax_version }} # only update when ax_version is set
Expand Down
27 changes: 13 additions & 14 deletions boa/async_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def run(config_path, scheduler_path, num_trials, experiment_dir=None):
scheduler.options = dataclasses.replace(scheduler.options, total_trials=num_trials)
else:
controller = Controller(config_path=config_path, wrapper=SyntheticWrapper(config=config))
controller.initialize_scheduler()
controller.initialize_scheduler(get_exp_kw={"check_for_nans": False})
scheduler = controller.scheduler

if not scheduler.opt_csv.exists() and scheduler.experiment.trials:
Expand All @@ -140,7 +140,7 @@ def run(config_path, scheduler_path, num_trials, experiment_dir=None):
)

if scheduler.opt_csv.exists():
exp_attach_data_from_opt_csv(list(config.objective.metric_names), scheduler)
exp_attach_data_from_opt_csv(config.objective.metric_names, scheduler)

generator_runs = scheduler.generation_strategy._gen_multiple(
experiment=scheduler.experiment, num_generator_runs=scheduler.wrapper.config.trials
Expand All @@ -156,20 +156,19 @@ def run(config_path, scheduler_path, num_trials, experiment_dir=None):
if scheduler.experiment.fetch_data().df.empty:
trials = scheduler.experiment.trials
metrics = scheduler.experiment.metrics
for metric in metrics.keys():
scheduler.experiment.attach_data(
Data(
df=pd.DataFrame.from_records(
dict(
trial_index=list(trials.keys()),
arm_name=[f"{i}_0" for i in trials.keys()],
metric_name=metric,
mean=None,
sem=0.0,
)
scheduler.experiment.attach_data(
Data(
df=pd.DataFrame(
dict(
trial_index=[i for i in trials.keys() for m in metrics.keys()],
arm_name=[f"{i}_0" for i in trials.keys() for m in metrics.keys()],
metric_name=[m for i in trials.keys() for m in metrics.keys()],
mean=None,
sem=0.0,
)
)
)
)

scheduler.save_data(metrics_to_end=True, ax_kwargs=dict(always_include_field_columns=True))
return scheduler
Expand All @@ -187,7 +186,7 @@ def exp_attach_data_from_opt_csv(metric_names, scheduler):
new_data = df.loc[df["trial_index"].isin(nan_trials)]
if new_data.empty:
return
metric_data = new_data[metric_names].to_dict()
metric_data = new_data[list(metric_names)].to_dict()
if check_min_package_version("ax-platform", "0.3.3"):
kw = dict(combine_with_last_data=True)
else:
Expand Down
7 changes: 2 additions & 5 deletions boa/ax_instantiation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,10 @@ def get_experiment(config: BOAConfig, runner: Runner, wrapper: BaseWrapper = Non
search_space = instantiate_search_space_from_json(config.parameters, config.parameter_constraints)

info_only_metrics = BoaInstantiationBase.get_metrics_from_obj_config(
config.objective, wrapper=wrapper, info_only=True
config.objective, wrapper=wrapper, info_only=True, **kwargs
)

optimization_config = BoaInstantiationBase.make_optimization_config(
config.objective,
wrapper=wrapper,
)
optimization_config = BoaInstantiationBase.make_optimization_config(config.objective, wrapper=wrapper, **kwargs)

exp = Experiment(
search_space=search_space,
Expand Down
6 changes: 4 additions & 2 deletions boa/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def __init__(self, **config):

@property
def metric_names(self):
return (metric.name for metric in self.metrics)
return [metric.name for metric in self.metrics]


@define
Expand Down Expand Up @@ -910,6 +910,8 @@ def update_dict(original: dict, param: dict):


if __name__ == "__main__": # pragma: no cover
from tests.conftest import TEST_CONFIG_DIR
from boa.definitions import ROOT

TEST_CONFIG_DIR = ROOT / "tests" / "test_configs"

c = BOAConfig.from_jsonlike(pathlib.Path(TEST_CONFIG_DIR / "test_config_generic.yaml"))
5 changes: 5 additions & 0 deletions boa/config/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
from .config import BOAMetric


STOPPING_STRATEGY_MAPPING = {"improvement": "ImprovementGlobalStoppingStrategy"}


def _convert_noton_type(converter, type_, default_if_none=None) -> Any:
def type_converter(val):
if default_if_none is not None and val is None:
Expand Down Expand Up @@ -66,6 +69,8 @@ def _load_stopping_strategy(d: Optional[dict], module: ModuleType):
if "type" not in d:
raise ValueError("Type missing from stopping strategy key") # can't work with it if type not set
type_ = d.pop("type")
if type_ in STOPPING_STRATEGY_MAPPING:
type_ = STOPPING_STRATEGY_MAPPING[type_]
for key, value in d.items():
if isinstance(value, dict):
d[key] = _load_stopping_strategy(d=value, module=module)
Expand Down
16 changes: 11 additions & 5 deletions boa/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,24 +114,30 @@ def start_logger(self):
get_logger("ax", filename=str(Path(self.wrapper.experiment_dir) / "optimization.log"))
return self.logger

def initialize_scheduler(self, **kwargs) -> tuple[Scheduler, BaseWrapper]:
def initialize_scheduler(self, get_exp_kw=None, get_scheduler_kw=None) -> tuple[Scheduler, BaseWrapper]:
"""
Sets experiment and scheduler
Parameters
----------
kwargs
kwargs to pass to get_experiment and get_scheduler
get_exp_kw
keyword arguments for :meth:`.get_experiment`
get_scheduler_kw
keyword arguments for :meth:`.get_scheduler`
Returns
-------
returns a tuple with the first element being the scheduler
and the second element being your wrapper (both initialized
and ready to go)
"""
get_exp_kw = get_exp_kw or {}
get_scheduler_kw = get_scheduler_kw or {}

self.experiment = get_experiment(self.config, WrappedJobRunner(wrapper=self.wrapper), self.wrapper, **kwargs)
self.scheduler = get_scheduler(self.experiment, config=self.config, **kwargs)
self.experiment = get_experiment(
self.config, WrappedJobRunner(wrapper=self.wrapper), self.wrapper, **get_exp_kw
)
self.scheduler = get_scheduler(self.experiment, config=self.config, **get_scheduler_kw)
return self.scheduler, self.wrapper

def run(self, scheduler: Scheduler = None, wrapper: BaseWrapper = None) -> Scheduler:
Expand Down
15 changes: 12 additions & 3 deletions boa/instantiation_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,31 @@
from ax.service.utils.instantiation import InstantiationBase

from boa.config import BOAMetric, BOAObjective
from boa.metrics.metrics import get_metric_from_config
from boa.metrics.metrics import PassThroughMetric, get_metric_from_config
from boa.metrics.modular_metric import ModularMetric
from boa.wrappers.base_wrapper import BaseWrapper


class BoaInstantiationBase(InstantiationBase):
@classmethod
def make_optimization_config(
cls,
objective: BOAObjective,
wrapper: BaseWrapper = None,
status_quo_defined: bool = False,
**kwargs,
):
outcome_constraints = cls.make_outcome_constraints(objective.outcome_constraints, status_quo_defined)
for constraint in outcome_constraints:
if not isinstance(constraint.metric, ModularMetric) or not getattr(constraint.metric, "wrapper", None):
constraint.metric = PassThroughMetric(
name=constraint.metric.name, lower_is_better=constraint.metric.lower_is_better, wrapper=wrapper
)

return cls.optimization_config_from_objectives(
cls.make_objectives(objective, **kwargs),
cls.make_objectives(objective, wrapper=wrapper, **kwargs),
cls.make_objective_thresholds(objective.objective_thresholds, status_quo_defined),
cls.make_outcome_constraints(objective.outcome_constraints, status_quo_defined),
outcome_constraints,
)

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion boa/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,12 @@ def set_handlers(logger, level=DEFAULT_LOG_LEVEL, filename=None):


def get_formatter():
fmt = "[%(levelname)s %(asctime)s %(processName)s %(threadName)s] %(name)s: %(message)s"
fmt = "[%(levelname)s %(asctime)s %(processName)s %(threadName)s {%(filename)s:%(lineno)d}] %(name)s: %(message)s"
formatter = logging.Formatter(fmt=fmt)
return formatter


# PosixPath('/private/var/folders/10/qs3h5zj10bn52ys456cjq0z40000gn/T/tmpqt74j2js_20240709T180234/run_model.R')
def build_stream_handler(level: int = DEFAULT_LOG_LEVEL) -> logging.StreamHandler:
"""Build the default stream handler used for most BOA logging. Sets
default level to INFO, instead of WARNING.
Expand Down
2 changes: 1 addition & 1 deletion boa/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def get_metric_from_config(config: BOAMetric, instantiate=True, **kwargs) -> Mod
if config.metric_type == MetricType.METRIC or config.metric_type == MetricType.BOA_METRIC:
metric = get_metric_by_class_name(instantiate=instantiate, **kw)
elif config.metric_type == MetricType.SKLEARN_METRIC:
kwargs["sklearn_"] = True
kw["sklearn_"] = True
metric = get_metric_by_class_name(instantiate=instantiate, **kw)
elif config.metric_type == MetricType.SYNTHETIC_METRIC:
metric = setup_synthetic_metric(instantiate=instantiate, **kw)
Expand Down
57 changes: 42 additions & 15 deletions boa/metrics/modular_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import pandas as pd
from ax import Data, Metric, Trial
from ax.core.metric import MetricFetchE
from ax.core.types import TParameterization
from ax.metrics.noisy_function import NoisyFunctionMetric
from ax.utils.common.result import Err, Ok
Expand Down Expand Up @@ -107,6 +108,9 @@ class ModularMetric(NoisyFunctionMetric, metaclass=MetricRegister):
properties
Arbitrary dictionary of properties to store. Properties need to be json
serializable
check_for_nans
If True, check for NaNs in the results of the metric and fail the trial if found.
If nans are not dealt with in some way, they can cause the optimization to fail.
kwargs
"""

Expand All @@ -122,6 +126,7 @@ def __init__(
wrapper: Optional[BaseWrapper] = None,
properties: Optional[dict[str]] = None,
weight: Optional[float] = None,
check_for_nans: Optional[bool] = True,
**kwargs,
):
"""""" # remove init docstring from parent class to stop it showing in sphinx
Expand Down Expand Up @@ -151,6 +156,8 @@ def __init__(
**get_dictionary_from_callable(NoisyFunctionMetric.__init__, kwargs),
)
self.properties = properties or {}
self._trial_data_cache = {}
self.check_for_nans = check_for_nans

@classmethod
def is_available_while_running(cls) -> bool:
Expand All @@ -161,21 +168,35 @@ def weight(self):
return self._weight

def fetch_trial_data(self, trial: Trial, **kwargs):
try:
wrapper_kwargs = (
self.wrapper._fetch_trial_data(
parameters=trial.arm.parameters,
param_names=self.param_names,
trial=trial,
metric_name=self.name,
**kwargs,
)
if self.wrapper
else {}
if trial.index in self._trial_data_cache:
return Ok(Data(df=pd.DataFrame(self._trial_data_cache[trial.index])))
wrapper_kwargs = (
self.wrapper._fetch_trial_data(
parameters=trial.arm.parameters,
param_names=self.param_names,
trial=trial,
metric_name=self.name,
**kwargs,
)
except IOError: # ScriptWrapper failed to fetch data
trial.mark_failed(unsafe=True)
return Ok(Data(df=pd.DataFrame(columns=list(Data.REQUIRED_COLUMNS))))
if self.wrapper
else {}
)
if self.check_for_nans:
if isinstance(wrapper_kwargs, dict):
nan_checks = list(wrapper_kwargs.values())
elif isinstance(wrapper_kwargs, list):
nan_checks = wrapper_kwargs
else:
nan_checks = [wrapper_kwargs]
for elem in nan_checks:
if (
(isinstance(elem, str) and ("nan" == elem.lower() or "na" == elem.lower()))
or (isinstance(elem, float) and pd.isna(elem))
or (elem is None)
):
m = f"NaNs in Results for Trial {trial.index}, failing trial"
return Err(MetricFetchE(message=m, exception=ValueError(m)))

wrapper_kwargs = wrapper_kwargs if wrapper_kwargs is not None else {}
if wrapper_kwargs is not None and not isinstance(wrapper_kwargs, dict):
wrapper_kwargs = {"wrapper_args": wrapper_kwargs}
Expand All @@ -196,6 +217,10 @@ def fetch_trial_data(self, trial: Trial, **kwargs):
trial_df = trial_data.unwrap().df
trial_df["sem"] = safe_kwargs["sem"]
trial_data = Ok(Data(df=trial_df))
if not isinstance(trial_data, Err):
self._trial_data_cache[trial.index] = trial_data.unwrap().df.to_dict(
orient="list"
) # the format ax uses to put them in
finally:
# We remove the extra parameters from the arms for json serialization
[arm._parameters.pop("kwargs") for arm in trial.arms_by_name.values()]
Expand Down Expand Up @@ -246,7 +271,9 @@ def serialize_init_args(cls, obj: Any) -> dict[str, Any]:
)

@classmethod
def deserialize_init_args(cls, args: dict[str, Any]) -> dict[str, Any]:
def deserialize_init_args(
cls, args: dict[str, Any], decoder_registry=None, class_decoder_registry=None
) -> dict[str, Any]:
"""Given a dictionary, deserialize the properties needed to initialize the
object. Used for storage.
"""
Expand Down
31 changes: 17 additions & 14 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
name: boa-test
name: boa
channels:
- pytorch
- conda-forge
dependencies:
- python=3.10
- python=3.11
# mac x86 or Apple Silicon macs on rosetta python need pytorch>2
# so if on either of those, it should install pytorch>2 by default
# but if not and something doesn't work, upgrade pytorch, torchvision,
# and torchaudio
- pytorch::pytorch
- numpy
- pandas
- scipy
- scikit-learn
- click
- panel
# Windows will need to install pytorch from the pytorch channel first
- pytorch<3
- numpy<2
- pandas<3
- scipy<2
- scikit-learn<2
- click<9
- panel<2
- plotly>=5.10.0
- notebook>=5.3
- ipywidgets>=7.5
- ax-platform >=0.3.1,<=0.3.4
- ruamel.yaml
- attrs
- jinja2
# something in 0.3.7 was introduced that caused both run_n_trials and run_all_trials to not run
# the expected number of trials. Removing support for 0.3.7 and above until we can figure out what's going on
# When fixed, add back into environment_dev.yml and CI.yaml
- ax-platform >=0.3.1,<=0.3.6
- ruamel.yaml <1
- attrs<24
- jinja2<4
Loading

0 comments on commit 9c3c06a

Please sign in to comment.