From 157807d5284f0eaccef4b2869a9579389a4e3077 Mon Sep 17 00:00:00 2001 From: Shahar Bar Date: Tue, 12 Aug 2025 13:03:54 +0300 Subject: [PATCH] Add multi-objective support in CMAB and BNN models (#106) ### Changes: * Introduced `CmabBernoulliMO` and `CmabBernoulliMOCC` classes for multi-objective contextual bandit strategies. * Added `BayesianNeuralNetworkMO` and `BayesianNeuralNetworkMOCC` classes for multi-objective Bayesian neural networks. * Updated `ActionsManager` to include multi-objective action managers. * Enhanced tests to cover new multi-objective functionalities and ensure proper behavior with multi-dimensional rewards. * Refactored existing models and tests to accommodate new multi-objective structures. --- pybandits/actions_manager.py | 4 + pybandits/cmab.py | 76 +++++++- pybandits/model.py | 102 +++++++++-- pybandits/pydantic_version_compatibility.py | 9 +- pyproject.toml | 2 +- tests/test_actions_manager.py | 2 +- tests/test_cmab.py | 191 ++++++++++++++++---- tests/test_mab.py | 101 ++++++++++- tests/test_model.py | 106 +++++++++++ tests/test_offline_policy_evaluator.py | 8 +- 10 files changed, 533 insertions(+), 68 deletions(-) diff --git a/pybandits/actions_manager.py b/pybandits/actions_manager.py index 60995947..54a61a70 100644 --- a/pybandits/actions_manager.py +++ b/pybandits/actions_manager.py @@ -21,6 +21,8 @@ BaseBetaMO, BayesianNeuralNetwork, BayesianNeuralNetworkCC, + BayesianNeuralNetworkMO, + BayesianNeuralNetworkMOCC, Beta, BetaCC, BetaMO, @@ -1067,3 +1069,5 @@ def _update_actions( CmabActionsManagerSO = CmabActionsManager[Union[BayesianNeuralNetwork, CmabZoomingModel]] CmabActionsManagerCC = CmabActionsManager[Union[BayesianNeuralNetworkCC, CmabZoomingModelCC]] +CmabActionsManagerMO = CmabActionsManager[BayesianNeuralNetworkMO] +CmabActionsManagerMOCC = CmabActionsManager[BayesianNeuralNetworkMOCC] diff --git a/pybandits/cmab.py b/pybandits/cmab.py index fe76f37b..f9cc4f1c 100644 --- a/pybandits/cmab.py +++ b/pybandits/cmab.py @@ -29,13 +29,24 @@ from pybandits.actions_manager import CmabActionsManager, CmabActionsManagerCC, CmabActionsManagerSO from pybandits.base import ActionId, BinaryReward, CmabPredictions, PositiveProbability, Serializable from pybandits.mab import BaseMab -from pybandits.model import BaseBayesianNeuralNetwork, BnnLayerParams, BnnParams, StudentTArray +from pybandits.model import ( + BaseBayesianNeuralNetwork, + BaseBayesianNeuralNetworkMO, + BayesianNeuralNetworkMO, + BayesianNeuralNetworkMOCC, + BnnLayerParams, + BnnParams, + StudentTArray, +) from pybandits.pydantic_version_compatibility import validate_call from pybandits.quantitative_model import BaseCmabZoomingModel from pybandits.strategy import ( BestActionIdentificationBandit, ClassicBandit, CostControlBandit, + MultiObjectiveBandit, + MultiObjectiveCostControlBandit, + MultiObjectiveStrategy, ) @@ -296,3 +307,66 @@ class CmabBernoulliCC(BaseCmabBernoulli): actions_manager: CmabActionsManagerCC strategy: CostControlBandit _predict_with_proba: bool = True + + +class BaseCmabBernoulliMO(BaseCmabBernoulli, ABC): + """ + Base model for a Contextual Multi-Armed Bandit with Thompson Sampling and Multi-Objective strategy. + + Parameters + ---------- + actions : Dict[ActionId, BaseBayesianNeuralNetworkMO] + The list of possible actions and their associated models. + strategy : MultiObjectiveStrategy + The strategy used to select actions. + """ + + actions: Dict[ActionId, BaseBayesianNeuralNetworkMO] + strategy: MultiObjectiveStrategy + + +class CmabBernoulliMO(BaseCmabBernoulliMO): + """ + Contextual Multi-Armed Bandit with Thompson Sampling and Multi-Objective strategy. + + The reward for an action is a multidimensional vector. Actions are compared using Pareto order between their expected reward vectors. + Pareto optimal actions are those not strictly dominated by any other action. + + Reference + --------- + Thompson Sampling for Multi-Objective Multi-Armed Bandits Problem (Yahyaa and Manderick, 2015) + https://www.researchgate.net/publication/272823659_Thompson_Sampling_for_Multi-Objective_Multi-Armed_Bandits_Problem + + Parameters + ---------- + actions : Dict[ActionId, BayesianNeuralNetworkMO] + The list of possible actions and their associated models. + strategy : MultiObjectiveBandit + The strategy used to select actions. + """ + + actions: Dict[ActionId, BayesianNeuralNetworkMO] + strategy: MultiObjectiveBandit + predict_with_proba: bool = False + predict_actions_randomly: bool = False + + +class CmabBernoulliMOCC(BaseCmabBernoulliMO): + """ + Contextual Multi-Armed Bandit with Thompson Sampling for Multi-Objective (MO) and Cost Control (CC) strategy. + + This bandit allows the reward to be a multidimensional vector and includes control of the action cost, merging + Multi-Objective and Cost Control strategies. + + Parameters + ---------- + actions : Dict[ActionId, BayesianNeuralNetworkMOCC] + The list of possible actions and their associated models. + strategy : MultiObjectiveCostControlBandit + The strategy used to select actions. + """ + + actions: Dict[ActionId, BayesianNeuralNetworkMOCC] + strategy: MultiObjectiveCostControlBandit + predict_with_proba: bool = True + predict_actions_randomly: bool = False diff --git a/pybandits/model.py b/pybandits/model.py index d8af4159..cfc2bf92 100644 --- a/pybandits/model.py +++ b/pybandits/model.py @@ -183,23 +183,6 @@ class BaseBetaMO(ModelMO, ABC): models: List[Beta] - @validate_call - def sample_proba(self, n_samples: PositiveInt) -> List[MOProbability]: - """ - Sample the probability of getting a positive reward. - - Parameters - ---------- - n_samples : PositiveInt - Number of samples to draw. - - Returns - ------- - prob: List[MOProbability] - Probabilities of getting a positive reward for each sample and objective. - """ - return [list(p) for p in zip(*[model.sample_proba(n_samples=n_samples) for model in self.models])] - @classmethod def cold_start(cls, n_objectives: PositiveInt, **kwargs) -> "BetaMO": """ @@ -854,6 +837,91 @@ class BayesianNeuralNetworkCC(BaseBayesianNeuralNetwork, ModelCC): """ +class BaseBayesianNeuralNetworkMO(ModelMO, ABC): + """ + Base class for Bayesian Neural Network with multi-objective. + + Parameters + ---------- + models : List[BayesianNeuralNetwork] + The list of Bayesian Neural Network models for each objective. + """ + + models: List[BayesianNeuralNetwork] + + @classmethod + def cold_start( + cls, + n_objectives: PositiveInt, + n_features: PositiveInt, + hidden_dim_list: Optional[List[PositiveInt]] = None, + update_method: UpdateMethods = "MCMC", + update_kwargs: Optional[dict] = None, + dist_params_init: Optional[Dict[str, float]] = None, + **kwargs, + ) -> "BayesianNeuralNetworkMO": + """ + Initialize a multi-objective Bayesian Neural Network with a cold start. + + Parameters + ---------- + n_objectives : PositiveInt + Number of objectives (models) to create. + n_features : PositiveInt + Number of input features for each network. + hidden_dim_list : Optional[List[PositiveInt]], optional + List of dimensions for the hidden layers of each network. + update_method : UpdateMethods + Method to update the networks. + update_kwargs : Optional[dict], optional + Additional keyword arguments for the update method. + dist_params_init : Optional[Dict[str, float]], optional + Initial distribution parameters for the network weights and biases. + **kwargs + Additional keyword arguments. + + Returns + ------- + BayesianNeuralNetworkMO + A multi-objective BNN with the specified number of objectives. + """ + models = [ + BayesianNeuralNetwork.cold_start( + n_features=n_features, + hidden_dim_list=hidden_dim_list, + update_method=update_method, + update_kwargs=update_kwargs, + dist_params_init=dist_params_init, + ) + for _ in range(n_objectives) + ] + return cls(models=models, **kwargs) + + +class BayesianNeuralNetworkMO(BaseBayesianNeuralNetworkMO): + """ + Bayesian Neural Network model for multi-objective. + + Parameters + ---------- + models : List[BayesianNeuralNetwork] + The list of Bayesian Neural Network models for each objective. + """ + + +class BayesianNeuralNetworkMOCC(BaseBayesianNeuralNetworkMO, ModelMO, ModelCC): + """ + Bayesian Neural Network model for multi-objective with cost control. + + Parameters + ---------- + models : List[BayesianNeuralNetwork] + The list of Bayesian Neural Network models for each objective. + cost : NonNegativeFloat + Cost associated to the Bayesian Neural Network model. + """ + + class BayesianLogisticRegression(BayesianNeuralNetwork): """ A Bayesian Logistic Regression model that inherits from BayesianNeuralNetwork. diff --git a/pybandits/pydantic_version_compatibility.py b/pybandits/pydantic_version_compatibility.py index 6b5ef594..4ba4d57f 100644 --- a/pybandits/pydantic_version_compatibility.py +++ b/pybandits/pydantic_version_compatibility.py @@ -49,7 +49,7 @@ PYDANTIC_VERSION_2 = "2" -def _get_major_pydantic_version(): +def _get_major_pydantic_version() -> str: """ Get the major version of pydantic. @@ -58,11 +58,8 @@ def _get_major_pydantic_version(): major_version : str The major version of pydantic. """ - try: - major_version = _VERSION.split(".")[0] - return major_version - except Exception as e: - raise ValueError(f"Error getting Pydantic version: {e}") + major_version = _VERSION.split(".")[0] + return major_version pydantic_version = _get_major_pydantic_version() diff --git a/pyproject.toml b/pyproject.toml index cd0f6d8f..4356cab4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pybandits" -version = "4.0.11" +version = "4.0.12" description = "Python Multi-Armed Bandit Library" authors = [ "Dario d'Andrea ", diff --git a/tests/test_actions_manager.py b/tests/test_actions_manager.py index e0d86927..36482cb6 100644 --- a/tests/test_actions_manager.py +++ b/tests/test_actions_manager.py @@ -25,7 +25,7 @@ class DummyActionsManager(ActionsManager): - actions: Dict[ActionId, Union[Beta, BetaMO]] + actions: Dict[ActionId, Union[Beta, BetaMO, SmabZoomingModel]] def _update_actions( self, diff --git a/tests/test_cmab.py b/tests/test_cmab.py index ef81b98f..f9357255 100644 --- a/tests/test_cmab.py +++ b/tests/test_cmab.py @@ -36,11 +36,20 @@ import pybandits from pybandits.actions_manager import CmabModelType from pybandits.base import ActionId, Float01, PositiveProbability, PyBanditsBaseModel -from pybandits.cmab import BaseCmabBernoulli, CmabBernoulli, CmabBernoulliBAI, CmabBernoulliCC +from pybandits.cmab import ( + BaseCmabBernoulli, + CmabBernoulli, + CmabBernoulliBAI, + CmabBernoulliCC, + CmabBernoulliMO, + CmabBernoulliMOCC, +) from pybandits.model import ( BaseBayesianNeuralNetwork, BayesianNeuralNetwork, BayesianNeuralNetworkCC, + BayesianNeuralNetworkMO, + BayesianNeuralNetworkMOCC, BnnLayerParams, BnnParams, StudentTArray, @@ -51,7 +60,13 @@ ValidationError, ) from pybandits.quantitative_model import BaseCmabZoomingModel, CmabZoomingModel, CmabZoomingModelCC, QuantitativeModel -from pybandits.strategy import BestActionIdentificationBandit, ClassicBandit, CostControlBandit +from pybandits.strategy import ( + BestActionIdentificationBandit, + ClassicBandit, + CostControlBandit, + MultiObjectiveBandit, + MultiObjectiveCostControlBandit, +) from tests.test_actions_manager import REFERENCE_DELTA from tests.utils import ( FakeApproximation, @@ -184,15 +199,21 @@ def _create_actions( hidden_dim_list: List[int], update_method: UpdateMethods, update_kwargs: Optional[Dict[str, Any]], + n_objectives: Optional[PositiveInt] = None, ) -> Tuple[Dict[str, Any], Dict[str, Any]]: if len(self.model_types) < len(action_ids): indices = np.random.randint(0, len(self.model_types), len(action_ids)) self.model_types = [self.model_types[i] for i in indices] - if all(model in [BayesianNeuralNetworkCC, CmabZoomingModelCC] for model in self.model_types): + if all( + model in [BayesianNeuralNetworkCC, BayesianNeuralNetworkMOCC, CmabZoomingModelCC] + for model in self.model_types + ): # Generate random costs costs = costs.draw(cost_strategy(n_actions=len(action_ids))) costs = [ - cost if model_type in [BayesianNeuralNetworkCC] else partial(_quantitative_cost, cost=cost) + cost + if model_type in [BayesianNeuralNetworkCC, BayesianNeuralNetworkMOCC] + else partial(_quantitative_cost, cost=cost) for cost, model_type in zip(costs, self.model_types) ] else: @@ -202,36 +223,56 @@ def _create_actions( base_model_cold_start_kwargs = dict( n_features=n_features, hidden_dim_list=hidden_dim_list, **model_cold_start_kwargs ) - model_params = BaseBayesianNeuralNetwork.create_model_params( - n_features=n_features, hidden_dim_list=hidden_dim_list - ) - if costs is not None: - # Handle models with costs (BayesianNeuralNetworkCC or CmabZoomingModelCC) - actions_dict = {} - for action_id, model_type, cost in zip(action_ids, self.model_types, costs): - if issubclass(model_type, BayesianNeuralNetworkCC): - actions_dict[action_id] = model_type( - model_params=model_params, - **model_cold_start_kwargs, - cost=cost, + if n_objectives is None: + # Single-objective models + if costs is not None: + # Handle models with costs + return { + action_id: model_type.cold_start( + n_features=n_features, hidden_dim_list=hidden_dim_list, cost=cost, **model_cold_start_kwargs ) - else: # CmabZoomingModelCC - actions_dict[action_id] = model_type.cold_start( + if issubclass(model_type, BayesianNeuralNetworkCC) + else model_type.cold_start( dimension=1, base_model_cold_start_kwargs=base_model_cold_start_kwargs, cost=cost + ) # CmabZoomingModelCC + for action_id, model_type, cost in zip(action_ids, self.model_types, costs) + }, base_model_cold_start_kwargs + else: + # Handle models without costs + return { + action_id: model_type.cold_start( + n_features=n_features, hidden_dim_list=hidden_dim_list, **model_cold_start_kwargs ) + if issubclass(model_type, BayesianNeuralNetwork) + else model_type.cold_start( + dimension=1, base_model_cold_start_kwargs=base_model_cold_start_kwargs + ) # CmabZoomingModel + for action_id, model_type in zip(action_ids, self.model_types) + }, base_model_cold_start_kwargs else: - # Handle models without costs (BayesianNeuralNetwork or CmabZoomingModel) - actions_dict = {} - for action_id, model_type in zip(action_ids, self.model_types): - if issubclass(model_type, BayesianNeuralNetwork): - actions_dict[action_id] = model_type(model_params=model_params, **model_cold_start_kwargs) - else: # CmabZoomingModel - actions_dict[action_id] = model_type.cold_start( - dimension=1, - base_model_cold_start_kwargs=base_model_cold_start_kwargs, + # Multi-objective models + if costs is not None: + return { + action_id: model_type.cold_start( + n_objectives=n_objectives, + n_features=n_features, + hidden_dim_list=hidden_dim_list, + cost=cost, + **model_cold_start_kwargs, ) - return actions_dict, base_model_cold_start_kwargs + for action_id, model_type, cost in zip(action_ids, self.model_types, costs) + }, base_model_cold_start_kwargs + else: + return { + action_id: model_type.cold_start( + n_objectives=n_objectives, + n_features=n_features, + hidden_dim_list=hidden_dim_list, + **model_cold_start_kwargs, + ) + for action_id, model_type in zip(action_ids, self.model_types) + }, base_model_cold_start_kwargs def create_cmab_and_actions( self, @@ -239,6 +280,7 @@ def create_cmab_and_actions( epsilon: Optional[Float01], delta: Optional[PositiveProbability], costs: st.SearchStrategy, + n_objectives: st.SearchStrategy[PositiveInt], exploit_p: Union[st.SearchStrategy[Optional[Float01]], Optional[float]], subsidy_factor: Union[st.SearchStrategy[Optional[Float01]], Optional[float]], n_features: PositiveInt, @@ -246,8 +288,13 @@ def create_cmab_and_actions( update_method: UpdateMethods, update_kwargs: Optional[Dict[str, Any]], ) -> Tuple[BaseCmabBernoulli, Dict[ActionId, CmabModelType], Dict[str, Any]]: + n_objectives = ( + n_objectives.draw(st.integers(min_value=1, max_value=10)) + if self.cmab_class in [CmabBernoulliMO, CmabBernoulliMOCC] + else None + ) actions, base_model_cold_start_kwargs = self._create_actions( - action_ids, costs, n_features, hidden_dim_list, update_method, update_kwargs + action_ids, costs, n_features, hidden_dim_list, update_method, update_kwargs, n_objectives ) default_action = action_ids[0] if epsilon and not delta else None if default_action and isinstance(self.model_types[0], QuantitativeModel): @@ -276,6 +323,9 @@ def create_cmab_and_actions( if any(isinstance(model, BaseBayesianNeuralNetwork) for model in actions.values()): kwargs.update(base_model_cold_start_kwargs) + # For cold start test + if self.cmab_class in [CmabBernoulliMO, CmabBernoulliMOCC]: + kwargs["n_objectives"] = n_objectives return cmab, actions, kwargs @@ -289,6 +339,16 @@ def create_cmab_and_actions( CostControlBandit, [BayesianNeuralNetworkCC, CmabZoomingModelCC], ), + "cmab_mo": ModelTestConfig( + CmabBernoulliMO, + MultiObjectiveBandit, + [BayesianNeuralNetworkMO], + ), + "cmab_mocc": ModelTestConfig( + CmabBernoulliMOCC, + MultiObjectiveCostControlBandit, + [BayesianNeuralNetworkMOCC], + ), } @@ -306,6 +366,7 @@ def create_cmab_and_actions( epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), delta=st.one_of(st.none(), st.just(0.1)), costs=st.data(), + n_objectives=st.data(), n_features=st.integers(min_value=1, max_value=5), hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2), subsidy_factor=st.data(), @@ -319,6 +380,7 @@ def test_cold_start( epsilon: Optional[float], delta, costs, + n_objectives, n_features, hidden_dim_list, exploit_p, @@ -332,6 +394,7 @@ def test_cold_start( epsilon, delta, costs, + n_objectives, exploit_p, subsidy_factor, n_features, @@ -351,9 +414,14 @@ def test_cold_start( action for action, model in zip(action_ids, config.model_types) if issubclass(model, QuantitativeModel) }, } - if all(model in [BayesianNeuralNetworkCC, CmabZoomingModelCC] for model in config.model_types): + if all( + model in [BayesianNeuralNetworkCC, BayesianNeuralNetworkMOCC, CmabZoomingModelCC] + for model in config.model_types + ): cold_start_kwargs["action_ids_cost"] = { - action: model.cost for action, model in actions.items() if isinstance(model, (BayesianNeuralNetworkCC)) + action: model.cost + for action, model in actions.items() + if isinstance(model, (BayesianNeuralNetworkCC, BayesianNeuralNetworkMOCC)) } cold_start_kwargs["quantitative_action_ids_cost"] = { action: model.cost for action, model in actions.items() if isinstance(model, CmabZoomingModelCC) @@ -370,6 +438,7 @@ def test_cold_start( n_features=st.integers(min_value=1, max_value=5), hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2), costs=st.data(), + n_objectives=st.data(), subsidy_factor=st.data(), exploit_p=st.data(), update_method=st.sampled_from(literal_update_methods), @@ -381,6 +450,7 @@ def test_bad_initialization( n_features: int, hidden_dim_list: List[PositiveInt], costs, + n_objectives, exploit_p, subsidy_factor, update_method, @@ -559,6 +629,11 @@ def test_update( # Test updates with generated data actions_to_update = sample_with_replacement(action_ids, n_samples) # Generate quantities only if there are any QuantitativeModel actions + # Handle multi-objective rewards for MO models + if config.cmab_class in [CmabBernoulliMO, CmabBernoulliMOCC]: + # Multi-objective rewards: list of lists + n_objectives = 2 # Default for testing + reward_data = [[np.random.randint(0, 2) for _ in range(n_objectives)] for _ in range(n_samples)] for_update_kwargs = {"actions": actions_to_update, "rewards": reward_data} if any(isinstance(model, BaseCmabZoomingModel) for model in cmab.actions.values()): quantity_data = np.random.random(size=n_samples).tolist() @@ -837,7 +912,13 @@ def cmab_old_state(draw, CmabClass=None): return state -OLD_STATE_TEST_CONFIGS = {"cmab": CmabBernoulli, "cmab_bai": CmabBernoulliBAI, "cmab_cc": CmabBernoulliCC} +OLD_STATE_TEST_CONFIGS = { + "cmab": CmabBernoulli, + "cmab_bai": CmabBernoulliBAI, + "cmab_cc": CmabBernoulliCC, + "cmab_mo": CmabBernoulliMO, + "cmab_mocc": CmabBernoulliMOCC, +} @pytest.mark.parametrize("CmabClass", OLD_STATE_TEST_CONFIGS.values(), ids=OLD_STATE_TEST_CONFIGS.keys()) @@ -1003,3 +1084,49 @@ def test_cmab_predict_shape_mismatch(dim_list): mab.predict(context=context) with pytest.raises(AttributeError): mab.predict(context=[]) + + +@settings(deadline=500) +@given( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=5), + st.sampled_from(literal_update_methods), + st.just([2]), + st.integers(min_value=2, max_value=3), +) +def test_cmab_mo_update_shape_mismatch(n_samples, n_features, update_method, hidden_dim_list, n_objectives): + actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() + # Multi-objective rewards + context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) + + # Create multi-objective models + models_a1 = [ + BayesianNeuralNetwork.cold_start( + n_features=n_features, hidden_dim_list=hidden_dim_list, update_method=update_method + ) + for _ in range(n_objectives) + ] + models_a2 = [ + BayesianNeuralNetwork.cold_start( + n_features=n_features, hidden_dim_list=hidden_dim_list, update_method=update_method + ) + for _ in range(n_objectives) + ] + model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list) + + mab = CmabBernoulliMO( + actions={ + "a1": BayesianNeuralNetworkMO(models=models_a1, model_params=model_params), + "a2": BayesianNeuralNetworkMO(models=models_a2, model_params=model_params), + } + ) + + # Test with wrong number of objectives in rewards + wrong_rewards = [[np.random.choice([0, 1]) for _ in range(n_objectives + 1)] for _ in range(n_samples)] + with pytest.raises(AttributeError): + mab.update(context=context, actions=actions, rewards=wrong_rewards) + + # Test with single-objective rewards (should fail for MO model) + single_rewards = np.random.choice([0, 1], size=n_samples).tolist() + with pytest.raises(AttributeError): + mab.update(context=context, actions=actions, rewards=single_rewards) diff --git a/tests/test_mab.py b/tests/test_mab.py index 74d6e964..314a1f48 100644 --- a/tests/test_mab.py +++ b/tests/test_mab.py @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from typing import Dict, List, Optional, Set, Union +from typing import Dict, List, Optional, Set, Tuple, Union import hypothesis.strategies as st import numpy as np @@ -28,17 +28,17 @@ from hypothesis import given from pytest_mock import MockerFixture -from pybandits.base import ActionId, BinaryReward, Float01, Probability +from pybandits.base import ActionId, BinaryReward, Float01, Probability, UnifiedActionId from pybandits.mab import BaseMab from pybandits.model import Beta, BetaCC from pybandits.pydantic_version_compatibility import ValidationError from pybandits.strategy import ClassicBandit -from tests.test_actions_manager import DummyActionsManager +from tests.test_actions_manager import REFERENCE_DELTA, DummyActionsManager class DummyMab(BaseMab): epsilon: Optional[Float01] = None - default_action: Optional[ActionId] = None + default_action: Optional[UnifiedActionId] = None actions_manager: DummyActionsManager def _update( @@ -57,7 +57,7 @@ def predict( valid_actions = self._get_valid_actions(forbidden_actions) return np.random.choice(valid_actions) - def get_state(self) -> (str, dict): + def get_state(self) -> Tuple[str, dict]: model_name = self.__class__.__name__ state: dict = {"actions": self.actions} return model_name, state @@ -172,3 +172,94 @@ def test_adaptive_window_without_epsilon_fails(adaptive_window_size, epsilon): epsilon=epsilon, default_action="a1", ) + + +######################################################################################################################## + + +# MAB model_post_init validation tests + + +def test_mab_model_post_init_adaptive_window_epsilon_validation(): + """Test model_post_init validation for adaptive window with epsilon greedy requirements.""" + actions = {"action1": Beta(), "action2": Beta()} + + # Test case 1: delta is set but epsilon is None - should raise ValueError + with pytest.raises( + ValueError, match="Adaptive window requires epsilon greedy super strategy with not default action." + ): + DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=None, default_action=None, delta=REFERENCE_DELTA) + + # Test case 2: delta is set, epsilon is provided, but default_action is also provided - should raise ValueError + with pytest.raises( + ValueError, match="Adaptive window requires epsilon greedy super strategy with not default action." + ): + DummyMab( + actions=actions, strategy=ClassicBandit(), epsilon=0.1, default_action="action1", delta=REFERENCE_DELTA + ) + + +def test_mab_model_post_init_default_action_validation(): + """Test model_post_init validation for default action requirements.""" + actions = {"action1": Beta(), "action2": Beta()} + + # Test case 1: epsilon is not provided but default_action is provided - should raise AttributeError + with pytest.raises(AttributeError, match="A default action should only be defined when epsilon is defined."): + DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=None, default_action="action1") + + +def test_mab_model_post_init_invalid_default_action(epsilon=0.1): + """Test model_post_init validation for invalid default action.""" + actions = {"action1": Beta(), "action2": Beta()} + + # Test case: default_action is not in the actions set - should raise AttributeError + with pytest.raises(AttributeError, match="The default action must be valid action defined in the actions set."): + DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action="invalid_action") + + +def test_mab_model_post_init_quantitative_default_action_validation(epsilon=0.1): + """Test model_post_init validation for quantitative default action requirements.""" + + # This test is demonstrating that the current validation logic has an issue: + # When default_action is a tuple, it checks if the entire tuple is in self.actions keys, + # but actions only contains string keys. This causes the validation to fail at line 138-139 + # before it reaches the quantitative validation at lines 140-145. + + # Test case: quantitative default action (tuple) with any actions will fail the basic validation + actions = {"action1": Beta(), "action2": Beta()} + + with pytest.raises(AttributeError, match="The default action must be valid action defined in the actions set."): + DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action=("action1", (0.5, 0.5))) + + +def test_mab_model_post_init_standard_default_action_validation(epsilon=0.1): + """Test model_post_init validation for standard default action requirements.""" + from pybandits.quantitative_model import SmabZoomingModel + + # Create quantitative actions + actions = {"action1": SmabZoomingModel.cold_start(), "action2": SmabZoomingModel.cold_start()} + + # Test case: standard default action (string) with quantitative model - should raise AttributeError + with pytest.raises(AttributeError, match="Standard default action requires a standard action model."): + DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action="action1") + + +def test_mab_model_post_init_valid_configurations(epsilon=0.1): + """Test model_post_init validation with valid configurations.""" + actions = {"action1": Beta(), "action2": Beta()} + + # Valid case 1: No epsilon, no default action, no delta + mab = DummyMab(actions=actions, strategy=ClassicBandit()) + assert mab.epsilon is None + assert mab.default_action is None + + # Valid case 2: Epsilon with valid default action + mab = DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action="action1") + assert mab.epsilon == 0.1 + assert mab.default_action == "action1" + + # Valid case 3: Epsilon without default action and delta (adaptive window) + mab = DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, delta=REFERENCE_DELTA) + assert mab.epsilon == epsilon + assert mab.default_action is None + assert mab.actions_manager.delta == REFERENCE_DELTA diff --git a/tests/test_model.py b/tests/test_model.py index c0adc0a6..39429797 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -30,6 +30,8 @@ BayesianLogisticRegression, BayesianNeuralNetwork, BayesianNeuralNetworkCC, + BayesianNeuralNetworkMO, + BayesianNeuralNetworkMOCC, Beta, BetaCC, BetaMO, @@ -524,3 +526,107 @@ def test_bayesian_logistic_regression_invalid_init(n_features: int, hidden_dim_l with pytest.raises(ValueError, match="The Bayesian Logistic Regression model should have only one layer."): BayesianLogisticRegression.cold_start(n_features=n_features, hidden_dim_list=hidden_dim_list) + + +######################################################################################################################## + + +# BayesianNeuralNetworkMO + + +@settings(deadline=500) +@given( + n_features=st.integers(min_value=1, max_value=3), + hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2), + n_objectives=st.integers(min_value=1, max_value=3), +) +def test_can_init_bayesian_neural_network_mo(n_features, hidden_dim_list, n_objectives): + dim_list = [n_features] + hidden_dim_list + if any(layer_dim <= 0 for layer_dim in dim_list) or n_objectives <= 0: + with pytest.raises((ValidationError, ValueError)): + models = [BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list) for _ in range(n_objectives)] + model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list) + BayesianNeuralNetworkMO(models=models, model_params=model_params) + else: + models = [BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list) for _ in range(n_objectives)] + model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list) + bnn_mo = BayesianNeuralNetworkMO(models=models, model_params=model_params) + assert len(bnn_mo.models) == n_objectives + assert all(isinstance(model, BayesianNeuralNetwork) for model in bnn_mo.models) + + +@settings(deadline=500) +@given( + n_features=st.integers(min_value=1, max_value=3), + hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2), + n_objectives=st.integers(min_value=1, max_value=3), + n_samples=st.integers(min_value=1, max_value=10), +) +def test_bayesian_neural_network_mo_sample_proba(n_features, hidden_dim_list, n_objectives, n_samples): + models = [BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list) for _ in range(n_objectives)] + model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list) + bnn_mo = BayesianNeuralNetworkMO(models=models, model_params=model_params) + + context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) + prob_weights = bnn_mo.sample_proba(context=context) + + assert len(prob_weights) == n_samples + for prob_weight in prob_weights: + prob, weight = prob_weight + assert isinstance(prob, list) and len(prob) == n_objectives + assert isinstance(weight, list) and len(weight) == n_objectives + assert all(0 <= p <= 1 for p in prob) + + +@given( + n_features=st.integers(min_value=1, max_value=3), + hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2), + n_objectives=st.integers(min_value=1, max_value=3), + n_samples=st.integers(min_value=1, max_value=5), +) +def test_bayesian_neural_network_mo_update(n_features, hidden_dim_list, n_objectives, n_samples): + models = [ + BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list, update_method="VI") for _ in range(n_objectives) + ] + model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list) + bnn_mo = BayesianNeuralNetworkMO(models=models, model_params=model_params) + + context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) + rewards = [[np.random.randint(0, 2) for _ in range(n_objectives)] for _ in range(n_samples)] + + # Should not raise any exceptions + bnn_mo.update(context=context, rewards=rewards) + + # Test with invalid rewards shape + invalid_rewards = [[1] * (n_objectives + 1) for _ in range(n_samples)] + with pytest.raises((ValueError, AttributeError)): + bnn_mo.update(context=context, rewards=invalid_rewards) + + +######################################################################################################################## + + +# BayesianNeuralNetworkMOCC + + +@settings(deadline=500) +@given( + n_features=st.integers(min_value=1, max_value=3), + hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2), + n_objectives=st.integers(min_value=1, max_value=3), + cost=st.floats(allow_nan=False, allow_infinity=False), +) +def test_can_init_bayesian_neural_network_mo_cc(n_features, hidden_dim_list, n_objectives, cost): + dim_list = [n_features] + hidden_dim_list + if any(layer_dim <= 0 for layer_dim in dim_list) or n_objectives <= 0 or cost < 0: + with pytest.raises((ValidationError, ValueError)): + models = [BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list) for _ in range(n_objectives)] + model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list) + BayesianNeuralNetworkMOCC(models=models, model_params=model_params, cost=cost) + else: + models = [BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list) for _ in range(n_objectives)] + model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list) + bnn_mo_cc = BayesianNeuralNetworkMOCC(models=models, model_params=model_params, cost=cost) + assert len(bnn_mo_cc.models) == n_objectives + assert bnn_mo_cc.cost == cost + assert all(isinstance(model, BayesianNeuralNetwork) for model in bnn_mo_cc.models) diff --git a/tests/test_offline_policy_evaluator.py b/tests/test_offline_policy_evaluator.py index d275912a..8d2913d5 100644 --- a/tests/test_offline_policy_evaluator.py +++ b/tests/test_offline_policy_evaluator.py @@ -14,7 +14,7 @@ import pybandits from pybandits import offline_policy_estimator -from pybandits.cmab import CmabBernoulli, CmabBernoulliCC +from pybandits.cmab import CmabBernoulli, CmabBernoulliCC, CmabBernoulliMO, CmabBernoulliMOCC from pybandits.offline_policy_estimator import BaseOfflinePolicyEstimator from pybandits.offline_policy_evaluator import OfflinePolicyEvaluator from pybandits.smab import ( @@ -250,8 +250,6 @@ def test_running_configuration( visualize = generate_random_bool() verbose = generate_random_bool() - if context and type(reward_feature) is List: - pass # CmabMO and CmabMOCC are not supported yet true_reward_feature = ( f"true_{reward_feature}" if isinstance(reward_feature, str) else [f"true_{r}" for r in reward_feature] ) @@ -279,12 +277,12 @@ def test_running_configuration( if context: if cost_feature: if type(reward_feature) is list: - return # CmabMOCC is not supported yet + mab = CmabBernoulliMOCC.cold_start(action_ids_cost=action_ids_cost, n_objectives=len(reward_feature)) else: mab = CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=len(contextual_features)) else: if type(reward_feature) is list: - return # CmabMO is not supported yet + mab = CmabBernoulliMO.cold_start(action_ids=set(unique_actions), n_objectives=len(reward_feature)) else: mab = CmabBernoulli.cold_start(action_ids=set(unique_actions), n_features=len(contextual_features)) else: