From 157807d5284f0eaccef4b2869a9579389a4e3077 Mon Sep 17 00:00:00 2001
From: Shahar Bar <shaharbar1@gmail.com>
Date: Tue, 12 Aug 2025 13:03:54 +0300
Subject: [PATCH] Add multi-objective support in CMAB and BNN models (#106)

### Changes:
* Introduced `CmabBernoulliMO` and `CmabBernoulliMOCC` classes for multi-objective contextual bandit strategies.
* Added `BayesianNeuralNetworkMO` and `BayesianNeuralNetworkMOCC` classes for multi-objective Bayesian neural networks.
* Updated `ActionsManager` to include multi-objective action managers.
* Enhanced tests to cover new multi-objective functionalities and ensure proper behavior with multi-dimensional rewards.
* Refactored existing models and tests to accommodate new multi-objective structures.
---
 pybandits/actions_manager.py                |   4 +
 pybandits/cmab.py                           |  76 +++++++-
 pybandits/model.py                          | 102 +++++++++--
 pybandits/pydantic_version_compatibility.py |   9 +-
 pyproject.toml                              |   2 +-
 tests/test_actions_manager.py               |   2 +-
 tests/test_cmab.py                          | 191 ++++++++++++++++----
 tests/test_mab.py                           | 101 ++++++++++-
 tests/test_model.py                         | 106 +++++++++++
 tests/test_offline_policy_evaluator.py      |   8 +-
 10 files changed, 533 insertions(+), 68 deletions(-)

diff --git a/pybandits/actions_manager.py b/pybandits/actions_manager.py
index 60995947..54a61a70 100644
--- a/pybandits/actions_manager.py
+++ b/pybandits/actions_manager.py
@@ -21,6 +21,8 @@
     BaseBetaMO,
     BayesianNeuralNetwork,
     BayesianNeuralNetworkCC,
+    BayesianNeuralNetworkMO,
+    BayesianNeuralNetworkMOCC,
     Beta,
     BetaCC,
     BetaMO,
@@ -1067,3 +1069,5 @@ def _update_actions(
 
 CmabActionsManagerSO = CmabActionsManager[Union[BayesianNeuralNetwork, CmabZoomingModel]]
 CmabActionsManagerCC = CmabActionsManager[Union[BayesianNeuralNetworkCC, CmabZoomingModelCC]]
+CmabActionsManagerMO = CmabActionsManager[BayesianNeuralNetworkMO]
+CmabActionsManagerMOCC = CmabActionsManager[BayesianNeuralNetworkMOCC]
diff --git a/pybandits/cmab.py b/pybandits/cmab.py
index fe76f37b..f9cc4f1c 100644
--- a/pybandits/cmab.py
+++ b/pybandits/cmab.py
@@ -29,13 +29,24 @@
 from pybandits.actions_manager import CmabActionsManager, CmabActionsManagerCC, CmabActionsManagerSO
 from pybandits.base import ActionId, BinaryReward, CmabPredictions, PositiveProbability, Serializable
 from pybandits.mab import BaseMab
-from pybandits.model import BaseBayesianNeuralNetwork, BnnLayerParams, BnnParams, StudentTArray
+from pybandits.model import (
+    BaseBayesianNeuralNetwork,
+    BaseBayesianNeuralNetworkMO,
+    BayesianNeuralNetworkMO,
+    BayesianNeuralNetworkMOCC,
+    BnnLayerParams,
+    BnnParams,
+    StudentTArray,
+)
 from pybandits.pydantic_version_compatibility import validate_call
 from pybandits.quantitative_model import BaseCmabZoomingModel
 from pybandits.strategy import (
     BestActionIdentificationBandit,
     ClassicBandit,
     CostControlBandit,
+    MultiObjectiveBandit,
+    MultiObjectiveCostControlBandit,
+    MultiObjectiveStrategy,
 )
 
 
@@ -296,3 +307,66 @@ class CmabBernoulliCC(BaseCmabBernoulli):
     actions_manager: CmabActionsManagerCC
     strategy: CostControlBandit
     _predict_with_proba: bool = True
+
+
+class BaseCmabBernoulliMO(BaseCmabBernoulli, ABC):
+    """
+    Base model for a Contextual Multi-Armed Bandit with Thompson Sampling and Multi-Objective strategy.
+
+    Parameters
+    ----------
+    actions : Dict[ActionId, BaseBayesianNeuralNetworkMO]
+        The list of possible actions and their associated models.
+    strategy : MultiObjectiveStrategy
+        The strategy used to select actions.
+    """
+
+    actions: Dict[ActionId, BaseBayesianNeuralNetworkMO]
+    strategy: MultiObjectiveStrategy
+
+
+class CmabBernoulliMO(BaseCmabBernoulliMO):
+    """
+    Contextual Multi-Armed Bandit with Thompson Sampling and Multi-Objective strategy.
+
+    The reward for an action is a multidimensional vector. Actions are compared using Pareto order between their expected reward vectors.
+    Pareto optimal actions are those not strictly dominated by any other action.
+
+    Reference
+    ---------
+    Thompson Sampling for Multi-Objective Multi-Armed Bandits Problem (Yahyaa and Manderick, 2015)
+    https://www.researchgate.net/publication/272823659_Thompson_Sampling_for_Multi-Objective_Multi-Armed_Bandits_Problem
+
+    Parameters
+    ----------
+    actions : Dict[ActionId, BayesianNeuralNetworkMO]
+        The list of possible actions and their associated models.
+    strategy : MultiObjectiveBandit
+        The strategy used to select actions.
+    """
+
+    actions: Dict[ActionId, BayesianNeuralNetworkMO]
+    strategy: MultiObjectiveBandit
+    predict_with_proba: bool = False
+    predict_actions_randomly: bool = False
+
+
+class CmabBernoulliMOCC(BaseCmabBernoulliMO):
+    """
+    Contextual Multi-Armed Bandit with Thompson Sampling for Multi-Objective (MO) and Cost Control (CC) strategy.
+
+    This bandit allows the reward to be a multidimensional vector and includes control of the action cost, merging
+    Multi-Objective and Cost Control strategies.
+
+    Parameters
+    ----------
+    actions : Dict[ActionId, BayesianNeuralNetworkMOCC]
+        The list of possible actions and their associated models.
+    strategy : MultiObjectiveCostControlBandit
+        The strategy used to select actions.
+    """
+
+    actions: Dict[ActionId, BayesianNeuralNetworkMOCC]
+    strategy: MultiObjectiveCostControlBandit
+    predict_with_proba: bool = True
+    predict_actions_randomly: bool = False
diff --git a/pybandits/model.py b/pybandits/model.py
index d8af4159..cfc2bf92 100644
--- a/pybandits/model.py
+++ b/pybandits/model.py
@@ -183,23 +183,6 @@ class BaseBetaMO(ModelMO, ABC):
 
     models: List[Beta]
 
-    @validate_call
-    def sample_proba(self, n_samples: PositiveInt) -> List[MOProbability]:
-        """
-        Sample the probability of getting a positive reward.
-
-        Parameters
-        ----------
-        n_samples : PositiveInt
-            Number of samples to draw.
-
-        Returns
-        -------
-        prob: List[MOProbability]
-            Probabilities of getting a positive reward for each sample and objective.
-        """
-        return [list(p) for p in zip(*[model.sample_proba(n_samples=n_samples) for model in self.models])]
-
     @classmethod
     def cold_start(cls, n_objectives: PositiveInt, **kwargs) -> "BetaMO":
         """
@@ -854,6 +837,91 @@ class BayesianNeuralNetworkCC(BaseBayesianNeuralNetwork, ModelCC):
     """
 
 
+class BaseBayesianNeuralNetworkMO(ModelMO, ABC):
+    """
+    Base class for Bayesian Neural Network with multi-objective.
+
+    Parameters
+    ----------
+    models : List[BayesianNeuralNetwork]
+        The list of Bayesian Neural Network models for each objective.
+    """
+
+    models: List[BayesianNeuralNetwork]
+
+    @classmethod
+    def cold_start(
+        cls,
+        n_objectives: PositiveInt,
+        n_features: PositiveInt,
+        hidden_dim_list: Optional[List[PositiveInt]] = None,
+        update_method: UpdateMethods = "MCMC",
+        update_kwargs: Optional[dict] = None,
+        dist_params_init: Optional[Dict[str, float]] = None,
+        **kwargs,
+    ) -> "BayesianNeuralNetworkMO":
+        """
+        Initialize a multi-objective Bayesian Neural Network with a cold start.
+
+        Parameters
+        ----------
+        n_objectives : PositiveInt
+            Number of objectives (models) to create.
+        n_features : PositiveInt
+            Number of input features for each network.
+        hidden_dim_list : Optional[List[PositiveInt]], optional
+            List of dimensions for the hidden layers of each network.
+        update_method : UpdateMethods
+            Method to update the networks.
+        update_kwargs : Optional[dict], optional
+            Additional keyword arguments for the update method.
+        dist_params_init : Optional[Dict[str, float]], optional
+            Initial distribution parameters for the network weights and biases.
+        **kwargs
+            Additional keyword arguments.
+
+        Returns
+        -------
+        BayesianNeuralNetworkMO
+            A multi-objective BNN with the specified number of objectives.
+        """
+        models = [
+            BayesianNeuralNetwork.cold_start(
+                n_features=n_features,
+                hidden_dim_list=hidden_dim_list,
+                update_method=update_method,
+                update_kwargs=update_kwargs,
+                dist_params_init=dist_params_init,
+            )
+            for _ in range(n_objectives)
+        ]
+        return cls(models=models, **kwargs)
+
+
+class BayesianNeuralNetworkMO(BaseBayesianNeuralNetworkMO):
+    """
+    Bayesian Neural Network model for multi-objective.
+
+    Parameters
+    ----------
+    models : List[BayesianNeuralNetwork]
+        The list of Bayesian Neural Network models for each objective.
+    """
+
+
+class BayesianNeuralNetworkMOCC(BaseBayesianNeuralNetworkMO, ModelMO, ModelCC):
+    """
+    Bayesian Neural Network model for multi-objective with cost control.
+
+    Parameters
+    ----------
+    models : List[BayesianNeuralNetwork]
+        The list of Bayesian Neural Network models for each objective.
+    cost : NonNegativeFloat
+        Cost associated to the Bayesian Neural Network model.
+    """
+
+
 class BayesianLogisticRegression(BayesianNeuralNetwork):
     """
     A Bayesian Logistic Regression model that inherits from BayesianNeuralNetwork.
diff --git a/pybandits/pydantic_version_compatibility.py b/pybandits/pydantic_version_compatibility.py
index 6b5ef594..4ba4d57f 100644
--- a/pybandits/pydantic_version_compatibility.py
+++ b/pybandits/pydantic_version_compatibility.py
@@ -49,7 +49,7 @@
 PYDANTIC_VERSION_2 = "2"
 
 
-def _get_major_pydantic_version():
+def _get_major_pydantic_version() -> str:
     """
     Get the major version of pydantic.
 
@@ -58,11 +58,8 @@ def _get_major_pydantic_version():
     major_version : str
         The major version of pydantic.
     """
-    try:
-        major_version = _VERSION.split(".")[0]
-        return major_version
-    except Exception as e:
-        raise ValueError(f"Error getting Pydantic version: {e}")
+    major_version = _VERSION.split(".")[0]
+    return major_version
 
 
 pydantic_version = _get_major_pydantic_version()
diff --git a/pyproject.toml b/pyproject.toml
index cd0f6d8f..4356cab4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pybandits"
-version = "4.0.11"
+version = "4.0.12"
 description = "Python Multi-Armed Bandit Library"
 authors = [
     "Dario d'Andrea <dariod@playtika.com>",
diff --git a/tests/test_actions_manager.py b/tests/test_actions_manager.py
index e0d86927..36482cb6 100644
--- a/tests/test_actions_manager.py
+++ b/tests/test_actions_manager.py
@@ -25,7 +25,7 @@
 
 
 class DummyActionsManager(ActionsManager):
-    actions: Dict[ActionId, Union[Beta, BetaMO]]
+    actions: Dict[ActionId, Union[Beta, BetaMO, SmabZoomingModel]]
 
     def _update_actions(
         self,
diff --git a/tests/test_cmab.py b/tests/test_cmab.py
index ef81b98f..f9357255 100644
--- a/tests/test_cmab.py
+++ b/tests/test_cmab.py
@@ -36,11 +36,20 @@
 import pybandits
 from pybandits.actions_manager import CmabModelType
 from pybandits.base import ActionId, Float01, PositiveProbability, PyBanditsBaseModel
-from pybandits.cmab import BaseCmabBernoulli, CmabBernoulli, CmabBernoulliBAI, CmabBernoulliCC
+from pybandits.cmab import (
+    BaseCmabBernoulli,
+    CmabBernoulli,
+    CmabBernoulliBAI,
+    CmabBernoulliCC,
+    CmabBernoulliMO,
+    CmabBernoulliMOCC,
+)
 from pybandits.model import (
     BaseBayesianNeuralNetwork,
     BayesianNeuralNetwork,
     BayesianNeuralNetworkCC,
+    BayesianNeuralNetworkMO,
+    BayesianNeuralNetworkMOCC,
     BnnLayerParams,
     BnnParams,
     StudentTArray,
@@ -51,7 +60,13 @@
     ValidationError,
 )
 from pybandits.quantitative_model import BaseCmabZoomingModel, CmabZoomingModel, CmabZoomingModelCC, QuantitativeModel
-from pybandits.strategy import BestActionIdentificationBandit, ClassicBandit, CostControlBandit
+from pybandits.strategy import (
+    BestActionIdentificationBandit,
+    ClassicBandit,
+    CostControlBandit,
+    MultiObjectiveBandit,
+    MultiObjectiveCostControlBandit,
+)
 from tests.test_actions_manager import REFERENCE_DELTA
 from tests.utils import (
     FakeApproximation,
@@ -184,15 +199,21 @@ def _create_actions(
         hidden_dim_list: List[int],
         update_method: UpdateMethods,
         update_kwargs: Optional[Dict[str, Any]],
+        n_objectives: Optional[PositiveInt] = None,
     ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
         if len(self.model_types) < len(action_ids):
             indices = np.random.randint(0, len(self.model_types), len(action_ids))
             self.model_types = [self.model_types[i] for i in indices]
-        if all(model in [BayesianNeuralNetworkCC, CmabZoomingModelCC] for model in self.model_types):
+        if all(
+            model in [BayesianNeuralNetworkCC, BayesianNeuralNetworkMOCC, CmabZoomingModelCC]
+            for model in self.model_types
+        ):
             # Generate random costs
             costs = costs.draw(cost_strategy(n_actions=len(action_ids)))
             costs = [
-                cost if model_type in [BayesianNeuralNetworkCC] else partial(_quantitative_cost, cost=cost)
+                cost
+                if model_type in [BayesianNeuralNetworkCC, BayesianNeuralNetworkMOCC]
+                else partial(_quantitative_cost, cost=cost)
                 for cost, model_type in zip(costs, self.model_types)
             ]
         else:
@@ -202,36 +223,56 @@ def _create_actions(
         base_model_cold_start_kwargs = dict(
             n_features=n_features, hidden_dim_list=hidden_dim_list, **model_cold_start_kwargs
         )
-        model_params = BaseBayesianNeuralNetwork.create_model_params(
-            n_features=n_features, hidden_dim_list=hidden_dim_list
-        )
 
-        if costs is not None:
-            # Handle models with costs (BayesianNeuralNetworkCC or CmabZoomingModelCC)
-            actions_dict = {}
-            for action_id, model_type, cost in zip(action_ids, self.model_types, costs):
-                if issubclass(model_type, BayesianNeuralNetworkCC):
-                    actions_dict[action_id] = model_type(
-                        model_params=model_params,
-                        **model_cold_start_kwargs,
-                        cost=cost,
+        if n_objectives is None:
+            # Single-objective models
+            if costs is not None:
+                # Handle models with costs
+                return {
+                    action_id: model_type.cold_start(
+                        n_features=n_features, hidden_dim_list=hidden_dim_list, cost=cost, **model_cold_start_kwargs
                     )
-                else:  # CmabZoomingModelCC
-                    actions_dict[action_id] = model_type.cold_start(
+                    if issubclass(model_type, BayesianNeuralNetworkCC)
+                    else model_type.cold_start(
                         dimension=1, base_model_cold_start_kwargs=base_model_cold_start_kwargs, cost=cost
+                    )  # CmabZoomingModelCC
+                    for action_id, model_type, cost in zip(action_ids, self.model_types, costs)
+                }, base_model_cold_start_kwargs
+            else:
+                # Handle models without costs
+                return {
+                    action_id: model_type.cold_start(
+                        n_features=n_features, hidden_dim_list=hidden_dim_list, **model_cold_start_kwargs
                     )
+                    if issubclass(model_type, BayesianNeuralNetwork)
+                    else model_type.cold_start(
+                        dimension=1, base_model_cold_start_kwargs=base_model_cold_start_kwargs
+                    )  # CmabZoomingModel
+                    for action_id, model_type in zip(action_ids, self.model_types)
+                }, base_model_cold_start_kwargs
         else:
-            # Handle models without costs (BayesianNeuralNetwork or CmabZoomingModel)
-            actions_dict = {}
-            for action_id, model_type in zip(action_ids, self.model_types):
-                if issubclass(model_type, BayesianNeuralNetwork):
-                    actions_dict[action_id] = model_type(model_params=model_params, **model_cold_start_kwargs)
-                else:  # CmabZoomingModel
-                    actions_dict[action_id] = model_type.cold_start(
-                        dimension=1,
-                        base_model_cold_start_kwargs=base_model_cold_start_kwargs,
+            # Multi-objective models
+            if costs is not None:
+                return {
+                    action_id: model_type.cold_start(
+                        n_objectives=n_objectives,
+                        n_features=n_features,
+                        hidden_dim_list=hidden_dim_list,
+                        cost=cost,
+                        **model_cold_start_kwargs,
                     )
-        return actions_dict, base_model_cold_start_kwargs
+                    for action_id, model_type, cost in zip(action_ids, self.model_types, costs)
+                }, base_model_cold_start_kwargs
+            else:
+                return {
+                    action_id: model_type.cold_start(
+                        n_objectives=n_objectives,
+                        n_features=n_features,
+                        hidden_dim_list=hidden_dim_list,
+                        **model_cold_start_kwargs,
+                    )
+                    for action_id, model_type in zip(action_ids, self.model_types)
+                }, base_model_cold_start_kwargs
 
     def create_cmab_and_actions(
         self,
@@ -239,6 +280,7 @@ def create_cmab_and_actions(
         epsilon: Optional[Float01],
         delta: Optional[PositiveProbability],
         costs: st.SearchStrategy,
+        n_objectives: st.SearchStrategy[PositiveInt],
         exploit_p: Union[st.SearchStrategy[Optional[Float01]], Optional[float]],
         subsidy_factor: Union[st.SearchStrategy[Optional[Float01]], Optional[float]],
         n_features: PositiveInt,
@@ -246,8 +288,13 @@ def create_cmab_and_actions(
         update_method: UpdateMethods,
         update_kwargs: Optional[Dict[str, Any]],
     ) -> Tuple[BaseCmabBernoulli, Dict[ActionId, CmabModelType], Dict[str, Any]]:
+        n_objectives = (
+            n_objectives.draw(st.integers(min_value=1, max_value=10))
+            if self.cmab_class in [CmabBernoulliMO, CmabBernoulliMOCC]
+            else None
+        )
         actions, base_model_cold_start_kwargs = self._create_actions(
-            action_ids, costs, n_features, hidden_dim_list, update_method, update_kwargs
+            action_ids, costs, n_features, hidden_dim_list, update_method, update_kwargs, n_objectives
         )
         default_action = action_ids[0] if epsilon and not delta else None
         if default_action and isinstance(self.model_types[0], QuantitativeModel):
@@ -276,6 +323,9 @@ def create_cmab_and_actions(
         if any(isinstance(model, BaseBayesianNeuralNetwork) for model in actions.values()):
             kwargs.update(base_model_cold_start_kwargs)
 
+        # For cold start test
+        if self.cmab_class in [CmabBernoulliMO, CmabBernoulliMOCC]:
+            kwargs["n_objectives"] = n_objectives
         return cmab, actions, kwargs
 
 
@@ -289,6 +339,16 @@ def create_cmab_and_actions(
         CostControlBandit,
         [BayesianNeuralNetworkCC, CmabZoomingModelCC],
     ),
+    "cmab_mo": ModelTestConfig(
+        CmabBernoulliMO,
+        MultiObjectiveBandit,
+        [BayesianNeuralNetworkMO],
+    ),
+    "cmab_mocc": ModelTestConfig(
+        CmabBernoulliMOCC,
+        MultiObjectiveCostControlBandit,
+        [BayesianNeuralNetworkMOCC],
+    ),
 }
 
 
@@ -306,6 +366,7 @@ def create_cmab_and_actions(
     epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)),
     delta=st.one_of(st.none(), st.just(0.1)),
     costs=st.data(),
+    n_objectives=st.data(),
     n_features=st.integers(min_value=1, max_value=5),
     hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2),
     subsidy_factor=st.data(),
@@ -319,6 +380,7 @@ def test_cold_start(
     epsilon: Optional[float],
     delta,
     costs,
+    n_objectives,
     n_features,
     hidden_dim_list,
     exploit_p,
@@ -332,6 +394,7 @@ def test_cold_start(
         epsilon,
         delta,
         costs,
+        n_objectives,
         exploit_p,
         subsidy_factor,
         n_features,
@@ -351,9 +414,14 @@ def test_cold_start(
             action for action, model in zip(action_ids, config.model_types) if issubclass(model, QuantitativeModel)
         },
     }
-    if all(model in [BayesianNeuralNetworkCC, CmabZoomingModelCC] for model in config.model_types):
+    if all(
+        model in [BayesianNeuralNetworkCC, BayesianNeuralNetworkMOCC, CmabZoomingModelCC]
+        for model in config.model_types
+    ):
         cold_start_kwargs["action_ids_cost"] = {
-            action: model.cost for action, model in actions.items() if isinstance(model, (BayesianNeuralNetworkCC))
+            action: model.cost
+            for action, model in actions.items()
+            if isinstance(model, (BayesianNeuralNetworkCC, BayesianNeuralNetworkMOCC))
         }
         cold_start_kwargs["quantitative_action_ids_cost"] = {
             action: model.cost for action, model in actions.items() if isinstance(model, CmabZoomingModelCC)
@@ -370,6 +438,7 @@ def test_cold_start(
     n_features=st.integers(min_value=1, max_value=5),
     hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2),
     costs=st.data(),
+    n_objectives=st.data(),
     subsidy_factor=st.data(),
     exploit_p=st.data(),
     update_method=st.sampled_from(literal_update_methods),
@@ -381,6 +450,7 @@ def test_bad_initialization(
     n_features: int,
     hidden_dim_list: List[PositiveInt],
     costs,
+    n_objectives,
     exploit_p,
     subsidy_factor,
     update_method,
@@ -559,6 +629,11 @@ def test_update(
     # Test updates with generated data
     actions_to_update = sample_with_replacement(action_ids, n_samples)
     # Generate quantities only if there are any QuantitativeModel actions
+    # Handle multi-objective rewards for MO models
+    if config.cmab_class in [CmabBernoulliMO, CmabBernoulliMOCC]:
+        # Multi-objective rewards: list of lists
+        n_objectives = 2  # Default for testing
+        reward_data = [[np.random.randint(0, 2) for _ in range(n_objectives)] for _ in range(n_samples)]
     for_update_kwargs = {"actions": actions_to_update, "rewards": reward_data}
     if any(isinstance(model, BaseCmabZoomingModel) for model in cmab.actions.values()):
         quantity_data = np.random.random(size=n_samples).tolist()
@@ -837,7 +912,13 @@ def cmab_old_state(draw, CmabClass=None):
     return state
 
 
-OLD_STATE_TEST_CONFIGS = {"cmab": CmabBernoulli, "cmab_bai": CmabBernoulliBAI, "cmab_cc": CmabBernoulliCC}
+OLD_STATE_TEST_CONFIGS = {
+    "cmab": CmabBernoulli,
+    "cmab_bai": CmabBernoulliBAI,
+    "cmab_cc": CmabBernoulliCC,
+    "cmab_mo": CmabBernoulliMO,
+    "cmab_mocc": CmabBernoulliMOCC,
+}
 
 
 @pytest.mark.parametrize("CmabClass", OLD_STATE_TEST_CONFIGS.values(), ids=OLD_STATE_TEST_CONFIGS.keys())
@@ -1003,3 +1084,49 @@ def test_cmab_predict_shape_mismatch(dim_list):
         mab.predict(context=context)
     with pytest.raises(AttributeError):
         mab.predict(context=[])
+
+
+@settings(deadline=500)
+@given(
+    st.integers(min_value=1, max_value=100),
+    st.integers(min_value=1, max_value=5),
+    st.sampled_from(literal_update_methods),
+    st.just([2]),
+    st.integers(min_value=2, max_value=3),
+)
+def test_cmab_mo_update_shape_mismatch(n_samples, n_features, update_method, hidden_dim_list, n_objectives):
+    actions = np.random.choice(["a1", "a2"], size=n_samples).tolist()
+    # Multi-objective rewards
+    context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features))
+
+    # Create multi-objective models
+    models_a1 = [
+        BayesianNeuralNetwork.cold_start(
+            n_features=n_features, hidden_dim_list=hidden_dim_list, update_method=update_method
+        )
+        for _ in range(n_objectives)
+    ]
+    models_a2 = [
+        BayesianNeuralNetwork.cold_start(
+            n_features=n_features, hidden_dim_list=hidden_dim_list, update_method=update_method
+        )
+        for _ in range(n_objectives)
+    ]
+    model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list)
+
+    mab = CmabBernoulliMO(
+        actions={
+            "a1": BayesianNeuralNetworkMO(models=models_a1, model_params=model_params),
+            "a2": BayesianNeuralNetworkMO(models=models_a2, model_params=model_params),
+        }
+    )
+
+    # Test with wrong number of objectives in rewards
+    wrong_rewards = [[np.random.choice([0, 1]) for _ in range(n_objectives + 1)] for _ in range(n_samples)]
+    with pytest.raises(AttributeError):
+        mab.update(context=context, actions=actions, rewards=wrong_rewards)
+
+    # Test with single-objective rewards (should fail for MO model)
+    single_rewards = np.random.choice([0, 1], size=n_samples).tolist()
+    with pytest.raises(AttributeError):
+        mab.update(context=context, actions=actions, rewards=single_rewards)
diff --git a/tests/test_mab.py b/tests/test_mab.py
index 74d6e964..314a1f48 100644
--- a/tests/test_mab.py
+++ b/tests/test_mab.py
@@ -20,7 +20,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-from typing import Dict, List, Optional, Set, Union
+from typing import Dict, List, Optional, Set, Tuple, Union
 
 import hypothesis.strategies as st
 import numpy as np
@@ -28,17 +28,17 @@
 from hypothesis import given
 from pytest_mock import MockerFixture
 
-from pybandits.base import ActionId, BinaryReward, Float01, Probability
+from pybandits.base import ActionId, BinaryReward, Float01, Probability, UnifiedActionId
 from pybandits.mab import BaseMab
 from pybandits.model import Beta, BetaCC
 from pybandits.pydantic_version_compatibility import ValidationError
 from pybandits.strategy import ClassicBandit
-from tests.test_actions_manager import DummyActionsManager
+from tests.test_actions_manager import REFERENCE_DELTA, DummyActionsManager
 
 
 class DummyMab(BaseMab):
     epsilon: Optional[Float01] = None
-    default_action: Optional[ActionId] = None
+    default_action: Optional[UnifiedActionId] = None
     actions_manager: DummyActionsManager
 
     def _update(
@@ -57,7 +57,7 @@ def predict(
         valid_actions = self._get_valid_actions(forbidden_actions)
         return np.random.choice(valid_actions)
 
-    def get_state(self) -> (str, dict):
+    def get_state(self) -> Tuple[str, dict]:
         model_name = self.__class__.__name__
         state: dict = {"actions": self.actions}
         return model_name, state
@@ -172,3 +172,94 @@ def test_adaptive_window_without_epsilon_fails(adaptive_window_size, epsilon):
             epsilon=epsilon,
             default_action="a1",
         )
+
+
+########################################################################################################################
+
+
+# MAB model_post_init validation tests
+
+
+def test_mab_model_post_init_adaptive_window_epsilon_validation():
+    """Test model_post_init validation for adaptive window with epsilon greedy requirements."""
+    actions = {"action1": Beta(), "action2": Beta()}
+
+    # Test case 1: delta is set but epsilon is None - should raise ValueError
+    with pytest.raises(
+        ValueError, match="Adaptive window requires epsilon greedy super strategy with not default action."
+    ):
+        DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=None, default_action=None, delta=REFERENCE_DELTA)
+
+    # Test case 2: delta is set, epsilon is provided, but default_action is also provided - should raise ValueError
+    with pytest.raises(
+        ValueError, match="Adaptive window requires epsilon greedy super strategy with not default action."
+    ):
+        DummyMab(
+            actions=actions, strategy=ClassicBandit(), epsilon=0.1, default_action="action1", delta=REFERENCE_DELTA
+        )
+
+
+def test_mab_model_post_init_default_action_validation():
+    """Test model_post_init validation for default action requirements."""
+    actions = {"action1": Beta(), "action2": Beta()}
+
+    # Test case 1: epsilon is not provided but default_action is provided - should raise AttributeError
+    with pytest.raises(AttributeError, match="A default action should only be defined when epsilon is defined."):
+        DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=None, default_action="action1")
+
+
+def test_mab_model_post_init_invalid_default_action(epsilon=0.1):
+    """Test model_post_init validation for invalid default action."""
+    actions = {"action1": Beta(), "action2": Beta()}
+
+    # Test case: default_action is not in the actions set - should raise AttributeError
+    with pytest.raises(AttributeError, match="The default action must be valid action defined in the actions set."):
+        DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action="invalid_action")
+
+
+def test_mab_model_post_init_quantitative_default_action_validation(epsilon=0.1):
+    """Test model_post_init validation for quantitative default action requirements."""
+
+    # This test is demonstrating that the current validation logic has an issue:
+    # When default_action is a tuple, it checks if the entire tuple is in self.actions keys,
+    # but actions only contains string keys. This causes the validation to fail at line 138-139
+    # before it reaches the quantitative validation at lines 140-145.
+
+    # Test case: quantitative default action (tuple) with any actions will fail the basic validation
+    actions = {"action1": Beta(), "action2": Beta()}
+
+    with pytest.raises(AttributeError, match="The default action must be valid action defined in the actions set."):
+        DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action=("action1", (0.5, 0.5)))
+
+
+def test_mab_model_post_init_standard_default_action_validation(epsilon=0.1):
+    """Test model_post_init validation for standard default action requirements."""
+    from pybandits.quantitative_model import SmabZoomingModel
+
+    # Create quantitative actions
+    actions = {"action1": SmabZoomingModel.cold_start(), "action2": SmabZoomingModel.cold_start()}
+
+    # Test case: standard default action (string) with quantitative model - should raise AttributeError
+    with pytest.raises(AttributeError, match="Standard default action requires a standard action model."):
+        DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action="action1")
+
+
+def test_mab_model_post_init_valid_configurations(epsilon=0.1):
+    """Test model_post_init validation with valid configurations."""
+    actions = {"action1": Beta(), "action2": Beta()}
+
+    # Valid case 1: No epsilon, no default action, no delta
+    mab = DummyMab(actions=actions, strategy=ClassicBandit())
+    assert mab.epsilon is None
+    assert mab.default_action is None
+
+    # Valid case 2: Epsilon with valid default action
+    mab = DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action="action1")
+    assert mab.epsilon == 0.1
+    assert mab.default_action == "action1"
+
+    # Valid case 3: Epsilon without default action and delta (adaptive window)
+    mab = DummyMab(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, delta=REFERENCE_DELTA)
+    assert mab.epsilon == epsilon
+    assert mab.default_action is None
+    assert mab.actions_manager.delta == REFERENCE_DELTA
diff --git a/tests/test_model.py b/tests/test_model.py
index c0adc0a6..39429797 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -30,6 +30,8 @@
     BayesianLogisticRegression,
     BayesianNeuralNetwork,
     BayesianNeuralNetworkCC,
+    BayesianNeuralNetworkMO,
+    BayesianNeuralNetworkMOCC,
     Beta,
     BetaCC,
     BetaMO,
@@ -524,3 +526,107 @@ def test_bayesian_logistic_regression_invalid_init(n_features: int, hidden_dim_l
 
     with pytest.raises(ValueError, match="The Bayesian Logistic Regression model should have only one layer."):
         BayesianLogisticRegression.cold_start(n_features=n_features, hidden_dim_list=hidden_dim_list)
+
+
+########################################################################################################################
+
+
+# BayesianNeuralNetworkMO
+
+
+@settings(deadline=500)
+@given(
+    n_features=st.integers(min_value=1, max_value=3),
+    hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2),
+    n_objectives=st.integers(min_value=1, max_value=3),
+)
+def test_can_init_bayesian_neural_network_mo(n_features, hidden_dim_list, n_objectives):
+    dim_list = [n_features] + hidden_dim_list
+    if any(layer_dim <= 0 for layer_dim in dim_list) or n_objectives <= 0:
+        with pytest.raises((ValidationError, ValueError)):
+            models = [BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list) for _ in range(n_objectives)]
+            model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list)
+            BayesianNeuralNetworkMO(models=models, model_params=model_params)
+    else:
+        models = [BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list) for _ in range(n_objectives)]
+        model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list)
+        bnn_mo = BayesianNeuralNetworkMO(models=models, model_params=model_params)
+        assert len(bnn_mo.models) == n_objectives
+        assert all(isinstance(model, BayesianNeuralNetwork) for model in bnn_mo.models)
+
+
+@settings(deadline=500)
+@given(
+    n_features=st.integers(min_value=1, max_value=3),
+    hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2),
+    n_objectives=st.integers(min_value=1, max_value=3),
+    n_samples=st.integers(min_value=1, max_value=10),
+)
+def test_bayesian_neural_network_mo_sample_proba(n_features, hidden_dim_list, n_objectives, n_samples):
+    models = [BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list) for _ in range(n_objectives)]
+    model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list)
+    bnn_mo = BayesianNeuralNetworkMO(models=models, model_params=model_params)
+
+    context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features))
+    prob_weights = bnn_mo.sample_proba(context=context)
+
+    assert len(prob_weights) == n_samples
+    for prob_weight in prob_weights:
+        prob, weight = prob_weight
+        assert isinstance(prob, list) and len(prob) == n_objectives
+        assert isinstance(weight, list) and len(weight) == n_objectives
+        assert all(0 <= p <= 1 for p in prob)
+
+
+@given(
+    n_features=st.integers(min_value=1, max_value=3),
+    hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2),
+    n_objectives=st.integers(min_value=1, max_value=3),
+    n_samples=st.integers(min_value=1, max_value=5),
+)
+def test_bayesian_neural_network_mo_update(n_features, hidden_dim_list, n_objectives, n_samples):
+    models = [
+        BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list, update_method="VI") for _ in range(n_objectives)
+    ]
+    model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list)
+    bnn_mo = BayesianNeuralNetworkMO(models=models, model_params=model_params)
+
+    context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features))
+    rewards = [[np.random.randint(0, 2) for _ in range(n_objectives)] for _ in range(n_samples)]
+
+    # Should not raise any exceptions
+    bnn_mo.update(context=context, rewards=rewards)
+
+    # Test with invalid rewards shape
+    invalid_rewards = [[1] * (n_objectives + 1) for _ in range(n_samples)]
+    with pytest.raises((ValueError, AttributeError)):
+        bnn_mo.update(context=context, rewards=invalid_rewards)
+
+
+########################################################################################################################
+
+
+# BayesianNeuralNetworkMOCC
+
+
+@settings(deadline=500)
+@given(
+    n_features=st.integers(min_value=1, max_value=3),
+    hidden_dim_list=st.lists(st.integers(min_value=1, max_value=3), min_size=0, max_size=2),
+    n_objectives=st.integers(min_value=1, max_value=3),
+    cost=st.floats(allow_nan=False, allow_infinity=False),
+)
+def test_can_init_bayesian_neural_network_mo_cc(n_features, hidden_dim_list, n_objectives, cost):
+    dim_list = [n_features] + hidden_dim_list
+    if any(layer_dim <= 0 for layer_dim in dim_list) or n_objectives <= 0 or cost < 0:
+        with pytest.raises((ValidationError, ValueError)):
+            models = [BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list) for _ in range(n_objectives)]
+            model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list)
+            BayesianNeuralNetworkMOCC(models=models, model_params=model_params, cost=cost)
+    else:
+        models = [BayesianNeuralNetwork.cold_start(n_features, hidden_dim_list) for _ in range(n_objectives)]
+        model_params = BayesianNeuralNetwork.create_model_params(n_features, hidden_dim_list)
+        bnn_mo_cc = BayesianNeuralNetworkMOCC(models=models, model_params=model_params, cost=cost)
+        assert len(bnn_mo_cc.models) == n_objectives
+        assert bnn_mo_cc.cost == cost
+        assert all(isinstance(model, BayesianNeuralNetwork) for model in bnn_mo_cc.models)
diff --git a/tests/test_offline_policy_evaluator.py b/tests/test_offline_policy_evaluator.py
index d275912a..8d2913d5 100644
--- a/tests/test_offline_policy_evaluator.py
+++ b/tests/test_offline_policy_evaluator.py
@@ -14,7 +14,7 @@
 
 import pybandits
 from pybandits import offline_policy_estimator
-from pybandits.cmab import CmabBernoulli, CmabBernoulliCC
+from pybandits.cmab import CmabBernoulli, CmabBernoulliCC, CmabBernoulliMO, CmabBernoulliMOCC
 from pybandits.offline_policy_estimator import BaseOfflinePolicyEstimator
 from pybandits.offline_policy_evaluator import OfflinePolicyEvaluator
 from pybandits.smab import (
@@ -250,8 +250,6 @@ def test_running_configuration(
     visualize = generate_random_bool()
     verbose = generate_random_bool()
 
-    if context and type(reward_feature) is List:
-        pass  # CmabMO and CmabMOCC are not supported yet
     true_reward_feature = (
         f"true_{reward_feature}" if isinstance(reward_feature, str) else [f"true_{r}" for r in reward_feature]
     )
@@ -279,12 +277,12 @@ def test_running_configuration(
     if context:
         if cost_feature:
             if type(reward_feature) is list:
-                return  # CmabMOCC is not supported yet
+                mab = CmabBernoulliMOCC.cold_start(action_ids_cost=action_ids_cost, n_objectives=len(reward_feature))
             else:
                 mab = CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=len(contextual_features))
         else:
             if type(reward_feature) is list:
-                return  # CmabMO is not supported yet
+                mab = CmabBernoulliMO.cold_start(action_ids=set(unique_actions), n_objectives=len(reward_feature))
             else:
                 mab = CmabBernoulli.cold_start(action_ids=set(unique_actions), n_features=len(contextual_features))
     else: