cMAB Fast Update via Variational Inference

### Changes * Edited BaseBayesianLogisticRegression and inheritors on model.py to support variational inference by adding fast_inference control parameter on class attributes and adding control arguments on update method. * Edited BaseBayesianLogisticRegression to allow faster update via vectorization of PyMC operations. * Edited "update" UTs on test_cmab.py to support new inference mode. * Edited cMABs cold start function tto support new inference mode. * Removed redundant test_execution_time.py. * Edited version on pyproject.toml.
PlaytikaOSS · Sep 23, 2024 · bdf207e · bdf207e
1 parent fcd0896
commit bdf207e
Show file tree

Hide file tree

Showing 7 changed files with 255 additions and 406 deletions.
diff --git a/pybandits/base.py b/pybandits/base.py
@@ -104,8 +104,21 @@ class BaseMab(PyBanditsBaseModel, ABC):
     @field_validator("actions", mode="before")
     @classmethod
     def at_least_2_actions_are_defined(cls, v):
+        # validate that at least 2 actions are defined
         if len(v) < 2:
             raise AttributeError("At least 2 actions should be defined.")
+        # validate that all actions are of the same configuration
+        action_models = list(v.values())
+        first_action = action_models[0]
+        first_action_type = type(first_action)
+        first_action_state_keys = first_action.model_dump(mode="json").keys()
+        if any(
+            not isinstance(action, first_action_type)
+            or action.model_dump(mode="json").keys() != first_action_state_keys
+            for action in action_models[1:]
+        ):
+            raise AttributeError("All actions should follow the same configuration.")
+
         return v
 
     @model_validator(mode="after")

diff --git a/pybandits/cmab.py b/pybandits/cmab.py
@@ -32,6 +32,7 @@
     BaseBayesianLogisticRegression,
     BayesianLogisticRegression,
     BayesianLogisticRegressionCC,
+    UpdateMethods,
     create_bayesian_logistic_regression_cc_cold_start,
     create_bayesian_logistic_regression_cold_start,
 )
@@ -63,7 +64,8 @@ class BaseCmabBernoulli(BaseMab):
     predict_with_proba: bool
     predict_actions_randomly: bool
 
-    @field_validator("actions")
+    @field_validator("actions", mode="after")
+    @classmethod
     def check_bayesian_logistic_regression_models_len(cls, v):
         blr_betas_len = [len(b.betas) for b in v.values()]
         if not all(blr_betas_len[0] == x for x in blr_betas_len):
@@ -329,6 +331,8 @@ def create_cmab_bernoulli_cold_start(
     n_features: PositiveInt,
     epsilon: Optional[Float01] = None,
     default_action: Optional[ActionId] = None,
+    update_method: UpdateMethods = "MCMC",
+    update_kwargs: Optional[dict] = None,
 ) -> CmabBernoulli:
     """
     Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, with default
@@ -347,15 +351,21 @@ def create_cmab_bernoulli_cold_start(
     default_action: Optional[ActionId]
         The default action to select with a probability of epsilon when using the epsilon-greedy approach.
         If `default_action` is None, a random action from the action set will be selected with a probability of epsilon.
+    update_method: UpdateMethods, defaults to MCMC
+        Whether to utilize MCMC or variational inference for the Bayesian inference on update.
+    update_kwargs : Optional[dict], uses default values if not specified
+        Additional arguments to pass to the update method of each of the action models.
 
     Returns
     -------
     cmab: CmabBernoulli
         Contextual Multi-Armed Bandit with strategy = ClassicBandit
     """
     actions = {}
-    for a in set(action_ids):
-        actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features)
+    for action_id in set(action_ids):
+        actions[action_id] = create_bayesian_logistic_regression_cold_start(
+            n_betas=n_features, update_method=update_method, update_kwargs=update_kwargs
+        )
     mab = CmabBernoulli(actions=actions, epsilon=epsilon, default_action=default_action)
     mab.predict_actions_randomly = True
     return mab
@@ -368,6 +378,8 @@ def create_cmab_bernoulli_bai_cold_start(
     exploit_p: Optional[Float01] = None,
     epsilon: Optional[Float01] = None,
     default_action: Optional[ActionId] = None,
+    update_method: UpdateMethods = "MCMC",
+    update_kwargs: Optional[dict] = None,
 ) -> CmabBernoulliBAI:
     """
     Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, and Best Action
@@ -395,6 +407,10 @@ def create_cmab_bernoulli_bai_cold_start(
     default_action: Optional[ActionId]
         The default action to select with a probability of epsilon when using the epsilon-greedy approach.
         If `default_action` is None, a random action from the action set will be selected with a probability of epsilon.
+    update_method: UpdateMethods, defaults to MCMC
+        Whether to utilize MCMC or variational inference for the Bayesian inference on update.
+    update_kwargs : Optional[dict], uses default values if not specified
+        Additional arguments to pass to the update method of each of the action models.
 
     Returns
     -------
@@ -403,8 +419,14 @@ def create_cmab_bernoulli_bai_cold_start(
     """
     actions = {}
     for a in set(action_ids):
-        actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features)
-    mab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p, epsilon=epsilon, default_action=default_action)
+        actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features, update_method=update_method)
+    mab = CmabBernoulliBAI(
+        actions=actions,
+        exploit_p=exploit_p,
+        epsilon=epsilon,
+        default_action=default_action,
+        update_kwargs=update_kwargs,
+    )
     mab.predict_actions_randomly = True
     return mab
 
@@ -416,6 +438,8 @@ def create_cmab_bernoulli_cc_cold_start(
     subsidy_factor: Optional[Float01] = None,
     epsilon: Optional[Float01] = None,
     default_action: Optional[ActionId] = None,
+    update_method: UpdateMethods = "MCMC",
+    update_kwargs: Optional[dict] = None,
 ) -> CmabBernoulliCC:
     """
     Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Cost Control
@@ -449,6 +473,10 @@ def create_cmab_bernoulli_cc_cold_start(
     default_action: Optional[ActionId]
         The default action to select with a probability of epsilon when using the epsilon-greedy approach.
         If `default_action` is None, a random action from the action set will be selected with a probability of epsilon.
+    update_method: UpdateMethods, defaults to MCMC
+        Whether to utilize MCMC or variational inference for the Bayesian inference on update.
+    update_kwargs : Optional[dict], uses default values if not specified
+        Additional arguments to pass to the update method.
 
     Returns
     -------
@@ -457,7 +485,9 @@ def create_cmab_bernoulli_cc_cold_start(
     """
     actions = {}
     for a, cost in action_ids_cost.items():
-        actions[a] = create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=cost)
+        actions[a] = create_bayesian_logistic_regression_cc_cold_start(
+            n_betas=n_features, cost=cost, update_method=update_method, update_kwargs=update_kwargs
+        )
     mab = CmabBernoulliCC(
         actions=actions, subsidy_factor=subsidy_factor, epsilon=epsilon, default_action=default_action
     )