From f6ffd58fb8c2e7b4fd8557da7abdd6bfd337b661 Mon Sep 17 00:00:00 2001
From: Shahar Bar <shaharb@playtika.com>
Date: Mon, 2 Sep 2024 10:23:29 +0300
Subject: [PATCH] cMAB Fast Update via Variational Inference

 ### Changes
 * Edited BaseBayesianLogisticRegression and inheritors on model.py to support variational inference by adding fast_inference control parameter on class attributes and adding control arguments on update method.
 * Edited BaseBayesianLogisticRegression to allow faster update via vectorization of PyMC operations.
 * Edited "update" UTs on test_cmab.py to support new inference mode.
 * Edited cMABs cold start function tto support new inference mode.
 * Removed redundant test_execution_time.py.
 * Edited version on pyproject.toml.
---
 pybandits/cmab.py            |  26 ++-
 pybandits/model.py           | 176 ++++++++++++-------
 pyproject.toml               |   2 +-
 tests/test_cmab.py           | 101 ++++++++---
 tests/test_execution_time.py | 316 -----------------------------------
 5 files changed, 218 insertions(+), 403 deletions(-)
 delete mode 100644 tests/test_execution_time.py

diff --git a/pybandits/cmab.py b/pybandits/cmab.py
index d26a9b9..1beab7b 100644
--- a/pybandits/cmab.py
+++ b/pybandits/cmab.py
@@ -63,7 +63,8 @@ class BaseCmabBernoulli(BaseMab):
     predict_with_proba: bool
     predict_actions_randomly: bool
 
-    @field_validator("actions")
+    @field_validator("actions", mode="after")
+    @classmethod
     def check_bayesian_logistic_regression_models_len(cls, v):
         blr_betas_len = [len(b.betas) for b in v.values()]
         if not all(blr_betas_len[0] == x for x in blr_betas_len):
@@ -329,6 +330,7 @@ def create_cmab_bernoulli_cold_start(
     n_features: PositiveInt,
     epsilon: Optional[Float01] = None,
     default_action: Optional[ActionId] = None,
+    fast_inference: bool = False,
 ) -> CmabBernoulli:
     """
     Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, with default
@@ -347,6 +349,8 @@ def create_cmab_bernoulli_cold_start(
     default_action: Optional[ActionId]
         The default action to select with a probability of epsilon when using the epsilon-greedy approach.
         If `default_action` is None, a random action from the action set will be selected with a probability of epsilon.
+    fast_inference: bool, defaults to False
+        Whether to utilize MCMC (False) or variational inference (True) for the Bayesian inference on update
 
     Returns
     -------
@@ -354,8 +358,10 @@ def create_cmab_bernoulli_cold_start(
         Contextual Multi-Armed Bandit with strategy = ClassicBandit
     """
     actions = {}
-    for a in set(action_ids):
-        actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features)
+    for action_id in set(action_ids):
+        actions[action_id] = create_bayesian_logistic_regression_cold_start(
+            n_betas=n_features, fast_inference=fast_inference
+        )
     mab = CmabBernoulli(actions=actions, epsilon=epsilon, default_action=default_action)
     mab.predict_actions_randomly = True
     return mab
@@ -368,6 +374,7 @@ def create_cmab_bernoulli_bai_cold_start(
     exploit_p: Optional[Float01] = None,
     epsilon: Optional[Float01] = None,
     default_action: Optional[ActionId] = None,
+    fast_inference: bool = False,
 ) -> CmabBernoulliBAI:
     """
     Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, and Best Action
@@ -395,6 +402,9 @@ def create_cmab_bernoulli_bai_cold_start(
     default_action: Optional[ActionId]
         The default action to select with a probability of epsilon when using the epsilon-greedy approach.
         If `default_action` is None, a random action from the action set will be selected with a probability of epsilon.
+    fast_inference: bool, defaults to False
+        Whether to utilize standard MCMC (False) or faster variational inference (True)
+        for the Bayesian inference on update steps.
 
     Returns
     -------
@@ -403,7 +413,7 @@ def create_cmab_bernoulli_bai_cold_start(
     """
     actions = {}
     for a in set(action_ids):
-        actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features)
+        actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features, fast_inference=fast_inference)
     mab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p, epsilon=epsilon, default_action=default_action)
     mab.predict_actions_randomly = True
     return mab
@@ -416,6 +426,7 @@ def create_cmab_bernoulli_cc_cold_start(
     subsidy_factor: Optional[Float01] = None,
     epsilon: Optional[Float01] = None,
     default_action: Optional[ActionId] = None,
+    fast_inference: bool = False,
 ) -> CmabBernoulliCC:
     """
     Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Cost Control
@@ -449,6 +460,9 @@ def create_cmab_bernoulli_cc_cold_start(
     default_action: Optional[ActionId]
         The default action to select with a probability of epsilon when using the epsilon-greedy approach.
         If `default_action` is None, a random action from the action set will be selected with a probability of epsilon.
+    fast_inference: bool, defaults to False
+        Whether to utilize standard MCMC (False) or faster variational inference (True)
+        for the Bayesian inference on update steps.
 
     Returns
     -------
@@ -457,7 +471,9 @@ def create_cmab_bernoulli_cc_cold_start(
     """
     actions = {}
     for a, cost in action_ids_cost.items():
-        actions[a] = create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=cost)
+        actions[a] = create_bayesian_logistic_regression_cc_cold_start(
+            n_betas=n_features, cost=cost, fast_inference=fast_inference
+        )
     mab = CmabBernoulliCC(
         actions=actions, subsidy_factor=subsidy_factor, epsilon=epsilon, default_action=default_action
     )
diff --git a/pybandits/model.py b/pybandits/model.py
index c94ba1f..3de91b9 100644
--- a/pybandits/model.py
+++ b/pybandits/model.py
@@ -22,9 +22,11 @@
 
 
 from random import betavariate
-from typing import List, Tuple
+from typing import List, Optional, Tuple, Union
 
-from numpy import array, c_, exp, insert, mean, multiply, ones, sqrt, std
+import numpy as np
+import pymc.math as pmath
+from numpy import array, c_, insert, mean, multiply, ones, sqrt, std
 from numpy.typing import ArrayLike
 from pydantic import (
     Field,
@@ -34,11 +36,10 @@
     model_validator,
     validate_call,
 )
-from pymc import Bernoulli, Data, Deterministic, sample
+from pymc import Bernoulli, Data, Deterministic, fit, sample
 from pymc import Model as PymcModel
 from pymc import StudentT as PymcStudentT
-from pymc.math import sigmoid
-from pytensor.tensor import dot
+from pytensor.tensor import TensorVariable, dot
 from scipy.stats import t
 
 from pybandits.base import BinaryReward, Model, Probability, PyBanditsBaseModel
@@ -231,16 +232,62 @@ class BaseBayesianLogisticRegression(Model):
 
     Parameters
     ----------
-    alpha: StudentT
+    alpha : StudentT
         Student's t-distribution of the alpha coefficient.
-    betas: StudentT
+    betas : StudentT
         Student's t-distributions of the betas coefficients.
-    params_sample: Dict
-        Parameters for the function pymc.sample()
+    fast_inference : bool, defaults to False
+        Whether to utilize standard MCMC (False) or faster variational inference (True)
+        for the Bayesian inference on update steps.
+    update_kwargs : Optional[dict], uses default values if not specified
+        Additional arguments to pass to the update method.
     """
 
     alpha: StudentT
     betas: List[StudentT] = Field(..., min_items=1)
+    fast_inference: bool = False
+    update_kwargs: Optional[dict] = None
+    _default_update_kwargs = dict(draws=1000, progressbar=False, return_inferencedata=False)
+    _default_mcmc_kwargs = dict(
+        tune=500,
+        draws=1000,
+        chains=2,
+        init="adapt_diag",
+        cores=1,
+        target_accept=0.95,
+        progressbar=False,
+        return_inferencedata=False,
+    )
+    _default_variational_inference_kwargs = dict(method="advi")
+
+    @model_validator(mode="after")
+    def arrange_update_kwargs(self):
+        if self.update_kwargs is None:
+            self.update_kwargs = self._default_update_kwargs
+        if self.fast_inference:
+            self.update_kwargs = {**self._default_variational_inference_kwargs, **self.update_kwargs}
+        else:
+            self.update_kwargs = {**self._default_mcmc_kwargs, **self.update_kwargs}
+        return self
+
+    @classmethod
+    def _stable_sigmoid(cls, x: Union[np.ndarray, TensorVariable]) -> Union[np.ndarray, TensorVariable]:
+        """
+        Vectorized sigmoid function that avoids overflow and underflow.
+        Compatible with both numpy and PyMC3 tensors.
+        Parameters
+        ----------
+        x : Union[np.ndarray, TensorVariable]
+            Input values.
+
+        Returns
+        -------
+        prob : Union[np.ndarray, TensorVariable]
+            Sigmoid function applied to the input values.
+        """
+        backend = np if isinstance(x, np.ndarray) else pmath
+        prob = backend.where(x >= 0, 1 / (1 + backend.exp(-x)), backend.exp(x) / (1 + backend.exp(x)))
+        return prob
 
     @validate_call(config=dict(arbitrary_types_allowed=True))
     def check_context_matrix(self, context: ArrayLike):
@@ -249,12 +296,12 @@ def check_context_matrix(self, context: ArrayLike):
 
         Parameters
         ----------
-        context: ArrayLike of shape (n_samples, n_features)
+        context : ArrayLike of shape (n_samples, n_features)
             Matrix of contextual features.
 
         Returns
         -------
-        context: pandas DataFrame of shape (n_samples, n_features)
+        context : pandas DataFrame of shape (n_samples, n_features)
             Matrix of contextual features.
         """
         try:
@@ -304,25 +351,12 @@ def sample_proba(self, context: ArrayLike) -> Tuple[Probability, float]:
         weighted_sum = multiply(context_ext, coeff.T).sum(axis=1)
 
         # compute the probability with the sigmoid function
-        prob = 1.0 / (1.0 + exp(-weighted_sum))
+        prob = self._stable_sigmoid(weighted_sum)
 
         return prob, weighted_sum
 
     @validate_call(config=dict(arbitrary_types_allowed=True))
-    def update(
-        self,
-        context: ArrayLike,
-        rewards: List[BinaryReward],
-        tune=500,
-        draws=1000,
-        chains=2,
-        init="adapt_diag",
-        cores=2,
-        target_accept=0.95,
-        progressbar=False,
-        return_inferencedata=False,
-        **kwargs,
-    ):
+    def update(self, context: ArrayLike, rewards: List[BinaryReward]):
         """
         Update the model parameters.
 
@@ -344,40 +378,39 @@ def update(
             # if model was never updated priors_parameters = default arguments
             # else priors_parameters are calculated from traces of the previous update
             alpha = PymcStudentT("alpha", mu=self.alpha.mu, sigma=self.alpha.sigma, nu=self.alpha.nu)
-            betas = [
-                PymcStudentT("beta" + str(i), mu=self.betas[i].mu, sigma=self.betas[i].sigma, nu=self.betas[i].nu)
-                for i in range(len(self.betas))
-            ]
+            beta_mu = [b.mu for b in self.betas]
+            beta_sigma = [b.sigma for b in self.betas]
+            beta_nu = [b.nu for b in self.betas]
+            betas = PymcStudentT("betas", mu=beta_mu, sigma=beta_sigma, nu=beta_nu, shape=len(self.betas))
 
-            context = Data("context", context)
-            rewards = Data("rewards", rewards)
+            context = Data("context", context, mutable=False)
+            rewards = Data("rewards", rewards, mutable=False)
 
             # Likelihood (sampling distribution) of observations
             weighted_sum = Deterministic("weighted_sum", alpha + dot(betas, context.T))
-            p = Deterministic("p", sigmoid(weighted_sum))
+            p = Deterministic("p", self._stable_sigmoid(weighted_sum))
 
             # Bernoulli random vector with probability of success given by sigmoid function and actual data as observed
             _ = Bernoulli("likelihood", p=p, observed=rewards)
 
             # update traces object by sampling from posterior distribution
-            trace = sample(
-                tune=tune,
-                draws=draws,
-                chains=chains,
-                init=init,
-                cores=cores,
-                target_accept=target_accept,
-                progressbar=progressbar,
-                return_inferencedata=return_inferencedata,
-                **kwargs,
-            )
+            if self.fast_inference:
+                # variational inference
+                update_kwargs = self.update_kwargs.copy()
+                approx = fit(method=update_kwargs.pop("method"))
+                trace = approx.sample(**update_kwargs)
+            else:
+                # MCMC
+                trace = sample(**self.update_kwargs)
 
             # compute mean and std of the coefficients distributions
             self.alpha.mu = mean(trace["alpha"])
             self.alpha.sigma = std(trace["alpha"], ddof=1)
-            for i in range(len(self.betas)):
-                self.betas[i].mu = mean(trace["beta" + str(i)])
-                self.betas[i].sigma = std(trace["beta" + str(i)], ddof=1)
+            betas_mu = mean(trace["betas"], axis=0)
+            betas_std = std(trace["betas"], axis=0, ddof=1)
+            self.betas = [
+                StudentT(mu=mu, sigma=sigma, nu=beta.nu) for mu, sigma, beta in zip(betas_mu, betas_std, self.betas)
+            ]
 
 
 class BayesianLogisticRegression(BaseBayesianLogisticRegression):
@@ -392,12 +425,15 @@ class BayesianLogisticRegression(BaseBayesianLogisticRegression):
 
     Parameters
     ----------
-    alpha: StudentT
+    alpha : StudentT
         Student's t-distribution of the alpha coefficient.
-    betas: StudentT
+    betas : StudentT
         Student's t-distributions of the betas coefficients.
-    params_sample: Dict
-        Parameters for the function pymc.sample()
+    fast_inference : bool, defaults to False
+        Whether to utilize standard MCMC (False) or faster variational inference (True)
+        for the Bayesian inference on update steps.
+    update_kwargs: Optional[dict], uses default values if not specified
+        Additional arguments to pass to the update method.
     """
 
 
@@ -417,8 +453,11 @@ class BayesianLogisticRegressionCC(BaseBayesianLogisticRegression):
         Student's t-distribution of the alpha coefficient.
     betas: StudentT
         Student's t-distributions of the betas coefficients.
-    params_sample: Dict
-        Parameters for the function pymc.sample()
+    fast_inference : bool, defaults to False
+        Whether to utilize standard MCMC (False) or faster variational inference (True)
+        for the Bayesian inference on update steps.
+    update_kwargs : Optional[dict], uses default values if not specified
+        Additional arguments to pass to the update method.
     cost: NonNegativeFloat
         Cost associated to the Bayesian Logistic Regression model.
     """
@@ -426,7 +465,9 @@ class BayesianLogisticRegressionCC(BaseBayesianLogisticRegression):
     cost: NonNegativeFloat
 
 
-def create_bayesian_logistic_regression_cold_start(n_betas: PositiveInt) -> BayesianLogisticRegression:
+def create_bayesian_logistic_regression_cold_start(
+    n_betas: PositiveInt, fast_inference: bool = False, update_kwargs: Optional[dict] = None
+) -> BayesianLogisticRegression:
     """
     Utility function to create a Bayesian Logistic Regression model, with default parameters.
 
@@ -441,17 +482,27 @@ def create_bayesian_logistic_regression_cold_start(n_betas: PositiveInt) -> Baye
     n_betas : PositiveInt
         The number of betas of the Bayesian Logistic Regression model. This is also the number of features expected
         after in the context matrix.
+    fast_inference : bool, defaults to False
+        Whether to utilize standard MCMC (False) or faster variational inference (True)
+        for the Bayesian inference on update steps.
+    update_kwargs : Optional[dict], uses default values if not specified
+        Additional arguments to pass to the update method.
 
     Returns
     -------
     blr: BayesianLogisticRegression
         The Bayesian Logistic Regression model.
     """
-    return BayesianLogisticRegression(alpha=StudentT(), betas=[StudentT() for _ in range(n_betas)])
+    return BayesianLogisticRegression(
+        alpha=StudentT(),
+        betas=[StudentT() for _ in range(n_betas)],
+        fast_inference=fast_inference,
+        update_kwargs=update_kwargs,
+    )
 
 
 def create_bayesian_logistic_regression_cc_cold_start(
-    n_betas: PositiveInt, cost: NonNegativeFloat
+    n_betas: PositiveInt, cost: NonNegativeFloat, fast_inference: bool = False, update_kwargs: Optional[dict] = None
 ) -> BayesianLogisticRegressionCC:
     """
     Utility function to create a Bayesian Logistic Regression model with cost control, with default parameters.
@@ -469,10 +520,21 @@ def create_bayesian_logistic_regression_cc_cold_start(
         after in the context matrix.
     cost: NonNegativeFloat
         Cost associated to the Bayesian Logistic Regression model.
+    fast_inference : bool, defaults to False
+        Whether to utilize standard MCMC (False) or faster variational inference (True)
+        for the Bayesian inference on update steps.
+    update_kwargs : Optional[dict], uses default values if not specified
+        Additional arguments to pass to the update method.
 
     Returns
     -------
     blr: BayesianLogisticRegressionCC
         The Bayesian Logistic Regression model.
     """
-    return BayesianLogisticRegressionCC(alpha=StudentT(), betas=[StudentT() for _ in range(n_betas)], cost=cost)
+    return BayesianLogisticRegressionCC(
+        alpha=StudentT(),
+        betas=[StudentT() for _ in range(n_betas)],
+        cost=cost,
+        fast_inference=fast_inference,
+        update_kwargs=update_kwargs,
+    )
diff --git a/pyproject.toml b/pyproject.toml
index 2304681..912d75d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pybandits"
-version = "0.4.1"
+version = "0.5.0"
 description = "Python Multi-Armed Bandit Library"
 authors = [
     "Dario d'Andrea <dariod@playtika.com>",
diff --git a/tests/test_cmab.py b/tests/test_cmab.py
index 5fe15e4..e434ffd 100644
--- a/tests/test_cmab.py
+++ b/tests/test_cmab.py
@@ -148,18 +148,30 @@ def test_cmab_init_with_wrong_blr_models(a, b):
         )
 
 
-def test_cmab_update(n_samples=100, n_features=3):
+@settings(deadline=60000)
+@given(st.just(100), st.just(3), st.booleans())
+def test_cmab_update(n_samples, n_features, fast_inference):
     actions = np.random.choice(["a1", "a2"], size=n_samples).tolist()
     rewards = np.random.choice([0, 1], size=n_samples).tolist()
 
     def run_update(context):
-        mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=n_features)
+        mab = create_cmab_bernoulli_cold_start(
+            action_ids={"a1", "a2"}, n_features=n_features, fast_inference=fast_inference
+        )
         assert all(
-            [mab.actions[a] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)]
+            [
+                mab.actions[a]
+                == create_bayesian_logistic_regression_cold_start(n_betas=n_features, fast_inference=fast_inference)
+                for a in set(actions)
+            ]
         )
         mab.update(context=context, actions=actions, rewards=rewards)
         assert all(
-            [mab.actions[a] != create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)]
+            [
+                mab.actions[a]
+                != create_bayesian_logistic_regression_cold_start(n_betas=n_features, fast_inference=fast_inference)
+                for a in set(actions)
+            ]
         )
         assert not mab.predict_actions_randomly
 
@@ -179,26 +191,40 @@ def run_update(context):
     run_update(context=context)
 
 
-def test_cmab_update_not_all_actions(n_samples=100, n_feat=3):
+@settings(deadline=10000)
+@given(st.just(100), st.just(3), st.booleans())
+def test_cmab_update_not_all_actions(n_samples, n_feat, fast_inference):
     actions = np.random.choice(["a3", "a4"], size=n_samples).tolist()
     rewards = np.random.choice([0, 1], size=n_samples).tolist()
     context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_feat))
-    mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2", "a3", "a4"}, n_features=n_feat)
+    mab = create_cmab_bernoulli_cold_start(
+        action_ids={"a1", "a2", "a3", "a4"}, n_features=n_feat, fast_inference=fast_inference
+    )
 
     mab.update(context=context, actions=actions, rewards=rewards)
-    assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start(n_betas=n_feat)
-    assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start(n_betas=n_feat)
-    assert mab.actions["a3"] != create_bayesian_logistic_regression_cold_start(n_betas=n_feat)
-    assert mab.actions["a4"] != create_bayesian_logistic_regression_cold_start(n_betas=n_feat)
+    assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start(
+        n_betas=n_feat, fast_inference=fast_inference
+    )
+    assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start(
+        n_betas=n_feat, fast_inference=fast_inference
+    )
+    assert mab.actions["a3"] != create_bayesian_logistic_regression_cold_start(
+        n_betas=n_feat, fast_inference=fast_inference
+    )
+    assert mab.actions["a4"] != create_bayesian_logistic_regression_cold_start(
+        n_betas=n_feat, fast_inference=fast_inference
+    )
 
 
 @settings(deadline=500)
-@given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100))
-def test_cmab_update_shape_mismatch(n_samples, n_features):
+@given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100), st.booleans())
+def test_cmab_update_shape_mismatch(n_samples, n_features, fast_inference):
     actions = np.random.choice(["a1", "a2"], size=n_samples).tolist()
     rewards = np.random.choice([0, 1], size=n_samples).tolist()
     context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features))
-    mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=n_features)
+    mab = create_cmab_bernoulli_cold_start(
+        action_ids={"a1", "a2"}, n_features=n_features, fast_inference=fast_inference
+    )
 
     with pytest.raises(AttributeError):  # actions shape mismatch
         mab.update(context=context, actions=actions[1:], rewards=rewards)
@@ -369,6 +395,7 @@ def test_cmab_get_state(mu, sigma, n_features):
                             min_size=3,
                             max_size=3,
                         ),
+                        "fast_inference": st.booleans(),
                     },
                 ),
                 min_size=2,
@@ -381,8 +408,8 @@ def test_cmab_from_state(state):
     cmab = CmabBernoulli.from_state(state)
     assert isinstance(cmab, CmabBernoulli)
 
-    expected_actions = state["actions"]
     actual_actions = to_serializable_dict(cmab.actions)  # Normalize the dict
+    expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()}
     assert expected_actions == actual_actions
 
     # Ensure get_state and from_state compatibility
@@ -513,18 +540,30 @@ def test_cmab_bai_predict(n_samples, n_features):
     assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples
 
 
-def test_cmab_bai_update(n_samples=100, n_features=3):
+@settings(deadline=10000)
+@given(st.just(100), st.just(3), st.booleans())
+def test_cmab_bai_update(n_samples, n_features, fast_inference):
     actions = np.random.choice(["a1", "a2"], size=n_samples).tolist()
     rewards = np.random.choice([0, 1], size=n_samples).tolist()
     context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features))
-    mab = create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=n_features)
+    mab = create_cmab_bernoulli_bai_cold_start(
+        action_ids={"a1", "a2"}, n_features=n_features, fast_inference=fast_inference
+    )
     assert mab.predict_actions_randomly
     assert all(
-        [mab.actions[a] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)]
+        [
+            mab.actions[a]
+            == create_bayesian_logistic_regression_cold_start(n_betas=n_features, fast_inference=fast_inference)
+            for a in set(actions)
+        ]
     )
     mab.update(context=context, actions=actions, rewards=rewards)
     assert all(
-        [mab.actions[a] != create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)]
+        [
+            mab.actions[a]
+            != create_bayesian_logistic_regression_cold_start(n_betas=n_features, fast_inference=fast_inference)
+            for a in set(actions)
+        ]
     )
     assert not mab.predict_actions_randomly
 
@@ -587,6 +626,7 @@ def test_cmab_bai_get_state(mu, sigma, n_features, exploit_p: Float01):
                             min_size=3,
                             max_size=3,
                         ),
+                        "fast_inference": st.booleans(),
                     },
                 ),
                 min_size=2,
@@ -603,9 +643,10 @@ def test_cmab_bai_from_state(state):
     cmab = CmabBernoulliBAI.from_state(state)
     assert isinstance(cmab, CmabBernoulliBAI)
 
-    expected_actions = state["actions"]
     actual_actions = to_serializable_dict(cmab.actions)  # Normalize the dict
+    expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()}
     assert expected_actions == actual_actions
+
     expected_exploit_p = (
         state["strategy"].get("exploit_p", 0.5) if state["strategy"].get("exploit_p") is not None else 0.5
     )  # Covers both not existing and existing + None
@@ -743,22 +784,32 @@ def test_cmab_cc_predict(n_samples, n_features):
     assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples
 
 
-def test_cmab_cc_update(n_samples=100, n_features=3):
+@settings(deadline=10000)
+@given(st.just(100), st.just(3), st.booleans())
+def test_cmab_cc_update(n_samples, n_features, fast_inference):
     actions = np.random.choice(["a1", "a2"], size=n_samples).tolist()
     rewards = np.random.choice([0, 1], size=n_samples).tolist()
     context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features))
-    mab = create_cmab_bernoulli_cc_cold_start(action_ids_cost={"a1": 10, "a2": 10}, n_features=n_features)
+    mab = create_cmab_bernoulli_cc_cold_start(
+        action_ids_cost={"a1": 10, "a2": 10}, n_features=n_features, fast_inference=fast_inference
+    )
     assert mab.predict_actions_randomly
     assert all(
         [
-            mab.actions[a] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10)
+            mab.actions[a]
+            == create_bayesian_logistic_regression_cc_cold_start(
+                n_betas=n_features, cost=10, fast_inference=fast_inference
+            )
             for a in set(actions)
         ]
     )
     mab.update(context=context, actions=actions, rewards=rewards)
     assert all(
         [
-            mab.actions[a] != create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10)
+            mab.actions[a]
+            != create_bayesian_logistic_regression_cc_cold_start(
+                n_betas=n_features, cost=10, fast_inference=fast_inference
+            )
             for a in set(actions)
         ]
     )
@@ -830,6 +881,7 @@ def test_cmab_cc_get_state(
                             max_size=3,
                         ),
                         "cost": st.floats(min_value=0),
+                        "fast_inference": st.booleans(),
                     },
                 ),
                 min_size=2,
@@ -846,9 +898,10 @@ def test_cmab_cc_from_state(state):
     cmab = CmabBernoulliCC.from_state(state)
     assert isinstance(cmab, CmabBernoulliCC)
 
-    expected_actions = state["actions"]
     actual_actions = to_serializable_dict(cmab.actions)  # Normalize the dict
+    expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()}
     assert expected_actions == actual_actions
+
     expected_subsidy_factor = (
         state["strategy"].get("subsidy_factor", 0.5) if state["strategy"].get("subsidy_factor") is not None else 0.5
     )  # Covers both not existing and existing + None
diff --git a/tests/test_execution_time.py b/tests/test_execution_time.py
deleted file mode 100644
index 647d6ac..0000000
--- a/tests/test_execution_time.py
+++ /dev/null
@@ -1,316 +0,0 @@
-# # MIT License
-# #
-# # Copyright (c) 2022 Playtika Ltd.
-# #
-# # Permission is hereby granted, free of charge, to any person obtaining a copy
-# # of this software and associated documentation files (the "Software"), to deal
-# # in the Software without restriction, including without limitation the rights
-# # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# # copies of the Software, and to permit persons to whom the Software is
-# # furnished to do so, subject to the following conditions:
-# #
-# # The above copyright notice and this permission notice shall be included in all
-# # copies or substantial portions of the Software.
-# #
-# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# # SOFTWARE.
-
-# import time
-
-# import numpy as np
-
-# from pybandits.core.cmab import Cmab
-
-# verbose = True
-
-
-# def run_cmab_predict_streaming(n_actions, n_features, n_samples, n_iterations, n_jobs, sampling, params_sample,
-#                                verbose=False):
-#     """
-#     This function executes the following steps:
-#         - initialize cmab with input params
-#         - simulate first batch of users with actions and rewards
-#         - update cmab with first batch
-#         - predict 1 sample at time (i.e. in streaming) with sampling (sampling=True) or without (sampling=False)
-#         - return the mean and std of the prediction time.
-#     """
-
-#     # params
-#     size_first_batch = 1000
-#     actions_ids = ['action' + str(i + 1) for i in range(n_actions)]
-
-#     # init model
-#     cmab = Cmab(n_features=n_features, actions_ids=actions_ids, n_jobs=n_jobs, params_sample=params_sample)
-
-#     # simulate first batch
-#     X = 2 * np.random.random_sample((size_first_batch, n_features)) - 1  # float in the interval (-1, 1)
-#     actions, _ = cmab.predict(X)
-#     rewards = np.random.randint(2, size=size_first_batch)
-
-#     # update
-#     start = time.time()
-#     cmab.update(X=X, actions=actions, rewards=rewards)
-#     end = time.time()
-#     t = end - start
-#     if verbose:
-#         print('\nUpdate with n_actions = {}, n_features = {}, size_first_batch = {}. Time = {:.6f} sec.'
-#               .format(n_actions, n_features, size_first_batch, t))
-
-#     # predict 1 sample at time
-#     t = []
-#     for i in range(n_iterations):
-#         x = 2 * np.random.random_sample((n_samples, n_features)) - 1  # floats in the interval (-1, 1)
-#         if sampling:
-#             start = time.time()
-#             _, _ = cmab.predict(x)
-#             end = time.time()
-#         else:
-#             start = time.time()
-#             _, _ = cmab.fast_predict(x)
-#             end = time.time()
-#         t.append(end-start)
-#     mu_t, simga_t = np.mean(t), np.std(t)
-
-#     if verbose:
-#         print('Predict of n_actions={}, n_features={}, n_samples={}, n_iterations={}, sampling={}. '
-#               '\nmean execution time = {:.6f} sec, std execution time = {:.6f} sec '
-#               .format(n_actions, n_features, n_samples, n_iterations, sampling, mu_t, simga_t))
-
-#     return mu_t, simga_t
-
-
-# def test_cmab_time_predict_before_update():
-#     """ Test cmab.predict() in steaming before the first update(). """
-#     # input
-#     n_iteration = 10000
-#     n_actions = 1000
-#     n_samples = 1
-#     n_features = 1000
-#     actions_ids = ['action' + str(i + 1) for i in range(n_actions)]
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # init model
-#     cmab = Cmab(n_features=n_features, actions_ids=actions_ids, params_sample=params_sample)
-
-#     # predict
-#     t = []
-#     for i in range(n_iteration):
-#         x = 2 * np.random.random_sample((n_samples, n_features)) - 1  # float in the interval (-1, 1)
-#         start = time.time()
-#         _, _ = cmab.predict(x)
-#         end = time.time()
-#         t.append(end - start)
-#     mu_t, simga_t = np.mean(t), np.std(t)
-
-#     if verbose:
-#         print('\nPredict before the first update of n_samples={}, n_actions={}, n_features={}, n_iteration={}'
-#               '\nmean execution time = {:.6f} sec, std execution time = {:.6f} sec '
-#               .format(n_samples, n_iteration, n_actions, n_features, mu_t, simga_t))
-
-
-# # test with fast predict
-
-# def test_cmab_time_predict_2_2_1_fp():
-#     """ Test cmab.fast_predict() in steaming after the first update(). """
-#     # input
-#     n_actions = 2
-#     n_features = 2
-#     n_samples = 1
-#     n_iterations = 10
-#     n_jobs = n_actions
-#     sampling = False
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # run test
-#     mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples,
-#                                                n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling,
-#                                                params_sample=params_sample, verbose=verbose)
-
-
-# def test_cmab_time_predict_2_2_10000_fp():
-#     """ Test cmab.fast_predict() in steaming after the first update(). """
-#     # input
-#     n_actions = 2
-#     n_features = 2
-#     n_samples = 10000
-#     n_iterations = 10
-#     n_jobs = n_actions
-#     sampling = False
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # run test
-#     mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples,
-#                                                n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling,
-#                                                params_sample=params_sample, verbose=verbose)
-
-
-# def test_cmab_time_predict_2_5_10000_fp():
-#     """ Test cmab.fast_predict() in steaming after the first update(). """
-#     # input
-#     n_actions = 2
-#     n_features = 5
-#     n_samples = 10000
-#     n_iterations = 10
-#     n_jobs = n_actions
-#     sampling = False
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # run test
-#     mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples,
-#                                                n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling,
-#                                                params_sample=params_sample, verbose=verbose)
-
-
-# def test_cmab_time_predict_2_100_10000_fp():
-#     """ Test cmab.fast_predict() in steaming after the first update(). """
-#     # input
-#     n_actions = 2
-#     n_features = 100
-#     n_samples = 10000
-#     n_iterations = 10
-#     n_jobs = n_actions
-#     sampling = False
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # run test
-#     mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples,
-#                                                n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling,
-#                                                params_sample=params_sample, verbose=verbose)
-
-
-# def test_cmab_time_predict_5_2_10000_fp():
-#     """ Test cmab.fast_predict() in steaming after the first update(). """
-#     # input
-#     n_actions = 5
-#     n_features = 2
-#     n_samples = 10000
-#     n_iterations = 10
-#     n_jobs = n_actions
-#     sampling = False
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # run test
-#     mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples,
-#                                                n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling,
-#                                                params_sample=params_sample, verbose=verbose)
-
-
-# def test_cmab_time_predict_20_2_1_fp():
-#     """ Test cmab.fast_predict() in steaming after the first update(). """
-#     # input
-#     n_actions = 20
-#     n_features = 2
-#     n_samples = 1
-#     n_iterations = 10
-#     n_jobs = n_actions
-#     sampling = False
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # run test
-#     mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples,
-#                                                n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling,
-#                                                params_sample=params_sample, verbose=verbose)
-
-
-# def test_cmab_time_predict_20_100_1_fp():
-#     """ Test cmab.fast_predict() in steaming after the first update(). """
-#     # input
-#     n_actions = 20
-#     n_features = 100
-#     n_samples = 1
-#     n_iterations = 10
-#     n_jobs = n_actions
-#     sampling = False
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # run test
-#     mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples,
-#                                                n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling,
-#                                                params_sample=params_sample, verbose=verbose)
-
-
-# # test with sampling
-
-# def test_cmab_time_predict_2_2_1_w_s():
-#     """ Test cmab.predict() in steaming after the first update(). """
-#     # input
-#     n_actions = 2
-#     n_features = 2
-#     n_samples = 1
-#     n_iterations = 10
-#     n_jobs = n_actions
-#     sampling = True
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # run test
-#     mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples,
-#                                                n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling,
-#                                                params_sample=params_sample, verbose=verbose)
-
-
-# def test_cmab_time_predict_2_2_10000_w_s():
-#     """ Test cmab.predict() in steaming after the first update(). """
-#     # input
-#     n_actions = 2
-#     n_features = 2
-#     n_samples = 10000
-#     n_iterations = 10
-#     n_jobs = n_actions
-#     sampling = True
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # run test
-#     mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples,
-#                                                n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling,
-#                                                params_sample=params_sample, verbose=verbose)
-
-
-# def test_cmab_time_predict_2_5_10000_w_s():
-#     """ Test cmab.predict() in steaming after the first update(). """
-#     # input
-#     n_actions = 2
-#     n_features = 5
-#     n_samples = 10000
-#     n_iterations = 10
-#     n_jobs = n_actions
-#     sampling = True
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # run test
-#     mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples,
-#                                                n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling,
-#                                                params_sample=params_sample, verbose=verbose)
-
-
-# def test_cmab_time_predict_2_100_10000_w_s():
-#     """ Test cmab.predict() in steaming after the first update(). """
-#     # input
-#     n_actions = 2
-#     n_features = 100
-#     n_samples = 10000
-#     n_iterations = 10
-#     n_jobs = n_actions
-#     sampling = True
-#     params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95,
-#                      'progressbar': False}
-
-#     # run test
-#     mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples,
-#                                                n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling,
-#                                                params_sample=params_sample, verbose=verbose)